From 8179a6893ce46cbdd7d8499b85a606596047db46 Mon Sep 17 00:00:00 2001 From: Alex Ultra Date: Tue, 16 Jun 2026 23:34:51 +0200 Subject: [PATCH 1/2] feat(errors): structured what/why/fix errors + stable exit codes (error-system v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Born from two same-day prod failures whose errors were thin and undiagnosable: `unknown mcp item(s): review (known: none)` (no hint it was a REMOVED slot) and a dead hook path surfacing only as a generic harness "PreToolUse error". This makes every rig error a consistent WHAT / WHY / HOW-to-fix block with a stable per-class exit code (a PUBLIC CONTRACT documented in `rig --help`). What's in: - riglib/errors.py: RigError(what/why/fix/exit_code) + per-class codes (2 config, 3 drift, 4 unknown-item, 5 missing-target, 6 not-a-repo, 127 missing-dep), a single top-level `guard()` renderer, did-you-mean (Levenshtein), and a removed-slot registry (mcp.items.review -> names agent-tools #32 + the fix). - riglib/layers.py: GLOBAL vs REPO classification — single source of truth so `rig status` groups drift by layer and names WHICH config file declares each item. - riglib/missing_target.py: proactively scans the harness settings.json for hook commands pointing at a script gone from disk (the dead-rtk-hook case). Resolves the invoked SCRIPT (skips bare/absolute interpreters like /usr/bin/env, honors `-c` only as an interpreter flag) and checks only that token, so a runtime output arg never false-flags. - cli.py: `rig status` renders the 3-part errors + layer grouping; a non-git dir shows "not a git repository — repo layer N/A" (never the "should be committed" nag), and degrades gracefully when the agent-tools catalog can't be resolved. plan.py raises the structured UnknownItemError. config.py adds primary_config_path for provenance. `rig doctor` honors the documented 127 missing-dependency exit code. Tests: 504 pytest passing; tests/smoke.sh fully green (incl. the non-git + dead-hook + removed-slot legs). New: test_errors, test_missing_target, test_plan_errors, test_status_layers; smoke gains non-git / clean-sample / removed-slot assertions. Co-Authored-By: Claude Opus 4.8 --- riglib/cli.py | 256 +++++++++++++++++++++++++++++--- riglib/config.py | 14 ++ riglib/errors.py | 273 +++++++++++++++++++++++++++++++++++ riglib/layers.py | 52 +++++++ riglib/missing_target.py | 136 +++++++++++++++++ riglib/plan.py | 53 ++++--- tests/smoke.sh | 75 +++++++++- tests/test_catalog_plan.py | 25 +++- tests/test_cli.py | 42 +++++- tests/test_errors.py | 193 +++++++++++++++++++++++++ tests/test_missing_target.py | 208 ++++++++++++++++++++++++++ tests/test_plan_errors.py | 77 ++++++++++ tests/test_status_layers.py | 232 +++++++++++++++++++++++++++++ 13 files changed, 1582 insertions(+), 54 deletions(-) create mode 100644 riglib/errors.py create mode 100644 riglib/layers.py create mode 100644 riglib/missing_target.py create mode 100644 tests/test_errors.py create mode 100644 tests/test_missing_target.py create mode 100644 tests/test_plan_errors.py create mode 100644 tests/test_status_layers.py diff --git a/riglib/cli.py b/riglib/cli.py index 0d07961..f19ec0a 100644 --- a/riglib/cli.py +++ b/riglib/cli.py @@ -56,8 +56,22 @@ def _bold(s: str) -> str: def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( prog="rig", + formatter_class=argparse.RawDescriptionHelpFormatter, description="rig — the dev-environment umbrella driver. Set up a repo from a " "committed rig.yaml by applying agent-tools content (skills, hooks, CI gates, MCP).", + # Exit codes are a PUBLIC CONTRACT (scripts/CI branch on them). Documented here per the + # structured-exit-codes skill; the constants live in riglib/errors.py. + epilog=( + "exit codes:\n" + " 0 success\n" + " 1 internal error (an unexpected failure / bug)\n" + " 2 invalid config (a malformed value, type, or unknown key)\n" + " 3 drift (rig status found config↔disk drift)\n" + " 4 unknown item (config names a catalog item that doesn't exist / was removed)\n" + " 5 missing target (config references a path/binary that's gone on disk)\n" + " 6 not a git repository (a repo-scoped command run outside a repo)\n" + " 127 missing dependency (a required external tool isn't installed)\n" + ), ) p.add_argument("--version", action="version", version=f"rig {__version__}") sub = p.add_subparsers(dest="command", metavar="") @@ -75,7 +89,14 @@ def _add_setup_args(parser: argparse.ArgumentParser) -> None: ip = sub.add_parser("init", help="first-run onboarding: scaffold rig.yaml + wire the catalog in (the front door)") _add_setup_args(ip) - ap = sub.add_parser("apply", help="reconcile the repo to rig.yaml (idempotent)") + ap = sub.add_parser( + "apply", + help="reconcile the repo to rig.yaml (idempotent)", + description="Reconcile disk to rig.yaml (idempotent). apply only ADDS/UPDATES the " + "items your config declares — it NEVER deletes on-disk extras. Items present on disk " + "but not declared in any layer (e.g. a hand-added skill) are reported by `rig status` " + "and left untouched; apply will not nuke them.", + ) ap.add_argument("-C", "--cwd", default=".", help="repo root (default: cwd)") ap.add_argument("--config", help="config file to apply (default: ./rig.yaml + global)") ap.add_argument("--dry-run", action="store_true", help="print the resolved plan, write nothing") @@ -155,7 +176,12 @@ def main(argv: list[str] | None = None) -> int: "install-skill": cmd_install_skill, "stats": cmd_stats, } - return handlers[args.command](args) + # The single top-level error handler (error-system v2): any structured RigError a command + # raises is rendered as the consistent what/why/fix block + its stable per-class exit code. + # A non-RigError (a real bug) propagates so its traceback is visible. + from . import errors + + return errors.guard(lambda: handlers[args.command](args)) # ── shared helpers ──────────────────────────────────────────────────────────────── @@ -352,7 +378,61 @@ def cmd_apply(args: argparse.Namespace) -> int: return 1 if report.errors else 0 +def _scan_missing_targets() -> list: + """Scan the harness settings.json for hook commands pointing at files that are gone. + + Resolves the claude-code harness settings file under HOME (``~/.claude/settings.json``) and + returns the missing-target findings. Kept a thin wrapper so cmd_status reads cleanly and a + future multi-harness expansion has one place to add settings paths. + """ + from .missing_target import scan_settings_hooks + + settings = Path(_os.path.expanduser("~/.claude/settings.json")) + return scan_settings_hooks(settings) + + +def _print_non_git_note() -> None: + """The one-line 'this is not a git repository' note shown under the status header. + + Single source for the wording so the normal status path and the catalog-failed fallback + (``_print_non_git_status``) can never drift apart — the smoke greps for "not a git + repository", and only one of two phrasings would be a regression waiting to happen. + """ + print(_dim(" (not a git repository — repo layer / rig.yaml N/A here)")) + + +def _print_non_git_status(env, config: str | None = None) -> int: + """Render a minimal `rig status` for a non-git dir when the catalog couldn't be resolved. + + The catalog scan feeds the REPO layer (what this repo would install); a non-git dir has no + repo layer, so its failure is irrelevant here. We still owe the user the header + the + explicit "not a git repository" note (never the "should be committed" nag). Returns 0 — a + non-git dir is a valid place to ask "what's my status?", not an error. + + Config loading precedes the (failed) catalog scan, so the layer provenance is still + reportable; we re-load it here rather than thread `loaded` through the exception path. + """ + from .config import ConfigError, load + + print(_bold("rig status")) + print(f" repo: {env.repo_root}") + print(f" stack: {env.stack} type: {env.project_type}") + layers = "(none — built-in defaults)" + try: + explicit = None + if config: + cp = Path(config) + explicit = (cp if cp.is_absolute() else env.repo_root / cp).resolve() + layers = ", ".join(load(env.repo_root, explicit_config=explicit).layers) or layers + except ConfigError: + pass # a malformed config still shouldn't stop us reporting "not a git repository" + print(f" config layers: {layers}") + _print_non_git_note() + return 0 + + def cmd_status(args: argparse.Namespace) -> int: + from . import errors from .catalog import CatalogError from .config import ConfigError from .drift import detect @@ -360,7 +440,20 @@ def cmd_status(args: argparse.Namespace) -> int: try: plan, loaded, env = _load_plan(args.cwd, args.config) - except (ConfigError, CatalogError, PlanError) as exc: + except CatalogError as exc: + # The catalog only matters for the REPO layer (what this repo would install). A non-git + # dir (e.g. ~) has no repo layer at all, so an unresolved agent-tools checkout must NOT + # mask the one thing status owes that user: "you're not in a git repository". Detecting + # the env is independent of (and cheap relative to) the catalog scan that just failed. + from .detect import detect_environment + + env = detect_environment(Path(args.cwd).resolve()) + if not env.is_git_repo: + return _print_non_git_status(env, args.config) + # A real repo with a broken/missing agent-tools source IS a genuine config failure. + print(_err(f"error: {exc}")) + return 2 + except (ConfigError, PlanError) as exc: print(_err(f"error: {exc}")) return 2 @@ -369,8 +462,19 @@ def cmd_status(args: argparse.Namespace) -> int: print(f" stack: {env.stack} type: {env.project_type}") cfg_src = ", ".join(loaded.layers) or "(none — built-in defaults)" print(f" config layers: {cfg_src}") - if loaded.repo_path is None: - print(_warn(" warning: no rig.yaml in this repo (it should be committed)")) + + # The REPO layer only exists inside a git repository. In a non-git dir (e.g. ~) there is no + # repo at all — do NOT nag "no rig.yaml, should be committed" (that advice applies only in a + # real repo). Show that the repo layer is N/A and report the GLOBAL layer only. + if not env.is_git_repo: + _print_non_git_note() + elif loaded.repo_path is None: + # A REAL repo with no committed rig.yaml: make the fix PROMINENT, not a one-liner + # buried above a long drift dump. + print() + print(_warn(_bold(" ▸ no committed rig.yaml in this repository"))) + print(_warn(" rig.yaml is the committed source of truth for this repo's setup.")) + print(_ok(" fix: run `rig init` to create one")) # scan the configured target dirs for extras even if no action targets them. # CI + MCP targets are REPO-LOCAL with clear ownership → scan even when the category is @@ -389,10 +493,13 @@ def cmd_status(args: argparse.Namespace) -> int: h = resolve_category_target(loaded, "agent_hooks") if h and loaded.category("agent_hooks").get("enabled") is not False: scan_hook_dirs.append(h) - d = resolve_category_target(loaded, "ci") # CI: scan unconditionally (repo-local) - if d: - scan_ci_dirs.append(d) - d = resolve_category_target(loaded, "mcp") # MCP: scan unconditionally + # CI + MCP are REPO-LOCAL — only scan them when this IS a repo (a non-git dir has no repo + # layer; scanning a cwd-relative .github there would be meaningless). + if env.is_git_repo: + d = resolve_category_target(loaded, "ci") # CI: scan unconditionally (repo-local) + if d: + scan_ci_dirs.append(d) + d = resolve_category_target(loaded, "mcp") # MCP: scan unconditionally (global) if d: scan_mcp_files.append(d if d.suffix == ".json" else d / "mcp.json") report = detect( @@ -414,22 +521,98 @@ def cmd_status(args: argparse.Namespace) -> int: # so `rig status` answers "is the daily checker cron there?" at a glance. _print_schedule_status(plan, report) - if report.in_sync: + # missing-target: a hook command in the harness settings.json that points at a file gone + # from disk (the dead-rtk-hook case) surfaces PROACTIVELY here, before it bites at runtime + # as a generic "PreToolUse error". This is independent of config↔disk drift. + dead_targets = _scan_missing_targets() + if dead_targets: + print() + print(_err(_bold(f" ▸ missing targets ({len(dead_targets)}) — config points at files that are gone:"))) + for f in dead_targets: + print(f" {_err('✗')} {f.what}") + print(f" {_dim('why:')} {f.why}") + print(f" {_ok('fix:')} {f.fix}") + + if report.in_sync and not dead_targets: print(_ok("\n in sync — config and disk agree")) return 0 + if dead_targets and report.in_sync: + # no config↔disk drift, but a dead hook reference IS a problem worth a non-zero exit. + print(_dim("\n (no config↔disk drift, but a missing target above needs attention)")) + return errors.EXIT_MISSING_TARGET + + _render_drift_by_layer(report, loaded, env) + # LOUD reassurance: apply NEVER deletes on-disk-not-declared items. A user must not fear + # `rig apply` will nuke a hand-added skill (it won't — extras are surfaced, never removed). + print() + print(_ok(" ✔ safe: `rig apply` NEVER deletes on-disk extras — items present on disk but")) + print(_ok(" not declared in any layer are left for you to decide. apply only ADDS/UPDATES.")) + print(_dim("\n run `rig apply` to converge config→disk (extras above are left as-is)")) + return 3 + + +def _declaring_config(category: str, loaded) -> str: + """Name the config FILE that declares a drift item's layer (or 'not declared in any layer'). + + GLOBAL categories are declared in ``~/.config/rig/config.yaml``; REPO categories in the + repo's ``./rig.yaml``. If that layer file isn't loaded (the category drifted from a default + or an orphaned on-disk extra with no backing config), say so plainly instead of pointing at + a file that doesn't declare it. + """ + from .layers import GLOBAL, layer_for_category + + layer = layer_for_category(category) + path = loaded.global_path if layer == GLOBAL else loaded.repo_path + return str(path) if path is not None else "not declared in any layer" + + +def _render_drift_by_layer(report, loaded, env) -> None: + """Render drift GROUPED by GLOBAL vs REPO, each item naming its declaring config file. + + The pre-v2 status flattened machine-wide GLOBAL drift (skills/hooks/harness/mcp) together + with this repo's REPO drift (CI/symlinks) into one undifferentiated dump — so a global + skills drift looked like the repo's problem. Grouping by layer + naming the source file + makes each item's ownership unambiguous. + """ + from .layers import GLOBAL, REPO, layer_for_category + + # bucket every drift item by its owning layer, preserving missing-before-extra ordering. missing = report.by_direction("missing") + report.by_direction("modified") extra = report.by_direction("extra") - if missing: - print(_warn(f"\n config→disk drift ({len(missing)}) — declared but missing/modified:")) - for d in missing: - print(f" {_warn('▸')} {d.category}/{d.item}: {d.detail}") - if extra: - print(_warn(f"\n disk→config drift ({len(extra)}) — on disk, not declared:")) - for d in extra: - print(f" {_warn('▸')} {d.category}/{d.item}: {d.detail} [{d.target}]") - print(_dim("\n run `rig apply` to converge config→disk (extras are left for you to decide)")) - return 3 + buckets: dict[str, dict[str, list]] = { + GLOBAL: {"missing": [], "extra": []}, + REPO: {"missing": [], "extra": []}, + } + for d in missing: + buckets[layer_for_category(d.category)]["missing"].append(d) + for d in extra: + buckets[layer_for_category(d.category)]["extra"].append(d) + + headers = { + GLOBAL: "GLOBAL — machine-wide (from ~/.config/rig/config.yaml)", + REPO: "REPO — this repository (from ./rig.yaml)", + } + for layer in (GLOBAL, REPO): + miss = buckets[layer]["missing"] + ext = buckets[layer]["extra"] + if not miss and not ext: + continue + # the REPO layer is meaningless outside a git repo — never print it there. + if layer == REPO and not env.is_git_repo: + continue + print() + print(_bold(f" {headers[layer]}")) + if miss: + print(_warn(f" config→disk drift ({len(miss)}) — declared but missing/modified:")) + for d in miss: + src = _declaring_config(d.category, loaded) + print(f" {_warn('▸')} {d.category}/{d.item}: {d.detail} {_dim('[declared in ' + src + ']')}") + if ext: + print(_warn(f" disk→config drift ({len(ext)}) — on disk, not declared:")) + for d in ext: + src = _declaring_config(d.category, loaded) + print(f" {_warn('▸')} {d.category}/{d.item}: {d.detail} {_dim('[' + str(d.target) + '; ' + src + ']')}") def _print_schedule_status(plan, report) -> None: @@ -468,22 +651,49 @@ def cmd_doctor(args: argparse.Namespace) -> int: else: print(f" {_dim('install: (no package mapping for this OS — install manually)')}") + # missing-target: proactively flag a dead hook reference in the harness settings.json (the + # rtk-hook case) so it's caught here, not at runtime as a generic harness error. + dead_targets = _scan_missing_targets() + if dead_targets: + print(_err(_bold(f"\n ▸ missing targets ({len(dead_targets)}) — config points at files that are gone:"))) + for f in dead_targets: + print(f" {_err('✗')} {f.what}") + print(f" {_ok('fix:')} {f.fix}") + missing_req = report.missing_required - if not missing_req and not (args.optional and report.missing_optional): + if not missing_req and not (args.optional and report.missing_optional) and not dead_targets: print(_ok("\n all required dependencies present")) return 0 + # a dead target (but no missing dep) is still a problem worth a non-zero exit. + if not missing_req and not (args.optional and report.missing_optional) and dead_targets: + from . import errors + + print(_warn("\n a missing target above needs attention (re-run `rig apply` or remove the stale entry)")) + return errors.EXIT_MISSING_TARGET if not args.yes: + from . import errors + print(_warn("\n missing dependencies above. Re-run with --yes to install them")) print(_dim(" (add --optional to also install optional deps)")) - return 1 + # honor the documented exit-code contract: a missing REQUIRED dep is the 127 class + # (shell convention). A shortfall of only OPTIONAL deps (under --optional) is advisory, + # not a hard "required tool absent", so it stays the generic non-zero. + return errors.EXIT_MISSING_DEP if missing_req else 1 print(_bold("\n installing missing dependencies...")) results = bootstrap(report, assume_yes=True, include_optional=args.optional) failed = [name for name, rc in results if rc not in (0,)] for name, rc in results: print(f" {_ok('✔') if rc == 0 else _err('✗')} {name} (rc={rc})") - return 1 if failed else 0 + if failed: + from . import errors + + # an install that left a REQUIRED dep absent is still the missing-dependency class; + # a failed optional install is advisory (generic non-zero). + req_names = {st.dep.name for st in report.statuses if st.dep.required and not st.present} + return errors.EXIT_MISSING_DEP if (set(failed) & req_names) else 1 + return 0 def cmd_export(args: argparse.Namespace) -> int: diff --git a/riglib/config.py b/riglib/config.py index cd91785..7a1ec5d 100644 --- a/riglib/config.py +++ b/riglib/config.py @@ -120,6 +120,20 @@ def defaults(self) -> dict[str, Any]: d = self.data.get("defaults") return d if isinstance(d, dict) else {} + @property + def primary_config_path(self) -> Path: + """The most-specific config file backing this config, for naming in error messages. + + The repo ``rig.yaml`` OVERRIDES the global config, so an invalid key is most likely + the repo one — name it first; fall back to the global path, then to a notional repo + path so an error always points somewhere concrete to edit. + """ + if self.repo_path is not None: + return self.repo_path + if self.global_path is not None: + return self.global_path + return repo_config_path(self.repo_root) + def load( repo_root: Path, diff --git a/riglib/errors.py b/riglib/errors.py new file mode 100644 index 0000000..264ce34 --- /dev/null +++ b/riglib/errors.py @@ -0,0 +1,273 @@ +"""Error system v2 — every rig error is WHAT / WHY / HOW-to-fix, with a stable exit code. + +Runtime reach: raised by config/catalog/plan/status code paths; rendered by the top-level +CLI handler (:func:`guard`, wired into ``cli.main``). Centralizing the shape here means a +human staring at a failure always gets the same three things — what happened, why (the root +cause, the offending config FILE PATH + key), and a concrete command/edit to fix it — and a +script always gets a meaningful, *stable* exit code per failure class. + +Invariants: +- The exit-code constants below are a PUBLIC CONTRACT. Scripts/CI branch on them; changing a + value is a breaking change. They are documented in ``rig --help`` and ``docs/config-schema.md``. +- They follow the ``structured-exit-codes`` skill: 0 success, 2 invalid-config/usage, + 127 missing-dependency (shell convention), plus rig-specific classes (drift/unknown-item/ + missing-target/not-a-repo) that a caller wants to distinguish. +- Stdlib-only at import time (AGENTS.md hard rule) — no third-party imports here. + +History: born from two same-day prod failures whose errors were thin and undiagnosable — +``unknown mcp item(s): review (known: none)`` (no hint it was a REMOVED slot or how to remove +it) and a dead hook path surfacing only as a generic harness "PreToolUse error". This module +exists so neither recurs: the removed-slot registry names ``mcp.items.review`` and prints the +exact removal PR + fix, and missing-target names the gone file + how to regenerate it. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +# ── stable, per-class exit codes (PUBLIC CONTRACT — do not renumber) ─────────────── +EXIT_OK = 0 +# 1 is reserved for an UNEXPECTED/internal failure (an unhandled exception): a caller can +# tell "rig itself crashed" (1) from "rig diagnosed a known problem" (>=2). +EXIT_INTERNAL = 1 +EXIT_CONFIG = 2 # malformed / invalid config value (usage class, per the skill) +EXIT_DRIFT = 3 # `rig status` found drift (config and disk disagree) +EXIT_UNKNOWN_ITEM = 4 # config names a catalog item that doesn't exist (typo / removed slot) +EXIT_MISSING_TARGET = 5 # config references a path/binary that's gone on disk +EXIT_NOT_A_REPO = 6 # a repo-scoped command run outside a git repository +EXIT_MISSING_DEP = 127 # a required external tool/binary isn't installed (shell convention) + + +@dataclass +class RigError(Exception): + """A structured, renderable error: WHAT happened / WHY / HOW to fix + an exit code. + + ``what`` — the symptom, one line ("unknown mcp item: reviewr"). + ``why`` — the root cause + context: the offending CONFIG FILE PATH + key when relevant. + ``fix`` — a concrete command or edit the user can run/make right now. + ``exit_code`` — the failure class (one of the EXIT_* constants); subclasses pin it. + """ + + what: str + why: str = "" + fix: str = "" + exit_code: int = EXIT_INTERNAL + + def __post_init__(self) -> None: + # Exception's own machinery wants args set; keep str(e) == the WHAT line. + super().__init__(self.what) + + def __str__(self) -> str: + return self.what + + +@dataclass +class ConfigError(RigError): + """Malformed/invalid config — a bad value, type, or unknown key. Fail-closed (exit 2).""" + + exit_code: int = EXIT_CONFIG + + +@dataclass +class UnknownItemError(RigError): + """Config names a catalog item that doesn't exist (typo or a removed slot). Exit 4.""" + + exit_code: int = EXIT_UNKNOWN_ITEM + + +@dataclass +class MissingTargetError(RigError): + """Config references a path/binary that's gone on disk (a dead hook path, …). Exit 5.""" + + exit_code: int = EXIT_MISSING_TARGET + + +@dataclass +class NotARepoError(RigError): + """A repo-scoped command was run outside a git repository. Exit 6.""" + + exit_code: int = EXIT_NOT_A_REPO + + +@dataclass +class MissingDepError(RigError): + """A required external tool/binary isn't installed. Exit 127 (shell convention).""" + + exit_code: int = EXIT_MISSING_DEP + + +# ── rendering ────────────────────────────────────────────────────────────────────── +def _c(code: str, s: str, color: bool) -> str: + return f"\033[{code}m{s}\033[0m" if color else s + + +def render(err: RigError, *, color: bool = True) -> str: + """Render a :class:`RigError` as the consistent 3-part block (what / why / fix). + + Always shows the WHAT (prefixed ``error:``); shows WHY and FIX only when populated, so a + terse error doesn't print empty labels. The label words ``why``/``fix`` always appear when + their field is set — the contract the CLI handler and tests rely on. + """ + lines = [_c("31", f"error: {err.what}", color)] + if err.why: + lines.append(_c("2", " why: ", color) + err.why) + if err.fix: + lines.append(_c("32", " fix: ", color) + err.fix) + return "\n".join(lines) + + +def guard(fn) -> int: + """Run ``fn`` and translate any :class:`RigError` into render() + its exit code. + + The single top-level CLI handler: a command body raises a structured error and this turns + it into a consistent printed block + the stable per-class exit code. A non-RigError + (a real bug) is NOT swallowed — it propagates so the stack trace is visible. + """ + try: + return fn() + except RigError as exc: + import sys + + print(render(exc, color=sys.stdout.isatty())) + return exc.exit_code + + +# ── did-you-mean (Levenshtein) ────────────────────────────────────────────────────── +def _levenshtein(a: str, b: str) -> int: + """Classic edit distance, stdlib-only (small strings — item names — so O(n*m) is fine).""" + if a == b: + return 0 + if not a: + return len(b) + if not b: + return len(a) + prev = list(range(len(b) + 1)) + for i, ca in enumerate(a, 1): + cur = [i] + for j, cb in enumerate(b, 1): + cur.append(min(prev[j] + 1, cur[j - 1] + 1, prev[j - 1] + (ca != cb))) + prev = cur + return prev[-1] + + +def did_you_mean(bad: str, candidates: set[str]) -> str | None: + """The nearest catalog name to ``bad`` within a sensible edit-distance threshold. + + Returns ``None`` when the catalog is empty or nothing is close enough — so we never + fabricate a bogus suggestion for a wildly different token. The threshold scales with the + typo length (a longer name tolerates more edits) but is capped so "zzzzzzzz" → "review" + is rejected. + """ + if not candidates: + return None + best = min(candidates, key=lambda c: (_levenshtein(bad, c), c)) + dist = _levenshtein(bad, best) + # allow up to ~40% of the longer string's length, capped at 3 — enough for a real typo, + # tight enough to reject an unrelated word. + threshold = min(3, max(1, round(0.4 * max(len(bad), len(best))))) + return best if dist <= threshold else None + + +# ── removed / deprecated slot registry ────────────────────────────────────────────── +@dataclass(frozen=True) +class RemovedSlot: + """A catalog slot that USED to exist and was removed — so its config key is now invalid. + + ``reason`` names WHY/WHEN it went (the PR + the rationale); the error builder turns it + into a precise "remove ```` from ````" fix instead of a useless + "unknown item (known: none)". + """ + + category: str + name: str + reason: str + + +# Seeded with the motivating case. Keyed (category, name). Add an entry whenever a catalog +# slot is removed so a lingering config reference explains itself instead of looking like a typo. +_REMOVED_SLOTS: dict[tuple[str, str], RemovedSlot] = { + ("mcp", "review"): RemovedSlot( + category="mcp", + name="review", + reason="removed in agent-tools #32: review is a CLI + skill, not an MCP server " + "(the `review --mcp` entrypoint was dropped in the subcommand refactor)", + ), +} + + +def removed_slot(category: str, name: str) -> RemovedSlot | None: + """Look up a (category, name) in the removed-slot registry; ``None`` if it was never a slot.""" + return _REMOVED_SLOTS.get((category, name)) + + +# ── error builders (the heuristics, assembled into structured errors) ─────────────── +def unknown_item_error( + *, + category: str, + key: str, + bad: str, + known: set[str], + config_path: str, +) -> UnknownItemError: + """Build the error for a config that names a non-existent catalog item. + + Priority of explanation (most specific first): + 1. **removed slot** — the name was a real slot that got removed: cite the PR + tell the + user to remove ```` from ````. + 2. **empty category** — the catalog has NO slots in this category: tell them to remove the + whole ```` block, NOT "known: none". + 3. **did-you-mean** — the catalog has slots and one is close: suggest it. + 4. **fallthrough** — list the known names so they can pick a valid one. + + ``key`` is the dotted config key (``mcp.items.review``), ``bad`` the bare item name + (``review``), ``config_path`` the offending file — all three appear in the output. + """ + removed = removed_slot(category, bad) + if removed is not None: + return UnknownItemError( + what=f"removed {category} slot: {bad}", + why=f"`{key}` ({removed.reason}); declared in {config_path}", + fix=f"remove `{key}` from {config_path}", + ) + + if not known: + return UnknownItemError( + what=f"unknown {category} item: {bad}", + why=f"the {category} catalog has no slots (declared in {config_path})", + fix=f"remove the `{category}` block from {config_path} — there are no {category} " + f"slots to enable", + ) + + suggestion = did_you_mean(bad, known) + if suggestion is not None: + return UnknownItemError( + what=f"unknown {category} item: {bad}", + why=f"`{key}` names an item not in the {category} catalog (declared in {config_path})", + fix=f"did you mean `{suggestion}`? fix `{key}` in {config_path}", + ) + + known_list = ", ".join(sorted(known)) + return UnknownItemError( + what=f"unknown {category} item: {bad}", + why=f"`{key}` names an item not in the {category} catalog (declared in {config_path})", + fix=f"use one of: {known_list} — or remove `{key}` from {config_path}", + ) + + +def missing_target_error( + *, + what_kind: str, + target: str, + why: str, + regen: str, +) -> MissingTargetError: + """Build the error for a config that points at a path/binary that's gone on disk. + + ``what_kind`` is a short noun ("hook", "binary", "skill"); ``target`` the missing path; + ``why`` the root cause; ``regen`` how to recreate it (a concrete command). + """ + return MissingTargetError( + what=f"missing {what_kind}: {target}", + why=why, + fix=regen, + ) diff --git a/riglib/layers.py b/riglib/layers.py new file mode 100644 index 0000000..03e21eb --- /dev/null +++ b/riglib/layers.py @@ -0,0 +1,52 @@ +"""Config-layer classification — which drift category belongs to which layer. + +`rig status` mixes two kinds of state: + +- **GLOBAL** — machine-wide artifacts a developer carries across every repo, declared in + ``~/.config/rig/config.yaml``: the installed skills (``~/.agents/skills`` + the harness + discovery symlinks), agent-hooks, the global git-hook dispatcher, the harness auto/permission + mode, the model-freshness schedule, and rig-managed tmux config. +- **REPO** — this specific repository's artifacts, declared in its committed ``./rig.yaml``: + the CI workflows under ``.github/``, the AGENTS.md/CLAUDE.md symlinks, and the GitHub ruleset. + +Grouping drift by layer (and naming WHICH config file declares each item) is what makes +``rig status`` legible: a global-skills drift is not the repo's problem, and a non-git dir has +no repo layer at all. This module is the single source of truth for that classification, so the +status renderer and any future consumer agree. + +Stdlib-only; no imports beyond the standard library. +""" + +from __future__ import annotations + +GLOBAL = "GLOBAL" +REPO = "REPO" + +# Each drift/action category → the config layer that OWNS it. Keep this exhaustive: a category +# missing here would silently render under the wrong/no heading. The tmux agent owns the tmux +# rows in drift.py; the classification of "tmux" as GLOBAL lives here (a single dict entry) so +# this file does not collide with the tmux provisioning work. +_CATEGORY_LAYER = { + # GLOBAL — machine-wide, from ~/.config/rig/config.yaml + "skills": GLOBAL, + "agent_hooks": GLOBAL, + "mcp": GLOBAL, + "harness": GLOBAL, + "models": GLOBAL, + "git_hooks": GLOBAL, + "tmux": GLOBAL, + # REPO — this repo, from ./rig.yaml + "ci": REPO, + "agents_md": REPO, + "github": REPO, +} + + +def layer_for_category(category: str) -> str: + """The owning layer (``GLOBAL``/``REPO``) for a drift/action category. + + Unknown categories default to ``GLOBAL`` (the conservative choice — a machine-wide artifact + is the safer assumption than claiming a repo declares something it may not), but every known + category is mapped explicitly above so this default is only a forward-compat guard. + """ + return _CATEGORY_LAYER.get(category, GLOBAL) diff --git a/riglib/missing_target.py b/riglib/missing_target.py new file mode 100644 index 0000000..a0b9f57 --- /dev/null +++ b/riglib/missing_target.py @@ -0,0 +1,136 @@ +"""missing-target scanner — find config that references a path/binary gone from disk. + +Runtime reach: called by ``rig status`` (and surfaced by ``rig doctor``) to PROACTIVELY catch +a dead reference before it bites at runtime. The motivating case: the harness ``settings.json`` +registers a hook whose ``command`` invokes a script that no longer exists — at runtime the +harness reports only a generic "PreToolUse error" with no hint of the cause. This scanner names +the missing file + how to regenerate/remove it, as a structured :class:`errors.MissingTargetError`. + +Scope: it inspects the harness ``settings.json`` hook blocks. It only flags an ABSOLUTE path +argument that looks like a script file (so a bare PATH-resolved binary like ``gitleaks`` is not +mistaken for a missing file). Stdlib-only. +""" + +from __future__ import annotations + +import json +import shlex +from pathlib import Path + +from . import errors + + +def _looks_like_script_path(token: str) -> bool: + """True for an absolute path that names a file we should check exists. + + Only absolute paths qualify — a bare ``gitleaks`` (PATH-resolved) is not a file reference + we own. We don't require a particular suffix (hooks can be ``.py``/``.sh``/extensionless), + just that it's an absolute path token, not a flag/option. + """ + if not token or token.startswith("-"): + return False + return token.startswith("/") or token.startswith("~/") + + +# Interpreter basenames whose FIRST path-looking argument is the script to check (so the +# interpreter token itself — even an absolute one like /usr/bin/env or /opt/homebrew/bin/python3 +# — is skipped, not mistaken for the target). env is special-cased below. +_INTERPRETERS = frozenset( + {"python", "python3", "python2", "sh", "bash", "zsh", "node", "deno", "bun", "ruby", "perl", "env"} +) + + +def _missing_paths_in_command(command: str) -> list[str]: + """Return the missing SCRIPT path of a hook command (at most one), or []. + + A hook command runs ONE script. It may be invoked directly (``/abs/hook.py``) or through an + interpreter — and that interpreter may itself be an ABSOLUTE path on macOS + (``/usr/bin/env python3 /abs/hook.py``, ``/opt/homebrew/bin/python3 /abs/hook.py``). We must + check the SCRIPT, not the interpreter: skip a leading interpreter token (bare or absolute) + plus its ``env VAR=…`` / module / ``-c`` style args, then take the first remaining + script-looking token as the target. We check only THAT token — a later absolute path is + almost always a runtime OUTPUT arg (``--out /var/run/x.json``) that legitimately doesn't + exist yet; flagging it would make ``rig status``/``doctor`` cry wolf on a healthy hook. + """ + try: + tokens = shlex.split(command) + except ValueError: + return [] # unparseable command (unbalanced quotes) — skip rather than guess + if not tokens: + return [] + + rest = tokens + # If the command runs through a known interpreter, drop the interpreter token. The script is + # then the first path-looking arg AFTER it; we skip the interpreter's own flags / `env` + # NAME=VALUE assignments by scanning for the first script-PATH token among the remaining args. + head = Path(tokens[0]).name if (tokens[0].startswith("/") or tokens[0].startswith("~/")) else tokens[0] + via_interpreter = head in _INTERPRETERS + if via_interpreter: + rest = tokens[1:] + + for tok in rest: + # `-c ''` as an INTERPRETER flag (i.e. before any script token) means there + # is no script FILE to verify. Only honor it before the first path — a `-c` AFTER the + # script is the script's OWN argument, not Python's, and must not suppress the check. + if via_interpreter and tok == "-c": + return [] + if _looks_like_script_path(tok): + p = Path(tok).expanduser() + return [tok] if not p.exists() else [] + return [] + + +def _iter_hook_commands(settings: dict): + """Yield every hook ``command`` string under the settings.json ``hooks`` blocks. + + Tolerant of shape drift: a missing/oddly-typed key is skipped, never raised — a malformed + settings.json should make the scan find nothing, not crash ``rig status``. + """ + hooks = settings.get("hooks") + if not isinstance(hooks, dict): + return + for blocks in hooks.values(): + if not isinstance(blocks, list): + continue + for block in blocks: + if not isinstance(block, dict): + continue + for hook in block.get("hooks", []) or []: + if isinstance(hook, dict) and isinstance(hook.get("command"), str): + yield hook["command"] + + +def scan_settings_hooks(settings_file: Path) -> list[errors.MissingTargetError]: + """Scan a harness ``settings.json`` for hook commands pointing at missing files. + + Returns one :class:`errors.MissingTargetError` per missing target (deduped by path), each + naming the gone file, where it's referenced, and how to fix it. An absent or malformed + settings.json yields an empty list (nothing to report, never an exception). + """ + if not settings_file.is_file(): + return [] + try: + data = json.loads(settings_file.read_text(encoding="utf-8")) + except (OSError, ValueError): + return [] + if not isinstance(data, dict): + return [] + + seen: set[str] = set() + findings: list[errors.MissingTargetError] = [] + for command in _iter_hook_commands(data): + for missing in _missing_paths_in_command(command): + if missing in seen: + continue + seen.add(missing) + findings.append( + errors.missing_target_error( + what_kind="hook script", + target=missing, + why=f"a hook command in {settings_file} runs `{missing}`, but that file " + f"is gone — the harness would fail with a generic PreToolUse error", + regen="re-run `rig apply` to reinstall the managed hooks, or remove the " + f"stale hook entry from {settings_file}", + ) + ) + return findings diff --git a/riglib/plan.py b/riglib/plan.py index 495fe4e..2eeb932 100644 --- a/riglib/plan.py +++ b/riglib/plan.py @@ -249,12 +249,26 @@ def _validate_item_names(config: LoadedConfig, catalog: Catalog) -> None: universal/by_type are separate groups, and by_type uses fully-qualified ``by-type//`` keys plus bare ```` bundle names in ``by_type.enable``. """ - def _check(category: str, names: set[str], known: set[str], label: str) -> None: - unknown = names - known + from . import errors + + config_path = str(config.primary_config_path) + + def _check(category: str, names: set[str], known: set[str], key_prefix: str) -> None: + """Raise a structured :class:`errors.UnknownItemError` for the first unknown name. + + ``category`` is the catalog category used for the removed-slot lookup + did-you-mean; + ``key_prefix`` is the dotted config-key path (e.g. ``mcp.items``) the bad name hangs + off, so the error names the EXACT offending key (``mcp.items.review``) + its file. + """ + unknown = sorted(names - known) if unknown: - raise PlanError( - f"unknown {label} item(s): {', '.join(sorted(unknown))} " - f"(known: {', '.join(sorted(known)) or 'none'})" + bad = unknown[0] + raise errors.unknown_item_error( + category=category, + key=f"{key_prefix}.{bad}", + bad=bad, + known=known, + config_path=config_path, ) # skills — universal group @@ -264,7 +278,7 @@ def _check(category: str, names: set[str], known: set[str], label: str) -> None: uni_known = {i.name for i in catalog.by_category("skills") if i.group == "universal"} refs = set(uni.get("enable", []) or []) | set(uni.get("disable", []) or []) refs |= set(k for k in uni.get("items", {}) if isinstance(uni.get("items"), dict)) - _check("skills", refs, uni_known, "universal skill") + _check("skills", refs, uni_known, "skills.universal") # skills — by_type group (fully-qualified item keys; bundle names checked separately) bt = sk.get("by_type", {}) if isinstance(sk, dict) else {} if isinstance(bt, dict): @@ -272,8 +286,8 @@ def _check(category: str, names: set[str], known: set[str], label: str) -> None: bt_kinds = {i.meta.get("kind", "") for i in catalog.by_category("skills") if i.group.startswith("by-type/")} bt_items = bt.get("items", {}) if isinstance(bt_items, dict): - _check("skills", set(bt_items), bt_known, "by-type skill") - _check("skills", set(bt.get("enable", []) or []), bt_kinds, "by-type bundle") + _check("skills", set(bt_items), bt_known, "skills.by_type.items") + _check("skills", set(bt.get("enable", []) or []), bt_kinds, "skills.by_type.enable") # agent_hooks + mcp — flat items/enable/disable for cat_name in ("agent_hooks", "mcp"): @@ -285,19 +299,14 @@ def _check(category: str, names: set[str], known: set[str], label: str) -> None: items = cfg.get("items", {}) if isinstance(items, dict): refs |= set(items) - _check(cat_name, refs, known, cat_name) + _check(cat_name, refs, known, f"{cat_name}.items") # git_hooks — nested sub-groups (only 'dispatcher' is shipped in v0.1). A typo like # 'dispatcherr' must fail closed, not silently build no dispatcher action. gh = config.category("git_hooks") if isinstance(gh, dict): gh_known = catalog.names("git_hooks") | {"templates"} # templates reserved for v0.2 - unknown_gh = set(gh) - gh_known - if unknown_gh: - raise PlanError( - f"unknown git_hooks key(s): {', '.join(sorted(unknown_gh))} " - f"(known: {', '.join(sorted(gh_known))})" - ) + _check("git_hooks", set(gh), gh_known, "git_hooks") def build(config: LoadedConfig, catalog: Catalog, *, project_type: str = "unknown") -> InstallPlan: @@ -404,11 +413,17 @@ def build(config: LoadedConfig, catalog: Catalog, *, project_type: str = "unknow # config enabling ship against a minimal checkout fails closed instead of dropping. known = catalog.names("ci") referenced = set(ci_items) | set(ci.get("enable", []) or []) | set(ci.get("disable", []) or []) - unknown = referenced - known + unknown = sorted(referenced - known) if unknown: - raise PlanError( - f"unknown ci item(s): {', '.join(sorted(unknown))} " - f"(known: {', '.join(sorted(known))})" + from . import errors + + bad = unknown[0] + raise errors.unknown_item_error( + category="ci", + key=f"ci.items.{bad}", + bad=bad, + known=known, + config_path=str(config.primary_config_path), ) # resolve which slots are enabled: per-item override > enable/disable > all > off. diff --git a/tests/smoke.sh b/tests/smoke.sh index 5b5d012..d110b1c 100755 --- a/tests/smoke.sh +++ b/tests/smoke.sh @@ -1,8 +1,15 @@ #!/usr/bin/env bash # smoke.sh — fast end-to-end check of the rig CLI surface, run in CI and locally. # Proves: --help, doctor, a headless setup against a sample config in a throwaway repo -# (isolated HOME so nothing on the machine is touched), idempotency, status drift, and the +# (isolated HOME so nothing on the machine is touched), idempotency, status drift, the +# error-system v2 contract (a removed slot prints the 3-part error + the right exit code; +# a clean sample exits 0; a non-git dir does NOT nag "should be committed"), and the # pytest unit suite. +# +# WHY a real smoke and not just pytest: two same-day prod failures were unit-GREEN but +# smoke-BROKEN — a stale `mcp.items.review` (a slot removed in agent-tools #32) lingered in a +# config and only the REAL `rig status`/`init` flow against the REAL catalog caught it. pytest +# uses a fake catalog; smoke runs the real CLI against the real agent-tools checkout. set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" @@ -42,6 +49,9 @@ else git config --global user.name rig-smoke ( cd "$TMP" && git init -q ) + # The CLEAN sample. NOTE: it intentionally carries NO `mcp.items.review` — that slot was + # REMOVED in agent-tools #32 (review is a CLI+skill, not an MCP). A config still naming it is + # exercised SEPARATELY below (the removed-slot leg), where it must fail with the 3-part error. cat > "$TMP/rig.yaml" </dev/null || fail "status nonzero when in sync" pass "rig status: in sync" + + # ── error-system v2: a removed slot prints the 3-part error + exit code 4 ────── + # This is the exact regression class that was unit-green but smoke-broken: a stale + # `mcp.items.review` (removed in agent-tools #32) must fail LOUDLY against the REAL catalog. + DEAD="$TMP/dead.yaml" + cat > "$DEAD" <&1)" + dead_rc=$? + set -e + echo "$dead_out" | grep -qi "removed mcp slot" || fail "removed-slot: no WHAT line" + echo "$dead_out" | grep -q "#32" || fail "removed-slot: WHY does not name the removal PR" + echo "$dead_out" | grep -qi "remove .*mcp.items.review" || fail "removed-slot: FIX does not say how to remove it" + echo "$dead_out" | grep -q "$DEAD" || fail "removed-slot: error does not name the offending config file" + [[ "$dead_rc" -eq 4 ]] || fail "removed-slot: exit code $dead_rc != 4 (unknown-item class)" + pass "rig status: removed slot → 3-part error + exit 4" + + # ── error-system v2: a CLEAN sample exits 0 (no false drift) ────────────────── + CLEAN="$TMP/clean.yaml" + cat > "$CLEAN" </dev/null 2>&1 + clean_rc=$? + set -e + [[ "$clean_rc" -eq 0 ]] || fail "clean sample: exit code $clean_rc != 0" + pass "rig status: clean sample → exit 0" + + # ── error-system v2: a NON-GIT dir does NOT nag "should be committed" ───────── + # IMPORTANT: $TMP itself was `git init`-ed above, so a dir UNDER it ($TMP/nongit) is still + # *inside* that work tree (git walks up to $TMP/.git) — env.is_git_repo would be True and this + # would not exercise the non-git path at all. Use a sibling mktemp dir outside any repo. + NONGIT="$(mktemp -d)"; trap 'rm -rf "$TMP" "$NONGIT"' EXIT # a plain dir, no .git, no repo above + set +e + nongit_out="$(HOME="$TMP/clean-home" XDG_CONFIG_HOME="$TMP/clean-xdg" $RIG status -C "$NONGIT" 2>&1)" + set -e + echo "$nongit_out" | grep -qi "should be committed" && fail "non-git: still nags 'should be committed'" + echo "$nongit_out" | grep -qi "not a git repository" || fail "non-git: does not say 'not a git repository'" + pass "rig status: non-git dir → no 'should be committed', repo layer N/A" fi # ── 4. unit suite ───────────────────────────────────────────────────────────── diff --git a/tests/test_catalog_plan.py b/tests/test_catalog_plan.py index cbb3f1b..d4c4200 100644 --- a/tests/test_catalog_plan.py +++ b/tests/test_catalog_plan.py @@ -8,7 +8,8 @@ from riglib.catalog import Catalog, CatalogError from riglib.config import LoadedConfig -from riglib.plan import PlanError, build +from riglib.errors import UnknownItemError +from riglib.plan import build def test_catalog_scan_finds_all_categories(fake_agent_tools): @@ -94,8 +95,10 @@ def test_plan_codeql_variant_selected(fake_agent_tools, tmp_path): def test_plan_unknown_universal_skill_fails_closed(fake_agent_tools, tmp_path): cat = Catalog.scan(str(fake_agent_tools)) cfg = _cfg({"skills": {"universal": {"disable": ["shell-timeout"]}}}, tmp_path) # typo - with pytest.raises(PlanError, match="universal skill"): + with pytest.raises(UnknownItemError) as exc: build(cfg, cat, project_type="unknown") + # error-system v2: did-you-mean suggests the nearest valid skill + assert "shell-timeouts" in exc.value.fix def test_plan_unknown_agent_hook_fails_closed(fake_agent_tools, tmp_path): @@ -104,15 +107,18 @@ def test_plan_unknown_agent_hook_fails_closed(fake_agent_tools, tmp_path): {"skills": {"enabled": False}, "agent_hooks": {"items": {"block-no-verifyy": {"enabled": True}}}}, tmp_path, ) - with pytest.raises(PlanError, match="agent_hooks"): + with pytest.raises(UnknownItemError) as exc: build(cfg, cat, project_type="unknown") + assert "agent_hooks" in exc.value.what + assert "block-no-verify" in exc.value.fix # nearest valid def test_plan_unknown_by_type_bundle_fails_closed(fake_agent_tools, tmp_path): cat = Catalog.scan(str(fake_agent_tools)) cfg = _cfg({"skills": {"by_type": {"enable": ["backendd"]}}}, tmp_path) # typo - with pytest.raises(PlanError, match="by-type bundle"): + with pytest.raises(UnknownItemError) as exc: build(cfg, cat, project_type="unknown") + assert "backend" in exc.value.fix # nearest valid bundle def test_plan_unknown_ci_item_fails_closed(fake_agent_tools, tmp_path): @@ -121,8 +127,9 @@ def test_plan_unknown_ci_item_fails_closed(fake_agent_tools, tmp_path): {"skills": {"enabled": False}, "ci": {"items": {"secret_scan": {"enabled": True}}}}, tmp_path, ) - with pytest.raises(PlanError, match="unknown ci item"): + with pytest.raises(UnknownItemError) as exc: build(cfg, cat, project_type="unknown") + assert "secret-scan" in exc.value.fix # nearest valid def test_plan_ci_all_true_enables_catalog(fake_agent_tools, tmp_path): @@ -141,7 +148,7 @@ def test_plan_unknown_ci_enable_name_fails_closed(fake_agent_tools, tmp_path): {"skills": {"enabled": False}, "ci": {"all": False, "enable": ["secret_scan"]}}, tmp_path, ) - with pytest.raises(PlanError, match="unknown ci item"): + with pytest.raises(UnknownItemError, match="unknown ci item"): build(cfg, cat, project_type="unknown") @@ -161,8 +168,10 @@ def test_plan_unknown_git_hooks_key_fails_closed(fake_agent_tools, tmp_path): "git_hooks": {"dispatcherr": {"enabled": True}}}, # typo tmp_path, ) - with pytest.raises(PlanError, match="git_hooks"): + with pytest.raises(UnknownItemError) as exc: build(cfg, cat, project_type="unknown") + assert "git_hooks" in exc.value.what + assert "dispatcher" in exc.value.fix def test_plan_ship_absent_from_catalog_fails_closed(fake_agent_tools, tmp_path): @@ -173,7 +182,7 @@ def test_plan_ship_absent_from_catalog_fails_closed(fake_agent_tools, tmp_path): {"skills": {"enabled": False}, "ci": {"items": {"ship": {"enabled": True}}}}, tmp_path, ) - with pytest.raises(PlanError, match="unknown ci item"): + with pytest.raises(UnknownItemError, match="unknown ci item"): build(cfg, cat, project_type="unknown") diff --git a/tests/test_cli.py b/tests/test_cli.py index 3ac2a54..b95a21c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,11 +22,37 @@ def test_no_command_prints_help(capsys): assert "rig — the dev-environment" in out -def test_doctor_runs(capsys): +def test_doctor_runs(tmp_path, capsys, monkeypatch): + from riglib import errors + + # isolate HOME so `_scan_missing_targets()` can't read the dev machine's real + # ~/.claude/settings.json (a dead hook there would flip the exit code to MISSING_TARGET). + monkeypatch.setenv("HOME", str(tmp_path / "home")) rc = main(["doctor"]) out = capsys.readouterr().out assert "rig doctor" in out - assert rc in (0, 1) # 1 if optional deps missing on the CI box + # 0 = all present; 1 = optional deps missing on the CI box; 127 = a REQUIRED dep absent + # (the documented missing-dependency class — honored by the exit-code contract). + assert rc in (0, 1, errors.EXIT_MISSING_DEP) + + +def test_doctor_missing_required_dep_uses_127_contract(tmp_path, capsys, monkeypatch): + """A missing REQUIRED dep exits with the documented missing-dependency class (127). + + The --help epilog promises `127 missing dependency`; doctor must honor that (the public + exit-code contract scripts branch on) instead of the generic non-zero. A pure --yes-less run + so nothing is installed; only required deps are forced absent. + """ + from riglib import doctor, errors + + monkeypatch.setenv("HOME", str(tmp_path / "home")) + # force every dependency probe to "absent" so missing_required is non-empty. + monkeypatch.setattr(doctor.shutil, "which", lambda name: None) + monkeypatch.setattr(doctor, "_python_present", lambda name: False) + rc = main(["doctor"]) # no --yes → reports + advises, installs nothing + out = capsys.readouterr().out + assert "missing dependencies above" in out + assert rc == errors.EXIT_MISSING_DEP def test_setup_dryrun_default(tmp_path, capsys, fake_agent_tools, monkeypatch): @@ -102,7 +128,11 @@ def test_setup_failclosed_leaves_no_config(tmp_path, capsys, fake_agent_tools, m encoding="utf-8", ) rc = main(["init", "-C", str(repo), "--config", str(template), "--yes"]) - assert rc == 2 + # error-system v2: an unknown catalog item exits with the unknown-item class (4), not the + # generic config class (2). The fail-closed guarantee is unchanged: no rig.yaml is written. + from riglib import errors + + assert rc == errors.EXIT_UNKNOWN_ITEM assert not (repo / "rig.yaml").exists() # fail-closed: no invalid config written @@ -147,8 +177,14 @@ def test_setup_with_external_config_persists_repo_yaml(tmp_path, capsys, fake_ag def test_status_scans_empty_ci_target_for_extras(tmp_path, capsys, fake_agent_tools, monkeypatch): + import subprocess + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) repo = tmp_path / "repo" + repo.mkdir() + # CI is the REPO layer — its extras are only scanned inside a real git repo (error-system + # v2: a non-git dir has no repo layer), so this must be an actual repository. + subprocess.run(["git", "init", "-q"], cwd=str(repo), check=True) (repo / ".github" / "workflows").mkdir(parents=True) (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8") cfg = repo / "rig.yaml" diff --git a/tests/test_errors.py b/tests/test_errors.py new file mode 100644 index 0000000..8a71006 --- /dev/null +++ b/tests/test_errors.py @@ -0,0 +1,193 @@ +"""Unit tests for the error system v2 — structured what/why/fix errors + heuristics. + +Covers: +- the RigError dataclass-style exception (what/why/fix/exit_code) + per-class stable codes, +- the consistent CLI rendering (every error prints WHAT, WHY, HOW-to-fix), +- did-you-mean (Levenshtein-nearest catalog name), +- the empty-category message ("no slots; remove the block"), +- the removed/deprecated-slot registry (mcp.items.review → names agent-tools #32 + the fix), +- missing-target (a config path/binary that's gone → names the file + how to regenerate). + +All offline; no disk, no network. +""" + +from __future__ import annotations + + +from riglib import errors + + +# ── exit-code table: stable + distinct per failure class ────────────────────────── +def test_exit_codes_are_distinct_and_stable(): + codes = { + errors.EXIT_CONFIG, + errors.EXIT_DRIFT, + errors.EXIT_UNKNOWN_ITEM, + errors.EXIT_MISSING_TARGET, + errors.EXIT_NOT_A_REPO, + errors.EXIT_MISSING_DEP, + } + # every class has its OWN code (no collisions) + assert len(codes) == 6 + # pinned values — scripts depend on these; a change here is a breaking change + assert errors.EXIT_CONFIG == 2 + assert errors.EXIT_DRIFT == 3 + assert errors.EXIT_UNKNOWN_ITEM == 4 + assert errors.EXIT_MISSING_TARGET == 5 + assert errors.EXIT_NOT_A_REPO == 6 + assert errors.EXIT_MISSING_DEP == 127 + + +def test_rig_error_carries_what_why_fix_and_code(): + e = errors.RigError( + what="something broke", + why="because of a reason", + fix="run `rig fix`", + exit_code=errors.EXIT_CONFIG, + ) + assert e.what == "something broke" + assert e.why == "because of a reason" + assert e.fix == "run `rig fix`" + assert e.exit_code == errors.EXIT_CONFIG + # str() includes the WHAT so a bare log line is still useful + assert "something broke" in str(e) + + +def test_subclasses_pin_their_exit_code(): + assert errors.ConfigError(what="x", why="y", fix="z").exit_code == errors.EXIT_CONFIG + assert errors.UnknownItemError(what="x", why="y", fix="z").exit_code == errors.EXIT_UNKNOWN_ITEM + assert errors.MissingTargetError(what="x", why="y", fix="z").exit_code == errors.EXIT_MISSING_TARGET + assert errors.NotARepoError(what="x", why="y", fix="z").exit_code == errors.EXIT_NOT_A_REPO + assert errors.MissingDepError(what="x", why="y", fix="z").exit_code == errors.EXIT_MISSING_DEP + + +# ── consistent rendering: every error shows the 3 parts ─────────────────────────── +def test_render_shows_three_parts(): + e = errors.UnknownItemError( + what="unknown mcp item: reviewr", + why="not in the mcp catalog (declared in /tmp/rig.yaml)", + fix="did you mean 'review'? edit mcp.items in /tmp/rig.yaml", + ) + text = errors.render(e, color=False) + # the three load-bearing labels are present and ordered + assert "what:" in text.lower() or "error:" in text.lower() + assert "why" in text.lower() + assert "fix" in text.lower() + # the actual content survives rendering + assert "reviewr" in text + assert "review" in text + assert "/tmp/rig.yaml" in text + + +# ── did-you-mean (Levenshtein) ──────────────────────────────────────────────────── +def test_did_you_mean_picks_nearest(): + assert errors.did_you_mean("reviewr", {"review", "naming", "shell-timeouts"}) == "review" + assert errors.did_you_mean("dispatcherr", {"dispatcher", "templates"}) == "dispatcher" + + +def test_did_you_mean_returns_none_when_too_far(): + # a wildly different token shouldn't fabricate a bogus suggestion + assert errors.did_you_mean("zzzzzzzz", {"review", "naming"}) is None + + +def test_did_you_mean_returns_none_on_empty_catalog(): + assert errors.did_you_mean("anything", set()) is None + + +# ── unknown-item error builder: did-you-mean OR empty-category message ───────────── +def test_unknown_item_suggests_nearest(): + e = errors.unknown_item_error( + category="mcp", + key="mcp.items.reviewr", + bad="reviewr", + known={"review", "context7"}, + config_path="/tmp/rig.yaml", + ) + assert isinstance(e, errors.UnknownItemError) + assert e.exit_code == errors.EXIT_UNKNOWN_ITEM + assert "review" in e.fix # the suggestion + assert "/tmp/rig.yaml" in e.fix # the offending config file path + assert "reviewr" in e.what + + +def test_unknown_item_empty_category_says_remove_the_block(): + # the category has NO slots → "remove the mcp block", NOT "known: none". + # use a NON-removed name so this exercises the empty-category branch (not removed-slot). + e = errors.unknown_item_error( + category="mcp", + key="mcp.items.context7", + bad="context7", + known=set(), + config_path="/tmp/rig.yaml", + ) + assert "no slots" in e.why.lower() or "no slots" in e.fix.lower() + assert "remove" in e.fix.lower() + assert "mcp" in e.fix.lower() + # explicitly NOT the old useless phrasing + assert "known: none" not in (e.what + e.why + e.fix).lower() + + +# ── removed/deprecated slots registry ───────────────────────────────────────────── +def test_removed_slot_registry_has_mcp_review(): + rec = errors.removed_slot("mcp", "review") + assert rec is not None + # names the PR/source and says it's a CLI+skill, not an MCP + assert "#32" in rec.reason + assert "cli" in rec.reason.lower() + + +def test_removed_slot_error_names_it_and_the_fix(): + e = errors.unknown_item_error( + category="mcp", + key="mcp.items.review", + bad="review", + known={"context7"}, # 'review' is NOT in the catalog (removed) + config_path="/home/u/.config/rig/config.yaml", + ) + blob = (e.what + " " + e.why + " " + e.fix).lower() + # removed-slot path wins over did-you-mean: it must cite the removal + the exact fix + assert "#32" in (e.what + e.why + e.fix) + assert "remove" in blob + assert "mcp.items.review" in (e.what + e.why + e.fix) + assert "/home/u/.config/rig/config.yaml" in (e.what + e.why + e.fix) + + +def test_removed_slot_unknown_returns_none(): + assert errors.removed_slot("mcp", "context7") is None + assert errors.removed_slot("skills", "naming") is None + + +# ── missing-target ──────────────────────────────────────────────────────────────── +def test_missing_target_error_names_file_and_regen(): + e = errors.missing_target_error( + what_kind="hook", + target="/home/u/.claude/hooks/block-no-verify.json", + why="settings.json references a hook descriptor that no longer exists on disk", + regen="run `rig apply` to reinstall the agent-hooks", + ) + assert isinstance(e, errors.MissingTargetError) + assert e.exit_code == errors.EXIT_MISSING_TARGET + assert "/home/u/.claude/hooks/block-no-verify.json" in e.what + assert "rig apply" in e.fix + + +def test_not_a_repo_error_code(): + e = errors.NotARepoError(what="not a git repo", why="no .git", fix="cd into a repo") + assert e.exit_code == errors.EXIT_NOT_A_REPO + + +# ── the CLI guard turns a RigError into render+exit-code ─────────────────────────── +def test_guard_renders_and_returns_exit_code(capsys): + def boom(): + raise errors.UnknownItemError(what="bad item", why="reason", fix="the fix") + + rc = errors.guard(boom) + out = capsys.readouterr().out + assert rc == errors.EXIT_UNKNOWN_ITEM + assert "bad item" in out + assert "the fix" in out + + +def test_guard_passes_through_success(): + assert errors.guard(lambda: 0) == 0 + assert errors.guard(lambda: 3) == 3 diff --git a/tests/test_missing_target.py b/tests/test_missing_target.py new file mode 100644 index 0000000..4737311 --- /dev/null +++ b/tests/test_missing_target.py @@ -0,0 +1,208 @@ +"""missing-target heuristic — a config that references a path/binary that's gone on disk. + +Motivating case (from the spec): a hook command in the harness ``settings.json`` points at a +script that no longer exists (the dead-rtk-hook case), which otherwise surfaces only as a +generic harness "PreToolUse error" with no hint of the cause. The scanner names the missing +file + how to regenerate it, and `rig status` / `rig doctor` surface it PROACTIVELY (before it +bites at runtime). + +All offline; operates on a fake settings.json under a tmp HOME. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from riglib import errors +from riglib.missing_target import scan_settings_hooks + + +def _settings(home: Path, hooks: dict) -> Path: + cfg = home / ".claude" / "settings.json" + cfg.parent.mkdir(parents=True, exist_ok=True) + cfg.write_text(json.dumps({"hooks": hooks}), encoding="utf-8") + return cfg + + +def test_scan_flags_dead_hook_command(tmp_path): + home = tmp_path / "home" + gone = tmp_path / "gone" / "rtk-hook.py" # never created + settings = _settings( + home, + { + "PreToolUse": [ + {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]} + ] + }, + ) + findings = scan_settings_hooks(settings) + assert len(findings) == 1 + f = findings[0] + assert isinstance(f, errors.MissingTargetError) + assert f.exit_code == errors.EXIT_MISSING_TARGET + assert str(gone) in f.what # names the missing file + assert "settings.json" in f.why # says WHERE it's referenced + assert f.fix # carries a concrete regenerate/remove hint + + +def test_scan_ignores_live_hook_command(tmp_path): + home = tmp_path / "home" + live = tmp_path / "live-hook.py" + live.write_text("#!/usr/bin/env python3\n", encoding="utf-8") + settings = _settings( + home, + {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {live}"}]}]}, + ) + assert scan_settings_hooks(settings) == [] + + +def test_scan_ignores_bare_binary_on_path(tmp_path): + # a command that's a plain binary name (resolved via PATH), not an absolute script path, + # must not be flagged as a missing FILE — we only check absolute path args that look like + # script files. + home = tmp_path / "home" + settings = _settings( + home, + {"PreToolUse": [{"matcher": "*", "hooks": [{"type": "command", "command": "gitleaks protect"}]}]}, + ) + assert scan_settings_hooks(settings) == [] + + +def test_scan_ignores_nonexistent_output_arg_when_script_lives(tmp_path): + # a hook whose SCRIPT exists but which names a not-yet-created runtime output file as a + # later absolute-path arg must NOT be flagged — the output file is created at run time, not + # a dead reference. Only the invoked script is the "target" we check. + home = tmp_path / "home" + live = tmp_path / "live-hook.py" + live.write_text("#!/usr/bin/env python3\n", encoding="utf-8") + out_file = tmp_path / "run" / "out.json" # never created — a runtime output path + settings = _settings( + home, + { + "PreToolUse": [ + {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {live} --out {out_file}"}]} + ] + }, + ) + assert scan_settings_hooks(settings) == [] + + +def test_scan_flags_dead_script_even_with_trailing_args(tmp_path): + # the inverse: a DEAD script is still flagged, and trailing args don't change the verdict. + home = tmp_path / "home" + gone = tmp_path / "gone" / "hook.py" # never created — the script itself is missing + out_file = tmp_path / "run" / "out.json" + settings = _settings( + home, + { + "PreToolUse": [ + {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone} --out {out_file}"}]} + ] + }, + ) + findings = scan_settings_hooks(settings) + assert len(findings) == 1 + assert str(gone) in findings[0].what + assert str(out_file) not in findings[0].what # the output arg is not the reported target + + +def test_scan_flags_dead_script_under_absolute_interpreter(tmp_path): + # the motivating rtk-hook form on macOS: the interpreter is itself an ABSOLUTE path + # (/usr/bin/env, /opt/homebrew/bin/python3). The first absolute token is the interpreter + # (which exists) — the scanner must skip it and check the real (missing) script after it. + home = tmp_path / "home" + gone = tmp_path / "gone" / "rtk-hook.py" # never created + for cmd in (f"/usr/bin/env python3 {gone}", f"/opt/homebrew/bin/python3 {gone}"): + settings = _settings( + home, {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": cmd}]}]} + ) + findings = scan_settings_hooks(settings) + assert len(findings) == 1, cmd + assert str(gone) in findings[0].what, cmd + + +def test_scan_ignores_inline_c_code(tmp_path): + # `python3 -c ''` runs no script FILE — there is no path target to verify, so a + # nonexistent-looking path embedded in inline code must not be flagged. + home = tmp_path / "home" + settings = _settings( + home, + {"PreToolUse": [{"matcher": "*", "hooks": [{"type": "command", "command": "python3 -c 'import sys; sys.exit(0)'"}]}]}, + ) + assert scan_settings_hooks(settings) == [] + + +def test_scan_flags_dead_script_when_dash_c_is_script_arg(tmp_path): + # `-c` AFTER the script is the SCRIPT's own argument, not Python's inline-code flag — a dead + # script must still be flagged. (Guards against suppressing the check on any trailing `-c`.) + home = tmp_path / "home" + gone = tmp_path / "gone" / "hook.py" # the script itself is missing + settings = _settings( + home, + {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone} -c config.json"}]}]}, + ) + findings = scan_settings_hooks(settings) + assert len(findings) == 1 + assert str(gone) in findings[0].what + + +def test_scan_missing_settings_file_is_empty(tmp_path): + # no settings.json at all → nothing to scan, no error + assert scan_settings_hooks(tmp_path / "home" / ".claude" / "settings.json") == [] + + +def test_scan_malformed_settings_is_empty(tmp_path): + home = tmp_path / "home" + cfg = home / ".claude" / "settings.json" + cfg.parent.mkdir(parents=True, exist_ok=True) + cfg.write_text("{not json", encoding="utf-8") + assert scan_settings_hooks(cfg) == [] + + +def test_status_surfaces_dead_hook(tmp_path, capsys, fake_agent_tools, monkeypatch): + """`rig status` proactively surfaces a dead hook path in the harness settings.json.""" + import subprocess + + home = tmp_path / "home" + gone = tmp_path / "gone-hook.py" # referenced but absent + _settings( + home, + {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]}]}, + ) + monkeypatch.setenv("HOME", str(home)) + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + repo = tmp_path / "repo" + repo.mkdir() + subprocess.run(["git", "init", "-q"], cwd=str(repo), check=True) + (repo / "rig.yaml").write_text( + f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n" + "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: false}\nagents_md: {enabled: false}\n", + encoding="utf-8", + ) + main_rc = __import__("riglib.cli", fromlist=["main"]).main(["status", "-C", str(repo)]) + out = capsys.readouterr().out + assert str(gone) in out # the dead hook path is named + assert "missing" in out.lower() + # a missing-target makes status non-zero (something IS wrong), distinct from clean + assert main_rc != 0 + + +def test_doctor_surfaces_dead_hook(tmp_path, capsys, monkeypatch): + """`rig doctor` proactively surfaces a dead hook path with the missing-target exit code.""" + from riglib.cli import main + + home = tmp_path / "home" + gone = tmp_path / "gone-hook.py" + _settings( + home, + {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]}]}, + ) + monkeypatch.setenv("HOME", str(home)) + rc = main(["doctor"]) + out = capsys.readouterr().out + assert str(gone) in out + # doctor returns the missing-target class when deps are otherwise fine OR may already be 1 + # if a required dep is missing on the box; either way it's non-zero and names the dead hook. + assert rc != 0 diff --git a/tests/test_plan_errors.py b/tests/test_plan_errors.py new file mode 100644 index 0000000..e37f776 --- /dev/null +++ b/tests/test_plan_errors.py @@ -0,0 +1,77 @@ +"""Plan-builder error integration — unknown items now raise structured errors v2. + +`plan.build` (via `_validate_item_names`) is where a config that names a non-existent +catalog item is caught. These tests assert it now raises the structured +:class:`errors.UnknownItemError` (exit 4) carrying the 3-part what/why/fix — with the +removed-slot, did-you-mean, and empty-category heuristics — instead of the old thin +``PlanError("unknown … (known: none)")``. +""" + +from __future__ import annotations + +import pytest + +from riglib import errors +from riglib.catalog import Catalog +from riglib.config import LoadedConfig +from riglib.plan import build + + +def _loaded(tmp_path, data: dict) -> LoadedConfig: + cfg = tmp_path / "rig.yaml" + cfg.write_text("version: 1\n", encoding="utf-8") # a real file for the path to name + return LoadedConfig(data=data, repo_root=tmp_path, repo_path=cfg, layers=[f"repo:{cfg}"]) + + +def test_unknown_mcp_item_raises_structured_with_suggestion(tmp_path, fake_agent_tools): + catalog = Catalog.scan(str(fake_agent_tools)) + # fake catalog has mcp slot "review"; "reviewr" is a typo + loaded = _loaded(tmp_path, {"mcp": {"items": {"reviewr": {"enabled": True}}}}) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + e = exc.value + assert e.exit_code == errors.EXIT_UNKNOWN_ITEM + assert "reviewr" in e.what + assert "review" in e.fix # did-you-mean + assert str(tmp_path / "rig.yaml") in e.fix # the offending file path + + +def test_removed_mcp_review_slot_names_pr_and_fix(tmp_path, fake_agent_tools): + # a catalog WITHOUT a review mcp slot (so 'review' is genuinely gone) → removed-slot path. + # Build a catalog then drop the review mcp item to simulate the post-#32 world. + catalog = Catalog.scan(str(fake_agent_tools)) + catalog.items = [i for i in catalog.items if not (i.category == "mcp" and i.name == "review")] + loaded = _loaded(tmp_path, {"mcp": {"items": {"review": {"enabled": True}}}}) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + blob = exc.value.what + exc.value.why + exc.value.fix + assert "#32" in blob # names the removal PR + assert "mcp.items.review" in blob # the exact key + assert "remove" in blob.lower() + assert str(tmp_path / "rig.yaml") in blob + + +def test_unknown_universal_skill_suggests_nearest(tmp_path, fake_agent_tools): + catalog = Catalog.scan(str(fake_agent_tools)) + # fake catalog has "naming"; "namin" is a typo + loaded = _loaded(tmp_path, {"skills": {"universal": {"enable": ["namin"]}}}) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + assert "naming" in exc.value.fix + + +def test_unknown_ci_item_raises_structured(tmp_path, fake_agent_tools): + catalog = Catalog.scan(str(fake_agent_tools)) + loaded = _loaded(tmp_path, {"ci": {"items": {"secret-scn": {"enabled": True}}}}) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + assert exc.value.exit_code == errors.EXIT_UNKNOWN_ITEM + assert "secret-scan" in exc.value.fix # nearest valid + + +def test_unknown_git_hooks_key_raises_structured(tmp_path, fake_agent_tools): + catalog = Catalog.scan(str(fake_agent_tools)) + loaded = _loaded(tmp_path, {"git_hooks": {"dispatcherr": {"enabled": True}}}) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + assert "dispatcher" in exc.value.fix diff --git a/tests/test_status_layers.py b/tests/test_status_layers.py new file mode 100644 index 0000000..69c640d --- /dev/null +++ b/tests/test_status_layers.py @@ -0,0 +1,232 @@ +"""rig status — GLOBAL vs REPO layer separation, non-git handling, prominent no-rig.yaml. + +The live-run bugs this fixes (from the spec): +- status mixed machine-wide GLOBAL drift (skills/hooks/harness/mcp from + ~/.config/rig/config.yaml) with REPO drift (this repo's CI/symlinks from ./rig.yaml) in one + flat dump → group by LAYER, each item naming WHICH config file declares it; +- in a non-git dir (e.g. ~) status printed "no rig.yaml in this repo (should be committed)" → + it must show ONLY the global layer + "(not a git repository — repo layer N/A)"; +- a real repo with no committed rig.yaml buried the fix above a 49-line drift dump → make it + PROMINENT with "run `rig init`"; +- the "extras (on-disk, not declared) are NEVER deleted by apply" reassurance must be LOUD. + +These drive the CLI through `main(["status", ...])` and assert on captured stdout + exit code. +All offline (no agent-tools apply leg needed — drift detection runs against the fake catalog). +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +import pytest + +from riglib import errors +from riglib.cli import main +from riglib.layers import GLOBAL, REPO, layer_for_category + + +@pytest.fixture(autouse=True) +def _isolate_home(tmp_path, monkeypatch): + """Isolate HOME for every test in this module. + + `rig status`/`rig doctor` unconditionally call `_scan_missing_targets()`, which reads the + real `~/.claude/settings.json` via `expanduser`. Without an isolated HOME, a dev machine + whose real settings.json has a hook pointing at a now-gone script would make `dead_targets` + non-empty — flipping a "clean repo → exit 0" test into EXIT_MISSING_TARGET and a "non-git → + 0/drift" test out of its expected set. An empty tmp HOME has no settings.json, so the scan + finds nothing and the tests assert only what they mean to. Runs before each test body, so a + test that needs a populated HOME (e.g. the GLOBAL-drift case) still overrides it. + """ + monkeypatch.setenv("HOME", str(tmp_path / "home")) + + +def _git_repo(path: Path) -> Path: + """Make ``path`` a real git repo (status uses `git rev-parse`, so a bare `.git` dir is not + enough — it must be an actual repository for env.is_git_repo to be True).""" + path.mkdir(parents=True, exist_ok=True) + subprocess.run(["git", "init", "-q"], cwd=str(path), check=True) + return path + + +# ── category → layer classification ─────────────────────────────────────────────── +def test_category_layer_classification(): + # GLOBAL: machine-wide artifacts declared in ~/.config/rig/config.yaml + for cat in ("skills", "agent_hooks", "mcp", "harness", "models", "git_hooks", "tmux"): + assert layer_for_category(cat) == GLOBAL, cat + # REPO: this repo's artifacts declared in ./rig.yaml + for cat in ("ci", "agents_md", "github"): + assert layer_for_category(cat) == REPO, cat + + +# ── non-git dir: no "should be committed", only the global layer ────────────────── +def test_status_non_git_dir_no_should_be_committed(tmp_path, capsys, fake_agent_tools, monkeypatch): + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + monkeypatch.setenv("RIG_AGENT_TOOLS_SOURCE", str(fake_agent_tools)) + # a plain dir with NO .git — the motivating case is running status in ~ + plain = tmp_path / "not-a-repo" + plain.mkdir() + rc = main(["status", "-C", str(plain)]) + out = capsys.readouterr().out + assert "should be committed" not in out # the bug: never say this outside a git repo + assert "not a git repository" in out.lower() + assert "repo layer" in out.lower() # "... repo layer N/A" + # it still ran (showed the global layer), not a hard error + assert rc in (0, errors.EXIT_DRIFT) + + +# ── non-git dir, catalog UNRESOLVABLE: still reports "not a git repository" ──────── +def test_status_non_git_dir_without_resolvable_catalog(tmp_path, capsys, monkeypatch): + """The smoke-only regression: in a non-git dir where the agent-tools checkout cannot be + resolved (fresh machine / running in ~ with no source), the catalog scan fails — but that + feeds only the REPO layer, which a non-git dir doesn't have. status must NOT die with the + catalog error; it must still report "not a git repository" and exit 0. + + The other non-git test pins RIG_AGENT_TOOLS_SOURCE so the catalog resolves; this one removes + every source (env + default candidates) to drive the CatalogError fallback path explicitly. + """ + import riglib.catalog as catalog + + monkeypatch.delenv("RIG_AGENT_TOOLS_SOURCE", raising=False) + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + # point every default candidate at empties so resolve_source finds no checkout anywhere + monkeypatch.setattr(catalog, "_DEFAULT_SOURCE_CANDIDATES", (str(tmp_path / "nope"),)) + plain = tmp_path / "not-a-repo" + plain.mkdir() + rc = main(["status", "-C", str(plain)]) + out = capsys.readouterr().out + assert rc == 0 + assert "not a git repository" in out.lower() + assert "should be committed" not in out + # the catalog error must NOT have leaked through + assert "could not locate an agent-tools checkout" not in out + + +# ── real repo, no committed rig.yaml: PROMINENT fix ─────────────────────────────── +def test_status_real_repo_no_rigyaml_prominent_init_hint(tmp_path, capsys, fake_agent_tools, monkeypatch): + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + monkeypatch.setenv("RIG_AGENT_TOOLS_SOURCE", str(fake_agent_tools)) + repo = _git_repo(tmp_path / "repo") # a real git repo, but no rig.yaml + main(["status", "-C", str(repo)]) + out = capsys.readouterr().out + # prominent: names the fix command, not a one-liner buried in drift + assert "rig init" in out + assert "no committed rig.yaml" in out.lower() or "no rig.yaml" in out.lower() + + +# ── layer grouping + per-item config-file provenance ────────────────────────────── +def test_status_groups_drift_by_layer_with_provenance(tmp_path, capsys, fake_agent_tools, monkeypatch): + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + repo = _git_repo(tmp_path / "repo") + # an undeclared CI workflow on disk → a REPO-layer extra + (repo / ".github" / "workflows").mkdir(parents=True) + (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8") + cfg = repo / "rig.yaml" + cfg.write_text( + f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n" + "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: true, all: false}\n", + encoding="utf-8", + ) + rc = main(["status", "-C", str(repo)]) + out = capsys.readouterr().out + assert rc == errors.EXIT_DRIFT + # the REPO layer heading appears (the CI extra + agents_md are REPO-layer drift). GLOBAL is + # only printed when there IS global drift — HOME is isolated here, so there is none. + assert "REPO — this repository" in out + # the CI extra is reported and names the declaring REPO config file (provenance) + assert "rogue" in out + assert str(cfg) in out # provenance: which config file declares this layer + + +# ── GLOBAL-layer drift names the GLOBAL config file ─────────────────────────────── +def test_status_global_layer_drift_names_global_config(tmp_path, capsys, fake_agent_tools, monkeypatch): + # an isolated HOME with an undeclared MCP server on disk → a GLOBAL-layer extra, and a + # global config file so its provenance can be named. + import json + + home = tmp_path / "home" + (home / ".claude" / "mcp").mkdir(parents=True) + (home / ".claude" / "mcp" / "mcp.json").write_text( + json.dumps({"mcpServers": {"rogue-server": {"command": "x"}}}), encoding="utf-8" + ) + monkeypatch.setenv("HOME", str(home)) + xdg = tmp_path / "xdg" + (xdg / "rig").mkdir(parents=True) + gcfg = xdg / "rig" / "config.yaml" + gcfg.write_text(f"version: 1\nagent_tools_source: {fake_agent_tools}\n", encoding="utf-8") + monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg)) + repo = _git_repo(tmp_path / "repo") + (repo / "rig.yaml").write_text( + f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n" + "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: false}\nagents_md: {enabled: false}\n", + encoding="utf-8", + ) + rc = main(["status", "-C", str(repo)]) + out = capsys.readouterr().out + assert rc == errors.EXIT_DRIFT + assert "GLOBAL — machine-wide" in out + assert "rogue-server" in out + assert str(gcfg) in out # the GLOBAL config file is named as the declaring layer + + +# ── reassurance is LOUD ─────────────────────────────────────────────────────────── +def test_status_extras_never_deleted_reassurance_is_loud(tmp_path, capsys, fake_agent_tools, monkeypatch): + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + repo = _git_repo(tmp_path / "repo") + (repo / ".github" / "workflows").mkdir(parents=True) + (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8") + cfg = repo / "rig.yaml" + cfg.write_text( + f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n" + "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: true, all: false}\n", + encoding="utf-8", + ) + main(["status", "-C", str(repo)]) + out = capsys.readouterr().out.lower() + # the guarantee must be explicit: apply NEVER deletes on-disk-not-declared items + assert "never" in out and "delete" in out + assert "extra" in out + + +# ── reassurance also LOUD in `rig apply --help` ─────────────────────────────────── +def test_apply_help_says_apply_never_deletes(capsys): + import pytest + + with pytest.raises(SystemExit): + main(["apply", "--help"]) + out = capsys.readouterr().out.lower() + assert "never" in out and "delete" in out # the apply-won't-nuke reassurance + + +# ── exit codes documented in --help (structured-exit-codes skill) ───────────────── +def test_help_documents_exit_codes(capsys): + import pytest + + with pytest.raises(SystemExit): + main(["--help"]) + out = capsys.readouterr().out + assert "exit codes:" in out + assert "4" in out and "unknown item" in out + assert "6" in out and "not a git repository" in out + + +# ── a clean in-sync repo still exits 0 ──────────────────────────────────────────── +def test_status_clean_repo_exits_zero(tmp_path, capsys, fake_agent_tools, monkeypatch): + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + repo = _git_repo(tmp_path / "repo") + cfg = repo / "rig.yaml" + # everything OFF, including agents_md (on by default → would otherwise drift the symlink) + cfg.write_text( + f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n" + "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: false}\n" + "agents_md: {enabled: false}\n", + encoding="utf-8", + ) + rc = main(["status", "-C", str(repo)]) + out = capsys.readouterr().out + assert rc == 0 + assert "in sync" in out.lower() From fe2a6a564a8ef1f23f2822503c5555cda6af54ca Mon Sep 17 00:00:00 2001 From: Alex Ultra Date: Wed, 17 Jun 2026 06:49:07 +0200 Subject: [PATCH 2/2] fix(errors): address codex P2 review findings on error-system-v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three deferred P2 findings from the PR #27 codex review, now fixed properly: 1. config.py — per-key LAYER PROVENANCE for unknown items. The loader now tracks which config FILE last declared each top-level key (key_sources, built in cascade order). An unknown item that came SOLELY from the global config is reported against ~/.config/rig/config.yaml, not the repo rig.yaml. plan.py's unknown-item errors resolve the source via config.source_for_key(). 2. missing_target.py — SKIP `-m module` invocations. `python3 -m my_hook --out x` runs a module by import name, not a script FILE; the scanner now treats `-m` like `-c` (interpreter mode, no path to verify), so a healthy module-based hook with an absolute --out arg no longer false-flags as a missing target. 3. cli.py — EXIT-CODE PRECEDENCE. A dead hook/target now surfaces its exit code (EXIT_MISSING_TARGET=5) even alongside config<->disk drift, instead of being masked by the drift exit (3). Both findings are still printed; missing-target outranks drift (it fails at runtime). Precedence documented in `rig --help`. Also makes tests/smoke.sh run pytest via `uv run --with pytest` when uv is available (the documented command), so the suite runs on a machine whose bare python3 lacks pytest. Tests: RED-first for each finding; full suite 504 -> 513 passing; smoke green. Co-Authored-By: Claude Opus 4.8 --- riglib/cli.py | 42 +++++++++++++------ riglib/config.py | 50 ++++++++++++++++++++--- riglib/missing_target.py | 16 +++++--- riglib/plan.py | 16 ++++---- tests/smoke.sh | 9 +++- tests/test_config.py | 25 ++++++++++++ tests/test_missing_target.py | 44 ++++++++++++++++++++ tests/test_plan_errors.py | 79 +++++++++++++++++++++++++++++++++++- tests/test_status_layers.py | 45 +++++++++++++++++++- 9 files changed, 291 insertions(+), 35 deletions(-) diff --git a/riglib/cli.py b/riglib/cli.py index f19ec0a..65ed84e 100644 --- a/riglib/cli.py +++ b/riglib/cli.py @@ -71,6 +71,10 @@ def build_parser() -> argparse.ArgumentParser: " 5 missing target (config references a path/binary that's gone on disk)\n" " 6 not a git repository (a repo-scoped command run outside a repo)\n" " 127 missing dependency (a required external tool isn't installed)\n" + "\n" + " precedence: when `rig status` finds BOTH a missing target and config↔disk drift,\n" + " it prints both but exits 5 (missing-target outranks drift — the dead reference\n" + " fails at runtime, so it's the more urgent class).\n" ), ) p.add_argument("--version", action="version", version=f"rig {__version__}") @@ -537,19 +541,33 @@ def cmd_status(args: argparse.Namespace) -> int: print(_ok("\n in sync — config and disk agree")) return 0 - if dead_targets and report.in_sync: - # no config↔disk drift, but a dead hook reference IS a problem worth a non-zero exit. - print(_dim("\n (no config↔disk drift, but a missing target above needs attention)")) + if not report.in_sync: + # render the full drift report whenever there IS drift — the user must SEE every problem, + # regardless of which exit code we ultimately surface below. + _render_drift_by_layer(report, loaded, env) + # LOUD reassurance: apply NEVER deletes on-disk-not-declared items. A user must not fear + # `rig apply` will nuke a hand-added skill (it won't — extras are surfaced, never removed). + print() + print(_ok(" ✔ safe: `rig apply` NEVER deletes on-disk extras — items present on disk but")) + print(_ok(" not declared in any layer are left for you to decide. apply only ADDS/UPDATES.")) + print(_dim("\n run `rig apply` to converge config→disk (extras above are left as-is)")) + + # EXIT-CODE PRECEDENCE — a dead target OUTRANKS ordinary drift. A missing hook script will + # FAIL at runtime (a generic "PreToolUse error"); drift is merely "config and disk disagree". + # The more-actionable, higher-severity class wins so a script following the stable exit-code + # contract sees EXIT_MISSING_TARGET (5) even when drift is also present — the drift exit (3) + # must never MASK it. Both are printed above; only the exit code is single-valued. + if dead_targets: + if not report.in_sync: + print(_dim( + f"\n exit: missing-target ({errors.EXIT_MISSING_TARGET}) takes precedence over " + f"config↔disk drift ({errors.EXIT_DRIFT}) — the dead reference fails at runtime; " + "fix it first, then re-run." + )) + else: + print(_dim("\n (no config↔disk drift, but a missing target above needs attention)")) return errors.EXIT_MISSING_TARGET - - _render_drift_by_layer(report, loaded, env) - # LOUD reassurance: apply NEVER deletes on-disk-not-declared items. A user must not fear - # `rig apply` will nuke a hand-added skill (it won't — extras are surfaced, never removed). - print() - print(_ok(" ✔ safe: `rig apply` NEVER deletes on-disk extras — items present on disk but")) - print(_ok(" not declared in any layer are left for you to decide. apply only ADDS/UPDATES.")) - print(_dim("\n run `rig apply` to converge config→disk (extras above are left as-is)")) - return 3 + return errors.EXIT_DRIFT def _declaring_config(category: str, loaded) -> str: diff --git a/riglib/config.py b/riglib/config.py index 7a1ec5d..fcb7816 100644 --- a/riglib/config.py +++ b/riglib/config.py @@ -105,6 +105,11 @@ class LoadedConfig: global_path: Path | None = None repo_path: Path | None = None layers: list[str] = field(default_factory=list) + # Provenance: which config FILE last declared each top-level key. Built during the cascade + # (global first, then repo/explicit overwrites), so a key present only in the global config + # maps to the global path even when a repo rig.yaml is also loaded. Used to name the CORRECT + # source file in an unknown-item error instead of always blaming the repo file. + key_sources: dict[str, Path] = field(default_factory=dict) @property def agent_tools_source(self) -> str | None: @@ -134,6 +139,22 @@ def primary_config_path(self) -> Path: return self.global_path return repo_config_path(self.repo_root) + def source_for_key(self, dotted_key: str) -> Path: + """The config FILE that declared ``dotted_key`` (e.g. ``mcp.items.review``). + + Resolves provenance by the TOP-LEVEL key (``mcp``): a key that came solely from the + global ``~/.config/rig/config.yaml`` names the global file, while a key the repo + ``rig.yaml`` set (or overrode) names the repo file. This is what lets an unknown-item + error point at the file actually carrying the stale entry — e.g. a removed MCP slot + left in the global config is reported against the global file, not the repo's. + + Falls back to :attr:`primary_config_path` when the key has no tracked provenance (a + config built directly in tests, or a key that is not top-level). + """ + top = dotted_key.split(".", 1)[0] + src = self.key_sources.get(top) + return src if src is not None else self.primary_config_path + def load( repo_root: Path, @@ -152,34 +173,51 @@ def load( layers: list[str] = [] gpath: Path | None = None rpath: Path | None = None + # Track which file last declared each top-level key, in cascade order (global, then + # repo/explicit). A key the repo layer sets overwrites the global provenance — so the final + # mapping names the file the user must edit to remove the offending key. + key_sources: dict[str, Path] = {} + + def _merge_layer(path: Path, label: str) -> None: + """Merge one config file into the accumulator and record its key provenance + layer. + + Single seam so a new layer can't forget to update ``key_sources`` (the bug the + provenance feature exists to avoid). ``merged`` is rebound via ``nonlocal`` because + ``_deep_merge`` returns a fresh dict rather than mutating in place. + """ + nonlocal merged + data = _load_yaml(path) + merged = _deep_merge(merged, data) + for k in data: + key_sources[k] = path + layers.append(f"{label}:{path}") if include_global: gpath = global_config_path() if gpath.is_file(): - merged = _deep_merge(merged, _load_yaml(gpath)) - layers.append(f"global:{gpath}") + _merge_layer(gpath, "global") if explicit_config is not None: rpath = explicit_config.resolve() if not rpath.is_file(): raise ConfigError(f"--config file not found: {rpath}") - merged = _deep_merge(merged, _load_yaml(rpath)) - layers.append(f"config:{rpath}") + _merge_layer(rpath, "config") else: rpath = repo_config_path(repo_root) if rpath.is_file(): - merged = _deep_merge(merged, _load_yaml(rpath)) - layers.append(f"repo:{rpath}") + _merge_layer(rpath, "repo") validate(merged) merged.pop("scope", None) # `scope` is a removed legacy key — drop it so it never # lingers in loaded.data, gets re-serialized, or is mistaken for a live setting. + key_sources.pop("scope", None) return LoadedConfig( data=merged, repo_root=repo_root, global_path=gpath if gpath and gpath.is_file() else None, repo_path=rpath if rpath and rpath.is_file() else None, layers=layers, + key_sources=key_sources, ) diff --git a/riglib/missing_target.py b/riglib/missing_target.py index a0b9f57..1a30f76 100644 --- a/riglib/missing_target.py +++ b/riglib/missing_target.py @@ -47,8 +47,9 @@ def _missing_paths_in_command(command: str) -> list[str]: interpreter — and that interpreter may itself be an ABSOLUTE path on macOS (``/usr/bin/env python3 /abs/hook.py``, ``/opt/homebrew/bin/python3 /abs/hook.py``). We must check the SCRIPT, not the interpreter: skip a leading interpreter token (bare or absolute) - plus its ``env VAR=…`` / module / ``-c`` style args, then take the first remaining - script-looking token as the target. We check only THAT token — a later absolute path is + plus its ``env VAR=…`` / ``-c`` inline-code / ``-m `` style args (both ``-c`` and + ``-m`` mean there is no script FILE to verify), then take the first remaining script-looking + token as the target. We check only THAT token — a later absolute path is almost always a runtime OUTPUT arg (``--out /var/run/x.json``) that legitimately doesn't exist yet; flagging it would make ``rig status``/``doctor`` cry wolf on a healthy hook. """ @@ -69,10 +70,13 @@ def _missing_paths_in_command(command: str) -> list[str]: rest = tokens[1:] for tok in rest: - # `-c ''` as an INTERPRETER flag (i.e. before any script token) means there - # is no script FILE to verify. Only honor it before the first path — a `-c` AFTER the - # script is the script's OWN argument, not Python's, and must not suppress the check. - if via_interpreter and tok == "-c": + # `-c ''` and `-m ` as INTERPRETER flags (i.e. before any script + # token) both mean there is NO script FILE to verify: `-c` runs inline code, `-m` runs an + # installed module by import name (`python3 -m my_hook --out x` has no path to check, and + # a later absolute arg like `--out /tmp/x.json` is a runtime output, not a hook script). + # Only honor them before the first path — a `-c`/`-m` AFTER the script is the script's OWN + # argument, not the interpreter's, and must not suppress the check. + if via_interpreter and tok in ("-c", "-m"): return [] if _looks_like_script_path(tok): p = Path(tok).expanduser() diff --git a/riglib/plan.py b/riglib/plan.py index 2eeb932..62a1184 100644 --- a/riglib/plan.py +++ b/riglib/plan.py @@ -251,24 +251,25 @@ def _validate_item_names(config: LoadedConfig, catalog: Catalog) -> None: """ from . import errors - config_path = str(config.primary_config_path) - def _check(category: str, names: set[str], known: set[str], key_prefix: str) -> None: """Raise a structured :class:`errors.UnknownItemError` for the first unknown name. ``category`` is the catalog category used for the removed-slot lookup + did-you-mean; ``key_prefix`` is the dotted config-key path (e.g. ``mcp.items``) the bad name hangs - off, so the error names the EXACT offending key (``mcp.items.review``) + its file. + off, so the error names the EXACT offending key (``mcp.items.review``) + its file. The + file is resolved by the key's PROVENANCE (``source_for_key``): a stale entry that came + solely from the global config is reported against the global file, not the repo's. """ unknown = sorted(names - known) if unknown: bad = unknown[0] + key = f"{key_prefix}.{bad}" raise errors.unknown_item_error( category=category, - key=f"{key_prefix}.{bad}", + key=key, bad=bad, known=known, - config_path=config_path, + config_path=str(config.source_for_key(key)), ) # skills — universal group @@ -418,12 +419,13 @@ def build(config: LoadedConfig, catalog: Catalog, *, project_type: str = "unknow from . import errors bad = unknown[0] + key = f"ci.items.{bad}" raise errors.unknown_item_error( category="ci", - key=f"ci.items.{bad}", + key=key, bad=bad, known=known, - config_path=str(config.primary_config_path), + config_path=str(config.source_for_key(key)), ) # resolve which slots are enabled: per-item override > enable/disable > all > off. diff --git a/tests/smoke.sh b/tests/smoke.sh index d110b1c..442229c 100755 --- a/tests/smoke.sh +++ b/tests/smoke.sh @@ -182,8 +182,15 @@ YAML fi # ── 4. unit suite ───────────────────────────────────────────────────────────── +# Run pytest the way the repo actually runs it: prefer `uv run --with pytest` (the documented +# command — README/AGENTS.md), so a machine whose bare `python3` is a clean interpreter without +# pytest installed still runs the suite. Fall back to `python3 -m pytest` when uv is absent. echo "running pytest…" -python3 -m pytest -q "$ROOT/tests" || fail "pytest" +if command -v uv >/dev/null 2>&1; then + uv run --with pytest python -m pytest -q "$ROOT/tests" || fail "pytest" +else + python3 -m pytest -q "$ROOT/tests" || fail "pytest" +fi pass "pytest" echo "smoke OK" diff --git a/tests/test_config.py b/tests/test_config.py index 6f5639a..16db1f4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -75,6 +75,31 @@ def test_load_drops_legacy_scope(tmp_path, monkeypatch): assert "scope" not in loaded.data +def test_key_sources_track_layer_provenance(tmp_path, monkeypatch): + # a key set only in the GLOBAL config maps to the global path; a key the repo sets maps to + # the repo path. This is what source_for_key() uses to name the right file in an error. + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / ".config")) + _w(config.global_config_path(), "version: 1\nmcp: {enabled: true}\nskills: {enabled: true}\n") + repo = tmp_path / "repo" + _w(repo / "rig.yaml", "version: 1\nci: {enabled: false}\nskills: {enabled: false}\n") + loaded = config.load(repo) + assert loaded.key_sources["mcp"] == config.global_config_path() # global-only key + assert loaded.key_sources["ci"] == repo / "rig.yaml" # repo-only key + assert loaded.key_sources["skills"] == repo / "rig.yaml" # repo OVERRIDES global + assert loaded.source_for_key("mcp.items.x") == config.global_config_path() + + +def test_load_strips_scope_from_key_sources(tmp_path, monkeypatch): + # the removed legacy `scope` key is dropped from provenance too (not just from data), so a + # stray `scope:` never lingers as a tracked source. source_for_key falls back when untracked. + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + repo = tmp_path / "repo" + _w(repo / "rig.yaml", "version: 1\nscope: both\nskills: {enabled: false}\n") + loaded = config.load(repo) + assert "scope" not in loaded.key_sources + assert loaded.source_for_key("scope") == loaded.primary_config_path # fallback, not KeyError + + def test_validate_rejects_bad_on_conflict(): with pytest.raises(config.ConfigError, match="on_conflict"): config.validate({"version": 1, "defaults": {"on_conflict": "nuke"}}) diff --git a/tests/test_missing_target.py b/tests/test_missing_target.py index 4737311..9a6c6b8 100644 --- a/tests/test_missing_target.py +++ b/tests/test_missing_target.py @@ -147,6 +147,50 @@ def test_scan_flags_dead_script_when_dash_c_is_script_arg(tmp_path): assert str(gone) in findings[0].what +def test_scan_ignores_module_invocation(tmp_path): + # `python3 -m my_hook --out /tmp/result.json` runs a MODULE, not a script FILE — there is no + # script path to verify. The later absolute `--out` arg is a runtime output path, not a hook + # script, so a module-based hook must NOT be flagged (it would cry wolf on a healthy hook). + home = tmp_path / "home" + out_file = tmp_path / "run" / "result.json" # never created — a runtime output path + settings = _settings( + home, + {"PreToolUse": [{"matcher": "*", "hooks": [ + {"type": "command", "command": f"python3 -m my_hook --out {out_file}"} + ]}]}, + ) + assert scan_settings_hooks(settings) == [] + + +def test_scan_ignores_module_invocation_under_absolute_interpreter(tmp_path): + # same, but the interpreter is an absolute path (the macOS form) — still no script to verify. + home = tmp_path / "home" + gone = tmp_path / "gone" / "x.py" # an absolute path that doesn't exist, AFTER -m + settings = _settings( + home, + {"PreToolUse": [{"matcher": "*", "hooks": [ + {"type": "command", "command": f"/opt/homebrew/bin/python3 -m pkg.hook {gone}"} + ]}]}, + ) + assert scan_settings_hooks(settings) == [] + + +def test_scan_flags_dead_script_when_dash_m_is_script_arg(tmp_path): + # `-m` AFTER the script is the SCRIPT's own argument, not Python's module flag — a dead + # script must still be flagged (mirror of the `-c`-as-script-arg guard). + home = tmp_path / "home" + gone = tmp_path / "gone" / "hook.py" # the script itself is missing + settings = _settings( + home, + {"PreToolUse": [{"matcher": "Bash", "hooks": [ + {"type": "command", "command": f"python3 {gone} -m somearg"} + ]}]}, + ) + findings = scan_settings_hooks(settings) + assert len(findings) == 1 + assert str(gone) in findings[0].what + + def test_scan_missing_settings_file_is_empty(tmp_path): # no settings.json at all → nothing to scan, no error assert scan_settings_hooks(tmp_path / "home" / ".claude" / "settings.json") == [] diff --git a/tests/test_plan_errors.py b/tests/test_plan_errors.py index e37f776..c5fbbc4 100644 --- a/tests/test_plan_errors.py +++ b/tests/test_plan_errors.py @@ -9,9 +9,11 @@ from __future__ import annotations +from pathlib import Path + import pytest -from riglib import errors +from riglib import config, errors from riglib.catalog import Catalog from riglib.config import LoadedConfig from riglib.plan import build @@ -23,6 +25,11 @@ def _loaded(tmp_path, data: dict) -> LoadedConfig: return LoadedConfig(data=data, repo_root=tmp_path, repo_path=cfg, layers=[f"repo:{cfg}"]) +def _write_yaml(p: Path, s: str) -> None: + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(s, encoding="utf-8") + + def test_unknown_mcp_item_raises_structured_with_suggestion(tmp_path, fake_agent_tools): catalog = Catalog.scan(str(fake_agent_tools)) # fake catalog has mcp slot "review"; "reviewr" is a typo @@ -75,3 +82,73 @@ def test_unknown_git_hooks_key_raises_structured(tmp_path, fake_agent_tools): with pytest.raises(errors.UnknownItemError) as exc: build(loaded, catalog) assert "dispatcher" in exc.value.fix + + +def test_unknown_item_from_global_layer_names_global_file(tmp_path, fake_agent_tools, monkeypatch): + """An unknown item that came SOLELY from the global config is reported against the GLOBAL + file, not the repo's rig.yaml — the loader tracks per-key layer provenance. + + Motivating case: a stale/removed MCP slot lingers in ``~/.config/rig/config.yaml`` while the + repo's rig.yaml has its own (valid) blocks. The fix must tell the user to edit the GLOBAL + file that actually carries the bad key, not the repo file that doesn't mention it. + """ + xdg = tmp_path / "xdg" + gcfg = xdg / "rig" / "config.yaml" + _write_yaml(gcfg, f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "mcp: {items: {reviewr: {enabled: true}}}\n") + monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg)) + repo = tmp_path / "repo" + rcfg = repo / "rig.yaml" + # repo declares OTHER (valid) blocks — it must NOT be blamed for the global's bad mcp key. + _write_yaml(rcfg, "version: 1\nskills: {enabled: false}\n") + + loaded = config.load(repo) + catalog = Catalog.scan(str(fake_agent_tools)) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + e = exc.value + assert "reviewr" in e.what + assert str(gcfg) in e.why # provenance: names the GLOBAL file that carries the bad key + assert str(gcfg) in e.fix + assert str(rcfg) not in (e.why + e.fix) # the repo file is NOT blamed + + +def test_unknown_item_overridden_in_repo_names_repo_file(tmp_path, fake_agent_tools, monkeypatch): + """When the repo layer ALSO sets the offending top-level key, the repo file wins provenance + (the repo overrides the global layer) — so the error names the repo file the user edits.""" + xdg = tmp_path / "xdg" + gcfg = xdg / "rig" / "config.yaml" + _write_yaml(gcfg, f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "mcp: {items: {review: {enabled: true}}}\n") + monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg)) + repo = tmp_path / "repo" + rcfg = repo / "rig.yaml" + _write_yaml(rcfg, "version: 1\nmcp: {items: {reviewr: {enabled: true}}}\n") # repo's own bad mcp key + + loaded = config.load(repo) + catalog = Catalog.scan(str(fake_agent_tools)) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + e = exc.value + assert "reviewr" in e.what + assert str(rcfg) in e.fix # the repo set mcp → repo file is the source to edit + + +def test_unknown_item_from_explicit_config_layer_names_that_file(tmp_path, fake_agent_tools, monkeypatch): + """A `--config P` (explicit) layer carries provenance too: an unknown key in P is reported + against P, not the repo's rig.yaml. Locks in the explicit branch of the cascade so a new + layer can't silently skip key-source tracking.""" + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + repo = tmp_path / "repo" + _write_yaml(repo / "rig.yaml", "version: 1\nskills: {enabled: false}\n") # ignored: --config replaces it + explicit = tmp_path / "explicit.yaml" + _write_yaml(explicit, f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "mcp: {items: {reviewr: {enabled: true}}}\n") + + loaded = config.load(repo, explicit_config=explicit) + catalog = Catalog.scan(str(fake_agent_tools)) + with pytest.raises(errors.UnknownItemError) as exc: + build(loaded, catalog) + e = exc.value + assert "reviewr" in e.what + assert str(explicit) in e.fix # the explicit --config file is the source to edit diff --git a/tests/test_status_layers.py b/tests/test_status_layers.py index 69c640d..e8426a3 100644 --- a/tests/test_status_layers.py +++ b/tests/test_status_layers.py @@ -16,6 +16,7 @@ from __future__ import annotations +import json import subprocess from pathlib import Path @@ -143,8 +144,6 @@ def test_status_groups_drift_by_layer_with_provenance(tmp_path, capsys, fake_age def test_status_global_layer_drift_names_global_config(tmp_path, capsys, fake_agent_tools, monkeypatch): # an isolated HOME with an undeclared MCP server on disk → a GLOBAL-layer extra, and a # global config file so its provenance can be named. - import json - home = tmp_path / "home" (home / ".claude" / "mcp").mkdir(parents=True) (home / ".claude" / "mcp" / "mcp.json").write_text( @@ -213,6 +212,48 @@ def test_help_documents_exit_codes(capsys): assert "6" in out and "not a git repository" in out +# ── exit-code precedence: a dead target outranks drift ──────────────────────────── +def test_status_missing_target_outranks_drift(tmp_path, capsys, fake_agent_tools, monkeypatch): + """When `rig status` finds BOTH a dead hook target AND config↔disk drift, it must exit with + EXIT_MISSING_TARGET (5), not EXIT_DRIFT (3) — the missing target must not be MASKED by drift. + + A dead hook script fails at runtime (a generic "PreToolUse error"), so it's the more urgent, + more actionable class. Both findings are still PRINTED; only the single-valued exit code + reflects the higher-severity class so a CI script keying on the stable contract sees it. + """ + home = tmp_path / "home" + gone = tmp_path / "gone-hook.py" # referenced by a hook but never created + settings = home / ".claude" / "settings.json" + settings.parent.mkdir(parents=True, exist_ok=True) + settings.write_text( + json.dumps({"hooks": {"PreToolUse": [ + {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]} + ]}}), + encoding="utf-8", + ) + monkeypatch.setenv("HOME", str(home)) + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global")) + repo = _git_repo(tmp_path / "repo") + # REPO-layer drift: an undeclared CI workflow on disk while ci is enabled-but-empty. + (repo / ".github" / "workflows").mkdir(parents=True) + (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8") + (repo / "rig.yaml").write_text( + f"version: 1\nagent_tools_source: {fake_agent_tools}\n" + "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n" + "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: true, all: false}\n" + "agents_md: {enabled: false}\n", + encoding="utf-8", + ) + rc = main(["status", "-C", str(repo)]) + out = capsys.readouterr().out + # the missing-target class wins the exit code even though drift is ALSO present + assert rc == errors.EXIT_MISSING_TARGET + # both problems are surfaced to the user + assert str(gone) in out # the dead hook target is printed + assert "rogue" in out # the drift is printed too + assert "precedence" in out.lower() # the exit-code precedence is explained + + # ── a clean in-sync repo still exits 0 ──────────────────────────────────────────── def test_status_clean_repo_exits_zero(tmp_path, capsys, fake_agent_tools, monkeypatch): monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))