From 8179a6893ce46cbdd7d8499b85a606596047db46 Mon Sep 17 00:00:00 2001
From: Alex Ultra <invntrm@icloud.com>
Date: Tue, 16 Jun 2026 23:34:51 +0200
Subject: [PATCH 1/2] feat(errors): structured what/why/fix errors + stable
 exit codes (error-system v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Born from two same-day prod failures whose errors were thin and undiagnosable:
`unknown mcp item(s): review (known: none)` (no hint it was a REMOVED slot) and a
dead hook path surfacing only as a generic harness "PreToolUse error". This makes
every rig error a consistent WHAT / WHY / HOW-to-fix block with a stable per-class
exit code (a PUBLIC CONTRACT documented in `rig --help`).

What's in:
- riglib/errors.py: RigError(what/why/fix/exit_code) + per-class codes (2 config,
  3 drift, 4 unknown-item, 5 missing-target, 6 not-a-repo, 127 missing-dep), a
  single top-level `guard()` renderer, did-you-mean (Levenshtein), and a
  removed-slot registry (mcp.items.review -> names agent-tools #32 + the fix).
- riglib/layers.py: GLOBAL vs REPO classification — single source of truth so
  `rig status` groups drift by layer and names WHICH config file declares each item.
- riglib/missing_target.py: proactively scans the harness settings.json for hook
  commands pointing at a script gone from disk (the dead-rtk-hook case). Resolves the
  invoked SCRIPT (skips bare/absolute interpreters like /usr/bin/env, honors `-c` only
  as an interpreter flag) and checks only that token, so a runtime output arg never
  false-flags.
- cli.py: `rig status` renders the 3-part errors + layer grouping; a non-git dir shows
  "not a git repository — repo layer N/A" (never the "should be committed" nag), and
  degrades gracefully when the agent-tools catalog can't be resolved. plan.py raises
  the structured UnknownItemError. config.py adds primary_config_path for provenance.
  `rig doctor` honors the documented 127 missing-dependency exit code.

Tests: 504 pytest passing; tests/smoke.sh fully green (incl. the non-git + dead-hook
+ removed-slot legs). New: test_errors, test_missing_target, test_plan_errors,
test_status_layers; smoke gains non-git / clean-sample / removed-slot assertions.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 riglib/cli.py                | 256 +++++++++++++++++++++++++++++---
 riglib/config.py             |  14 ++
 riglib/errors.py             | 273 +++++++++++++++++++++++++++++++++++
 riglib/layers.py             |  52 +++++++
 riglib/missing_target.py     | 136 +++++++++++++++++
 riglib/plan.py               |  53 ++++---
 tests/smoke.sh               |  75 +++++++++-
 tests/test_catalog_plan.py   |  25 +++-
 tests/test_cli.py            |  42 +++++-
 tests/test_errors.py         | 193 +++++++++++++++++++++++++
 tests/test_missing_target.py | 208 ++++++++++++++++++++++++++
 tests/test_plan_errors.py    |  77 ++++++++++
 tests/test_status_layers.py  | 232 +++++++++++++++++++++++++++++
 13 files changed, 1582 insertions(+), 54 deletions(-)
 create mode 100644 riglib/errors.py
 create mode 100644 riglib/layers.py
 create mode 100644 riglib/missing_target.py
 create mode 100644 tests/test_errors.py
 create mode 100644 tests/test_missing_target.py
 create mode 100644 tests/test_plan_errors.py
 create mode 100644 tests/test_status_layers.py

diff --git a/riglib/cli.py b/riglib/cli.py
index 0d07961..f19ec0a 100644
--- a/riglib/cli.py
+++ b/riglib/cli.py
@@ -56,8 +56,22 @@ def _bold(s: str) -> str:
 def build_parser() -> argparse.ArgumentParser:
     p = argparse.ArgumentParser(
         prog="rig",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
         description="rig — the dev-environment umbrella driver. Set up a repo from a "
         "committed rig.yaml by applying agent-tools content (skills, hooks, CI gates, MCP).",
+        # Exit codes are a PUBLIC CONTRACT (scripts/CI branch on them). Documented here per the
+        # structured-exit-codes skill; the constants live in riglib/errors.py.
+        epilog=(
+            "exit codes:\n"
+            "  0    success\n"
+            "  1    internal error (an unexpected failure / bug)\n"
+            "  2    invalid config (a malformed value, type, or unknown key)\n"
+            "  3    drift (rig status found config↔disk drift)\n"
+            "  4    unknown item (config names a catalog item that doesn't exist / was removed)\n"
+            "  5    missing target (config references a path/binary that's gone on disk)\n"
+            "  6    not a git repository (a repo-scoped command run outside a repo)\n"
+            "  127  missing dependency (a required external tool isn't installed)\n"
+        ),
     )
     p.add_argument("--version", action="version", version=f"rig {__version__}")
     sub = p.add_subparsers(dest="command", metavar="<command>")
@@ -75,7 +89,14 @@ def _add_setup_args(parser: argparse.ArgumentParser) -> None:
     ip = sub.add_parser("init", help="first-run onboarding: scaffold rig.yaml + wire the catalog in (the front door)")
     _add_setup_args(ip)
 
-    ap = sub.add_parser("apply", help="reconcile the repo to rig.yaml (idempotent)")
+    ap = sub.add_parser(
+        "apply",
+        help="reconcile the repo to rig.yaml (idempotent)",
+        description="Reconcile disk to rig.yaml (idempotent). apply only ADDS/UPDATES the "
+        "items your config declares — it NEVER deletes on-disk extras. Items present on disk "
+        "but not declared in any layer (e.g. a hand-added skill) are reported by `rig status` "
+        "and left untouched; apply will not nuke them.",
+    )
     ap.add_argument("-C", "--cwd", default=".", help="repo root (default: cwd)")
     ap.add_argument("--config", help="config file to apply (default: ./rig.yaml + global)")
     ap.add_argument("--dry-run", action="store_true", help="print the resolved plan, write nothing")
@@ -155,7 +176,12 @@ def main(argv: list[str] | None = None) -> int:
         "install-skill": cmd_install_skill,
         "stats": cmd_stats,
     }
-    return handlers[args.command](args)
+    # The single top-level error handler (error-system v2): any structured RigError a command
+    # raises is rendered as the consistent what/why/fix block + its stable per-class exit code.
+    # A non-RigError (a real bug) propagates so its traceback is visible.
+    from . import errors
+
+    return errors.guard(lambda: handlers[args.command](args))
 
 
 # ── shared helpers ────────────────────────────────────────────────────────────────
@@ -352,7 +378,61 @@ def cmd_apply(args: argparse.Namespace) -> int:
     return 1 if report.errors else 0
 
 
+def _scan_missing_targets() -> list:
+    """Scan the harness settings.json for hook commands pointing at files that are gone.
+
+    Resolves the claude-code harness settings file under HOME (``~/.claude/settings.json``) and
+    returns the missing-target findings. Kept a thin wrapper so cmd_status reads cleanly and a
+    future multi-harness expansion has one place to add settings paths.
+    """
+    from .missing_target import scan_settings_hooks
+
+    settings = Path(_os.path.expanduser("~/.claude/settings.json"))
+    return scan_settings_hooks(settings)
+
+
+def _print_non_git_note() -> None:
+    """The one-line 'this is not a git repository' note shown under the status header.
+
+    Single source for the wording so the normal status path and the catalog-failed fallback
+    (``_print_non_git_status``) can never drift apart — the smoke greps for "not a git
+    repository", and only one of two phrasings would be a regression waiting to happen.
+    """
+    print(_dim("  (not a git repository — repo layer / rig.yaml N/A here)"))
+
+
+def _print_non_git_status(env, config: str | None = None) -> int:
+    """Render a minimal `rig status` for a non-git dir when the catalog couldn't be resolved.
+
+    The catalog scan feeds the REPO layer (what this repo would install); a non-git dir has no
+    repo layer, so its failure is irrelevant here. We still owe the user the header + the
+    explicit "not a git repository" note (never the "should be committed" nag). Returns 0 — a
+    non-git dir is a valid place to ask "what's my status?", not an error.
+
+    Config loading precedes the (failed) catalog scan, so the layer provenance is still
+    reportable; we re-load it here rather than thread `loaded` through the exception path.
+    """
+    from .config import ConfigError, load
+
+    print(_bold("rig status"))
+    print(f"  repo: {env.repo_root}")
+    print(f"  stack: {env.stack}  type: {env.project_type}")
+    layers = "(none — built-in defaults)"
+    try:
+        explicit = None
+        if config:
+            cp = Path(config)
+            explicit = (cp if cp.is_absolute() else env.repo_root / cp).resolve()
+        layers = ", ".join(load(env.repo_root, explicit_config=explicit).layers) or layers
+    except ConfigError:
+        pass  # a malformed config still shouldn't stop us reporting "not a git repository"
+    print(f"  config layers: {layers}")
+    _print_non_git_note()
+    return 0
+
+
 def cmd_status(args: argparse.Namespace) -> int:
+    from . import errors
     from .catalog import CatalogError
     from .config import ConfigError
     from .drift import detect
@@ -360,7 +440,20 @@ def cmd_status(args: argparse.Namespace) -> int:
 
     try:
         plan, loaded, env = _load_plan(args.cwd, args.config)
-    except (ConfigError, CatalogError, PlanError) as exc:
+    except CatalogError as exc:
+        # The catalog only matters for the REPO layer (what this repo would install). A non-git
+        # dir (e.g. ~) has no repo layer at all, so an unresolved agent-tools checkout must NOT
+        # mask the one thing status owes that user: "you're not in a git repository". Detecting
+        # the env is independent of (and cheap relative to) the catalog scan that just failed.
+        from .detect import detect_environment
+
+        env = detect_environment(Path(args.cwd).resolve())
+        if not env.is_git_repo:
+            return _print_non_git_status(env, args.config)
+        # A real repo with a broken/missing agent-tools source IS a genuine config failure.
+        print(_err(f"error: {exc}"))
+        return 2
+    except (ConfigError, PlanError) as exc:
         print(_err(f"error: {exc}"))
         return 2
 
@@ -369,8 +462,19 @@ def cmd_status(args: argparse.Namespace) -> int:
     print(f"  stack: {env.stack}  type: {env.project_type}")
     cfg_src = ", ".join(loaded.layers) or "(none — built-in defaults)"
     print(f"  config layers: {cfg_src}")
-    if loaded.repo_path is None:
-        print(_warn("  warning: no rig.yaml in this repo (it should be committed)"))
+
+    # The REPO layer only exists inside a git repository. In a non-git dir (e.g. ~) there is no
+    # repo at all — do NOT nag "no rig.yaml, should be committed" (that advice applies only in a
+    # real repo). Show that the repo layer is N/A and report the GLOBAL layer only.
+    if not env.is_git_repo:
+        _print_non_git_note()
+    elif loaded.repo_path is None:
+        # A REAL repo with no committed rig.yaml: make the fix PROMINENT, not a one-liner
+        # buried above a long drift dump.
+        print()
+        print(_warn(_bold("  ▸ no committed rig.yaml in this repository")))
+        print(_warn("    rig.yaml is the committed source of truth for this repo's setup."))
+        print(_ok("    fix: run `rig init` to create one"))
 
     # scan the configured target dirs for extras even if no action targets them.
     # CI + MCP targets are REPO-LOCAL with clear ownership → scan even when the category is
@@ -389,10 +493,13 @@ def cmd_status(args: argparse.Namespace) -> int:
         h = resolve_category_target(loaded, "agent_hooks")
         if h and loaded.category("agent_hooks").get("enabled") is not False:
             scan_hook_dirs.append(h)
-    d = resolve_category_target(loaded, "ci")  # CI: scan unconditionally (repo-local)
-    if d:
-        scan_ci_dirs.append(d)
-    d = resolve_category_target(loaded, "mcp")  # MCP: scan unconditionally
+    # CI + MCP are REPO-LOCAL — only scan them when this IS a repo (a non-git dir has no repo
+    # layer; scanning a cwd-relative .github there would be meaningless).
+    if env.is_git_repo:
+        d = resolve_category_target(loaded, "ci")  # CI: scan unconditionally (repo-local)
+        if d:
+            scan_ci_dirs.append(d)
+    d = resolve_category_target(loaded, "mcp")  # MCP: scan unconditionally (global)
     if d:
         scan_mcp_files.append(d if d.suffix == ".json" else d / "mcp.json")
     report = detect(
@@ -414,22 +521,98 @@ def cmd_status(args: argparse.Namespace) -> int:
     # so `rig status` answers "is the daily checker cron there?" at a glance.
     _print_schedule_status(plan, report)
 
-    if report.in_sync:
+    # missing-target: a hook command in the harness settings.json that points at a file gone
+    # from disk (the dead-rtk-hook case) surfaces PROACTIVELY here, before it bites at runtime
+    # as a generic "PreToolUse error". This is independent of config↔disk drift.
+    dead_targets = _scan_missing_targets()
+    if dead_targets:
+        print()
+        print(_err(_bold(f"  ▸ missing targets ({len(dead_targets)}) — config points at files that are gone:")))
+        for f in dead_targets:
+            print(f"    {_err('✗')} {f.what}")
+            print(f"      {_dim('why:')} {f.why}")
+            print(f"      {_ok('fix:')} {f.fix}")
+
+    if report.in_sync and not dead_targets:
         print(_ok("\n  in sync — config and disk agree"))
         return 0
 
+    if dead_targets and report.in_sync:
+        # no config↔disk drift, but a dead hook reference IS a problem worth a non-zero exit.
+        print(_dim("\n  (no config↔disk drift, but a missing target above needs attention)"))
+        return errors.EXIT_MISSING_TARGET
+
+    _render_drift_by_layer(report, loaded, env)
+    # LOUD reassurance: apply NEVER deletes on-disk-not-declared items. A user must not fear
+    # `rig apply` will nuke a hand-added skill (it won't — extras are surfaced, never removed).
+    print()
+    print(_ok("  ✔ safe: `rig apply` NEVER deletes on-disk extras — items present on disk but"))
+    print(_ok("    not declared in any layer are left for you to decide. apply only ADDS/UPDATES."))
+    print(_dim("\n  run `rig apply` to converge config→disk (extras above are left as-is)"))
+    return 3
+
+
+def _declaring_config(category: str, loaded) -> str:
+    """Name the config FILE that declares a drift item's layer (or 'not declared in any layer').
+
+    GLOBAL categories are declared in ``~/.config/rig/config.yaml``; REPO categories in the
+    repo's ``./rig.yaml``. If that layer file isn't loaded (the category drifted from a default
+    or an orphaned on-disk extra with no backing config), say so plainly instead of pointing at
+    a file that doesn't declare it.
+    """
+    from .layers import GLOBAL, layer_for_category
+
+    layer = layer_for_category(category)
+    path = loaded.global_path if layer == GLOBAL else loaded.repo_path
+    return str(path) if path is not None else "not declared in any layer"
+
+
+def _render_drift_by_layer(report, loaded, env) -> None:
+    """Render drift GROUPED by GLOBAL vs REPO, each item naming its declaring config file.
+
+    The pre-v2 status flattened machine-wide GLOBAL drift (skills/hooks/harness/mcp) together
+    with this repo's REPO drift (CI/symlinks) into one undifferentiated dump — so a global
+    skills drift looked like the repo's problem. Grouping by layer + naming the source file
+    makes each item's ownership unambiguous.
+    """
+    from .layers import GLOBAL, REPO, layer_for_category
+
+    # bucket every drift item by its owning layer, preserving missing-before-extra ordering.
     missing = report.by_direction("missing") + report.by_direction("modified")
     extra = report.by_direction("extra")
-    if missing:
-        print(_warn(f"\n  config→disk drift ({len(missing)}) — declared but missing/modified:"))
-        for d in missing:
-            print(f"    {_warn('▸')} {d.category}/{d.item}: {d.detail}")
-    if extra:
-        print(_warn(f"\n  disk→config drift ({len(extra)}) — on disk, not declared:"))
-        for d in extra:
-            print(f"    {_warn('▸')} {d.category}/{d.item}: {d.detail}  [{d.target}]")
-    print(_dim("\n  run `rig apply` to converge config→disk (extras are left for you to decide)"))
-    return 3
+    buckets: dict[str, dict[str, list]] = {
+        GLOBAL: {"missing": [], "extra": []},
+        REPO: {"missing": [], "extra": []},
+    }
+    for d in missing:
+        buckets[layer_for_category(d.category)]["missing"].append(d)
+    for d in extra:
+        buckets[layer_for_category(d.category)]["extra"].append(d)
+
+    headers = {
+        GLOBAL: "GLOBAL — machine-wide (from ~/.config/rig/config.yaml)",
+        REPO: "REPO — this repository (from ./rig.yaml)",
+    }
+    for layer in (GLOBAL, REPO):
+        miss = buckets[layer]["missing"]
+        ext = buckets[layer]["extra"]
+        if not miss and not ext:
+            continue
+        # the REPO layer is meaningless outside a git repo — never print it there.
+        if layer == REPO and not env.is_git_repo:
+            continue
+        print()
+        print(_bold(f"  {headers[layer]}"))
+        if miss:
+            print(_warn(f"    config→disk drift ({len(miss)}) — declared but missing/modified:"))
+            for d in miss:
+                src = _declaring_config(d.category, loaded)
+                print(f"      {_warn('▸')} {d.category}/{d.item}: {d.detail}  {_dim('[declared in ' + src + ']')}")
+        if ext:
+            print(_warn(f"    disk→config drift ({len(ext)}) — on disk, not declared:"))
+            for d in ext:
+                src = _declaring_config(d.category, loaded)
+                print(f"      {_warn('▸')} {d.category}/{d.item}: {d.detail}  {_dim('[' + str(d.target) + '; ' + src + ']')}")
 
 
 def _print_schedule_status(plan, report) -> None:
@@ -468,22 +651,49 @@ def cmd_doctor(args: argparse.Namespace) -> int:
             else:
                 print(f"      {_dim('install: (no package mapping for this OS — install manually)')}")
 
+    # missing-target: proactively flag a dead hook reference in the harness settings.json (the
+    # rtk-hook case) so it's caught here, not at runtime as a generic harness error.
+    dead_targets = _scan_missing_targets()
+    if dead_targets:
+        print(_err(_bold(f"\n  ▸ missing targets ({len(dead_targets)}) — config points at files that are gone:")))
+        for f in dead_targets:
+            print(f"    {_err('✗')} {f.what}")
+            print(f"      {_ok('fix:')} {f.fix}")
+
     missing_req = report.missing_required
-    if not missing_req and not (args.optional and report.missing_optional):
+    if not missing_req and not (args.optional and report.missing_optional) and not dead_targets:
         print(_ok("\n  all required dependencies present"))
         return 0
+    # a dead target (but no missing dep) is still a problem worth a non-zero exit.
+    if not missing_req and not (args.optional and report.missing_optional) and dead_targets:
+        from . import errors
+
+        print(_warn("\n  a missing target above needs attention (re-run `rig apply` or remove the stale entry)"))
+        return errors.EXIT_MISSING_TARGET
 
     if not args.yes:
+        from . import errors
+
         print(_warn("\n  missing dependencies above. Re-run with --yes to install them"))
         print(_dim("  (add --optional to also install optional deps)"))
-        return 1
+        # honor the documented exit-code contract: a missing REQUIRED dep is the 127 class
+        # (shell convention). A shortfall of only OPTIONAL deps (under --optional) is advisory,
+        # not a hard "required tool absent", so it stays the generic non-zero.
+        return errors.EXIT_MISSING_DEP if missing_req else 1
 
     print(_bold("\n  installing missing dependencies..."))
     results = bootstrap(report, assume_yes=True, include_optional=args.optional)
     failed = [name for name, rc in results if rc not in (0,)]
     for name, rc in results:
         print(f"    {_ok('✔') if rc == 0 else _err('✗')} {name} (rc={rc})")
-    return 1 if failed else 0
+    if failed:
+        from . import errors
+
+        # an install that left a REQUIRED dep absent is still the missing-dependency class;
+        # a failed optional install is advisory (generic non-zero).
+        req_names = {st.dep.name for st in report.statuses if st.dep.required and not st.present}
+        return errors.EXIT_MISSING_DEP if (set(failed) & req_names) else 1
+    return 0
 
 
 def cmd_export(args: argparse.Namespace) -> int:
diff --git a/riglib/config.py b/riglib/config.py
index cd91785..7a1ec5d 100644
--- a/riglib/config.py
+++ b/riglib/config.py
@@ -120,6 +120,20 @@ def defaults(self) -> dict[str, Any]:
         d = self.data.get("defaults")
         return d if isinstance(d, dict) else {}
 
+    @property
+    def primary_config_path(self) -> Path:
+        """The most-specific config file backing this config, for naming in error messages.
+
+        The repo ``rig.yaml`` OVERRIDES the global config, so an invalid key is most likely
+        the repo one — name it first; fall back to the global path, then to a notional repo
+        path so an error always points somewhere concrete to edit.
+        """
+        if self.repo_path is not None:
+            return self.repo_path
+        if self.global_path is not None:
+            return self.global_path
+        return repo_config_path(self.repo_root)
+
 
 def load(
     repo_root: Path,
diff --git a/riglib/errors.py b/riglib/errors.py
new file mode 100644
index 0000000..264ce34
--- /dev/null
+++ b/riglib/errors.py
@@ -0,0 +1,273 @@
+"""Error system v2 — every rig error is WHAT / WHY / HOW-to-fix, with a stable exit code.
+
+Runtime reach: raised by config/catalog/plan/status code paths; rendered by the top-level
+CLI handler (:func:`guard`, wired into ``cli.main``). Centralizing the shape here means a
+human staring at a failure always gets the same three things — what happened, why (the root
+cause, the offending config FILE PATH + key), and a concrete command/edit to fix it — and a
+script always gets a meaningful, *stable* exit code per failure class.
+
+Invariants:
+- The exit-code constants below are a PUBLIC CONTRACT. Scripts/CI branch on them; changing a
+  value is a breaking change. They are documented in ``rig --help`` and ``docs/config-schema.md``.
+- They follow the ``structured-exit-codes`` skill: 0 success, 2 invalid-config/usage,
+  127 missing-dependency (shell convention), plus rig-specific classes (drift/unknown-item/
+  missing-target/not-a-repo) that a caller wants to distinguish.
+- Stdlib-only at import time (AGENTS.md hard rule) — no third-party imports here.
+
+History: born from two same-day prod failures whose errors were thin and undiagnosable —
+``unknown mcp item(s): review (known: none)`` (no hint it was a REMOVED slot or how to remove
+it) and a dead hook path surfacing only as a generic harness "PreToolUse error". This module
+exists so neither recurs: the removed-slot registry names ``mcp.items.review`` and prints the
+exact removal PR + fix, and missing-target names the gone file + how to regenerate it.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+# ── stable, per-class exit codes (PUBLIC CONTRACT — do not renumber) ───────────────
+EXIT_OK = 0
+# 1 is reserved for an UNEXPECTED/internal failure (an unhandled exception): a caller can
+# tell "rig itself crashed" (1) from "rig diagnosed a known problem" (>=2).
+EXIT_INTERNAL = 1
+EXIT_CONFIG = 2  # malformed / invalid config value (usage class, per the skill)
+EXIT_DRIFT = 3  # `rig status` found drift (config and disk disagree)
+EXIT_UNKNOWN_ITEM = 4  # config names a catalog item that doesn't exist (typo / removed slot)
+EXIT_MISSING_TARGET = 5  # config references a path/binary that's gone on disk
+EXIT_NOT_A_REPO = 6  # a repo-scoped command run outside a git repository
+EXIT_MISSING_DEP = 127  # a required external tool/binary isn't installed (shell convention)
+
+
+@dataclass
+class RigError(Exception):
+    """A structured, renderable error: WHAT happened / WHY / HOW to fix + an exit code.
+
+    ``what`` — the symptom, one line ("unknown mcp item: reviewr").
+    ``why``  — the root cause + context: the offending CONFIG FILE PATH + key when relevant.
+    ``fix``  — a concrete command or edit the user can run/make right now.
+    ``exit_code`` — the failure class (one of the EXIT_* constants); subclasses pin it.
+    """
+
+    what: str
+    why: str = ""
+    fix: str = ""
+    exit_code: int = EXIT_INTERNAL
+
+    def __post_init__(self) -> None:
+        # Exception's own machinery wants args set; keep str(e) == the WHAT line.
+        super().__init__(self.what)
+
+    def __str__(self) -> str:
+        return self.what
+
+
+@dataclass
+class ConfigError(RigError):
+    """Malformed/invalid config — a bad value, type, or unknown key. Fail-closed (exit 2)."""
+
+    exit_code: int = EXIT_CONFIG
+
+
+@dataclass
+class UnknownItemError(RigError):
+    """Config names a catalog item that doesn't exist (typo or a removed slot). Exit 4."""
+
+    exit_code: int = EXIT_UNKNOWN_ITEM
+
+
+@dataclass
+class MissingTargetError(RigError):
+    """Config references a path/binary that's gone on disk (a dead hook path, …). Exit 5."""
+
+    exit_code: int = EXIT_MISSING_TARGET
+
+
+@dataclass
+class NotARepoError(RigError):
+    """A repo-scoped command was run outside a git repository. Exit 6."""
+
+    exit_code: int = EXIT_NOT_A_REPO
+
+
+@dataclass
+class MissingDepError(RigError):
+    """A required external tool/binary isn't installed. Exit 127 (shell convention)."""
+
+    exit_code: int = EXIT_MISSING_DEP
+
+
+# ── rendering ──────────────────────────────────────────────────────────────────────
+def _c(code: str, s: str, color: bool) -> str:
+    return f"\033[{code}m{s}\033[0m" if color else s
+
+
+def render(err: RigError, *, color: bool = True) -> str:
+    """Render a :class:`RigError` as the consistent 3-part block (what / why / fix).
+
+    Always shows the WHAT (prefixed ``error:``); shows WHY and FIX only when populated, so a
+    terse error doesn't print empty labels. The label words ``why``/``fix`` always appear when
+    their field is set — the contract the CLI handler and tests rely on.
+    """
+    lines = [_c("31", f"error: {err.what}", color)]
+    if err.why:
+        lines.append(_c("2", "  why: ", color) + err.why)
+    if err.fix:
+        lines.append(_c("32", "  fix: ", color) + err.fix)
+    return "\n".join(lines)
+
+
+def guard(fn) -> int:
+    """Run ``fn`` and translate any :class:`RigError` into render() + its exit code.
+
+    The single top-level CLI handler: a command body raises a structured error and this turns
+    it into a consistent printed block + the stable per-class exit code. A non-RigError
+    (a real bug) is NOT swallowed — it propagates so the stack trace is visible.
+    """
+    try:
+        return fn()
+    except RigError as exc:
+        import sys
+
+        print(render(exc, color=sys.stdout.isatty()))
+        return exc.exit_code
+
+
+# ── did-you-mean (Levenshtein) ──────────────────────────────────────────────────────
+def _levenshtein(a: str, b: str) -> int:
+    """Classic edit distance, stdlib-only (small strings — item names — so O(n*m) is fine)."""
+    if a == b:
+        return 0
+    if not a:
+        return len(b)
+    if not b:
+        return len(a)
+    prev = list(range(len(b) + 1))
+    for i, ca in enumerate(a, 1):
+        cur = [i]
+        for j, cb in enumerate(b, 1):
+            cur.append(min(prev[j] + 1, cur[j - 1] + 1, prev[j - 1] + (ca != cb)))
+        prev = cur
+    return prev[-1]
+
+
+def did_you_mean(bad: str, candidates: set[str]) -> str | None:
+    """The nearest catalog name to ``bad`` within a sensible edit-distance threshold.
+
+    Returns ``None`` when the catalog is empty or nothing is close enough — so we never
+    fabricate a bogus suggestion for a wildly different token. The threshold scales with the
+    typo length (a longer name tolerates more edits) but is capped so "zzzzzzzz" → "review"
+    is rejected.
+    """
+    if not candidates:
+        return None
+    best = min(candidates, key=lambda c: (_levenshtein(bad, c), c))
+    dist = _levenshtein(bad, best)
+    # allow up to ~40% of the longer string's length, capped at 3 — enough for a real typo,
+    # tight enough to reject an unrelated word.
+    threshold = min(3, max(1, round(0.4 * max(len(bad), len(best)))))
+    return best if dist <= threshold else None
+
+
+# ── removed / deprecated slot registry ──────────────────────────────────────────────
+@dataclass(frozen=True)
+class RemovedSlot:
+    """A catalog slot that USED to exist and was removed — so its config key is now invalid.
+
+    ``reason`` names WHY/WHEN it went (the PR + the rationale); the error builder turns it
+    into a precise "remove ``<key>`` from ``<config path>``" fix instead of a useless
+    "unknown item (known: none)".
+    """
+
+    category: str
+    name: str
+    reason: str
+
+
+# Seeded with the motivating case. Keyed (category, name). Add an entry whenever a catalog
+# slot is removed so a lingering config reference explains itself instead of looking like a typo.
+_REMOVED_SLOTS: dict[tuple[str, str], RemovedSlot] = {
+    ("mcp", "review"): RemovedSlot(
+        category="mcp",
+        name="review",
+        reason="removed in agent-tools #32: review is a CLI + skill, not an MCP server "
+        "(the `review --mcp` entrypoint was dropped in the subcommand refactor)",
+    ),
+}
+
+
+def removed_slot(category: str, name: str) -> RemovedSlot | None:
+    """Look up a (category, name) in the removed-slot registry; ``None`` if it was never a slot."""
+    return _REMOVED_SLOTS.get((category, name))
+
+
+# ── error builders (the heuristics, assembled into structured errors) ───────────────
+def unknown_item_error(
+    *,
+    category: str,
+    key: str,
+    bad: str,
+    known: set[str],
+    config_path: str,
+) -> UnknownItemError:
+    """Build the error for a config that names a non-existent catalog item.
+
+    Priority of explanation (most specific first):
+      1. **removed slot** — the name was a real slot that got removed: cite the PR + tell the
+         user to remove ``<key>`` from ``<config path>``.
+      2. **empty category** — the catalog has NO slots in this category: tell them to remove the
+         whole ``<category>`` block, NOT "known: none".
+      3. **did-you-mean** — the catalog has slots and one is close: suggest it.
+      4. **fallthrough** — list the known names so they can pick a valid one.
+
+    ``key`` is the dotted config key (``mcp.items.review``), ``bad`` the bare item name
+    (``review``), ``config_path`` the offending file — all three appear in the output.
+    """
+    removed = removed_slot(category, bad)
+    if removed is not None:
+        return UnknownItemError(
+            what=f"removed {category} slot: {bad}",
+            why=f"`{key}` ({removed.reason}); declared in {config_path}",
+            fix=f"remove `{key}` from {config_path}",
+        )
+
+    if not known:
+        return UnknownItemError(
+            what=f"unknown {category} item: {bad}",
+            why=f"the {category} catalog has no slots (declared in {config_path})",
+            fix=f"remove the `{category}` block from {config_path} — there are no {category} "
+            f"slots to enable",
+        )
+
+    suggestion = did_you_mean(bad, known)
+    if suggestion is not None:
+        return UnknownItemError(
+            what=f"unknown {category} item: {bad}",
+            why=f"`{key}` names an item not in the {category} catalog (declared in {config_path})",
+            fix=f"did you mean `{suggestion}`? fix `{key}` in {config_path}",
+        )
+
+    known_list = ", ".join(sorted(known))
+    return UnknownItemError(
+        what=f"unknown {category} item: {bad}",
+        why=f"`{key}` names an item not in the {category} catalog (declared in {config_path})",
+        fix=f"use one of: {known_list} — or remove `{key}` from {config_path}",
+    )
+
+
+def missing_target_error(
+    *,
+    what_kind: str,
+    target: str,
+    why: str,
+    regen: str,
+) -> MissingTargetError:
+    """Build the error for a config that points at a path/binary that's gone on disk.
+
+    ``what_kind`` is a short noun ("hook", "binary", "skill"); ``target`` the missing path;
+    ``why`` the root cause; ``regen`` how to recreate it (a concrete command).
+    """
+    return MissingTargetError(
+        what=f"missing {what_kind}: {target}",
+        why=why,
+        fix=regen,
+    )
diff --git a/riglib/layers.py b/riglib/layers.py
new file mode 100644
index 0000000..03e21eb
--- /dev/null
+++ b/riglib/layers.py
@@ -0,0 +1,52 @@
+"""Config-layer classification — which drift category belongs to which layer.
+
+`rig status` mixes two kinds of state:
+
+- **GLOBAL** — machine-wide artifacts a developer carries across every repo, declared in
+  ``~/.config/rig/config.yaml``: the installed skills (``~/.agents/skills`` + the harness
+  discovery symlinks), agent-hooks, the global git-hook dispatcher, the harness auto/permission
+  mode, the model-freshness schedule, and rig-managed tmux config.
+- **REPO** — this specific repository's artifacts, declared in its committed ``./rig.yaml``:
+  the CI workflows under ``.github/``, the AGENTS.md/CLAUDE.md symlinks, and the GitHub ruleset.
+
+Grouping drift by layer (and naming WHICH config file declares each item) is what makes
+``rig status`` legible: a global-skills drift is not the repo's problem, and a non-git dir has
+no repo layer at all. This module is the single source of truth for that classification, so the
+status renderer and any future consumer agree.
+
+Stdlib-only; no imports beyond the standard library.
+"""
+
+from __future__ import annotations
+
+GLOBAL = "GLOBAL"
+REPO = "REPO"
+
+# Each drift/action category → the config layer that OWNS it. Keep this exhaustive: a category
+# missing here would silently render under the wrong/no heading. The tmux agent owns the tmux
+# rows in drift.py; the classification of "tmux" as GLOBAL lives here (a single dict entry) so
+# this file does not collide with the tmux provisioning work.
+_CATEGORY_LAYER = {
+    # GLOBAL — machine-wide, from ~/.config/rig/config.yaml
+    "skills": GLOBAL,
+    "agent_hooks": GLOBAL,
+    "mcp": GLOBAL,
+    "harness": GLOBAL,
+    "models": GLOBAL,
+    "git_hooks": GLOBAL,
+    "tmux": GLOBAL,
+    # REPO — this repo, from ./rig.yaml
+    "ci": REPO,
+    "agents_md": REPO,
+    "github": REPO,
+}
+
+
+def layer_for_category(category: str) -> str:
+    """The owning layer (``GLOBAL``/``REPO``) for a drift/action category.
+
+    Unknown categories default to ``GLOBAL`` (the conservative choice — a machine-wide artifact
+    is the safer assumption than claiming a repo declares something it may not), but every known
+    category is mapped explicitly above so this default is only a forward-compat guard.
+    """
+    return _CATEGORY_LAYER.get(category, GLOBAL)
diff --git a/riglib/missing_target.py b/riglib/missing_target.py
new file mode 100644
index 0000000..a0b9f57
--- /dev/null
+++ b/riglib/missing_target.py
@@ -0,0 +1,136 @@
+"""missing-target scanner — find config that references a path/binary gone from disk.
+
+Runtime reach: called by ``rig status`` (and surfaced by ``rig doctor``) to PROACTIVELY catch
+a dead reference before it bites at runtime. The motivating case: the harness ``settings.json``
+registers a hook whose ``command`` invokes a script that no longer exists — at runtime the
+harness reports only a generic "PreToolUse error" with no hint of the cause. This scanner names
+the missing file + how to regenerate/remove it, as a structured :class:`errors.MissingTargetError`.
+
+Scope: it inspects the harness ``settings.json`` hook blocks. It only flags an ABSOLUTE path
+argument that looks like a script file (so a bare PATH-resolved binary like ``gitleaks`` is not
+mistaken for a missing file). Stdlib-only.
+"""
+
+from __future__ import annotations
+
+import json
+import shlex
+from pathlib import Path
+
+from . import errors
+
+
+def _looks_like_script_path(token: str) -> bool:
+    """True for an absolute path that names a file we should check exists.
+
+    Only absolute paths qualify — a bare ``gitleaks`` (PATH-resolved) is not a file reference
+    we own. We don't require a particular suffix (hooks can be ``.py``/``.sh``/extensionless),
+    just that it's an absolute path token, not a flag/option.
+    """
+    if not token or token.startswith("-"):
+        return False
+    return token.startswith("/") or token.startswith("~/")
+
+
+# Interpreter basenames whose FIRST path-looking argument is the script to check (so the
+# interpreter token itself — even an absolute one like /usr/bin/env or /opt/homebrew/bin/python3
+# — is skipped, not mistaken for the target). env is special-cased below.
+_INTERPRETERS = frozenset(
+    {"python", "python3", "python2", "sh", "bash", "zsh", "node", "deno", "bun", "ruby", "perl", "env"}
+)
+
+
+def _missing_paths_in_command(command: str) -> list[str]:
+    """Return the missing SCRIPT path of a hook command (at most one), or [].
+
+    A hook command runs ONE script. It may be invoked directly (``/abs/hook.py``) or through an
+    interpreter — and that interpreter may itself be an ABSOLUTE path on macOS
+    (``/usr/bin/env python3 /abs/hook.py``, ``/opt/homebrew/bin/python3 /abs/hook.py``). We must
+    check the SCRIPT, not the interpreter: skip a leading interpreter token (bare or absolute)
+    plus its ``env VAR=…`` / module / ``-c`` style args, then take the first remaining
+    script-looking token as the target. We check only THAT token — a later absolute path is
+    almost always a runtime OUTPUT arg (``--out /var/run/x.json``) that legitimately doesn't
+    exist yet; flagging it would make ``rig status``/``doctor`` cry wolf on a healthy hook.
+    """
+    try:
+        tokens = shlex.split(command)
+    except ValueError:
+        return []  # unparseable command (unbalanced quotes) — skip rather than guess
+    if not tokens:
+        return []
+
+    rest = tokens
+    # If the command runs through a known interpreter, drop the interpreter token. The script is
+    # then the first path-looking arg AFTER it; we skip the interpreter's own flags / `env`
+    # NAME=VALUE assignments by scanning for the first script-PATH token among the remaining args.
+    head = Path(tokens[0]).name if (tokens[0].startswith("/") or tokens[0].startswith("~/")) else tokens[0]
+    via_interpreter = head in _INTERPRETERS
+    if via_interpreter:
+        rest = tokens[1:]
+
+    for tok in rest:
+        # `-c '<inline code>'` as an INTERPRETER flag (i.e. before any script token) means there
+        # is no script FILE to verify. Only honor it before the first path — a `-c` AFTER the
+        # script is the script's OWN argument, not Python's, and must not suppress the check.
+        if via_interpreter and tok == "-c":
+            return []
+        if _looks_like_script_path(tok):
+            p = Path(tok).expanduser()
+            return [tok] if not p.exists() else []
+    return []
+
+
+def _iter_hook_commands(settings: dict):
+    """Yield every hook ``command`` string under the settings.json ``hooks`` blocks.
+
+    Tolerant of shape drift: a missing/oddly-typed key is skipped, never raised — a malformed
+    settings.json should make the scan find nothing, not crash ``rig status``.
+    """
+    hooks = settings.get("hooks")
+    if not isinstance(hooks, dict):
+        return
+    for blocks in hooks.values():
+        if not isinstance(blocks, list):
+            continue
+        for block in blocks:
+            if not isinstance(block, dict):
+                continue
+            for hook in block.get("hooks", []) or []:
+                if isinstance(hook, dict) and isinstance(hook.get("command"), str):
+                    yield hook["command"]
+
+
+def scan_settings_hooks(settings_file: Path) -> list[errors.MissingTargetError]:
+    """Scan a harness ``settings.json`` for hook commands pointing at missing files.
+
+    Returns one :class:`errors.MissingTargetError` per missing target (deduped by path), each
+    naming the gone file, where it's referenced, and how to fix it. An absent or malformed
+    settings.json yields an empty list (nothing to report, never an exception).
+    """
+    if not settings_file.is_file():
+        return []
+    try:
+        data = json.loads(settings_file.read_text(encoding="utf-8"))
+    except (OSError, ValueError):
+        return []
+    if not isinstance(data, dict):
+        return []
+
+    seen: set[str] = set()
+    findings: list[errors.MissingTargetError] = []
+    for command in _iter_hook_commands(data):
+        for missing in _missing_paths_in_command(command):
+            if missing in seen:
+                continue
+            seen.add(missing)
+            findings.append(
+                errors.missing_target_error(
+                    what_kind="hook script",
+                    target=missing,
+                    why=f"a hook command in {settings_file} runs `{missing}`, but that file "
+                    f"is gone — the harness would fail with a generic PreToolUse error",
+                    regen="re-run `rig apply` to reinstall the managed hooks, or remove the "
+                    f"stale hook entry from {settings_file}",
+                )
+            )
+    return findings
diff --git a/riglib/plan.py b/riglib/plan.py
index 495fe4e..2eeb932 100644
--- a/riglib/plan.py
+++ b/riglib/plan.py
@@ -249,12 +249,26 @@ def _validate_item_names(config: LoadedConfig, catalog: Catalog) -> None:
     universal/by_type are separate groups, and by_type uses fully-qualified
     ``by-type/<kind>/<name>`` keys plus bare ``<kind>`` bundle names in ``by_type.enable``.
     """
-    def _check(category: str, names: set[str], known: set[str], label: str) -> None:
-        unknown = names - known
+    from . import errors
+
+    config_path = str(config.primary_config_path)
+
+    def _check(category: str, names: set[str], known: set[str], key_prefix: str) -> None:
+        """Raise a structured :class:`errors.UnknownItemError` for the first unknown name.
+
+        ``category`` is the catalog category used for the removed-slot lookup + did-you-mean;
+        ``key_prefix`` is the dotted config-key path (e.g. ``mcp.items``) the bad name hangs
+        off, so the error names the EXACT offending key (``mcp.items.review``) + its file.
+        """
+        unknown = sorted(names - known)
         if unknown:
-            raise PlanError(
-                f"unknown {label} item(s): {', '.join(sorted(unknown))} "
-                f"(known: {', '.join(sorted(known)) or 'none'})"
+            bad = unknown[0]
+            raise errors.unknown_item_error(
+                category=category,
+                key=f"{key_prefix}.{bad}",
+                bad=bad,
+                known=known,
+                config_path=config_path,
             )
 
     # skills — universal group
@@ -264,7 +278,7 @@ def _check(category: str, names: set[str], known: set[str], label: str) -> None:
         uni_known = {i.name for i in catalog.by_category("skills") if i.group == "universal"}
         refs = set(uni.get("enable", []) or []) | set(uni.get("disable", []) or [])
         refs |= set(k for k in uni.get("items", {}) if isinstance(uni.get("items"), dict))
-        _check("skills", refs, uni_known, "universal skill")
+        _check("skills", refs, uni_known, "skills.universal")
     # skills — by_type group (fully-qualified item keys; bundle names checked separately)
     bt = sk.get("by_type", {}) if isinstance(sk, dict) else {}
     if isinstance(bt, dict):
@@ -272,8 +286,8 @@ def _check(category: str, names: set[str], known: set[str], label: str) -> None:
         bt_kinds = {i.meta.get("kind", "") for i in catalog.by_category("skills") if i.group.startswith("by-type/")}
         bt_items = bt.get("items", {})
         if isinstance(bt_items, dict):
-            _check("skills", set(bt_items), bt_known, "by-type skill")
-        _check("skills", set(bt.get("enable", []) or []), bt_kinds, "by-type bundle")
+            _check("skills", set(bt_items), bt_known, "skills.by_type.items")
+        _check("skills", set(bt.get("enable", []) or []), bt_kinds, "skills.by_type.enable")
 
     # agent_hooks + mcp — flat items/enable/disable
     for cat_name in ("agent_hooks", "mcp"):
@@ -285,19 +299,14 @@ def _check(category: str, names: set[str], known: set[str], label: str) -> None:
         items = cfg.get("items", {})
         if isinstance(items, dict):
             refs |= set(items)
-        _check(cat_name, refs, known, cat_name)
+        _check(cat_name, refs, known, f"{cat_name}.items")
 
     # git_hooks — nested sub-groups (only 'dispatcher' is shipped in v0.1). A typo like
     # 'dispatcherr' must fail closed, not silently build no dispatcher action.
     gh = config.category("git_hooks")
     if isinstance(gh, dict):
         gh_known = catalog.names("git_hooks") | {"templates"}  # templates reserved for v0.2
-        unknown_gh = set(gh) - gh_known
-        if unknown_gh:
-            raise PlanError(
-                f"unknown git_hooks key(s): {', '.join(sorted(unknown_gh))} "
-                f"(known: {', '.join(sorted(gh_known))})"
-            )
+        _check("git_hooks", set(gh), gh_known, "git_hooks")
 
 
 def build(config: LoadedConfig, catalog: Catalog, *, project_type: str = "unknown") -> InstallPlan:
@@ -404,11 +413,17 @@ def build(config: LoadedConfig, catalog: Catalog, *, project_type: str = "unknow
         # config enabling ship against a minimal checkout fails closed instead of dropping.
         known = catalog.names("ci")
         referenced = set(ci_items) | set(ci.get("enable", []) or []) | set(ci.get("disable", []) or [])
-        unknown = referenced - known
+        unknown = sorted(referenced - known)
         if unknown:
-            raise PlanError(
-                f"unknown ci item(s): {', '.join(sorted(unknown))} "
-                f"(known: {', '.join(sorted(known))})"
+            from . import errors
+
+            bad = unknown[0]
+            raise errors.unknown_item_error(
+                category="ci",
+                key=f"ci.items.{bad}",
+                bad=bad,
+                known=known,
+                config_path=str(config.primary_config_path),
             )
 
         # resolve which slots are enabled: per-item override > enable/disable > all > off.
diff --git a/tests/smoke.sh b/tests/smoke.sh
index 5b5d012..d110b1c 100755
--- a/tests/smoke.sh
+++ b/tests/smoke.sh
@@ -1,8 +1,15 @@
 #!/usr/bin/env bash
 # smoke.sh — fast end-to-end check of the rig CLI surface, run in CI and locally.
 # Proves: --help, doctor, a headless setup against a sample config in a throwaway repo
-# (isolated HOME so nothing on the machine is touched), idempotency, status drift, and the
+# (isolated HOME so nothing on the machine is touched), idempotency, status drift, the
+# error-system v2 contract (a removed slot prints the 3-part error + the right exit code;
+# a clean sample exits 0; a non-git dir does NOT nag "should be committed"), and the
 # pytest unit suite.
+#
+# WHY a real smoke and not just pytest: two same-day prod failures were unit-GREEN but
+# smoke-BROKEN — a stale `mcp.items.review` (a slot removed in agent-tools #32) lingered in a
+# config and only the REAL `rig status`/`init` flow against the REAL catalog caught it. pytest
+# uses a fake catalog; smoke runs the real CLI against the real agent-tools checkout.
 set -euo pipefail
 
 ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
@@ -42,6 +49,9 @@ else
   git config --global user.name  rig-smoke
   ( cd "$TMP" && git init -q )
 
+  # The CLEAN sample. NOTE: it intentionally carries NO `mcp.items.review` — that slot was
+  # REMOVED in agent-tools #32 (review is a CLI+skill, not an MCP). A config still naming it is
+  # exercised SEPARATELY below (the removed-slot leg), where it must fail with the 3-part error.
   cat > "$TMP/rig.yaml" <<YAML
 version: 1
 agent_tools_source: $SRC
@@ -106,6 +116,69 @@ YAML
   # status reports in sync
   $RIG status -C "$TMP" --config "$TMP/rig.yaml" >/dev/null || fail "status nonzero when in sync"
   pass "rig status: in sync"
+
+  # ── error-system v2: a removed slot prints the 3-part error + exit code 4 ──────
+  # This is the exact regression class that was unit-green but smoke-broken: a stale
+  # `mcp.items.review` (removed in agent-tools #32) must fail LOUDLY against the REAL catalog.
+  DEAD="$TMP/dead.yaml"
+  cat > "$DEAD" <<YAML
+version: 1
+agent_tools_source: $SRC
+skills: { enabled: false }
+agent_hooks: { enabled: false }
+ci: { enabled: false }
+git_hooks: { dispatcher: { enabled: false } }
+mcp:
+  items:
+    review: { enabled: true, command: "review --mcp" }
+YAML
+  # capture both the rendered error and the exit code (don't let `set -e` mask the code).
+  set +e
+  dead_out="$($RIG status -C "$TMP" --config "$DEAD" 2>&1)"
+  dead_rc=$?
+  set -e
+  echo "$dead_out" | grep -qi "removed mcp slot"          || fail "removed-slot: no WHAT line"
+  echo "$dead_out" | grep -q  "#32"                       || fail "removed-slot: WHY does not name the removal PR"
+  echo "$dead_out" | grep -qi "remove .*mcp.items.review" || fail "removed-slot: FIX does not say how to remove it"
+  echo "$dead_out" | grep -q  "$DEAD"                     || fail "removed-slot: error does not name the offending config file"
+  [[ "$dead_rc" -eq 4 ]]                                  || fail "removed-slot: exit code $dead_rc != 4 (unknown-item class)"
+  pass "rig status: removed slot → 3-part error + exit 4"
+
+  # ── error-system v2: a CLEAN sample exits 0 (no false drift) ──────────────────
+  CLEAN="$TMP/clean.yaml"
+  cat > "$CLEAN" <<YAML
+version: 1
+agent_tools_source: $SRC
+skills: { enabled: false }
+agent_hooks: { enabled: false }
+ci: { enabled: false }
+git_hooks: { dispatcher: { enabled: false } }
+mcp: { enabled: false }
+agents_md: { enabled: false }
+YAML
+  CLEANREPO="$TMP/clean-repo"; mkdir -p "$CLEANREPO"; ( cd "$CLEANREPO" && git init -q )
+  cp "$CLEAN" "$CLEANREPO/rig.yaml"
+  # a PRISTINE HOME/XDG: the earlier `rig init --yes` populated $TMP/home with skills+hooks, so
+  # reusing it would show them as drift against this all-disabled config. A clean sample must be
+  # judged against a clean machine.
+  set +e
+  HOME="$TMP/clean-home" XDG_CONFIG_HOME="$TMP/clean-xdg" $RIG status -C "$CLEANREPO" >/dev/null 2>&1
+  clean_rc=$?
+  set -e
+  [[ "$clean_rc" -eq 0 ]] || fail "clean sample: exit code $clean_rc != 0"
+  pass "rig status: clean sample → exit 0"
+
+  # ── error-system v2: a NON-GIT dir does NOT nag "should be committed" ─────────
+  # IMPORTANT: $TMP itself was `git init`-ed above, so a dir UNDER it ($TMP/nongit) is still
+  # *inside* that work tree (git walks up to $TMP/.git) — env.is_git_repo would be True and this
+  # would not exercise the non-git path at all. Use a sibling mktemp dir outside any repo.
+  NONGIT="$(mktemp -d)"; trap 'rm -rf "$TMP" "$NONGIT"' EXIT   # a plain dir, no .git, no repo above
+  set +e
+  nongit_out="$(HOME="$TMP/clean-home" XDG_CONFIG_HOME="$TMP/clean-xdg" $RIG status -C "$NONGIT" 2>&1)"
+  set -e
+  echo "$nongit_out" | grep -qi "should be committed" && fail "non-git: still nags 'should be committed'"
+  echo "$nongit_out" | grep -qi "not a git repository" || fail "non-git: does not say 'not a git repository'"
+  pass "rig status: non-git dir → no 'should be committed', repo layer N/A"
 fi
 
 # ── 4. unit suite ─────────────────────────────────────────────────────────────
diff --git a/tests/test_catalog_plan.py b/tests/test_catalog_plan.py
index cbb3f1b..d4c4200 100644
--- a/tests/test_catalog_plan.py
+++ b/tests/test_catalog_plan.py
@@ -8,7 +8,8 @@
 
 from riglib.catalog import Catalog, CatalogError
 from riglib.config import LoadedConfig
-from riglib.plan import PlanError, build
+from riglib.errors import UnknownItemError
+from riglib.plan import build
 
 
 def test_catalog_scan_finds_all_categories(fake_agent_tools):
@@ -94,8 +95,10 @@ def test_plan_codeql_variant_selected(fake_agent_tools, tmp_path):
 def test_plan_unknown_universal_skill_fails_closed(fake_agent_tools, tmp_path):
     cat = Catalog.scan(str(fake_agent_tools))
     cfg = _cfg({"skills": {"universal": {"disable": ["shell-timeout"]}}}, tmp_path)  # typo
-    with pytest.raises(PlanError, match="universal skill"):
+    with pytest.raises(UnknownItemError) as exc:
         build(cfg, cat, project_type="unknown")
+    # error-system v2: did-you-mean suggests the nearest valid skill
+    assert "shell-timeouts" in exc.value.fix
 
 
 def test_plan_unknown_agent_hook_fails_closed(fake_agent_tools, tmp_path):
@@ -104,15 +107,18 @@ def test_plan_unknown_agent_hook_fails_closed(fake_agent_tools, tmp_path):
         {"skills": {"enabled": False}, "agent_hooks": {"items": {"block-no-verifyy": {"enabled": True}}}},
         tmp_path,
     )
-    with pytest.raises(PlanError, match="agent_hooks"):
+    with pytest.raises(UnknownItemError) as exc:
         build(cfg, cat, project_type="unknown")
+    assert "agent_hooks" in exc.value.what
+    assert "block-no-verify" in exc.value.fix  # nearest valid
 
 
 def test_plan_unknown_by_type_bundle_fails_closed(fake_agent_tools, tmp_path):
     cat = Catalog.scan(str(fake_agent_tools))
     cfg = _cfg({"skills": {"by_type": {"enable": ["backendd"]}}}, tmp_path)  # typo
-    with pytest.raises(PlanError, match="by-type bundle"):
+    with pytest.raises(UnknownItemError) as exc:
         build(cfg, cat, project_type="unknown")
+    assert "backend" in exc.value.fix  # nearest valid bundle
 
 
 def test_plan_unknown_ci_item_fails_closed(fake_agent_tools, tmp_path):
@@ -121,8 +127,9 @@ def test_plan_unknown_ci_item_fails_closed(fake_agent_tools, tmp_path):
         {"skills": {"enabled": False}, "ci": {"items": {"secret_scan": {"enabled": True}}}},
         tmp_path,
     )
-    with pytest.raises(PlanError, match="unknown ci item"):
+    with pytest.raises(UnknownItemError) as exc:
         build(cfg, cat, project_type="unknown")
+    assert "secret-scan" in exc.value.fix  # nearest valid
 
 
 def test_plan_ci_all_true_enables_catalog(fake_agent_tools, tmp_path):
@@ -141,7 +148,7 @@ def test_plan_unknown_ci_enable_name_fails_closed(fake_agent_tools, tmp_path):
         {"skills": {"enabled": False}, "ci": {"all": False, "enable": ["secret_scan"]}},
         tmp_path,
     )
-    with pytest.raises(PlanError, match="unknown ci item"):
+    with pytest.raises(UnknownItemError, match="unknown ci item"):
         build(cfg, cat, project_type="unknown")
 
 
@@ -161,8 +168,10 @@ def test_plan_unknown_git_hooks_key_fails_closed(fake_agent_tools, tmp_path):
          "git_hooks": {"dispatcherr": {"enabled": True}}},  # typo
         tmp_path,
     )
-    with pytest.raises(PlanError, match="git_hooks"):
+    with pytest.raises(UnknownItemError) as exc:
         build(cfg, cat, project_type="unknown")
+    assert "git_hooks" in exc.value.what
+    assert "dispatcher" in exc.value.fix
 
 
 def test_plan_ship_absent_from_catalog_fails_closed(fake_agent_tools, tmp_path):
@@ -173,7 +182,7 @@ def test_plan_ship_absent_from_catalog_fails_closed(fake_agent_tools, tmp_path):
         {"skills": {"enabled": False}, "ci": {"items": {"ship": {"enabled": True}}}},
         tmp_path,
     )
-    with pytest.raises(PlanError, match="unknown ci item"):
+    with pytest.raises(UnknownItemError, match="unknown ci item"):
         build(cfg, cat, project_type="unknown")
 
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 3ac2a54..b95a21c 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -22,11 +22,37 @@ def test_no_command_prints_help(capsys):
     assert "rig — the dev-environment" in out
 
 
-def test_doctor_runs(capsys):
+def test_doctor_runs(tmp_path, capsys, monkeypatch):
+    from riglib import errors
+
+    # isolate HOME so `_scan_missing_targets()` can't read the dev machine's real
+    # ~/.claude/settings.json (a dead hook there would flip the exit code to MISSING_TARGET).
+    monkeypatch.setenv("HOME", str(tmp_path / "home"))
     rc = main(["doctor"])
     out = capsys.readouterr().out
     assert "rig doctor" in out
-    assert rc in (0, 1)  # 1 if optional deps missing on the CI box
+    # 0 = all present; 1 = optional deps missing on the CI box; 127 = a REQUIRED dep absent
+    # (the documented missing-dependency class — honored by the exit-code contract).
+    assert rc in (0, 1, errors.EXIT_MISSING_DEP)
+
+
+def test_doctor_missing_required_dep_uses_127_contract(tmp_path, capsys, monkeypatch):
+    """A missing REQUIRED dep exits with the documented missing-dependency class (127).
+
+    The --help epilog promises `127  missing dependency`; doctor must honor that (the public
+    exit-code contract scripts branch on) instead of the generic non-zero. A pure --yes-less run
+    so nothing is installed; only required deps are forced absent.
+    """
+    from riglib import doctor, errors
+
+    monkeypatch.setenv("HOME", str(tmp_path / "home"))
+    # force every dependency probe to "absent" so missing_required is non-empty.
+    monkeypatch.setattr(doctor.shutil, "which", lambda name: None)
+    monkeypatch.setattr(doctor, "_python_present", lambda name: False)
+    rc = main(["doctor"])  # no --yes → reports + advises, installs nothing
+    out = capsys.readouterr().out
+    assert "missing dependencies above" in out
+    assert rc == errors.EXIT_MISSING_DEP
 
 
 def test_setup_dryrun_default(tmp_path, capsys, fake_agent_tools, monkeypatch):
@@ -102,7 +128,11 @@ def test_setup_failclosed_leaves_no_config(tmp_path, capsys, fake_agent_tools, m
         encoding="utf-8",
     )
     rc = main(["init", "-C", str(repo), "--config", str(template), "--yes"])
-    assert rc == 2
+    # error-system v2: an unknown catalog item exits with the unknown-item class (4), not the
+    # generic config class (2). The fail-closed guarantee is unchanged: no rig.yaml is written.
+    from riglib import errors
+
+    assert rc == errors.EXIT_UNKNOWN_ITEM
     assert not (repo / "rig.yaml").exists()  # fail-closed: no invalid config written
 
 
@@ -147,8 +177,14 @@ def test_setup_with_external_config_persists_repo_yaml(tmp_path, capsys, fake_ag
 
 
 def test_status_scans_empty_ci_target_for_extras(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    import subprocess
+
     monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
     repo = tmp_path / "repo"
+    repo.mkdir()
+    # CI is the REPO layer — its extras are only scanned inside a real git repo (error-system
+    # v2: a non-git dir has no repo layer), so this must be an actual repository.
+    subprocess.run(["git", "init", "-q"], cwd=str(repo), check=True)
     (repo / ".github" / "workflows").mkdir(parents=True)
     (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8")
     cfg = repo / "rig.yaml"
diff --git a/tests/test_errors.py b/tests/test_errors.py
new file mode 100644
index 0000000..8a71006
--- /dev/null
+++ b/tests/test_errors.py
@@ -0,0 +1,193 @@
+"""Unit tests for the error system v2 — structured what/why/fix errors + heuristics.
+
+Covers:
+- the RigError dataclass-style exception (what/why/fix/exit_code) + per-class stable codes,
+- the consistent CLI rendering (every error prints WHAT, WHY, HOW-to-fix),
+- did-you-mean (Levenshtein-nearest catalog name),
+- the empty-category message ("no slots; remove the block"),
+- the removed/deprecated-slot registry (mcp.items.review → names agent-tools #32 + the fix),
+- missing-target (a config path/binary that's gone → names the file + how to regenerate).
+
+All offline; no disk, no network.
+"""
+
+from __future__ import annotations
+
+
+from riglib import errors
+
+
+# ── exit-code table: stable + distinct per failure class ──────────────────────────
+def test_exit_codes_are_distinct_and_stable():
+    codes = {
+        errors.EXIT_CONFIG,
+        errors.EXIT_DRIFT,
+        errors.EXIT_UNKNOWN_ITEM,
+        errors.EXIT_MISSING_TARGET,
+        errors.EXIT_NOT_A_REPO,
+        errors.EXIT_MISSING_DEP,
+    }
+    # every class has its OWN code (no collisions)
+    assert len(codes) == 6
+    # pinned values — scripts depend on these; a change here is a breaking change
+    assert errors.EXIT_CONFIG == 2
+    assert errors.EXIT_DRIFT == 3
+    assert errors.EXIT_UNKNOWN_ITEM == 4
+    assert errors.EXIT_MISSING_TARGET == 5
+    assert errors.EXIT_NOT_A_REPO == 6
+    assert errors.EXIT_MISSING_DEP == 127
+
+
+def test_rig_error_carries_what_why_fix_and_code():
+    e = errors.RigError(
+        what="something broke",
+        why="because of a reason",
+        fix="run `rig fix`",
+        exit_code=errors.EXIT_CONFIG,
+    )
+    assert e.what == "something broke"
+    assert e.why == "because of a reason"
+    assert e.fix == "run `rig fix`"
+    assert e.exit_code == errors.EXIT_CONFIG
+    # str() includes the WHAT so a bare log line is still useful
+    assert "something broke" in str(e)
+
+
+def test_subclasses_pin_their_exit_code():
+    assert errors.ConfigError(what="x", why="y", fix="z").exit_code == errors.EXIT_CONFIG
+    assert errors.UnknownItemError(what="x", why="y", fix="z").exit_code == errors.EXIT_UNKNOWN_ITEM
+    assert errors.MissingTargetError(what="x", why="y", fix="z").exit_code == errors.EXIT_MISSING_TARGET
+    assert errors.NotARepoError(what="x", why="y", fix="z").exit_code == errors.EXIT_NOT_A_REPO
+    assert errors.MissingDepError(what="x", why="y", fix="z").exit_code == errors.EXIT_MISSING_DEP
+
+
+# ── consistent rendering: every error shows the 3 parts ───────────────────────────
+def test_render_shows_three_parts():
+    e = errors.UnknownItemError(
+        what="unknown mcp item: reviewr",
+        why="not in the mcp catalog (declared in /tmp/rig.yaml)",
+        fix="did you mean 'review'? edit mcp.items in /tmp/rig.yaml",
+    )
+    text = errors.render(e, color=False)
+    # the three load-bearing labels are present and ordered
+    assert "what:" in text.lower() or "error:" in text.lower()
+    assert "why" in text.lower()
+    assert "fix" in text.lower()
+    # the actual content survives rendering
+    assert "reviewr" in text
+    assert "review" in text
+    assert "/tmp/rig.yaml" in text
+
+
+# ── did-you-mean (Levenshtein) ────────────────────────────────────────────────────
+def test_did_you_mean_picks_nearest():
+    assert errors.did_you_mean("reviewr", {"review", "naming", "shell-timeouts"}) == "review"
+    assert errors.did_you_mean("dispatcherr", {"dispatcher", "templates"}) == "dispatcher"
+
+
+def test_did_you_mean_returns_none_when_too_far():
+    # a wildly different token shouldn't fabricate a bogus suggestion
+    assert errors.did_you_mean("zzzzzzzz", {"review", "naming"}) is None
+
+
+def test_did_you_mean_returns_none_on_empty_catalog():
+    assert errors.did_you_mean("anything", set()) is None
+
+
+# ── unknown-item error builder: did-you-mean OR empty-category message ─────────────
+def test_unknown_item_suggests_nearest():
+    e = errors.unknown_item_error(
+        category="mcp",
+        key="mcp.items.reviewr",
+        bad="reviewr",
+        known={"review", "context7"},
+        config_path="/tmp/rig.yaml",
+    )
+    assert isinstance(e, errors.UnknownItemError)
+    assert e.exit_code == errors.EXIT_UNKNOWN_ITEM
+    assert "review" in e.fix  # the suggestion
+    assert "/tmp/rig.yaml" in e.fix  # the offending config file path
+    assert "reviewr" in e.what
+
+
+def test_unknown_item_empty_category_says_remove_the_block():
+    # the category has NO slots → "remove the mcp block", NOT "known: none".
+    # use a NON-removed name so this exercises the empty-category branch (not removed-slot).
+    e = errors.unknown_item_error(
+        category="mcp",
+        key="mcp.items.context7",
+        bad="context7",
+        known=set(),
+        config_path="/tmp/rig.yaml",
+    )
+    assert "no slots" in e.why.lower() or "no slots" in e.fix.lower()
+    assert "remove" in e.fix.lower()
+    assert "mcp" in e.fix.lower()
+    # explicitly NOT the old useless phrasing
+    assert "known: none" not in (e.what + e.why + e.fix).lower()
+
+
+# ── removed/deprecated slots registry ─────────────────────────────────────────────
+def test_removed_slot_registry_has_mcp_review():
+    rec = errors.removed_slot("mcp", "review")
+    assert rec is not None
+    # names the PR/source and says it's a CLI+skill, not an MCP
+    assert "#32" in rec.reason
+    assert "cli" in rec.reason.lower()
+
+
+def test_removed_slot_error_names_it_and_the_fix():
+    e = errors.unknown_item_error(
+        category="mcp",
+        key="mcp.items.review",
+        bad="review",
+        known={"context7"},  # 'review' is NOT in the catalog (removed)
+        config_path="/home/u/.config/rig/config.yaml",
+    )
+    blob = (e.what + " " + e.why + " " + e.fix).lower()
+    # removed-slot path wins over did-you-mean: it must cite the removal + the exact fix
+    assert "#32" in (e.what + e.why + e.fix)
+    assert "remove" in blob
+    assert "mcp.items.review" in (e.what + e.why + e.fix)
+    assert "/home/u/.config/rig/config.yaml" in (e.what + e.why + e.fix)
+
+
+def test_removed_slot_unknown_returns_none():
+    assert errors.removed_slot("mcp", "context7") is None
+    assert errors.removed_slot("skills", "naming") is None
+
+
+# ── missing-target ────────────────────────────────────────────────────────────────
+def test_missing_target_error_names_file_and_regen():
+    e = errors.missing_target_error(
+        what_kind="hook",
+        target="/home/u/.claude/hooks/block-no-verify.json",
+        why="settings.json references a hook descriptor that no longer exists on disk",
+        regen="run `rig apply` to reinstall the agent-hooks",
+    )
+    assert isinstance(e, errors.MissingTargetError)
+    assert e.exit_code == errors.EXIT_MISSING_TARGET
+    assert "/home/u/.claude/hooks/block-no-verify.json" in e.what
+    assert "rig apply" in e.fix
+
+
+def test_not_a_repo_error_code():
+    e = errors.NotARepoError(what="not a git repo", why="no .git", fix="cd into a repo")
+    assert e.exit_code == errors.EXIT_NOT_A_REPO
+
+
+# ── the CLI guard turns a RigError into render+exit-code ───────────────────────────
+def test_guard_renders_and_returns_exit_code(capsys):
+    def boom():
+        raise errors.UnknownItemError(what="bad item", why="reason", fix="the fix")
+
+    rc = errors.guard(boom)
+    out = capsys.readouterr().out
+    assert rc == errors.EXIT_UNKNOWN_ITEM
+    assert "bad item" in out
+    assert "the fix" in out
+
+
+def test_guard_passes_through_success():
+    assert errors.guard(lambda: 0) == 0
+    assert errors.guard(lambda: 3) == 3
diff --git a/tests/test_missing_target.py b/tests/test_missing_target.py
new file mode 100644
index 0000000..4737311
--- /dev/null
+++ b/tests/test_missing_target.py
@@ -0,0 +1,208 @@
+"""missing-target heuristic — a config that references a path/binary that's gone on disk.
+
+Motivating case (from the spec): a hook command in the harness ``settings.json`` points at a
+script that no longer exists (the dead-rtk-hook case), which otherwise surfaces only as a
+generic harness "PreToolUse error" with no hint of the cause. The scanner names the missing
+file + how to regenerate it, and `rig status` / `rig doctor` surface it PROACTIVELY (before it
+bites at runtime).
+
+All offline; operates on a fake settings.json under a tmp HOME.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from riglib import errors
+from riglib.missing_target import scan_settings_hooks
+
+
+def _settings(home: Path, hooks: dict) -> Path:
+    cfg = home / ".claude" / "settings.json"
+    cfg.parent.mkdir(parents=True, exist_ok=True)
+    cfg.write_text(json.dumps({"hooks": hooks}), encoding="utf-8")
+    return cfg
+
+
+def test_scan_flags_dead_hook_command(tmp_path):
+    home = tmp_path / "home"
+    gone = tmp_path / "gone" / "rtk-hook.py"  # never created
+    settings = _settings(
+        home,
+        {
+            "PreToolUse": [
+                {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]}
+            ]
+        },
+    )
+    findings = scan_settings_hooks(settings)
+    assert len(findings) == 1
+    f = findings[0]
+    assert isinstance(f, errors.MissingTargetError)
+    assert f.exit_code == errors.EXIT_MISSING_TARGET
+    assert str(gone) in f.what  # names the missing file
+    assert "settings.json" in f.why  # says WHERE it's referenced
+    assert f.fix  # carries a concrete regenerate/remove hint
+
+
+def test_scan_ignores_live_hook_command(tmp_path):
+    home = tmp_path / "home"
+    live = tmp_path / "live-hook.py"
+    live.write_text("#!/usr/bin/env python3\n", encoding="utf-8")
+    settings = _settings(
+        home,
+        {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {live}"}]}]},
+    )
+    assert scan_settings_hooks(settings) == []
+
+
+def test_scan_ignores_bare_binary_on_path(tmp_path):
+    # a command that's a plain binary name (resolved via PATH), not an absolute script path,
+    # must not be flagged as a missing FILE — we only check absolute path args that look like
+    # script files.
+    home = tmp_path / "home"
+    settings = _settings(
+        home,
+        {"PreToolUse": [{"matcher": "*", "hooks": [{"type": "command", "command": "gitleaks protect"}]}]},
+    )
+    assert scan_settings_hooks(settings) == []
+
+
+def test_scan_ignores_nonexistent_output_arg_when_script_lives(tmp_path):
+    # a hook whose SCRIPT exists but which names a not-yet-created runtime output file as a
+    # later absolute-path arg must NOT be flagged — the output file is created at run time, not
+    # a dead reference. Only the invoked script is the "target" we check.
+    home = tmp_path / "home"
+    live = tmp_path / "live-hook.py"
+    live.write_text("#!/usr/bin/env python3\n", encoding="utf-8")
+    out_file = tmp_path / "run" / "out.json"  # never created — a runtime output path
+    settings = _settings(
+        home,
+        {
+            "PreToolUse": [
+                {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {live} --out {out_file}"}]}
+            ]
+        },
+    )
+    assert scan_settings_hooks(settings) == []
+
+
+def test_scan_flags_dead_script_even_with_trailing_args(tmp_path):
+    # the inverse: a DEAD script is still flagged, and trailing args don't change the verdict.
+    home = tmp_path / "home"
+    gone = tmp_path / "gone" / "hook.py"  # never created — the script itself is missing
+    out_file = tmp_path / "run" / "out.json"
+    settings = _settings(
+        home,
+        {
+            "PreToolUse": [
+                {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone} --out {out_file}"}]}
+            ]
+        },
+    )
+    findings = scan_settings_hooks(settings)
+    assert len(findings) == 1
+    assert str(gone) in findings[0].what
+    assert str(out_file) not in findings[0].what  # the output arg is not the reported target
+
+
+def test_scan_flags_dead_script_under_absolute_interpreter(tmp_path):
+    # the motivating rtk-hook form on macOS: the interpreter is itself an ABSOLUTE path
+    # (/usr/bin/env, /opt/homebrew/bin/python3). The first absolute token is the interpreter
+    # (which exists) — the scanner must skip it and check the real (missing) script after it.
+    home = tmp_path / "home"
+    gone = tmp_path / "gone" / "rtk-hook.py"  # never created
+    for cmd in (f"/usr/bin/env python3 {gone}", f"/opt/homebrew/bin/python3 {gone}"):
+        settings = _settings(
+            home, {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": cmd}]}]}
+        )
+        findings = scan_settings_hooks(settings)
+        assert len(findings) == 1, cmd
+        assert str(gone) in findings[0].what, cmd
+
+
+def test_scan_ignores_inline_c_code(tmp_path):
+    # `python3 -c '<code>'` runs no script FILE — there is no path target to verify, so a
+    # nonexistent-looking path embedded in inline code must not be flagged.
+    home = tmp_path / "home"
+    settings = _settings(
+        home,
+        {"PreToolUse": [{"matcher": "*", "hooks": [{"type": "command", "command": "python3 -c 'import sys; sys.exit(0)'"}]}]},
+    )
+    assert scan_settings_hooks(settings) == []
+
+
+def test_scan_flags_dead_script_when_dash_c_is_script_arg(tmp_path):
+    # `-c` AFTER the script is the SCRIPT's own argument, not Python's inline-code flag — a dead
+    # script must still be flagged. (Guards against suppressing the check on any trailing `-c`.)
+    home = tmp_path / "home"
+    gone = tmp_path / "gone" / "hook.py"  # the script itself is missing
+    settings = _settings(
+        home,
+        {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone} -c config.json"}]}]},
+    )
+    findings = scan_settings_hooks(settings)
+    assert len(findings) == 1
+    assert str(gone) in findings[0].what
+
+
+def test_scan_missing_settings_file_is_empty(tmp_path):
+    # no settings.json at all → nothing to scan, no error
+    assert scan_settings_hooks(tmp_path / "home" / ".claude" / "settings.json") == []
+
+
+def test_scan_malformed_settings_is_empty(tmp_path):
+    home = tmp_path / "home"
+    cfg = home / ".claude" / "settings.json"
+    cfg.parent.mkdir(parents=True, exist_ok=True)
+    cfg.write_text("{not json", encoding="utf-8")
+    assert scan_settings_hooks(cfg) == []
+
+
+def test_status_surfaces_dead_hook(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    """`rig status` proactively surfaces a dead hook path in the harness settings.json."""
+    import subprocess
+
+    home = tmp_path / "home"
+    gone = tmp_path / "gone-hook.py"  # referenced but absent
+    _settings(
+        home,
+        {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]}]},
+    )
+    monkeypatch.setenv("HOME", str(home))
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    subprocess.run(["git", "init", "-q"], cwd=str(repo), check=True)
+    (repo / "rig.yaml").write_text(
+        f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+        "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n"
+        "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: false}\nagents_md: {enabled: false}\n",
+        encoding="utf-8",
+    )
+    main_rc = __import__("riglib.cli", fromlist=["main"]).main(["status", "-C", str(repo)])
+    out = capsys.readouterr().out
+    assert str(gone) in out  # the dead hook path is named
+    assert "missing" in out.lower()
+    # a missing-target makes status non-zero (something IS wrong), distinct from clean
+    assert main_rc != 0
+
+
+def test_doctor_surfaces_dead_hook(tmp_path, capsys, monkeypatch):
+    """`rig doctor` proactively surfaces a dead hook path with the missing-target exit code."""
+    from riglib.cli import main
+
+    home = tmp_path / "home"
+    gone = tmp_path / "gone-hook.py"
+    _settings(
+        home,
+        {"PreToolUse": [{"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]}]},
+    )
+    monkeypatch.setenv("HOME", str(home))
+    rc = main(["doctor"])
+    out = capsys.readouterr().out
+    assert str(gone) in out
+    # doctor returns the missing-target class when deps are otherwise fine OR may already be 1
+    # if a required dep is missing on the box; either way it's non-zero and names the dead hook.
+    assert rc != 0
diff --git a/tests/test_plan_errors.py b/tests/test_plan_errors.py
new file mode 100644
index 0000000..e37f776
--- /dev/null
+++ b/tests/test_plan_errors.py
@@ -0,0 +1,77 @@
+"""Plan-builder error integration — unknown items now raise structured errors v2.
+
+`plan.build` (via `_validate_item_names`) is where a config that names a non-existent
+catalog item is caught. These tests assert it now raises the structured
+:class:`errors.UnknownItemError` (exit 4) carrying the 3-part what/why/fix — with the
+removed-slot, did-you-mean, and empty-category heuristics — instead of the old thin
+``PlanError("unknown … (known: none)")``.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from riglib import errors
+from riglib.catalog import Catalog
+from riglib.config import LoadedConfig
+from riglib.plan import build
+
+
+def _loaded(tmp_path, data: dict) -> LoadedConfig:
+    cfg = tmp_path / "rig.yaml"
+    cfg.write_text("version: 1\n", encoding="utf-8")  # a real file for the path to name
+    return LoadedConfig(data=data, repo_root=tmp_path, repo_path=cfg, layers=[f"repo:{cfg}"])
+
+
+def test_unknown_mcp_item_raises_structured_with_suggestion(tmp_path, fake_agent_tools):
+    catalog = Catalog.scan(str(fake_agent_tools))
+    # fake catalog has mcp slot "review"; "reviewr" is a typo
+    loaded = _loaded(tmp_path, {"mcp": {"items": {"reviewr": {"enabled": True}}}})
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    e = exc.value
+    assert e.exit_code == errors.EXIT_UNKNOWN_ITEM
+    assert "reviewr" in e.what
+    assert "review" in e.fix  # did-you-mean
+    assert str(tmp_path / "rig.yaml") in e.fix  # the offending file path
+
+
+def test_removed_mcp_review_slot_names_pr_and_fix(tmp_path, fake_agent_tools):
+    # a catalog WITHOUT a review mcp slot (so 'review' is genuinely gone) → removed-slot path.
+    # Build a catalog then drop the review mcp item to simulate the post-#32 world.
+    catalog = Catalog.scan(str(fake_agent_tools))
+    catalog.items = [i for i in catalog.items if not (i.category == "mcp" and i.name == "review")]
+    loaded = _loaded(tmp_path, {"mcp": {"items": {"review": {"enabled": True}}}})
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    blob = exc.value.what + exc.value.why + exc.value.fix
+    assert "#32" in blob  # names the removal PR
+    assert "mcp.items.review" in blob  # the exact key
+    assert "remove" in blob.lower()
+    assert str(tmp_path / "rig.yaml") in blob
+
+
+def test_unknown_universal_skill_suggests_nearest(tmp_path, fake_agent_tools):
+    catalog = Catalog.scan(str(fake_agent_tools))
+    # fake catalog has "naming"; "namin" is a typo
+    loaded = _loaded(tmp_path, {"skills": {"universal": {"enable": ["namin"]}}})
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    assert "naming" in exc.value.fix
+
+
+def test_unknown_ci_item_raises_structured(tmp_path, fake_agent_tools):
+    catalog = Catalog.scan(str(fake_agent_tools))
+    loaded = _loaded(tmp_path, {"ci": {"items": {"secret-scn": {"enabled": True}}}})
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    assert exc.value.exit_code == errors.EXIT_UNKNOWN_ITEM
+    assert "secret-scan" in exc.value.fix  # nearest valid
+
+
+def test_unknown_git_hooks_key_raises_structured(tmp_path, fake_agent_tools):
+    catalog = Catalog.scan(str(fake_agent_tools))
+    loaded = _loaded(tmp_path, {"git_hooks": {"dispatcherr": {"enabled": True}}})
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    assert "dispatcher" in exc.value.fix
diff --git a/tests/test_status_layers.py b/tests/test_status_layers.py
new file mode 100644
index 0000000..69c640d
--- /dev/null
+++ b/tests/test_status_layers.py
@@ -0,0 +1,232 @@
+"""rig status — GLOBAL vs REPO layer separation, non-git handling, prominent no-rig.yaml.
+
+The live-run bugs this fixes (from the spec):
+- status mixed machine-wide GLOBAL drift (skills/hooks/harness/mcp from
+  ~/.config/rig/config.yaml) with REPO drift (this repo's CI/symlinks from ./rig.yaml) in one
+  flat dump → group by LAYER, each item naming WHICH config file declares it;
+- in a non-git dir (e.g. ~) status printed "no rig.yaml in this repo (should be committed)" →
+  it must show ONLY the global layer + "(not a git repository — repo layer N/A)";
+- a real repo with no committed rig.yaml buried the fix above a 49-line drift dump → make it
+  PROMINENT with "run `rig init`";
+- the "extras (on-disk, not declared) are NEVER deleted by apply" reassurance must be LOUD.
+
+These drive the CLI through `main(["status", ...])` and assert on captured stdout + exit code.
+All offline (no agent-tools apply leg needed — drift detection runs against the fake catalog).
+"""
+
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+
+import pytest
+
+from riglib import errors
+from riglib.cli import main
+from riglib.layers import GLOBAL, REPO, layer_for_category
+
+
+@pytest.fixture(autouse=True)
+def _isolate_home(tmp_path, monkeypatch):
+    """Isolate HOME for every test in this module.
+
+    `rig status`/`rig doctor` unconditionally call `_scan_missing_targets()`, which reads the
+    real `~/.claude/settings.json` via `expanduser`. Without an isolated HOME, a dev machine
+    whose real settings.json has a hook pointing at a now-gone script would make `dead_targets`
+    non-empty — flipping a "clean repo → exit 0" test into EXIT_MISSING_TARGET and a "non-git →
+    0/drift" test out of its expected set. An empty tmp HOME has no settings.json, so the scan
+    finds nothing and the tests assert only what they mean to. Runs before each test body, so a
+    test that needs a populated HOME (e.g. the GLOBAL-drift case) still overrides it.
+    """
+    monkeypatch.setenv("HOME", str(tmp_path / "home"))
+
+
+def _git_repo(path: Path) -> Path:
+    """Make ``path`` a real git repo (status uses `git rev-parse`, so a bare `.git` dir is not
+    enough — it must be an actual repository for env.is_git_repo to be True)."""
+    path.mkdir(parents=True, exist_ok=True)
+    subprocess.run(["git", "init", "-q"], cwd=str(path), check=True)
+    return path
+
+
+# ── category → layer classification ───────────────────────────────────────────────
+def test_category_layer_classification():
+    # GLOBAL: machine-wide artifacts declared in ~/.config/rig/config.yaml
+    for cat in ("skills", "agent_hooks", "mcp", "harness", "models", "git_hooks", "tmux"):
+        assert layer_for_category(cat) == GLOBAL, cat
+    # REPO: this repo's artifacts declared in ./rig.yaml
+    for cat in ("ci", "agents_md", "github"):
+        assert layer_for_category(cat) == REPO, cat
+
+
+# ── non-git dir: no "should be committed", only the global layer ──────────────────
+def test_status_non_git_dir_no_should_be_committed(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    monkeypatch.setenv("RIG_AGENT_TOOLS_SOURCE", str(fake_agent_tools))
+    # a plain dir with NO .git — the motivating case is running status in ~
+    plain = tmp_path / "not-a-repo"
+    plain.mkdir()
+    rc = main(["status", "-C", str(plain)])
+    out = capsys.readouterr().out
+    assert "should be committed" not in out  # the bug: never say this outside a git repo
+    assert "not a git repository" in out.lower()
+    assert "repo layer" in out.lower()  # "... repo layer N/A"
+    # it still ran (showed the global layer), not a hard error
+    assert rc in (0, errors.EXIT_DRIFT)
+
+
+# ── non-git dir, catalog UNRESOLVABLE: still reports "not a git repository" ────────
+def test_status_non_git_dir_without_resolvable_catalog(tmp_path, capsys, monkeypatch):
+    """The smoke-only regression: in a non-git dir where the agent-tools checkout cannot be
+    resolved (fresh machine / running in ~ with no source), the catalog scan fails — but that
+    feeds only the REPO layer, which a non-git dir doesn't have. status must NOT die with the
+    catalog error; it must still report "not a git repository" and exit 0.
+
+    The other non-git test pins RIG_AGENT_TOOLS_SOURCE so the catalog resolves; this one removes
+    every source (env + default candidates) to drive the CatalogError fallback path explicitly.
+    """
+    import riglib.catalog as catalog
+
+    monkeypatch.delenv("RIG_AGENT_TOOLS_SOURCE", raising=False)
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    # point every default candidate at empties so resolve_source finds no checkout anywhere
+    monkeypatch.setattr(catalog, "_DEFAULT_SOURCE_CANDIDATES", (str(tmp_path / "nope"),))
+    plain = tmp_path / "not-a-repo"
+    plain.mkdir()
+    rc = main(["status", "-C", str(plain)])
+    out = capsys.readouterr().out
+    assert rc == 0
+    assert "not a git repository" in out.lower()
+    assert "should be committed" not in out
+    # the catalog error must NOT have leaked through
+    assert "could not locate an agent-tools checkout" not in out
+
+
+# ── real repo, no committed rig.yaml: PROMINENT fix ───────────────────────────────
+def test_status_real_repo_no_rigyaml_prominent_init_hint(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    monkeypatch.setenv("RIG_AGENT_TOOLS_SOURCE", str(fake_agent_tools))
+    repo = _git_repo(tmp_path / "repo")  # a real git repo, but no rig.yaml
+    main(["status", "-C", str(repo)])
+    out = capsys.readouterr().out
+    # prominent: names the fix command, not a one-liner buried in drift
+    assert "rig init" in out
+    assert "no committed rig.yaml" in out.lower() or "no rig.yaml" in out.lower()
+
+
+# ── layer grouping + per-item config-file provenance ──────────────────────────────
+def test_status_groups_drift_by_layer_with_provenance(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    repo = _git_repo(tmp_path / "repo")
+    # an undeclared CI workflow on disk → a REPO-layer extra
+    (repo / ".github" / "workflows").mkdir(parents=True)
+    (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8")
+    cfg = repo / "rig.yaml"
+    cfg.write_text(
+        f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+        "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n"
+        "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: true, all: false}\n",
+        encoding="utf-8",
+    )
+    rc = main(["status", "-C", str(repo)])
+    out = capsys.readouterr().out
+    assert rc == errors.EXIT_DRIFT
+    # the REPO layer heading appears (the CI extra + agents_md are REPO-layer drift). GLOBAL is
+    # only printed when there IS global drift — HOME is isolated here, so there is none.
+    assert "REPO — this repository" in out
+    # the CI extra is reported and names the declaring REPO config file (provenance)
+    assert "rogue" in out
+    assert str(cfg) in out  # provenance: which config file declares this layer
+
+
+# ── GLOBAL-layer drift names the GLOBAL config file ───────────────────────────────
+def test_status_global_layer_drift_names_global_config(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    # an isolated HOME with an undeclared MCP server on disk → a GLOBAL-layer extra, and a
+    # global config file so its provenance can be named.
+    import json
+
+    home = tmp_path / "home"
+    (home / ".claude" / "mcp").mkdir(parents=True)
+    (home / ".claude" / "mcp" / "mcp.json").write_text(
+        json.dumps({"mcpServers": {"rogue-server": {"command": "x"}}}), encoding="utf-8"
+    )
+    monkeypatch.setenv("HOME", str(home))
+    xdg = tmp_path / "xdg"
+    (xdg / "rig").mkdir(parents=True)
+    gcfg = xdg / "rig" / "config.yaml"
+    gcfg.write_text(f"version: 1\nagent_tools_source: {fake_agent_tools}\n", encoding="utf-8")
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg))
+    repo = _git_repo(tmp_path / "repo")
+    (repo / "rig.yaml").write_text(
+        f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+        "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n"
+        "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: false}\nagents_md: {enabled: false}\n",
+        encoding="utf-8",
+    )
+    rc = main(["status", "-C", str(repo)])
+    out = capsys.readouterr().out
+    assert rc == errors.EXIT_DRIFT
+    assert "GLOBAL — machine-wide" in out
+    assert "rogue-server" in out
+    assert str(gcfg) in out  # the GLOBAL config file is named as the declaring layer
+
+
+# ── reassurance is LOUD ───────────────────────────────────────────────────────────
+def test_status_extras_never_deleted_reassurance_is_loud(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    repo = _git_repo(tmp_path / "repo")
+    (repo / ".github" / "workflows").mkdir(parents=True)
+    (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8")
+    cfg = repo / "rig.yaml"
+    cfg.write_text(
+        f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+        "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n"
+        "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: true, all: false}\n",
+        encoding="utf-8",
+    )
+    main(["status", "-C", str(repo)])
+    out = capsys.readouterr().out.lower()
+    # the guarantee must be explicit: apply NEVER deletes on-disk-not-declared items
+    assert "never" in out and "delete" in out
+    assert "extra" in out
+
+
+# ── reassurance also LOUD in `rig apply --help` ───────────────────────────────────
+def test_apply_help_says_apply_never_deletes(capsys):
+    import pytest
+
+    with pytest.raises(SystemExit):
+        main(["apply", "--help"])
+    out = capsys.readouterr().out.lower()
+    assert "never" in out and "delete" in out  # the apply-won't-nuke reassurance
+
+
+# ── exit codes documented in --help (structured-exit-codes skill) ─────────────────
+def test_help_documents_exit_codes(capsys):
+    import pytest
+
+    with pytest.raises(SystemExit):
+        main(["--help"])
+    out = capsys.readouterr().out
+    assert "exit codes:" in out
+    assert "4" in out and "unknown item" in out
+    assert "6" in out and "not a git repository" in out
+
+
+# ── a clean in-sync repo still exits 0 ────────────────────────────────────────────
+def test_status_clean_repo_exits_zero(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    repo = _git_repo(tmp_path / "repo")
+    cfg = repo / "rig.yaml"
+    # everything OFF, including agents_md (on by default → would otherwise drift the symlink)
+    cfg.write_text(
+        f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+        "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n"
+        "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: false}\n"
+        "agents_md: {enabled: false}\n",
+        encoding="utf-8",
+    )
+    rc = main(["status", "-C", str(repo)])
+    out = capsys.readouterr().out
+    assert rc == 0
+    assert "in sync" in out.lower()

From fe2a6a564a8ef1f23f2822503c5555cda6af54ca Mon Sep 17 00:00:00 2001
From: Alex Ultra <invntrm@icloud.com>
Date: Wed, 17 Jun 2026 06:49:07 +0200
Subject: [PATCH 2/2] fix(errors): address codex P2 review findings on
 error-system-v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three deferred P2 findings from the PR #27 codex review, now fixed properly:

1. config.py — per-key LAYER PROVENANCE for unknown items. The loader now tracks
   which config FILE last declared each top-level key (key_sources, built in
   cascade order). An unknown item that came SOLELY from the global config is
   reported against ~/.config/rig/config.yaml, not the repo rig.yaml. plan.py's
   unknown-item errors resolve the source via config.source_for_key().

2. missing_target.py — SKIP `-m module` invocations. `python3 -m my_hook --out x`
   runs a module by import name, not a script FILE; the scanner now treats `-m`
   like `-c` (interpreter mode, no path to verify), so a healthy module-based hook
   with an absolute --out arg no longer false-flags as a missing target.

3. cli.py — EXIT-CODE PRECEDENCE. A dead hook/target now surfaces its exit code
   (EXIT_MISSING_TARGET=5) even alongside config<->disk drift, instead of being
   masked by the drift exit (3). Both findings are still printed; missing-target
   outranks drift (it fails at runtime). Precedence documented in `rig --help`.

Also makes tests/smoke.sh run pytest via `uv run --with pytest` when uv is
available (the documented command), so the suite runs on a machine whose bare
python3 lacks pytest.

Tests: RED-first for each finding; full suite 504 -> 513 passing; smoke green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 riglib/cli.py                | 42 +++++++++++++------
 riglib/config.py             | 50 ++++++++++++++++++++---
 riglib/missing_target.py     | 16 +++++---
 riglib/plan.py               | 16 ++++----
 tests/smoke.sh               |  9 +++-
 tests/test_config.py         | 25 ++++++++++++
 tests/test_missing_target.py | 44 ++++++++++++++++++++
 tests/test_plan_errors.py    | 79 +++++++++++++++++++++++++++++++++++-
 tests/test_status_layers.py  | 45 +++++++++++++++++++-
 9 files changed, 291 insertions(+), 35 deletions(-)

diff --git a/riglib/cli.py b/riglib/cli.py
index f19ec0a..65ed84e 100644
--- a/riglib/cli.py
+++ b/riglib/cli.py
@@ -71,6 +71,10 @@ def build_parser() -> argparse.ArgumentParser:
             "  5    missing target (config references a path/binary that's gone on disk)\n"
             "  6    not a git repository (a repo-scoped command run outside a repo)\n"
             "  127  missing dependency (a required external tool isn't installed)\n"
+            "\n"
+            "  precedence: when `rig status` finds BOTH a missing target and config↔disk drift,\n"
+            "  it prints both but exits 5 (missing-target outranks drift — the dead reference\n"
+            "  fails at runtime, so it's the more urgent class).\n"
         ),
     )
     p.add_argument("--version", action="version", version=f"rig {__version__}")
@@ -537,19 +541,33 @@ def cmd_status(args: argparse.Namespace) -> int:
         print(_ok("\n  in sync — config and disk agree"))
         return 0
 
-    if dead_targets and report.in_sync:
-        # no config↔disk drift, but a dead hook reference IS a problem worth a non-zero exit.
-        print(_dim("\n  (no config↔disk drift, but a missing target above needs attention)"))
+    if not report.in_sync:
+        # render the full drift report whenever there IS drift — the user must SEE every problem,
+        # regardless of which exit code we ultimately surface below.
+        _render_drift_by_layer(report, loaded, env)
+        # LOUD reassurance: apply NEVER deletes on-disk-not-declared items. A user must not fear
+        # `rig apply` will nuke a hand-added skill (it won't — extras are surfaced, never removed).
+        print()
+        print(_ok("  ✔ safe: `rig apply` NEVER deletes on-disk extras — items present on disk but"))
+        print(_ok("    not declared in any layer are left for you to decide. apply only ADDS/UPDATES."))
+        print(_dim("\n  run `rig apply` to converge config→disk (extras above are left as-is)"))
+
+    # EXIT-CODE PRECEDENCE — a dead target OUTRANKS ordinary drift. A missing hook script will
+    # FAIL at runtime (a generic "PreToolUse error"); drift is merely "config and disk disagree".
+    # The more-actionable, higher-severity class wins so a script following the stable exit-code
+    # contract sees EXIT_MISSING_TARGET (5) even when drift is also present — the drift exit (3)
+    # must never MASK it. Both are printed above; only the exit code is single-valued.
+    if dead_targets:
+        if not report.in_sync:
+            print(_dim(
+                f"\n  exit: missing-target ({errors.EXIT_MISSING_TARGET}) takes precedence over "
+                f"config↔disk drift ({errors.EXIT_DRIFT}) — the dead reference fails at runtime; "
+                "fix it first, then re-run."
+            ))
+        else:
+            print(_dim("\n  (no config↔disk drift, but a missing target above needs attention)"))
         return errors.EXIT_MISSING_TARGET
-
-    _render_drift_by_layer(report, loaded, env)
-    # LOUD reassurance: apply NEVER deletes on-disk-not-declared items. A user must not fear
-    # `rig apply` will nuke a hand-added skill (it won't — extras are surfaced, never removed).
-    print()
-    print(_ok("  ✔ safe: `rig apply` NEVER deletes on-disk extras — items present on disk but"))
-    print(_ok("    not declared in any layer are left for you to decide. apply only ADDS/UPDATES."))
-    print(_dim("\n  run `rig apply` to converge config→disk (extras above are left as-is)"))
-    return 3
+    return errors.EXIT_DRIFT
 
 
 def _declaring_config(category: str, loaded) -> str:
diff --git a/riglib/config.py b/riglib/config.py
index 7a1ec5d..fcb7816 100644
--- a/riglib/config.py
+++ b/riglib/config.py
@@ -105,6 +105,11 @@ class LoadedConfig:
     global_path: Path | None = None
     repo_path: Path | None = None
     layers: list[str] = field(default_factory=list)
+    # Provenance: which config FILE last declared each top-level key. Built during the cascade
+    # (global first, then repo/explicit overwrites), so a key present only in the global config
+    # maps to the global path even when a repo rig.yaml is also loaded. Used to name the CORRECT
+    # source file in an unknown-item error instead of always blaming the repo file.
+    key_sources: dict[str, Path] = field(default_factory=dict)
 
     @property
     def agent_tools_source(self) -> str | None:
@@ -134,6 +139,22 @@ def primary_config_path(self) -> Path:
             return self.global_path
         return repo_config_path(self.repo_root)
 
+    def source_for_key(self, dotted_key: str) -> Path:
+        """The config FILE that declared ``dotted_key`` (e.g. ``mcp.items.review``).
+
+        Resolves provenance by the TOP-LEVEL key (``mcp``): a key that came solely from the
+        global ``~/.config/rig/config.yaml`` names the global file, while a key the repo
+        ``rig.yaml`` set (or overrode) names the repo file. This is what lets an unknown-item
+        error point at the file actually carrying the stale entry — e.g. a removed MCP slot
+        left in the global config is reported against the global file, not the repo's.
+
+        Falls back to :attr:`primary_config_path` when the key has no tracked provenance (a
+        config built directly in tests, or a key that is not top-level).
+        """
+        top = dotted_key.split(".", 1)[0]
+        src = self.key_sources.get(top)
+        return src if src is not None else self.primary_config_path
+
 
 def load(
     repo_root: Path,
@@ -152,34 +173,51 @@ def load(
     layers: list[str] = []
     gpath: Path | None = None
     rpath: Path | None = None
+    # Track which file last declared each top-level key, in cascade order (global, then
+    # repo/explicit). A key the repo layer sets overwrites the global provenance — so the final
+    # mapping names the file the user must edit to remove the offending key.
+    key_sources: dict[str, Path] = {}
+
+    def _merge_layer(path: Path, label: str) -> None:
+        """Merge one config file into the accumulator and record its key provenance + layer.
+
+        Single seam so a new layer can't forget to update ``key_sources`` (the bug the
+        provenance feature exists to avoid). ``merged`` is rebound via ``nonlocal`` because
+        ``_deep_merge`` returns a fresh dict rather than mutating in place.
+        """
+        nonlocal merged
+        data = _load_yaml(path)
+        merged = _deep_merge(merged, data)
+        for k in data:
+            key_sources[k] = path
+        layers.append(f"{label}:{path}")
 
     if include_global:
         gpath = global_config_path()
         if gpath.is_file():
-            merged = _deep_merge(merged, _load_yaml(gpath))
-            layers.append(f"global:{gpath}")
+            _merge_layer(gpath, "global")
 
     if explicit_config is not None:
         rpath = explicit_config.resolve()
         if not rpath.is_file():
             raise ConfigError(f"--config file not found: {rpath}")
-        merged = _deep_merge(merged, _load_yaml(rpath))
-        layers.append(f"config:{rpath}")
+        _merge_layer(rpath, "config")
     else:
         rpath = repo_config_path(repo_root)
         if rpath.is_file():
-            merged = _deep_merge(merged, _load_yaml(rpath))
-            layers.append(f"repo:{rpath}")
+            _merge_layer(rpath, "repo")
 
     validate(merged)
     merged.pop("scope", None)  # `scope` is a removed legacy key — drop it so it never
     # lingers in loaded.data, gets re-serialized, or is mistaken for a live setting.
+    key_sources.pop("scope", None)
     return LoadedConfig(
         data=merged,
         repo_root=repo_root,
         global_path=gpath if gpath and gpath.is_file() else None,
         repo_path=rpath if rpath and rpath.is_file() else None,
         layers=layers,
+        key_sources=key_sources,
     )
 
 
diff --git a/riglib/missing_target.py b/riglib/missing_target.py
index a0b9f57..1a30f76 100644
--- a/riglib/missing_target.py
+++ b/riglib/missing_target.py
@@ -47,8 +47,9 @@ def _missing_paths_in_command(command: str) -> list[str]:
     interpreter — and that interpreter may itself be an ABSOLUTE path on macOS
     (``/usr/bin/env python3 /abs/hook.py``, ``/opt/homebrew/bin/python3 /abs/hook.py``). We must
     check the SCRIPT, not the interpreter: skip a leading interpreter token (bare or absolute)
-    plus its ``env VAR=…`` / module / ``-c`` style args, then take the first remaining
-    script-looking token as the target. We check only THAT token — a later absolute path is
+    plus its ``env VAR=…`` / ``-c`` inline-code / ``-m <module>`` style args (both ``-c`` and
+    ``-m`` mean there is no script FILE to verify), then take the first remaining script-looking
+    token as the target. We check only THAT token — a later absolute path is
     almost always a runtime OUTPUT arg (``--out /var/run/x.json``) that legitimately doesn't
     exist yet; flagging it would make ``rig status``/``doctor`` cry wolf on a healthy hook.
     """
@@ -69,10 +70,13 @@ def _missing_paths_in_command(command: str) -> list[str]:
         rest = tokens[1:]
 
     for tok in rest:
-        # `-c '<inline code>'` as an INTERPRETER flag (i.e. before any script token) means there
-        # is no script FILE to verify. Only honor it before the first path — a `-c` AFTER the
-        # script is the script's OWN argument, not Python's, and must not suppress the check.
-        if via_interpreter and tok == "-c":
+        # `-c '<inline code>'` and `-m <module>` as INTERPRETER flags (i.e. before any script
+        # token) both mean there is NO script FILE to verify: `-c` runs inline code, `-m` runs an
+        # installed module by import name (`python3 -m my_hook --out x` has no path to check, and
+        # a later absolute arg like `--out /tmp/x.json` is a runtime output, not a hook script).
+        # Only honor them before the first path — a `-c`/`-m` AFTER the script is the script's OWN
+        # argument, not the interpreter's, and must not suppress the check.
+        if via_interpreter and tok in ("-c", "-m"):
             return []
         if _looks_like_script_path(tok):
             p = Path(tok).expanduser()
diff --git a/riglib/plan.py b/riglib/plan.py
index 2eeb932..62a1184 100644
--- a/riglib/plan.py
+++ b/riglib/plan.py
@@ -251,24 +251,25 @@ def _validate_item_names(config: LoadedConfig, catalog: Catalog) -> None:
     """
     from . import errors
 
-    config_path = str(config.primary_config_path)
-
     def _check(category: str, names: set[str], known: set[str], key_prefix: str) -> None:
         """Raise a structured :class:`errors.UnknownItemError` for the first unknown name.
 
         ``category`` is the catalog category used for the removed-slot lookup + did-you-mean;
         ``key_prefix`` is the dotted config-key path (e.g. ``mcp.items``) the bad name hangs
-        off, so the error names the EXACT offending key (``mcp.items.review``) + its file.
+        off, so the error names the EXACT offending key (``mcp.items.review``) + its file. The
+        file is resolved by the key's PROVENANCE (``source_for_key``): a stale entry that came
+        solely from the global config is reported against the global file, not the repo's.
         """
         unknown = sorted(names - known)
         if unknown:
             bad = unknown[0]
+            key = f"{key_prefix}.{bad}"
             raise errors.unknown_item_error(
                 category=category,
-                key=f"{key_prefix}.{bad}",
+                key=key,
                 bad=bad,
                 known=known,
-                config_path=config_path,
+                config_path=str(config.source_for_key(key)),
             )
 
     # skills — universal group
@@ -418,12 +419,13 @@ def build(config: LoadedConfig, catalog: Catalog, *, project_type: str = "unknow
             from . import errors
 
             bad = unknown[0]
+            key = f"ci.items.{bad}"
             raise errors.unknown_item_error(
                 category="ci",
-                key=f"ci.items.{bad}",
+                key=key,
                 bad=bad,
                 known=known,
-                config_path=str(config.primary_config_path),
+                config_path=str(config.source_for_key(key)),
             )
 
         # resolve which slots are enabled: per-item override > enable/disable > all > off.
diff --git a/tests/smoke.sh b/tests/smoke.sh
index d110b1c..442229c 100755
--- a/tests/smoke.sh
+++ b/tests/smoke.sh
@@ -182,8 +182,15 @@ YAML
 fi
 
 # ── 4. unit suite ─────────────────────────────────────────────────────────────
+# Run pytest the way the repo actually runs it: prefer `uv run --with pytest` (the documented
+# command — README/AGENTS.md), so a machine whose bare `python3` is a clean interpreter without
+# pytest installed still runs the suite. Fall back to `python3 -m pytest` when uv is absent.
 echo "running pytest…"
-python3 -m pytest -q "$ROOT/tests" || fail "pytest"
+if command -v uv >/dev/null 2>&1; then
+  uv run --with pytest python -m pytest -q "$ROOT/tests" || fail "pytest"
+else
+  python3 -m pytest -q "$ROOT/tests" || fail "pytest"
+fi
 pass "pytest"
 
 echo "smoke OK"
diff --git a/tests/test_config.py b/tests/test_config.py
index 6f5639a..16db1f4 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -75,6 +75,31 @@ def test_load_drops_legacy_scope(tmp_path, monkeypatch):
     assert "scope" not in loaded.data
 
 
+def test_key_sources_track_layer_provenance(tmp_path, monkeypatch):
+    # a key set only in the GLOBAL config maps to the global path; a key the repo sets maps to
+    # the repo path. This is what source_for_key() uses to name the right file in an error.
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / ".config"))
+    _w(config.global_config_path(), "version: 1\nmcp: {enabled: true}\nskills: {enabled: true}\n")
+    repo = tmp_path / "repo"
+    _w(repo / "rig.yaml", "version: 1\nci: {enabled: false}\nskills: {enabled: false}\n")
+    loaded = config.load(repo)
+    assert loaded.key_sources["mcp"] == config.global_config_path()  # global-only key
+    assert loaded.key_sources["ci"] == repo / "rig.yaml"  # repo-only key
+    assert loaded.key_sources["skills"] == repo / "rig.yaml"  # repo OVERRIDES global
+    assert loaded.source_for_key("mcp.items.x") == config.global_config_path()
+
+
+def test_load_strips_scope_from_key_sources(tmp_path, monkeypatch):
+    # the removed legacy `scope` key is dropped from provenance too (not just from data), so a
+    # stray `scope:` never lingers as a tracked source. source_for_key falls back when untracked.
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    repo = tmp_path / "repo"
+    _w(repo / "rig.yaml", "version: 1\nscope: both\nskills: {enabled: false}\n")
+    loaded = config.load(repo)
+    assert "scope" not in loaded.key_sources
+    assert loaded.source_for_key("scope") == loaded.primary_config_path  # fallback, not KeyError
+
+
 def test_validate_rejects_bad_on_conflict():
     with pytest.raises(config.ConfigError, match="on_conflict"):
         config.validate({"version": 1, "defaults": {"on_conflict": "nuke"}})
diff --git a/tests/test_missing_target.py b/tests/test_missing_target.py
index 4737311..9a6c6b8 100644
--- a/tests/test_missing_target.py
+++ b/tests/test_missing_target.py
@@ -147,6 +147,50 @@ def test_scan_flags_dead_script_when_dash_c_is_script_arg(tmp_path):
     assert str(gone) in findings[0].what
 
 
+def test_scan_ignores_module_invocation(tmp_path):
+    # `python3 -m my_hook --out /tmp/result.json` runs a MODULE, not a script FILE — there is no
+    # script path to verify. The later absolute `--out` arg is a runtime output path, not a hook
+    # script, so a module-based hook must NOT be flagged (it would cry wolf on a healthy hook).
+    home = tmp_path / "home"
+    out_file = tmp_path / "run" / "result.json"  # never created — a runtime output path
+    settings = _settings(
+        home,
+        {"PreToolUse": [{"matcher": "*", "hooks": [
+            {"type": "command", "command": f"python3 -m my_hook --out {out_file}"}
+        ]}]},
+    )
+    assert scan_settings_hooks(settings) == []
+
+
+def test_scan_ignores_module_invocation_under_absolute_interpreter(tmp_path):
+    # same, but the interpreter is an absolute path (the macOS form) — still no script to verify.
+    home = tmp_path / "home"
+    gone = tmp_path / "gone" / "x.py"  # an absolute path that doesn't exist, AFTER -m <module>
+    settings = _settings(
+        home,
+        {"PreToolUse": [{"matcher": "*", "hooks": [
+            {"type": "command", "command": f"/opt/homebrew/bin/python3 -m pkg.hook {gone}"}
+        ]}]},
+    )
+    assert scan_settings_hooks(settings) == []
+
+
+def test_scan_flags_dead_script_when_dash_m_is_script_arg(tmp_path):
+    # `-m` AFTER the script is the SCRIPT's own argument, not Python's module flag — a dead
+    # script must still be flagged (mirror of the `-c`-as-script-arg guard).
+    home = tmp_path / "home"
+    gone = tmp_path / "gone" / "hook.py"  # the script itself is missing
+    settings = _settings(
+        home,
+        {"PreToolUse": [{"matcher": "Bash", "hooks": [
+            {"type": "command", "command": f"python3 {gone} -m somearg"}
+        ]}]},
+    )
+    findings = scan_settings_hooks(settings)
+    assert len(findings) == 1
+    assert str(gone) in findings[0].what
+
+
 def test_scan_missing_settings_file_is_empty(tmp_path):
     # no settings.json at all → nothing to scan, no error
     assert scan_settings_hooks(tmp_path / "home" / ".claude" / "settings.json") == []
diff --git a/tests/test_plan_errors.py b/tests/test_plan_errors.py
index e37f776..c5fbbc4 100644
--- a/tests/test_plan_errors.py
+++ b/tests/test_plan_errors.py
@@ -9,9 +9,11 @@
 
 from __future__ import annotations
 
+from pathlib import Path
+
 import pytest
 
-from riglib import errors
+from riglib import config, errors
 from riglib.catalog import Catalog
 from riglib.config import LoadedConfig
 from riglib.plan import build
@@ -23,6 +25,11 @@ def _loaded(tmp_path, data: dict) -> LoadedConfig:
     return LoadedConfig(data=data, repo_root=tmp_path, repo_path=cfg, layers=[f"repo:{cfg}"])
 
 
+def _write_yaml(p: Path, s: str) -> None:
+    p.parent.mkdir(parents=True, exist_ok=True)
+    p.write_text(s, encoding="utf-8")
+
+
 def test_unknown_mcp_item_raises_structured_with_suggestion(tmp_path, fake_agent_tools):
     catalog = Catalog.scan(str(fake_agent_tools))
     # fake catalog has mcp slot "review"; "reviewr" is a typo
@@ -75,3 +82,73 @@ def test_unknown_git_hooks_key_raises_structured(tmp_path, fake_agent_tools):
     with pytest.raises(errors.UnknownItemError) as exc:
         build(loaded, catalog)
     assert "dispatcher" in exc.value.fix
+
+
+def test_unknown_item_from_global_layer_names_global_file(tmp_path, fake_agent_tools, monkeypatch):
+    """An unknown item that came SOLELY from the global config is reported against the GLOBAL
+    file, not the repo's rig.yaml — the loader tracks per-key layer provenance.
+
+    Motivating case: a stale/removed MCP slot lingers in ``~/.config/rig/config.yaml`` while the
+    repo's rig.yaml has its own (valid) blocks. The fix must tell the user to edit the GLOBAL
+    file that actually carries the bad key, not the repo file that doesn't mention it.
+    """
+    xdg = tmp_path / "xdg"
+    gcfg = xdg / "rig" / "config.yaml"
+    _write_yaml(gcfg, f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+             "mcp: {items: {reviewr: {enabled: true}}}\n")
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg))
+    repo = tmp_path / "repo"
+    rcfg = repo / "rig.yaml"
+    # repo declares OTHER (valid) blocks — it must NOT be blamed for the global's bad mcp key.
+    _write_yaml(rcfg, "version: 1\nskills: {enabled: false}\n")
+
+    loaded = config.load(repo)
+    catalog = Catalog.scan(str(fake_agent_tools))
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    e = exc.value
+    assert "reviewr" in e.what
+    assert str(gcfg) in e.why  # provenance: names the GLOBAL file that carries the bad key
+    assert str(gcfg) in e.fix
+    assert str(rcfg) not in (e.why + e.fix)  # the repo file is NOT blamed
+
+
+def test_unknown_item_overridden_in_repo_names_repo_file(tmp_path, fake_agent_tools, monkeypatch):
+    """When the repo layer ALSO sets the offending top-level key, the repo file wins provenance
+    (the repo overrides the global layer) — so the error names the repo file the user edits."""
+    xdg = tmp_path / "xdg"
+    gcfg = xdg / "rig" / "config.yaml"
+    _write_yaml(gcfg, f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+             "mcp: {items: {review: {enabled: true}}}\n")
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg))
+    repo = tmp_path / "repo"
+    rcfg = repo / "rig.yaml"
+    _write_yaml(rcfg, "version: 1\nmcp: {items: {reviewr: {enabled: true}}}\n")  # repo's own bad mcp key
+
+    loaded = config.load(repo)
+    catalog = Catalog.scan(str(fake_agent_tools))
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    e = exc.value
+    assert "reviewr" in e.what
+    assert str(rcfg) in e.fix  # the repo set mcp → repo file is the source to edit
+
+
+def test_unknown_item_from_explicit_config_layer_names_that_file(tmp_path, fake_agent_tools, monkeypatch):
+    """A `--config P` (explicit) layer carries provenance too: an unknown key in P is reported
+    against P, not the repo's rig.yaml. Locks in the explicit branch of the cascade so a new
+    layer can't silently skip key-source tracking."""
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    repo = tmp_path / "repo"
+    _write_yaml(repo / "rig.yaml", "version: 1\nskills: {enabled: false}\n")  # ignored: --config replaces it
+    explicit = tmp_path / "explicit.yaml"
+    _write_yaml(explicit, f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+                "mcp: {items: {reviewr: {enabled: true}}}\n")
+
+    loaded = config.load(repo, explicit_config=explicit)
+    catalog = Catalog.scan(str(fake_agent_tools))
+    with pytest.raises(errors.UnknownItemError) as exc:
+        build(loaded, catalog)
+    e = exc.value
+    assert "reviewr" in e.what
+    assert str(explicit) in e.fix  # the explicit --config file is the source to edit
diff --git a/tests/test_status_layers.py b/tests/test_status_layers.py
index 69c640d..e8426a3 100644
--- a/tests/test_status_layers.py
+++ b/tests/test_status_layers.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import json
 import subprocess
 from pathlib import Path
 
@@ -143,8 +144,6 @@ def test_status_groups_drift_by_layer_with_provenance(tmp_path, capsys, fake_age
 def test_status_global_layer_drift_names_global_config(tmp_path, capsys, fake_agent_tools, monkeypatch):
     # an isolated HOME with an undeclared MCP server on disk → a GLOBAL-layer extra, and a
     # global config file so its provenance can be named.
-    import json
-
     home = tmp_path / "home"
     (home / ".claude" / "mcp").mkdir(parents=True)
     (home / ".claude" / "mcp" / "mcp.json").write_text(
@@ -213,6 +212,48 @@ def test_help_documents_exit_codes(capsys):
     assert "6" in out and "not a git repository" in out
 
 
+# ── exit-code precedence: a dead target outranks drift ────────────────────────────
+def test_status_missing_target_outranks_drift(tmp_path, capsys, fake_agent_tools, monkeypatch):
+    """When `rig status` finds BOTH a dead hook target AND config↔disk drift, it must exit with
+    EXIT_MISSING_TARGET (5), not EXIT_DRIFT (3) — the missing target must not be MASKED by drift.
+
+    A dead hook script fails at runtime (a generic "PreToolUse error"), so it's the more urgent,
+    more actionable class. Both findings are still PRINTED; only the single-valued exit code
+    reflects the higher-severity class so a CI script keying on the stable contract sees it.
+    """
+    home = tmp_path / "home"
+    gone = tmp_path / "gone-hook.py"  # referenced by a hook but never created
+    settings = home / ".claude" / "settings.json"
+    settings.parent.mkdir(parents=True, exist_ok=True)
+    settings.write_text(
+        json.dumps({"hooks": {"PreToolUse": [
+            {"matcher": "Bash", "hooks": [{"type": "command", "command": f"python3 {gone}"}]}
+        ]}}),
+        encoding="utf-8",
+    )
+    monkeypatch.setenv("HOME", str(home))
+    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))
+    repo = _git_repo(tmp_path / "repo")
+    # REPO-layer drift: an undeclared CI workflow on disk while ci is enabled-but-empty.
+    (repo / ".github" / "workflows").mkdir(parents=True)
+    (repo / ".github" / "workflows" / "rogue.yml").write_text("name: rogue\n", encoding="utf-8")
+    (repo / "rig.yaml").write_text(
+        f"version: 1\nagent_tools_source: {fake_agent_tools}\n"
+        "skills: {enabled: false}\nagent_hooks: {enabled: false}\nmcp: {enabled: false}\n"
+        "git_hooks: {dispatcher: {enabled: false}}\nci: {enabled: true, all: false}\n"
+        "agents_md: {enabled: false}\n",
+        encoding="utf-8",
+    )
+    rc = main(["status", "-C", str(repo)])
+    out = capsys.readouterr().out
+    # the missing-target class wins the exit code even though drift is ALSO present
+    assert rc == errors.EXIT_MISSING_TARGET
+    # both problems are surfaced to the user
+    assert str(gone) in out  # the dead hook target is printed
+    assert "rogue" in out  # the drift is printed too
+    assert "precedence" in out.lower()  # the exit-code precedence is explained
+
+
 # ── a clean in-sync repo still exits 0 ────────────────────────────────────────────
 def test_status_clean_repo_exits_zero(tmp_path, capsys, fake_agent_tools, monkeypatch):
     monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "no-global"))