From 2d60131e7a106c0e7f84a446e1732b95795064da Mon Sep 17 00:00:00 2001 From: "CANFIELD.BRANDIN.W.1078680373.E" <12603953+bcanfield@users.noreply.github.com> Date: Sun, 7 Jun 2026 14:13:29 -0400 Subject: [PATCH] Add debt-ops (bcanfield/agentic-tech-debt) Co-Authored-By: Claude Opus 4.8 (1M context) --- .agents/plugins/marketplace.json | 15 + README.md | 1 + plugins.json | 12 +- .../.codex-plugin/plugin.json | 30 ++ plugins/bcanfield/agentic-tech-debt/LICENSE | 21 + plugins/bcanfield/agentic-tech-debt/README.md | 40 ++ .../agentic-tech-debt/assets/icon.svg | 9 + .../bcanfield/agentic-tech-debt/hooks/drop.py | 202 +++++++++ .../agentic-tech-debt/hooks/feedback.py | 382 ++++++++++++++++ .../agentic-tech-debt/hooks/hooks.json | 48 ++ .../agentic-tech-debt/hooks/session-start.py | 374 ++++++++++++++++ .../bcanfield/agentic-tech-debt/hooks/stop.py | 423 ++++++++++++++++++ .../agentic-tech-debt/skills/add/SKILL.md | 53 +++ .../skills/add/scripts/register.py | 191 ++++++++ .../agentic-tech-debt/skills/init/SKILL.md | 80 ++++ .../skills/init/agents/openai.yaml | 3 + .../agentic-tech-debt/skills/metrics/SKILL.md | 80 ++++ .../agentic-tech-debt/skills/review/SKILL.md | 57 +++ .../skills/review/scripts/review.py | 358 +++++++++++++++ scripts/generate_plugins_json.py | 3 + 20 files changed, 2381 insertions(+), 1 deletion(-) create mode 100644 plugins/bcanfield/agentic-tech-debt/.codex-plugin/plugin.json create mode 100644 plugins/bcanfield/agentic-tech-debt/LICENSE create mode 100644 plugins/bcanfield/agentic-tech-debt/README.md create mode 100644 plugins/bcanfield/agentic-tech-debt/assets/icon.svg create mode 100644 plugins/bcanfield/agentic-tech-debt/hooks/drop.py create mode 100644 plugins/bcanfield/agentic-tech-debt/hooks/feedback.py create mode 100644 plugins/bcanfield/agentic-tech-debt/hooks/hooks.json create mode 100644 plugins/bcanfield/agentic-tech-debt/hooks/session-start.py create mode 100644 plugins/bcanfield/agentic-tech-debt/hooks/stop.py create mode 100644 plugins/bcanfield/agentic-tech-debt/skills/add/SKILL.md create mode 100644 plugins/bcanfield/agentic-tech-debt/skills/add/scripts/register.py create mode 100644 plugins/bcanfield/agentic-tech-debt/skills/init/SKILL.md create mode 100644 plugins/bcanfield/agentic-tech-debt/skills/init/agents/openai.yaml create mode 100644 plugins/bcanfield/agentic-tech-debt/skills/metrics/SKILL.md create mode 100644 plugins/bcanfield/agentic-tech-debt/skills/review/SKILL.md create mode 100644 plugins/bcanfield/agentic-tech-debt/skills/review/scripts/review.py diff --git a/.agents/plugins/marketplace.json b/.agents/plugins/marketplace.json index 567f953b..9378f14d 100644 --- a/.agents/plugins/marketplace.json +++ b/.agents/plugins/marketplace.json @@ -297,6 +297,21 @@ "category": "Development & Workflow", "description": "Budgeted `rg`/`grep` replacement for Codex that narrows broad searches before they waste model context." }, + { + "name": "debt-ops", + "displayName": "debt-ops", + "source": { + "source": "local", + "path": "./plugins/bcanfield/agentic-tech-debt" + }, + "policy": { + "installation": "AVAILABLE", + "authentication": "ON_INSTALL" + }, + "category": "Development & Workflow", + "description": "Catches AI-introduced tech debt at write-time: hooks log every deferral to a registry in your repo and a review skill ranks paydown by file churn.", + "icon": "./plugins/bcanfield/agentic-tech-debt/assets/icon.svg" + }, { "name": "dev-skills", "displayName": "Dev Skills", diff --git a/README.md b/README.md index eda58e0c..d3478d1a 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,7 @@ Third-party plugins built by the community. [PRs welcome](#contributing)! - [Codex Reviewer](https://github.com/schuettc/codex-reviewer) - Second-pass review of Claude-driven plans and implementations. - [Codex rg Guard](https://github.com/Rycen7822/codex-rg-guard) - Budgeted `rg`/`grep` replacement for Codex that narrows broad searches before they waste model context. - [Commit Narrator](./plugins/mturac/commit-narrator) - Generate semantic commit message from staged diff, including the _why_. +- [debt-ops](https://github.com/bcanfield/agentic-tech-debt) - Catches AI-introduced tech debt at write-time: hooks log every deferral to a registry in your repo and a review skill ranks paydown by file churn. - [Deps Doctor](./plugins/mturac/deps-doctor) - Multi-ecosystem dependency audit (npm, pip, cargo, go) in one report. - [Dev Skills](https://github.com/Jason-chen-coder/dev-skills) - Team workflow skills for specs, plans, TDD, debugging, verification, review, branch finishing, and design context. - [Development Skills](https://github.com/reidemeister94/development-skills) - Three-tier triage (PASS_THROUGH / LIGHT / FULL 4-phase) development workflow for Codex and Claude Code with language auto-detection (Python, Java, TypeScript, Swift, frontend) and a staff-reviewer subagent for fresh-eyes review on every change. diff --git a/plugins.json b/plugins.json index 4de31e97..bf062285 100644 --- a/plugins.json +++ b/plugins.json @@ -3,7 +3,7 @@ "name": "awesome-codex-plugins", "version": "1.0.0", "last_updated": "2026-06-07", - "total": 102, + "total": 103, "categories": [ "Development & Workflow", "Tools & Integrations" @@ -209,6 +209,16 @@ "source": "awesome-codex-plugins", "install_url": "https://raw.githubusercontent.com/Rycen7822/codex-rg-guard/HEAD/.codex-plugin/plugin.json" }, + { + "name": "debt-ops", + "url": "https://github.com/bcanfield/agentic-tech-debt", + "owner": "bcanfield", + "repo": "agentic-tech-debt", + "description": "Catches AI-introduced tech debt at write-time: hooks log every deferral to a registry in your repo and a review skill ranks paydown by file churn.", + "category": "Development & Workflow", + "source": "awesome-codex-plugins", + "install_url": "https://raw.githubusercontent.com/bcanfield/agentic-tech-debt/HEAD/codex/.codex-plugin/plugin.json" + }, { "name": "Dev Skills", "url": "https://github.com/Jason-chen-coder/dev-skills", diff --git a/plugins/bcanfield/agentic-tech-debt/.codex-plugin/plugin.json b/plugins/bcanfield/agentic-tech-debt/.codex-plugin/plugin.json new file mode 100644 index 00000000..5d7f99cc --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/.codex-plugin/plugin.json @@ -0,0 +1,30 @@ +{ + "name": "debt-ops", + "version": "0.9.1", + "description": "Catches AI-introduced tech debt at write-time. Every \"I'll fix this later,\" every shortcut, every punt your AI agent writes gets caught.", + "author": { + "name": "Brandin Canfield", + "email": "brandincanfield@gmail.com" + }, + "homepage": "https://github.com/bcanfield/agentic-tech-debt", + "repository": "https://github.com/bcanfield/agentic-tech-debt", + "license": "MIT", + "keywords": [ + "tech-debt", + "code-health", + "hotspots", + "adr", + "productivity" + ], + "skills": "./skills/", + "hooks": "./hooks/hooks.json", + "interface": { + "displayName": "debt-ops", + "shortDescription": "Catches AI-introduced tech debt at write-time", + "longDescription": "Every \"I'll fix this later,\" shortcut, and punt your AI agent writes lands in a folder in your repo. Architectural calls get a short ADR. When you're ready to clean up, a review skill ranks the backlog by how often each file changes, so you pay down the debt that actually hurts first. Plain Python, fully local, no network calls.", + "developerName": "Brandin Canfield", + "category": "Coding", + "composerIcon": "./assets/icon.svg", + "brandColor": "#d97757" + } +} diff --git a/plugins/bcanfield/agentic-tech-debt/LICENSE b/plugins/bcanfield/agentic-tech-debt/LICENSE new file mode 100644 index 00000000..f4a19e27 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Brandin Canfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plugins/bcanfield/agentic-tech-debt/README.md b/plugins/bcanfield/agentic-tech-debt/README.md new file mode 100644 index 00000000..fda34b8e --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/README.md @@ -0,0 +1,40 @@ +# debt-ops — Codex adapter + +The [debt-ops](../README.md) disciplines, packaged as a [Codex](https://developers.openai.com/codex) plugin. Behavior matches the [Claude Code adapter](../claude-code); this README covers only what's Codex-specific. + +## Install + +Register the marketplace from your shell, then install from the plugin browser: + +```bash +codex plugin marketplace add bcanfield/agentic-tech-debt +``` + +Open `/plugins` inside Codex and install **debt-ops**. Working *inside* this repo, Codex auto-discovers the bundled marketplace at `.agents/plugins/marketplace.json` once the project is trusted — no add step needed. Requires a git repo and Python 3.10+ (stdlib only). + +## What's wired + +Layout follows Codex's plugin conventions: hooks bundle their scripts under `hooks/` (referenced via `${PLUGIN_ROOT}/hooks/…`), and each skill bundles its helper under its own `scripts/` (referenced by a relative path, the documented skill convention). + +| Codex primitive | File | Role | +| --- | --- | --- | +| `SessionStart` hook | `hooks/session-start.py` | Injects the disciplines + detects/caches quality commands and ADR/registry dirs | +| `PostToolUse` hook (`apply_patch\|Edit\|Write`) | `hooks/feedback.py` | Runs quality commands on edited files under a 3s/command budget | +| `Stop` hook | `hooks/stop.py` | TODO-sniff safety net — nudges when deferrals went unregistered | +| `UserPromptSubmit` hook | `hooks/drop.py` | Handles `drop A` / `drop A,C` / `drop all` shorthand | +| `$add` skill | `skills/add/` (+ `scripts/register.py`) | Registers a debt entry, assigns a batch letter | +| `$review` skill | `skills/review/` (+ `scripts/review.py`) | Audits + ranks the registry; walks paydown | +| `$init` skill *(explicit-only)* | `skills/init/` | Writes the `## Tech debt operations` charter into `AGENTS.md` | +| `$metrics` skill | `skills/metrics/` | Read-only health summary from the metrics log | + +## Codex-specific notes + +- **Charter file is `AGENTS.md`**, not `CLAUDE.md` — `$init` writes the managed `## Tech debt operations` section there, and the hooks read quality commands from it. +- **Edits are `apply_patch`.** The feedback hook parses the V4A patch envelope (`*** Add/Update File:`, `*** Move to:`) to learn which files changed, since there's no `tool_input.file_path`. +- **Cache** lives at `~/.cache/debt-ops/cache//` (override with `DEBT_OPS_CACHE`) so the hooks and skill Bash always agree on one path ([ADR 0012](../docs/adr/0012-codex-deterministic-cache-base.md)). +- **Skill invocation** is `$add` / `$review` / `$init` / `$metrics` (or the `/skills` picker). `$init` is explicit-only (`skills/init/agents/openai.yaml`). +- **Debug:** set `DEBT_OPS_DEBUG=1` to log every hook fire to `/debug.log`. + +## Research + +Disciplines map to the [nine tool-agnostic pillars](../docs/tech-debt-pillars.md); the [Claude Code mapping](../docs/tech-debt-plugin-plan.md) explains why each hook exists. Same evidence base, different agent. diff --git a/plugins/bcanfield/agentic-tech-debt/assets/icon.svg b/plugins/bcanfield/agentic-tech-debt/assets/icon.svg new file mode 100644 index 00000000..4fc1a657 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/assets/icon.svg @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/plugins/bcanfield/agentic-tech-debt/hooks/drop.py b/plugins/bcanfield/agentic-tech-debt/hooks/drop.py new file mode 100644 index 00000000..7fb5744f --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/hooks/drop.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +"""debt-ops UserPromptSubmit hook: handle 'drop A,B' / 'drop all' shorthand. + +Codex adapter. When the user types `drop A`, `drop A,C`, or `drop all` as the +entire prompt, this hook deletes the matching entries from the most recent +batch and blocks the prompt with a one-line confirmation — no model turn +consumed. + +Other "drop" forms ("drop it", "drop foo-slug") aren't matched and fall through +to the model's normal handling per the add skill. +""" + +import hashlib +import json +import os +import re +import subprocess +import sys +import time +from pathlib import Path + +# Strict: full input must be 'drop' + 'all' OR 1-3 letters separated by commas/spaces. +# Trailing period optional. Case-insensitive. +# "drop it" doesn't match (i,t can't be one 1-3-char token without a separator +# — actually `it` IS two letters so [a-z]{1,3} matches it. Guard below). +DROP_RE = re.compile( + r"^\s*drop\s+(all|[a-z]{1,3}(?:[\s,]+[a-z]{1,3})*)\s*\.?\s*$", + re.IGNORECASE, +) + +DEFAULT_REGISTRY_DIR = "docs/debt" + + +# Single deterministic cache base so hook subprocesses and skill Bash (which +# never sees PLUGIN_DATA) resolve the same path. Override with DEBT_OPS_CACHE. +def cache_base(): + override = os.environ.get("DEBT_OPS_CACHE") + return Path(override) if override else (Path.home() / ".cache" / "debt-ops") + + +# Read session-start.py's cached registry-dir path; default if missing/empty. +def read_registry_dir(cache_dir): + f = cache_dir / "registry-dir" + if f.is_file(): + try: + val = f.read_text(encoding="utf-8").strip() + if val: + return val + except OSError: + pass + return DEFAULT_REGISTRY_DIR + + +def git_toplevel(): + try: + out = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, timeout=2, + ) + s = out.stdout.strip() + return Path(s) if s else None + except (subprocess.SubprocessError, FileNotFoundError): + return None + + +def repo_hash(toplevel): + return hashlib.sha1(str(toplevel).encode()).hexdigest()[:12] + + +def emit_block(reason): + payload = { + "decision": "block", + "hookSpecificOutput": { + "hookEventName": "UserPromptSubmit", + "additionalContext": reason, + }, + "reason": reason, + } + sys.stdout.write(json.dumps(payload) + "\n") + + +def log_metric(cache_dir, payload): + if not cache_dir.is_dir(): + return + payload["ts"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + with (cache_dir / "metrics.jsonl").open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, separators=(",", ":")) + "\n") + except OSError: + pass + + +def read_batch(path): + """Parse `LETTER\tslug\tfname` rows into a dict keyed by uppercase letter.""" + if not path.is_file(): + return {} + try: + text = path.read_text(encoding="utf-8") + except OSError: + return {} + mapping = {} + for ln in text.splitlines(): + parts = ln.split("\t") + if len(parts) >= 3 and parts[0].strip(): + mapping[parts[0].strip().upper()] = (parts[1].strip(), parts[2].strip()) + return mapping + + +def write_batch(path, mapping): + try: + if not mapping: + path.unlink(missing_ok=True) + return + lines = [f"{L}\t{slug}\t{fname}" for L, (slug, fname) in mapping.items()] + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + except OSError: + pass + + +def main(): + try: + raw = sys.stdin.read() + except OSError: + return 0 + if not raw: + return 0 + try: + data = json.loads(raw) + except (json.JSONDecodeError, ValueError): + return 0 + prompt = (data.get("prompt") or "").strip() + if not prompt: + return 0 + + m = DROP_RE.match(prompt) + if not m: + return 0 + + # Guard: don't intercept "drop it" — let the model handle it conversationally. + tokens = re.split(r"[\s,]+", m.group(1).strip().lower()) + if tokens == ["it"]: + return 0 + + toplevel = git_toplevel() + if toplevel is None: + return 0 + + cache_dir = cache_base() / "cache" / repo_hash(toplevel) + + # Look in both files: current-turn.txt (just-finished turn, not yet rotated) + # and last-batch.txt (turn before that). Merge with current-turn winning. + current = read_batch(cache_dir / "current-turn.txt") + last = read_batch(cache_dir / "last-batch.txt") + mapping = {**last, **current} + if not mapping: + return 0 + + if tokens == ["all"]: + letters = list(mapping.keys()) + else: + letters = [t.upper() for t in tokens] + + registry_dir = toplevel / read_registry_dir(cache_dir) + deleted = [] + not_found = [] + for L in letters: + if L not in mapping: + not_found.append(L) + continue + slug, fname = mapping[L] + target = registry_dir / fname + try: + target.unlink(missing_ok=True) + deleted.append(slug) + # Remove from whichever source file held it. + current.pop(L, None) + last.pop(L, None) + except OSError: + not_found.append(L) + + if not deleted: + # Nothing actually deleted — pass through to the model so they can ask why. + return 0 + + write_batch(cache_dir / "current-turn.txt", current) + write_batch(cache_dir / "last-batch.txt", last) + + log_metric(cache_dir, {"event": "drop", "slugs": deleted, "missed": not_found}) + + parts = [f"Dropped: {', '.join(deleted)}."] + if not_found: + parts.append(f"Not in batch: {', '.join(not_found)}.") + emit_block(" ".join(parts)) + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception: + # A hook bug must never block the user's prompt — exit clean. + sys.exit(0) diff --git a/plugins/bcanfield/agentic-tech-debt/hooks/feedback.py b/plugins/bcanfield/agentic-tech-debt/hooks/feedback.py new file mode 100644 index 00000000..48b4d1db --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/hooks/feedback.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 +"""debt-ops PostToolUse hook: run quality commands in parallel under a 3s budget. + +Codex adapter. Codex edits files through the `apply_patch` tool (V4A envelope), +not Claude's Write/Edit, so the changed path isn't `tool_input.file_path` — +we parse the Add/Update/Move-to targets out of the patch instead. We still +honor `file_path` so Edit/Write-shaped tools keep working. +""" + +import concurrent.futures +import hashlib +import json +import os +import re +import shlex +import subprocess +import sys +import time +from pathlib import Path + +MARKER_OPEN = "" +MARKER_CLOSE = "" +PER_COMMAND_TIMEOUT = 3 +SNIPPET_LEN = 200 +DEBUG_ENV = "DEBT_OPS_DEBUG" +SKIP_DIRS = {".git", "node_modules", "target", "dist", "build"} +TEST_PATTERNS = ( + re.compile(r"^test_"), + re.compile(r"_test\."), + re.compile(r"\.test\."), + re.compile(r"\.spec\."), +) +HEADING_RE = re.compile(r"^##\s") +# V4A patch envelope: files that exist after the edit (Delete targets are gone). +PATCH_ADD_UPDATE_RE = re.compile(r"^\*\*\* (?:Add|Update) File: (.+?)\s*$") +PATCH_MOVE_RE = re.compile(r"^\*\*\* Move to: (.+?)\s*$") + + +# Single deterministic cache base so hook subprocesses and skill Bash (which +# never sees PLUGIN_DATA) resolve the same path. Override with DEBT_OPS_CACHE. +def cache_base(): + override = os.environ.get("DEBT_OPS_CACHE") + return Path(override) if override else (Path.home() / ".cache" / "debt-ops") + + +# Wraps text in the JSON envelope Codex expects from a PostToolUse hook. +def emit(context): + payload = { + "hookSpecificOutput": { + "hookEventName": "PostToolUse", + "additionalContext": context, + } + } + sys.stdout.write(json.dumps(payload) + "\n") + + +# Repo root, or None if we're not in a git repo. +def git_toplevel(): + try: + out = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, + ) + s = out.stdout.strip() + return Path(s) if s else None + except (subprocess.CalledProcessError, FileNotFoundError): + return None + + +# Short stable hash of the repo path — used as the cache subdirectory name. +def repo_hash(toplevel): + return hashlib.sha1(str(toplevel).encode()).hexdigest()[:12] + + +# Pull the V4A patch text out of an apply_patch tool_input. Codex may place it +# in any string field (or hand tool_input as a raw string), so we sniff for the +# envelope markers rather than guessing the field name. +def patch_text(tool_input): + if isinstance(tool_input, str): + return tool_input if "*** " in tool_input else "" + if isinstance(tool_input, dict): + for v in tool_input.values(): + if isinstance(v, str) and ( + "*** Begin Patch" in v or "*** Update File:" in v or "*** Add File:" in v + ): + return v + return "" + + +# The just-edited file path(s) from the hook's stdin JSON. Edit/Write expose +# file_path directly; apply_patch carries them inside the patch envelope. +def changed_files_from_stdin(): + try: + raw = sys.stdin.read() + except OSError: + return [] + if not raw: + return [] + try: + data = json.loads(raw) + except (json.JSONDecodeError, ValueError): + return [] + ti = data.get("tool_input") + if isinstance(ti, dict): + fp = ti.get("file_path") + if isinstance(fp, str) and fp: + return [fp] + patch = patch_text(ti) + if not patch: + return [] + files = [] + for line in patch.splitlines(): + m = PATCH_ADD_UPDATE_RE.match(line) + if m: + files.append(m.group(1)) + continue + m = PATCH_MOVE_RE.match(line) + if m: + # Rename: the new path is what now exists; it supersedes the + # Update-File source captured on the preceding line. + if files: + files[-1] = m.group(1) + else: + files.append(m.group(1)) + # De-dup, preserve order. + seen, out = set(), [] + for f in files: + if f not in seen: + seen.add(f) + out.append(f) + return out + + +# Loads quality commands. AGENTS.md marker block wins if present; otherwise the cached list. +def read_commands(toplevel, cache_dir): + agents_md = toplevel / "AGENTS.md" + if agents_md.is_file(): + try: + text = agents_md.read_text(encoding="utf-8", errors="replace") + except OSError: + text = "" + if MARKER_OPEN in text: + block = [] + collecting = False + for line in text.splitlines(): + if not collecting: + if MARKER_OPEN in line: + collecting = True + continue + if MARKER_CLOSE in line or HEADING_RE.match(line): + break + block.append(line) + return "\n".join(block) + list_file = cache_dir / "feedback.list" + if list_file.is_file(): + try: + return list_file.read_text(encoding="utf-8") + except OSError: + return "" + return "" + + +# Debug log path — only when DEBT_OPS_DEBUG=1 is set in the environment. +def debug_path(cache_dir): + if not os.environ.get(DEBUG_ENV): + return None + return cache_dir / "debug.log" + + +# Appends one tab-separated line to the debug log; silently no-ops on failure. +def dlog(path, *fields): + if path is None: + return + ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + with path.open("a", encoding="utf-8") as f: + f.write("\t".join((ts, *fields)) + "\n") + except OSError: + pass + + +# Runs one quality command under a 3s timeout. Returns (cmd, status, snippet). +def run_one(line, changed_files, env): + has_var = "$CHANGED_FILES" in line or "${CHANGED_FILES}" in line + if has_var and not changed_files: + return line, "SKIP_NO_FILE", "" + + try: + args = shlex.split(line) + except ValueError as e: + return line, "FAIL", f"parse error: {e}" + if not args: + return line, "SKIP_NO_FILE", "" + + # Only $CHANGED_FILES is expanded; other shell features (pipes, &&, globs) + # are not, so we don't need bash on PATH. Wrap in `bash -c '...'` to opt in. + # A bare $CHANGED_FILES token becomes one argument per file (tools like + # pytest/eslint need separate argv entries); embedded uses get the joined string. + if changed_files: + joined = " ".join(changed_files) + expanded = [] + for tok in args: + if tok in ("$CHANGED_FILES", "${CHANGED_FILES}"): + expanded.extend(changed_files) + else: + expanded.append( + tok.replace("${CHANGED_FILES}", joined).replace("$CHANGED_FILES", joined) + ) + args = expanded + + try: + result = subprocess.run( + args, + capture_output=True, text=True, + timeout=PER_COMMAND_TIMEOUT, env=env, + ) + except subprocess.TimeoutExpired: + return line, "TIMEOUT", "" + except FileNotFoundError: + return line, "FAIL", f"command not found: {args[0]}" + except OSError as e: + return line, "FAIL", str(e)[:SNIPPET_LEN] + if result.returncode == 0: + return line, "PASS", "" + snippet = ((result.stdout or "") + (result.stderr or ""))[:SNIPPET_LEN] + return line, "FAIL", snippet + + +# How many .md entries currently live in the (cached or default) registry dir. +def registry_count(toplevel, registry_dir): + reg = toplevel / registry_dir + if not reg.is_dir(): + return 0 + try: + return sum(1 for p in reg.iterdir() if p.is_file() and p.suffix == ".md") + except OSError: + return 0 + + +DEFAULT_REGISTRY_DIR = "docs/debt" + + +# Read session-start.py's cached registry-dir path; default if missing/empty. +def read_registry_dir(cache_dir): + f = cache_dir / "registry-dir" + if f.is_file(): + try: + val = f.read_text(encoding="utf-8").strip() + if val: + return val + except OSError: + pass + return DEFAULT_REGISTRY_DIR + + +# Appends one JSON line to metrics.jsonl in the cache dir; silent no-op on failure. +def log_metric(cache_dir, payload): + if not cache_dir.is_dir(): + return + payload["ts"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + with (cache_dir / "metrics.jsonl").open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, separators=(",", ":")) + "\n") + except OSError: + pass + + +# Counts test-shaped filenames anywhere in the repo (test_*, *_test.*, *.test.*, *.spec.*). +def test_count(toplevel): + try: + n = 0 + for root, dirs, files in os.walk(toplevel): + dirs[:] = [d for d in dirs if d not in SKIP_DIRS] + for f in files: + if any(p.search(f) for p in TEST_PATTERNS): + n += 1 + return n + except OSError: + return None + + +def main(): + # Idle out cleanly if we're not in a git repo. + toplevel = git_toplevel() + if toplevel is None: + return 0 + + cache_dir = cache_base() / "cache" / repo_hash(toplevel) + + changed_files = changed_files_from_stdin() + changed = " ".join(changed_files) + env = os.environ.copy() + env["CHANGED_FILES"] = changed + + registry_dir = read_registry_dir(cache_dir) + + # One line per edit — the dogfood tripwire signal (edits vs registry growth). + log_metric(cache_dir, { + "event": "edit", + "file": changed, + "registry_count": registry_count(toplevel, registry_dir), + }) + + # Nothing to run? Done. + raw = read_commands(toplevel, cache_dir) + commands = [ + line.rstrip() + for line in raw.splitlines() + if line.strip() and not line.lstrip().startswith("#") + ] + if not commands: + return 0 + + dpath = debug_path(cache_dir) + dlog(dpath, "FIRE", f"changed={changed or ''}", f"cmds={len(commands)}") + + def run_and_log(c): + start = time.monotonic() + cmd, status, snippet = run_one(c, changed_files, env) + dlog(dpath, status, f"{time.monotonic() - start:.2f}s", cmd) + return cmd, status, snippet + + # Run all commands in parallel; per-command 3s timeout enforces the budget. + with concurrent.futures.ThreadPoolExecutor(max_workers=len(commands)) as pool: + results = list(pool.map(run_and_log, commands)) + + # Log overall result so the $metrics skill can detect FAIL → PASS self-corrections. + agg = "fail" if any(s in ("FAIL", "TIMEOUT") for _, s, _ in results) else "pass" + log_metric(cache_dir, {"event": "feedback", "file": changed, "result": agg}) + + # Format pass/fail/snippet per command for the agent-facing summary. + summary_lines = [] + for cmd, status, snippet in results: + if status == "FAIL" and snippet: + summary_lines.append(f"{cmd}\tFAIL\t{snippet}") + else: + summary_lines.append(f"{cmd}\t{status}") + summary = "\n".join(summary_lines) + + # Warn if this edit dropped the test-file count (Beck's "agent deletes tests" anti-pattern). + warn = "" + test_count_file = cache_dir / "test-count" + now = test_count(toplevel) + if now is not None and not test_count_file.is_file(): + # Seed the baseline on first run instead of relying on the agent to do it. + try: + test_count_file.parent.mkdir(parents=True, exist_ok=True) + test_count_file.write_text(str(now), encoding="utf-8") + except OSError: + pass + if now is not None and test_count_file.is_file(): + prev = None + try: + prev = int(test_count_file.read_text(encoding="utf-8").strip()) + except (OSError, ValueError): + pass + if prev is not None and now != prev: + if now < prev: + warn = f"WARNING: this edit removed {prev - now} test file(s) (was {prev}, now {now})." + try: + test_count_file.write_text(str(now), encoding="utf-8") + except OSError: + pass + + if not summary and not warn: + return 0 + + parts = [] + if summary: + parts.append(f"debt-ops feedback (3s budget per command):\n{summary}") + if warn: + parts.append(warn) + emit("\n\n".join(parts)) + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception: + # A bug here must never block the tool cycle. + sys.exit(0) diff --git a/plugins/bcanfield/agentic-tech-debt/hooks/hooks.json b/plugins/bcanfield/agentic-tech-debt/hooks/hooks.json new file mode 100644 index 00000000..83fe8887 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/hooks/hooks.json @@ -0,0 +1,48 @@ +{ + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "python3 ${PLUGIN_ROOT}/hooks/session-start.py" + } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "python3 ${PLUGIN_ROOT}/hooks/drop.py", + "timeout": 3 + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "apply_patch|Edit|Write", + "hooks": [ + { + "type": "command", + "command": "python3 ${PLUGIN_ROOT}/hooks/feedback.py", + "timeout": 5 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "python3 ${PLUGIN_ROOT}/hooks/stop.py", + "timeout": 5 + } + ] + } + ] + } +} diff --git a/plugins/bcanfield/agentic-tech-debt/hooks/session-start.py b/plugins/bcanfield/agentic-tech-debt/hooks/session-start.py new file mode 100644 index 00000000..03670da1 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/hooks/session-start.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 +"""debt-ops SessionStart hook: emit disciplines + (charter | cache | discovery prompt). + +Codex adapter. Mirrors the Claude hook but reads/writes AGENTS.md (Codex's +charter file) and resolves the cache from one deterministic base so the +skill Bash env agrees with the hook subprocess (ADR 0011). + +Path adaptivity: cheap Python probe of common ADR/registry conventions on first +session, cached at /adr-dir and /registry-dir. If the probe finds +no existing ADR directory, the inject asks Codex to detect it semantically and +write the path itself — same pattern as the quality-commands detection below. +""" + +import hashlib +import json +import os +import re +import subprocess +import sys +import time +from pathlib import Path + +CHARTER_MARKER = "" +MANIFEST_FILES = ("Cargo.toml", "package.json", "pyproject.toml", "Makefile", "go.mod", "Gemfile") + +ADR_CANDIDATE_PATHS = ( + "doc/adr", "docs/adr", "doc/adrs", "docs/adrs", + "adr", "adrs", + "architecture/decisions", "docs/architecture/decisions", "doc/architecture/decisions", +) + +REGISTRY_CANDIDATE_PATHS = ( + "docs/debt", "docs/registry", "doc/debt", + "debt/registry", "tech-debt/registry", "debt-registry", "registry", +) + +ADR_FILENAME_RE = re.compile(r"^\d+[-_].*\.md$", re.IGNORECASE) +# Co-located default home (ADR 0009): both artifacts under one `docs/` parent. +DEFAULT_ADR_DIR = "docs/adr" +DEFAULT_REGISTRY_DIR = "docs/debt" + + +# Single deterministic cache base so hook subprocesses and skill Bash (which +# never sees PLUGIN_DATA) resolve the same path. Override with DEBT_OPS_CACHE. +def cache_base(): + override = os.environ.get("DEBT_OPS_CACHE") + return Path(override) if override else (Path.home() / ".cache" / "debt-ops") + + +def emit(context): + payload = { + "hookSpecificOutput": { + "hookEventName": "SessionStart", + "additionalContext": context, + } + } + sys.stdout.write(json.dumps(payload) + "\n") + + +def git_toplevel(): + try: + out = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, + ) + s = out.stdout.strip() + return Path(s) if s else None + except (subprocess.CalledProcessError, FileNotFoundError): + return None + + +def repo_hash(toplevel): + return hashlib.sha1(str(toplevel).encode()).hexdigest()[:12] + + +def manifest_hash(toplevel): + paths = [toplevel / n for n in MANIFEST_FILES if (toplevel / n).is_file()] + if not paths: + return "no-manifest" + try: + joined = "\n".join(f"{int(p.stat().st_mtime)} {p}" for p in paths) + except OSError: + return "stat-failed" + return hashlib.sha1(joined.encode()).hexdigest()[:12] + + +def md_count(dir_path): + if not dir_path.is_dir(): + return 0 + try: + return sum(1 for p in dir_path.iterdir() if p.is_file() and p.suffix == ".md") + except OSError: + return 0 + + +def ai_authored_count(registry_dir): + if not registry_dir.is_dir(): + return 0 + n = 0 + try: + for p in registry_dir.iterdir(): + if p.is_file() and p.suffix == ".md": + try: + if "ai_authored: true" in p.read_text(encoding="utf-8", errors="replace"): + n += 1 + except OSError: + pass + except OSError: + return 0 + return n + + +def log_metric(cache_dir, payload): + if not cache_dir.is_dir(): + return + payload["ts"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + with (cache_dir / "metrics.jsonl").open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, separators=(",", ":")) + "\n") + except OSError: + pass + + +def has_charter(toplevel): + agents_md = toplevel / "AGENTS.md" + if not agents_md.is_file(): + return False + try: + return CHARTER_MARKER in agents_md.read_text(encoding="utf-8", errors="replace") + except OSError: + return False + + +# Read a cached relative path. Returns the string if the cache file exists, +# its content is non-empty, AND the path still resolves to a real directory +# under toplevel. Otherwise None — triggers re-detection. +def read_cached_dir(cache_file, toplevel): + if not cache_file.is_file(): + return None + try: + rel = cache_file.read_text(encoding="utf-8").strip() + except OSError: + return None + if not rel: + return None + if not (toplevel / rel).is_dir(): + return None + return rel + + +def write_cached_dir(cache_file, rel_path): + try: + cache_file.parent.mkdir(parents=True, exist_ok=True) + cache_file.write_text(rel_path, encoding="utf-8") + except OSError: + pass + + +# Look for any ADR-shaped file (numbered-prefix .md) in known convention dirs. +# Returns the relative path or None if no convention matches with content. +def probe_adr_dir(toplevel): + for rel in ADR_CANDIDATE_PATHS: + p = toplevel / rel + if not p.is_dir(): + continue + try: + for f in p.iterdir(): + if f.is_file() and ADR_FILENAME_RE.match(f.name): + return rel + except OSError: + pass + return None + + +# Look for an existing registry dir with any .md content. Less variable than +# ADRs (plugin owns the convention), so we just check known shapes and fall +# back to the default. +def probe_registry_dir(toplevel): + for rel in REGISTRY_CANDIDATE_PATHS: + p = toplevel / rel + if not p.is_dir(): + continue + try: + if any(f.is_file() and f.suffix == ".md" for f in p.iterdir()): + return rel + except OSError: + pass + return None + + +# When the ADR or registry dir doesn't exist yet, the inject states the +# co-location convention as fact and lets Codex pick the home — it reads the +# repo's doc conventions better than a fixed probe (ADR 0009). Codex writes +# the resolved repo-relative path(s) to the cache file(s) the other hooks read. +def placement_instruction(cache_dir, adr_dir, registry_dir): + lines = ["Directory placement — resolve silently, no announcement:"] + if adr_dir: + lines.append(f"- ADRs already live at `{adr_dir}`.") + if registry_dir: + lines.append(f"- The debt registry already lives at `{registry_dir}`.") + # ADRs vary more than the plugin-owned registry; check for a non-standard + # existing dir before treating it as greenfield. + if adr_dir is None: + lines.append( + "- No ADR directory was detected. First check whether one already exists " + "in a non-standard spot — a dir of markdown with numbered filenames " + "(`NNNN-title.md`) or Nygard sections (`## Context`, `## Decision`, " + "`## Consequences`). Use it if it exists." + ) + lines.append( + "- debt-ops co-locates ADRs and the debt registry under one documentation " + "home. When creating either from scratch, prefer an existing docs home " + "(`docs/`, `documentation/`, or `doc/`); otherwise the home is `docs/`." + ) + if adr_dir is None and registry_dir is None: + lines.append( + f"- With no home yet, use `/adr` and `/debt` " + f"(defaults: `{DEFAULT_ADR_DIR}` + `{DEFAULT_REGISTRY_DIR}`)." + ) + elif adr_dir is None: + lines.append( + f"- Place the ADR dir under the same home as the registry above so they " + f"stay co-located, or default to `{DEFAULT_ADR_DIR}`." + ) + else: + lines.append( + f"- Place the registry dir under the same home as the ADRs above so they " + f"stay co-located, or default to `{DEFAULT_REGISTRY_DIR}`." + ) + targets = [] + if adr_dir is None: + targets.append(f"the ADR path to {cache_dir}/adr-dir") + if registry_dir is None: + targets.append(f"the registry path to {cache_dir}/registry-dir") + lines.append( + "- Write " + " and ".join(targets) + ", repo-relative, one path per file. " + "Directories are created lazily on first write — no need to create them now." + ) + return "\n".join(lines) + + +def commands_block(cache_dir, manifest, charter, stateless): + if charter: + return ( + f"Quality commands: read the AGENTS.md section marked `{CHARTER_MARKER}`. " + "AGENTS.md is the source of truth when present." + ) + if stateless: + return ( + "debt-ops: cache disabled (read-only cache dir). " + "For this session, detect quality commands once and run them yourself under timeout 3." + ) + list_file = cache_dir / "feedback.list" + hash_file = cache_dir / "manifest.hash" + cached = "" + if hash_file.is_file(): + try: + cached = hash_file.read_text(encoding="utf-8").strip() + except OSError: + pass + if list_file.is_file() and cached == manifest: + try: + return ( + "Cached quality commands (one per line; comment lines are estimates):\n" + + list_file.read_text(encoding="utf-8") + ) + except OSError: + pass + return ( + "First session for this repo (or manifest changed). Please do the following silently — no need to announce:\n\n" + "1. Detect this project's quality commands by scanning manifest files (Cargo.toml, package.json, pyproject.toml, Makefile, go.mod, Gemfile) and tool-specific config (eslint, biome, ruff, mypy, tsconfig, etc.).\n" + "2. Prefer commands that accept a changed-file or changed-package argument (e.g., `eslint $CHANGED_FILES`, `cargo clippy --no-deps -p $CHANGED_PACKAGE`, `pytest path/to/dir`) over project-wide ones.\n" + "3. Reject any command whose typical wall-clock on this repo exceeds 3 seconds. Project-wide commands almost always exceed this on non-trivial repos.\n" + f"4. Write to {cache_dir}/feedback.list. Format: one command per line, with the wall-clock estimate as a preceding comment, e.g.:\n" + " # est ~0.8s — fast type check\n" + " tsc --noEmit -p tsconfig.json\n" + " Comments (#) and empty lines are skipped when feedback.py reads the file.\n" + f"5. Write the manifest hash to {cache_dir}/manifest.hash with this exact value: {manifest}\n" + f"6. Count test-shaped filenames in the repo (filenames matching test_*, *_test.*, *.test.*, or *.spec.*) and write the integer count to {cache_dir}/test-count. feedback.py recomputes this on every edit and warns when it drops." + ) + + +# Substitutes the detected paths into the disciplines text. Discipline 2 names +# the ADR dir Codex should write new ADRs into; Discipline 3 names the +# registry to read before editing referenced files. +def disciplines_text(adr_dir, registry_dir): + return ( + '1. The test for debt: would a future reader ask "why this way?" If yes, register via the ' + '`$add` skill immediately — no prompt. This is judgment, not a marker scan: a ' + '`TODO`/`FIXME`/`HACK`/`XXX` is the obvious case, but an unmade decision, a stub, a loosened ' + 'type, or a default picked "for now" all count even with no marker in the diff. Use ' + '`payoff_trigger: unknown` if unsure. The helper prints one line: `+1 entry: ' + '()` — don\'t paraphrase or add commentary. Over-register freely; the developer drops ' + 'with `drop A`, `drop A,C`, `drop all`, or `drop `.\n\n' + '2. When making an architecturally significant change — a data model, public interface, ' + 'security boundary, release pipeline, or a dep-manifest change that is a major-version bump ' + f'or a *new* top-level dependency — draft an ADR under {adr_dir}/ in Nygard format: a ' + '`# NNNN — Title` heading, `**Date:**` and `**Status:**` lines, then Context, Decision, Consequences, ' + 'Alternatives, Payoff trigger. Create the directory if needed. Only draft an ADR when there ' + 'are two credible alternatives; if you cannot list two, it is a comment, not an ADR. An ADR ' + 'with a payoff trigger *is* deliberate debt — when you write one, also invoke `$add` so the ' + 'registry entry mirrors the ADR (don\'t conclude "no markers, no debt").\n\n' + f'3. Read entries under {registry_dir}/ before changing files they reference.' + ) + + +def main(): + toplevel = git_toplevel() + if toplevel is None: + emit("debt-ops: not a git repo, plugin idle this session") + return 0 + + cache_dir = cache_base() / "cache" / repo_hash(toplevel) + stateless = False + try: + cache_dir.mkdir(parents=True, exist_ok=True) + except OSError: + stateless = True + + # Resolve ADR and registry paths: cached → existing-content probe. Either + # may stay None (greenfield) — the placement inject below then has Codex + # choose a co-located home and write the cache file(s). See ADR 0009. + adr_cache = cache_dir / "adr-dir" + registry_cache = cache_dir / "registry-dir" + + adr_dir = read_cached_dir(adr_cache, toplevel) + if adr_dir is None: + probed = probe_adr_dir(toplevel) + if probed: + adr_dir = probed + if not stateless: + write_cached_dir(adr_cache, probed) + + registry_dir = read_cached_dir(registry_cache, toplevel) + if registry_dir is None: + probed = probe_registry_dir(toplevel) + if probed: + registry_dir = probed + if not stateless: + write_cached_dir(registry_cache, probed) + + effective_adr_dir = adr_dir or DEFAULT_ADR_DIR + effective_registry_dir = registry_dir or DEFAULT_REGISTRY_DIR + + log_metric(cache_dir, { + "event": "session", + "registry_count": md_count(toplevel / effective_registry_dir), + "adr_count": md_count(toplevel / effective_adr_dir), + "ai_authored_count": ai_authored_count(toplevel / effective_registry_dir), + "adr_dir": effective_adr_dir, + "registry_dir": effective_registry_dir, + }) + + context = ( + "Tech-debt-operations disciplines (debt-ops plugin):\n\n" + f"{disciplines_text(effective_adr_dir, effective_registry_dir)}\n\n" + f"{commands_block(cache_dir, manifest_hash(toplevel), has_charter(toplevel), stateless)}" + ) + if (adr_dir is None or registry_dir is None) and not stateless: + context += "\n\n" + placement_instruction(cache_dir, adr_dir, registry_dir) + if not stateless: + context += ( + f"\n\nDebug: set DEBT_OPS_DEBUG=1 in the environment to log every hook fire " + f"and command result to {cache_dir}/debug.log (tab-separated; tail -f to watch)." + ) + emit(context) + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception: + sys.exit(0) diff --git a/plugins/bcanfield/agentic-tech-debt/hooks/stop.py b/plugins/bcanfield/agentic-tech-debt/hooks/stop.py new file mode 100644 index 00000000..15378d0e --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/hooks/stop.py @@ -0,0 +1,423 @@ +#!/usr/bin/env python3 +"""debt-ops Stop hook: TODO-sniff safety net for Discipline 1. + +Codex adapter. Fires at the end of every turn. Counts newly-added marker lines +(TODO/FIXME/HACK/XXX) in the working tree vs newly-added entries under the +registry dir. If markers > registrations, nudges Codex on the next turn via a +`decision: block` continuation. + +Tripwire, not precision: false positives are cheap (the dev drops +spurious entries with "drop it"); false negatives defeat the point. +""" + +import hashlib +import json +import os +import re +import subprocess +import sys +import time +from pathlib import Path + +DEBUG_ENV = "DEBT_OPS_DEBUG" +MARKER_RE = re.compile(r"\b(TODO|FIXME|HACK|XXX)\b") +# Plugin-source prefixes are always excluded (this monorepo hosts all four +# adapters); debt-registry and ADR prefixes are looked up per-repo from the +# cache files written by session-start.py. See build_excluded_prefixes below. +STATIC_EXCLUDED_PREFIXES = ("claude-code/", "codex/", "copilot/", "skills/") +DEFAULT_REGISTRY_DIR = "docs/debt" +DEFAULT_ADR_DIR = "docs/adr" +MAX_UNTRACKED_BYTES = 1_000_000 +# Hard cap on Stop-hook blocks per session. After this many blocks fire +# for a given session_id, all subsequent Stop calls in that session stay +# silent — bounds any pathological loop and respects the "more behind +# the scenes" posture. SessionStart resets the counter implicitly via a +# new session_id from the hook payload. +SESSION_BLOCK_CAP = 1 + + +# Single deterministic cache base so hook subprocesses and skill Bash (which +# never sees PLUGIN_DATA) resolve the same path. Override with DEBT_OPS_CACHE. +def cache_base(): + override = os.environ.get("DEBT_OPS_CACHE") + return Path(override) if override else (Path.home() / ".cache" / "debt-ops") + + +# Emit a block decision. `decision: "block"` + `reason` is the documented way +# to make Codex continue working on the supplied message before stopping. +def emit(reason): + payload = { + "decision": "block", + "reason": reason, + } + sys.stdout.write(json.dumps(payload) + "\n") + + +# Resolve repo root; returns None outside a git repo so we idle cleanly. +def git_toplevel(): + try: + out = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, timeout=2, + ) + s = out.stdout.strip() + return Path(s) if s else None + except (subprocess.SubprocessError, FileNotFoundError): + return None + + +def repo_hash(toplevel): + return hashlib.sha1(str(toplevel).encode()).hexdigest()[:12] + + +# Debug log path — only when DEBT_OPS_DEBUG=1 is set in the environment. +def debug_path(cache_dir): + if not os.environ.get(DEBUG_ENV): + return None + return cache_dir / "debug.log" + + +# Appends one tab-separated line to the debug log; silently no-ops on failure. +def dlog(path, *fields): + if path is None: + return + ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as f: + f.write("\t".join((ts,) + fields) + "\n") + except OSError: + pass + + +# Resolve registry/ADR paths from cache (written by session-start.py); fall +# back to defaults if cache missing. Returns ("docs/debt", "docs/adr") shape. +def resolve_dirs(cache_dir): + def read(name, default): + p = cache_dir / name + if p.is_file(): + try: + val = p.read_text(encoding="utf-8").strip() + if val: + return val + except OSError: + pass + return default + return read("registry-dir", DEFAULT_REGISTRY_DIR), read("adr-dir", DEFAULT_ADR_DIR) + + +def build_excluded_prefixes(registry_dir, adr_dir): + return tuple(f"{d.rstrip('/')}/" for d in (registry_dir, adr_dir)) + STATIC_EXCLUDED_PREFIXES + + +# True if the file path is excluded from marker counting. +def is_excluded(path, excluded_prefixes): + return any(path.startswith(p) for p in excluded_prefixes) + + +# Counts marker hits in `+` lines from `git diff HEAD` (modified-tracked files). +def markers_in_diff(toplevel, excluded_prefixes): + try: + out = subprocess.run( + ["git", "diff", "HEAD", "--", "."], + cwd=toplevel, + capture_output=True, text=True, timeout=2, + ) + except (subprocess.SubprocessError, FileNotFoundError): + return 0 + # rc 0 = no diff or success; rc 1 from git diff also signals "differences found" in some contexts. + if out.returncode not in (0, 1): + return 0 + n = 0 + current_path = None + for line in out.stdout.splitlines(): + if line.startswith("+++ b/"): + p = line[6:] + current_path = None if p == "/dev/null" else p + continue + if line.startswith("+++"): + current_path = None + continue + if not line.startswith("+"): + continue + if current_path is None or is_excluded(current_path, excluded_prefixes): + continue + if MARKER_RE.search(line): + n += 1 + return n + + +# Counts marker hits in untracked files (whole file = new lines). +def markers_in_untracked(toplevel, excluded_prefixes): + try: + out = subprocess.run( + ["git", "ls-files", "-o", "--exclude-standard"], + cwd=toplevel, + capture_output=True, text=True, timeout=2, + ) + except (subprocess.SubprocessError, FileNotFoundError): + return 0 + if out.returncode != 0: + return 0 + n = 0 + for path in out.stdout.splitlines(): + if is_excluded(path, excluded_prefixes): + continue + full = toplevel / path + try: + if not full.is_file() or full.stat().st_size > MAX_UNTRACKED_BYTES: + continue + text = full.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + for line in text.splitlines(): + if MARKER_RE.search(line): + n += 1 + return n + + +# True if this turn produced any tracked-or-untracked file change outside +# the excluded paths (registry/ADR). Used to gate stage-2 broad-judgment +# blocks so we don't fire on no-op turns or doc-only edits. +def has_code_changes(toplevel, excluded_prefixes): + try: + out = subprocess.run( + ["git", "diff", "--name-only", "HEAD"], + cwd=toplevel, + capture_output=True, text=True, timeout=2, + ) + except (subprocess.SubprocessError, FileNotFoundError): + out = None + if out and out.returncode in (0, 1): + for path in out.stdout.splitlines(): + if path.strip() and not is_excluded(path, excluded_prefixes): + return True + try: + out2 = subprocess.run( + ["git", "ls-files", "-o", "--exclude-standard"], + cwd=toplevel, + capture_output=True, text=True, timeout=2, + ) + except (subprocess.SubprocessError, FileNotFoundError): + return False + if out2.returncode == 0: + for path in out2.stdout.splitlines(): + if path.strip() and not is_excluded(path, excluded_prefixes): + return True + return False + + +# Fingerprint of the current decidable state. The Stop hook re-runs at the +# end of every assistant turn; without this, stages 1 and 2 would re-fire +# every turn on an unchanged pending diff and trap the agent in an +# "Acknowledged, no changes" loop until the user commits. +def state_fingerprint(toplevel, stage, markers, entries): + h = hashlib.sha1() + h.update(f"{stage}|{markers}|{entries}|".encode()) + try: + out = subprocess.run( + ["git", "diff", "HEAD"], + cwd=toplevel, capture_output=True, text=True, timeout=2, + ) + h.update(out.stdout.encode("utf-8", errors="replace")) + except (subprocess.SubprocessError, FileNotFoundError): + pass + h.update(b"\x00") + try: + out = subprocess.run( + ["git", "ls-files", "-o", "--exclude-standard"], + cwd=toplevel, capture_output=True, text=True, timeout=2, + ) + h.update(out.stdout.encode("utf-8", errors="replace")) + except (subprocess.SubprocessError, FileNotFoundError): + pass + return h.hexdigest() + + +def already_nudged(state_path, fingerprint): + try: + return state_path.read_text(encoding="utf-8").strip() == fingerprint + except OSError: + return False + + +def record_nudge(state_path, fingerprint): + try: + state_path.parent.mkdir(parents=True, exist_ok=True) + state_path.write_text(fingerprint, encoding="utf-8") + except OSError: + pass + + +# Counts new (untracked or staged-add) .md files under the registry dir. +# Pathspec scopes the call so `--untracked-files=all` (needed to walk into +# fully-untracked registry dirs) doesn't expand work over the whole repo. +def new_registry_entries(toplevel, registry_dir): + pathspec = f"{registry_dir.rstrip('/')}/" + try: + out = subprocess.run( + ["git", "status", "--porcelain", "--untracked-files=all", "--", pathspec], + cwd=toplevel, + capture_output=True, text=True, timeout=2, + ) + except (subprocess.SubprocessError, FileNotFoundError): + return 0 + if out.returncode != 0: + return 0 + n = 0 + for line in out.stdout.splitlines(): + if len(line) < 4: + continue + status = line[:2] + path = line[3:].strip() + # Renames look like "R oldname -> newname"; we want the new name. + if " -> " in path: + path = path.split(" -> ", 1)[1] + # Strip git's quoting around paths with special chars. + if path.startswith('"') and path.endswith('"'): + path = path[1:-1] + if not path.startswith(pathspec) or not path.endswith(".md"): + continue + # New = untracked (??) or any add in either status column. + if status == "??" or "A" in status: + n += 1 + return n + + +def parse_stdin(): + try: + raw = sys.stdin.read() + except OSError: + return {} + if not raw: + return {} + try: + return json.loads(raw) + except (json.JSONDecodeError, ValueError): + return {} + + +# Read the per-session block counter. Returns 0 if file missing OR stored +# session_id doesn't match — the latter implicitly resets the count on a +# new session without needing SessionStart to do anything. +def session_block_count(path, session_id): + if not path.is_file(): + return 0 + try: + text = path.read_text(encoding="utf-8").strip() + except OSError: + return 0 + parts = text.split("\t", 1) + if len(parts) != 2: + return 0 + if parts[0] != session_id: + return 0 + try: + return int(parts[1]) + except ValueError: + return 0 + + +def record_session_block(path, session_id, count): + try: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f"{session_id}\t{count}", encoding="utf-8") + except OSError: + pass + + +def main(): + data = parse_stdin() + session_id = str(data.get("session_id") or "") + + toplevel = git_toplevel() + if toplevel is None: + return 0 + + cache_dir = cache_base() / "cache" / repo_hash(toplevel) + dpath = debug_path(cache_dir) + state_path = cache_dir / "stop-state" + session_blocks_path = cache_dir / "session-blocks" + + registry_dir, adr_dir = resolve_dirs(cache_dir) + excluded_prefixes = build_excluded_prefixes(registry_dir, adr_dir) + + markers = markers_in_diff(toplevel, excluded_prefixes) + markers_in_untracked(toplevel, excluded_prefixes) + entries = new_registry_entries(toplevel, registry_dir) + + # Per-session block cap (outer gate). If we've already blocked once + # this session, stay silent regardless of state — bounds loops and + # respects the "more behind the scenes" posture. + blocks_this_session = session_block_count(session_blocks_path, session_id) + capped = blocks_this_session >= SESSION_BLOCK_CAP + + if markers > entries: + # Stage 1: specific marker-count block. + fp = state_fingerprint(toplevel, "stage1", markers, entries) + if already_nudged(state_path, fp): + dlog(dpath, "STOP", f"markers={markers}", f"new_registry={entries}", "stage=1", "skipped=dup") + return 0 + if capped: + dlog(dpath, "STOP", f"markers={markers}", f"new_registry={entries}", "stage=1", f"skipped=cap({blocks_this_session})") + return 0 + record_nudge(state_path, fp) + record_session_block(session_blocks_path, session_id, blocks_this_session + 1) + dlog(dpath, "STOP", f"markers={markers}", f"new_registry={entries}", "stage=1") + delta = markers - entries + reason = ( + f"debt-ops: {markers} marker(s), {entries} entry/entries — " + f"register {delta} more via the $add skill." + ) + emit(reason) + return 0 + + # Stage 2: no markers and no registrations, but code changed — let Codex + # judge whether the diff contains broader Discipline 1 deferrals (stubs, + # loosened types, swallowed errors, deferred-via-prose, mocked calls). + if markers == 0 and entries == 0 and has_code_changes(toplevel, excluded_prefixes): + fp = state_fingerprint(toplevel, "stage2", markers, entries) + if already_nudged(state_path, fp): + dlog(dpath, "STOP", f"markers={markers}", f"new_registry={entries}", "stage=2", "skipped=dup") + return 0 + if capped: + dlog(dpath, "STOP", f"markers={markers}", f"new_registry={entries}", "stage=2", f"skipped=cap({blocks_this_session})") + return 0 + record_nudge(state_path, fp) + record_session_block(session_blocks_path, session_id, blocks_this_session + 1) + dlog(dpath, "STOP", f"markers={markers}", f"new_registry={entries}", "stage=2") + # One-line nudge — the loaded add skill carries the full definition. + reason = ( + "debt-ops: turn changed code, no entries registered — " + "review your diff for deferrals." + ) + emit(reason) + return 0 + + # Rotate this turn's batch into last-batch.txt so `drop A` resolves + # against the just-completed turn on the next UserPromptSubmit. Only + # runs on clean stops (stage 1/2 didn't fire) so re-fires under a + # blocked stop don't clobber an earlier batch before Codex resolves. + rotate_batch(cache_dir) + dlog(dpath, "STOP", f"markers={markers}", f"new_registry={entries}", "silent") + return 0 + + +# Move current-turn.txt -> last-batch.txt atomically. Silent no-op if there's +# nothing to rotate. +def rotate_batch(cache_dir): + src = cache_dir / "current-turn.txt" + dst = cache_dir / "last-batch.txt" + if not src.is_file(): + return + try: + os.replace(src, dst) + except OSError: + pass + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception: + # A hook bug must never block the tool cycle — exit clean. + sys.exit(0) diff --git a/plugins/bcanfield/agentic-tech-debt/skills/add/SKILL.md b/plugins/bcanfield/agentic-tech-debt/skills/add/SKILL.md new file mode 100644 index 00000000..e0a90143 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/skills/add/SKILL.md @@ -0,0 +1,53 @@ +--- +name: add +description: 'Register a deferred decision in the debt registry. Trigger by judgment, not a marker scan, whenever a future reader would ask "why this way?": an unmade decision, stub, loosened type, bypassed check, swallowed error, a default picked "for now", or a TODO/FIXME/HACK/XXX marker. Trigger immediately whenever you defer work, or when the user invokes $add. Over-register freely; the developer drops with "drop A", "drop A,C", or "drop all".' +--- + +# add — register a tech-debt entry + +Call the bundled `register.py` via Bash — it lives in this skill's `scripts/` directory, so reference it with the relative path below (Codex resolves it against the skill root). The helper writes the entry under the repo's detected registry dir (default `docs/debt/`), assigns a short batch letter (A, B, C…), and prints exactly one line: `+1 entry: ()`. That stdout IS the user-facing announcement — add no commentary before or after. + +## The call + +```bash +python3 scripts/register.py \ + --slug \ + --principal \ + --interest \ + --hotspot \ + --business-capability \ + --payoff-trigger \ + --quadrant \ + --category \ + --ai-authored <<'EOF' + +EOF +``` + +The helper: +- Generates the timestamp `id` itself (no `date` call needed). +- Resolves filename collisions when two registrations land in the same second. +- Tracks the letter mapping in `~/.cache/debt-ops/cache//current-turn.txt` (override the base with `DEBT_OPS_CACHE`) so the user can drop by letter. + +## Slug + +1–4 word kebab-case label of what the debt is. Examples: `cancelled-promotion-callback`, `legacy-auth-shim`, `unfinished-rate-limiter`. Keep it short — the body carries the context. + +## Schema notes + +- **Quadrant** (Fowler): `reckless-inadvertent` (didn't know better), `reckless-deliberate` (knew, did it anyway), `prudent-inadvertent` (learned afterward), `prudent-deliberate` (deliberate, with a payoff plan). +- **Category** (Google / Jaspan-Green): pick the closest match. +- **payoff_trigger: unknown** is first-class. Don't manufacture a trigger to fill the field — `unknown` ages into stale review and that's the point. +- **ai_authored: true** is the leading behavioral signal — be honest. + +## Drops + +- `drop A`, `drop A,C`, `drop all` — the user types this; a UserPromptSubmit hook deletes the matching entries and surfaces a one-line confirmation. You don't act on those. +- `drop it` or `drop ` — you delete it yourself: `rm /-.md` (the registry dir named in Discipline 3; default `docs/debt/`). Treat dropping as cheap — over-registering is the intended posture. + +## Don't + +- Don't ask the developer for confirmation before writing. Discipline 1 says "no permission prompt; just do it." +- Don't write the file directly with an editor tool — letter assignment depends on going through `register.py`. +- Don't echo or paraphrase the helper's output. The Bash tool result is already visible to the user. +- Don't fill `payoff_trigger` with a guess to seem certain. diff --git a/plugins/bcanfield/agentic-tech-debt/skills/add/scripts/register.py b/plugins/bcanfield/agentic-tech-debt/skills/add/scripts/register.py new file mode 100644 index 00000000..9cd11152 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/skills/add/scripts/register.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +"""debt-ops register helper: silent entry writer + turn-batch letter assigner. + +Codex adapter. Writing through this helper keeps the agent's mid-turn footprint +to one line — the helper's own stdout — and assigns a short letter (A, B, C, ...) +per entry so the user can later say `drop A` (handled by drop.py). +""" + +import argparse +import hashlib +import json +import os +import subprocess +import sys +import time +from pathlib import Path + +REQUIRED_FIELDS = ( + "slug", "principal", "interest", "hotspot", + "business_capability", "payoff_trigger", "quadrant", "category", "ai_authored", +) + +DEFAULT_REGISTRY_DIR = "docs/debt" + + +# Single deterministic cache base, shared verbatim with the hooks (ADR 0011). +# Both the hook subprocess (which gets PLUGIN_DATA) and this skill Bash env +# (which does not) land on the same dir, so the letter file this writes is the +# one drop.py reads. Override with DEBT_OPS_CACHE. +def cache_base(): + override = os.environ.get("DEBT_OPS_CACHE") + return Path(override) if override else (Path.home() / ".cache" / "debt-ops") + + +# Read session-start.py's cached registry-dir path; default if missing/empty. +def read_registry_dir(cache_dir): + f = cache_dir / "registry-dir" + if f.is_file(): + try: + val = f.read_text(encoding="utf-8").strip() + if val: + return val + except OSError: + pass + return DEFAULT_REGISTRY_DIR + + +def git_toplevel(): + try: + out = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, timeout=2, + ) + s = out.stdout.strip() + return Path(s) if s else None + except (subprocess.SubprocessError, FileNotFoundError): + return None + + +def repo_hash(toplevel): + return hashlib.sha1(str(toplevel).encode()).hexdigest()[:12] + + +# A, B, ..., Z, AA, AB, ..., ZZ, AAA — base-26 column-style labels. +def letter_for(n): + s = "" + n += 1 + while n > 0: + n -= 1 + s = chr(ord("A") + n % 26) + s + n //= 26 + return s + + +# Append one JSON line to metrics.jsonl in the cache dir; silent no-op on failure. +def log_metric(cache_dir, payload): + if not cache_dir.is_dir(): + return + payload["ts"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + with (cache_dir / "metrics.jsonl").open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, separators=(",", ":")) + "\n") + except OSError: + pass + + +def parse_args(): + p = argparse.ArgumentParser(description="Register a debt entry silently.") + p.add_argument("--slug", required=True, help="kebab-case label, 1-4 words") + p.add_argument("--principal", required=True, help="effort to fix, or 'unknown'") + p.add_argument("--interest", required=True, help="ongoing cost, or 'unknown'") + p.add_argument("--hotspot", required=True, help="path or module, or 'unknown'") + p.add_argument("--business-capability", required=True, dest="business_capability") + p.add_argument("--payoff-trigger", required=True, dest="payoff_trigger") + p.add_argument("--quadrant", required=True, + choices=["reckless-inadvertent", "reckless-deliberate", + "prudent-inadvertent", "prudent-deliberate"]) + p.add_argument("--category", required=True, + choices=["migration", "documentation", "testing", "code_quality", + "dead_code", "code_rot", "expertise", "release", + "infrastructure", "planning"]) + p.add_argument("--ai-authored", required=True, choices=["true", "false"], + dest="ai_authored") + return p.parse_args() + + +def main(): + args = parse_args() + + toplevel = git_toplevel() + if toplevel is None: + sys.stderr.write("debt-ops: not in a git repo\n") + return 2 + + body = sys.stdin.read().strip() + if not body: + sys.stderr.write("debt-ops: empty body on stdin (pipe markdown via heredoc)\n") + return 2 + + now = time.localtime() + entry_id = time.strftime("%Y%m%d%H%M%S", now) + created = time.strftime("%Y-%m-%d", now) + + cache_dir = cache_base() / "cache" / repo_hash(toplevel) + registry_rel = read_registry_dir(cache_dir) + registry = toplevel / registry_rel + registry.mkdir(parents=True, exist_ok=True) + + # Collision-safe path: parallel registrations within the same second get -2, -3, ... + candidate = registry / f"{entry_id}-{args.slug}.md" + i = 2 + while candidate.exists(): + candidate = registry / f"{entry_id}-{args.slug}-{i}.md" + i += 1 + + frontmatter = ( + "---\n" + f"id: {entry_id}\n" + f"title: {args.slug}\n" + f"principal: {args.principal}\n" + f"interest: {args.interest}\n" + f"hotspot: {args.hotspot}\n" + f"business_capability: {args.business_capability}\n" + f"payoff_trigger: {args.payoff_trigger}\n" + f"quadrant: {args.quadrant}\n" + f"category: {args.category}\n" + f"ai_authored: {args.ai_authored}\n" + f"created: {created}\n" + "---\n\n" + ) + candidate.write_text(frontmatter + body + "\n", encoding="utf-8") + + # Assign a turn-batch letter by counting existing rows in current-turn.txt. + try: + cache_dir.mkdir(parents=True, exist_ok=True) + except OSError: + # Read-only cache: file is written, but letter shorthand won't work. + sys.stdout.write(f"+1 entry: {args.slug}\n") + return 0 + + turn_file = cache_dir / "current-turn.txt" + existing = 0 + if turn_file.is_file(): + try: + existing = sum(1 for ln in turn_file.read_text(encoding="utf-8").splitlines() if ln.strip()) + except OSError: + existing = 0 + letter = letter_for(existing) + try: + with turn_file.open("a", encoding="utf-8") as f: + f.write(f"{letter}\t{args.slug}\t{candidate.name}\n") + except OSError: + pass + + log_metric(cache_dir, { + "event": "register", + "slug": args.slug, + "ai_authored": args.ai_authored == "true", + "letter": letter, + }) + + sys.stdout.write(f"+1 entry: {args.slug} ({letter})\n") + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception as e: + sys.stderr.write(f"debt-ops register: {e}\n") + sys.exit(1) diff --git a/plugins/bcanfield/agentic-tech-debt/skills/init/SKILL.md b/plugins/bcanfield/agentic-tech-debt/skills/init/SKILL.md new file mode 100644 index 00000000..596c05df --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/skills/init/SKILL.md @@ -0,0 +1,80 @@ +--- +name: init +description: Write or refresh the "## Tech debt operations" section in AGENTS.md so a team shares one source of truth for debt-ops disciplines and cached quality commands. Idempotent. Only the managed section changes; other sections are untouched. Invoke explicitly via $init (solo users get the same content from the SessionStart inject). +--- + +# init + +Write or update a `## Tech debt operations` section in `./AGENTS.md`. Idempotent — only the managed section changes. + +## 1. Read the cached commands + +```bash +TOPLEVEL=$(git rev-parse --show-toplevel) +REPO_HASH=$(printf '%s' "$TOPLEVEL" | shasum | cut -c1-12) + +# Hooks and skills share one deterministic cache base (ADR 0011). Override the +# base with DEBT_OPS_CACHE; default is ~/.cache/debt-ops. +CACHE_DIR="${DEBT_OPS_CACHE:-$HOME/.cache/debt-ops}/cache/$REPO_HASH" + +LIST="$CACHE_DIR/feedback.list" +[ -f "$LIST" ] && cat "$LIST" + +# Detected ADR/registry dirs. Fall back to the co-located defaults if the +# cache files aren't written yet. +ADR_DIR=$( [ -s "$CACHE_DIR/adr-dir" ] && cat "$CACHE_DIR/adr-dir" || echo "docs/adr" ) +REGISTRY_DIR=$( [ -s "$CACHE_DIR/registry-dir" ] && cat "$CACHE_DIR/registry-dir" || echo "docs/debt" ) +echo "adr-dir: $ADR_DIR" +echo "registry-dir: $REGISTRY_DIR" +``` + +If the file doesn't exist, the SessionStart discovery prompt hasn't run yet. Tell the developer: + +> No cached quality commands yet. Start a new session so I can detect them, then re-run $init. + +…and stop. + +## 2. Compose the section (template) + +Substitute `{{COMMANDS}}` with the cache contents verbatim, `{{ADR_DIR}}` with the detected `adr-dir`, and `{{REGISTRY_DIR}}` with the detected `registry-dir` (from step 1). + +```markdown +## Tech debt operations + + + +### Disciplines + +1. The test for debt: would a future reader ask "why this way?" If yes, register via the `$add` skill immediately — no prompt. This is judgment, not a marker scan: a `TODO`/`FIXME`/`HACK`/`XXX` is the obvious case, but an unmade decision, a stub, a loosened type, or a default picked "for now" all count even with no marker in the diff. Use `payoff_trigger: unknown` if unsure. Announce: `+1 entry: (drop?)`. Over-register freely; the developer drops with "drop it". + +2. When making an architecturally significant change — a data model, public interface, security boundary, release pipeline, or a dep-manifest change that is a major-version bump or a *new* top-level dependency — draft an ADR under `{{ADR_DIR}}/` in Nygard format: a `# NNNN — Title` heading, `**Date:**` and `**Status:**` lines, then Context, Decision, Consequences, Alternatives, Payoff trigger. Create the directory if needed. Only draft an ADR when there are two credible alternatives; if you cannot list two, it is a comment, not an ADR. An ADR with a payoff trigger *is* deliberate debt — when you write one, also invoke `$add` so the registry entry mirrors the ADR (don't conclude "no markers, no debt"). + +3. Read entries under `{{REGISTRY_DIR}}/` before changing files they reference. + +### Quality commands + +These run after every edit under a 3 s budget per command. Edit freely; the plugin reads tolerantly. Lines starting with `#` are estimates/comments and are skipped at run time. + + +{{COMMANDS}} + +``` + +## 3. Apply + +- **If `./AGENTS.md` doesn't exist:** Write it with the section above as the entire file. +- **If `./AGENTS.md` has a `## Tech debt operations` section:** Edit to replace exactly that section — from the heading through (but not including) the next `## ` heading, or through EOF if no next heading. Leave every other byte unchanged. +- **If `./AGENTS.md` exists without the section:** Edit to append the section after the last existing line, with a single blank line between. + +## 4. Announce + +`charter updated: ./AGENTS.md — disciplines + N quality commands` + +(N = count of non-comment, non-blank lines inside the marker block.) + +## Marker contract — do not deviate + +- `` is the open marker `feedback.py` keys on. Exact string; the `v1` is part of the marker. +- `` is the close marker. +- The self-explaining `` line is mandatory — a teammate without the plugin reads that to understand what they're seeing. +- Never touch any byte outside the `## Tech debt operations` section. diff --git a/plugins/bcanfield/agentic-tech-debt/skills/init/agents/openai.yaml b/plugins/bcanfield/agentic-tech-debt/skills/init/agents/openai.yaml new file mode 100644 index 00000000..7c95b337 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/skills/init/agents/openai.yaml @@ -0,0 +1,3 @@ +# init is opt-in: solo users already get the same content from the SessionStart +# inject, so the model should never auto-trigger it. Explicit $init only. +allow_implicit_invocation: false diff --git a/plugins/bcanfield/agentic-tech-debt/skills/metrics/SKILL.md b/plugins/bcanfield/agentic-tech-debt/skills/metrics/SKILL.md new file mode 100644 index 00000000..94819176 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/skills/metrics/SKILL.md @@ -0,0 +1,80 @@ +--- +name: metrics +description: Print a debt-ops health summary from the metrics log, covering registration rate, hook feedback action rate, ADR creation, and AI-authored share. Use when the user asks for "debt-ops metrics", "debt health", "registry stats", or invokes $metrics. Read-only, never writes the log. +--- + +# metrics + +Read the hidden metrics log and tell the user whether v1's tripwires are tripping. + +## 1. Find the log + +```bash +TOPLEVEL=$(git rev-parse --show-toplevel) +REPO_HASH=$(printf '%s' "$TOPLEVEL" | shasum | cut -c1-12) + +# Hooks and skills share one deterministic cache base (ADR 0011). Override the +# base with DEBT_OPS_CACHE; default is ~/.cache/debt-ops. +CACHE_DIR="${DEBT_OPS_CACHE:-$HOME/.cache/debt-ops}/cache/$REPO_HASH" + +LOG="$CACHE_DIR/metrics.jsonl" +if [ -f "$LOG" ]; then + tail -n 500 "$LOG" +else + echo "MISSING: no metrics.jsonl found for repo hash $REPO_HASH" +fi +``` + +If the file is missing or empty, tell the user the hooks haven't fired yet in this repo and stop. + +## 2. The log format + +One JSON object per line, three event shapes: + +- `{"event":"edit","file":"...","registry_count":N,"ts":"..."}` — every agent edit +- `{"event":"feedback","file":"...","result":"pass|fail","ts":"..."}` — every quality-check fire +- `{"event":"session","registry_count":N,"adr_count":M,"ai_authored_count":K,"ts":"..."}` — start of each session + +Timestamps are ISO-8601 UTC. + +## 3. Compute the tripwires + +Filter to the last 7 days. Then compute: + +- **Edits / sessions** — counts of `event:edit` and `event:session`. +- **Registry growth** — last `registry_count` minus first (across either edit or session events). >0 means Discipline 1 is firing. +- **ADR growth** — last `adr_count` minus first (session events only). +- **AI-authored share trend** — first vs. last session percentage (`ai_authored_count / registry_count`, when registry_count>0). +- **Feedback pass rate** — `count(result:pass) / count(event:feedback)`. +- **FAIL → PASS rate** — for each feedback event with `result:fail`, look at the *next* feedback event for the *same* file. Count those that flipped to `pass`. Divide by total fails. Below 50% means the agent isn't reliably acting on hook output — the architectural alarm bell. + +If there are fewer than 5 sessions in the window, say "need more data" and skip the verdict. + +## 4. Report + +One screen. No padding. Use `→` and `↑/↓` for trends. Example shape: + +``` +debt-ops metrics — last 7 days +───────────────────────────────── +edits : 142 (8 sessions, ~18 edits/session) +registry : +3 +adrs : +1 +ai-authored : 50% → 60% ↑ + +feedback ran : 89 times +pass rate : 88% +fail → pass rate: 80% (8/10) + +verdict: ok +``` + +End with one judgment line: +- **ok** — registry growth >0 AND fail→pass rate ≥50%. +- **investigate: ** — name the specific tripwire that tripped. + +## Don't + +- Don't write to the log. +- Don't compute metrics not listed above. +- Don't guess at health when data is thin — say "need more data" instead. diff --git a/plugins/bcanfield/agentic-tech-debt/skills/review/SKILL.md b/plugins/bcanfield/agentic-tech-debt/skills/review/SKILL.md new file mode 100644 index 00000000..ca1482a8 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/skills/review/SKILL.md @@ -0,0 +1,57 @@ +--- +name: review +description: Audit the debt registry, rank survivors by churn × Fowler quadrant, surface a top-N list, then walk paydown on user follow-up. Use when the user asks to review debt, see what to pay down, work through entries, or invokes $review. Stale entries drop with "drop A,B,C". +--- + +# review — audit + (on follow-up) walk paydown + +Two modes. First turn: print the audit and stop. On a user follow-up ("fix the top one," "walk these," "do A," "pay some down"), apply the rubric below. + +## First turn: print the audit + +Run the bundled `review.py` (it lives in this skill's `scripts/` directory — reference it with the relative path; Codex resolves it against the skill root): + +```bash +python3 scripts/review.py +``` + +Optional: `--top N` to surface more than the default 3 candidates. + +**Re-emit the helper's stdout verbatim in a fenced code block.** Codex may collapse long bash outputs — if you don't print it yourself, the user might not see it. Copy exactly: no preamble, no summary, no "want me to fix the top one?" The fenced block preserves column alignment. + +Then stop. The user picks the next move. + +## Paydown mode (only on user follow-up) + +Work through requested entries one at a time. Confirm before each fix. Never auto-batch. Never auto-commit. + +For each entry, read the registry file, the hotspot, and adjacent tests. Apply this rubric: + +- **Already fixed?** If the marker/symptom the entry describes no longer appears in the hotspot file, say so and add the entry's letter to the drop list. Don't re-fix. +- **Cold area?** Churn=0 since `created:` and age >90d → propose deferring. ~20% of files generate ~80% of debt-related rework; don't pay down vanity refactors. +- **Prudent-deliberate with payoff_trigger not met?** Honor the trigger. Skip with a one-line "trigger not met: ." +- **Fix candidate?** Propose the smallest change that resolves the entry. Improvement, not perfection — don't refactor surrounding code. + +### When you fix + +- **Read the repo first.** Check the test framework, adjacent tests, the cached feedback commands. Adapt to what exists; don't impose a new style. +- **TDD where tests exist.** Write a failing test that pins the deferral, then make it pass. Don't weaken or delete existing tests to make a fix pass. +- **No tests in this area?** Surface that and ask: write one, or fix without? +- **Explain why this resolves the entry.** Cite the entry's `payoff_trigger` or body — don't commit code you can't explain. +- **Risky fix?** Auth, payments, migrations, public APIs, or `ai_authored: true` → run a fresh-context review of the diff before suggesting commit. Fresh-context review catches what the writer's motivated reasoning misses. +- **Don't commit.** Show the diff. The user runs the gates, drops the entry with `drop A`, and commits. + +### Pacing + +Aim for 3–10 entries per session — continuous paydown outperforms stop-the-world batches. If the user says "do them all," push back once: unsupervised AI cleanup measurably increases duplicate blocks and short-term churn. If they insist, still one-at-a-time with diffs surfaced. + +## Speak plainly + +The frontmatter uses a research taxonomy (`quadrant`, `category`) for ranking and grounding — it is not user-facing vocabulary. When you talk about an entry, describe it in plain words; never say "prudent-inadvertent", "reckless-deliberate", "code_rot", etc. to the user. Use the entry's body and a plain phrase (e.g. "a planned tradeoff", "a shortcut you knew about", "came up later") instead. The `review.py` output is already translated — match its tone. + +## Don't + +- Don't ask the user to confirm before running `review.py`. +- Don't paraphrase the helper's stdout. Copy it verbatim into the fenced code block. +- Don't enter paydown mode on the first turn. Stop after the report. Wait for the user's intent. +- Don't auto-commit. Ever. diff --git a/plugins/bcanfield/agentic-tech-debt/skills/review/scripts/review.py b/plugins/bcanfield/agentic-tech-debt/skills/review/scripts/review.py new file mode 100644 index 00000000..544933e9 --- /dev/null +++ b/plugins/bcanfield/agentic-tech-debt/skills/review/scripts/review.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""debt-ops review: audit registry for stale entries + rank survivors by churn × quadrant. + +Codex adapter. Audit signals (deterministic, Git-as-oracle): +- (a) hotspot path exists in the repo — strong "stale" signal when missing +- (b) commits touching the hotspot since the entry's created date — used for + ranking and for cold-area detection, not for staleness alone + +The marker-presence heuristic was considered and cut: most registry entries +describe architectural deferrals (unfinished features, undecided shapes, +validation gaps) that never had an in-code TODO. A "no markers in file" +check fired on 100% of slack-agent's 83 entries in dogfood — a clear sign +the signal is wrong for this registry shape. Without ground-truth on the +marker at register time, we err on the side of not auto-flagging stale. + +Ranking (survivors only): +- score = churn × quadrant_weight + ai_authored_bonus + age_bonus +- quadrant weights track Fowler triage: reckless-inadvertent first (3), + reckless-deliberate (2), prudent-inadvertent (2), prudent-deliberate (1). + +Side effects: +- Writes letter mappings for stale entries to current-turn.txt so the user + can drop them with the existing `drop A,B,C` UX. + +The output is a one-screen, three-bucket report intended to be the entire +user-facing reply when the review skill fires. +""" + +import argparse +import hashlib +import json +import os +import subprocess +import sys +import time +from pathlib import Path + +DEFAULT_REGISTRY_DIR = "docs/debt" +QUADRANT_WEIGHT = { + "reckless-inadvertent": 3, + "reckless-deliberate": 2, + "prudent-inadvertent": 2, + "prudent-deliberate": 1, +} + +# Plain-language labels for display. The canonical Fowler quadrant stays in the +# frontmatter + scoring; developers read these instead of the academic terms. +QUADRANT_PLAIN = { + "reckless-inadvertent": "accidental", + "reckless-deliberate": "knowing shortcut", + "prudent-inadvertent": "came up later", + "prudent-deliberate": "planned tradeoff", +} + + +# Single deterministic cache base, shared verbatim with the hooks (ADR 0011). +# Override with DEBT_OPS_CACHE. +def cache_base(): + override = os.environ.get("DEBT_OPS_CACHE") + return Path(override) if override else (Path.home() / ".cache" / "debt-ops") + + +def git_toplevel(): + try: + out = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, timeout=2, + ) + s = out.stdout.strip() + return Path(s) if s else None + except (subprocess.SubprocessError, FileNotFoundError): + return None + + +def repo_hash(toplevel): + return hashlib.sha1(str(toplevel).encode()).hexdigest()[:12] + + +# Read session-start.py's cached registry-dir path; default if missing. +def read_registry_dir(cache_dir): + f = cache_dir / "registry-dir" + if f.is_file(): + try: + val = f.read_text(encoding="utf-8").strip() + if val: + return val + except OSError: + pass + return DEFAULT_REGISTRY_DIR + + +# Parse YAML-ish frontmatter into a dict; tolerant of missing fields. +def parse_frontmatter(text): + if not text.startswith("---"): + return {} + end = text.find("\n---", 3) + if end < 0: + return {} + fm = {} + for ln in text[3:end].strip().splitlines(): + if ":" not in ln: + continue + k, _, v = ln.partition(":") + fm[k.strip()] = v.strip() + return fm + + +# git log commit count touching since (YYYY-MM-DD); 0 on failure. +def churn_since(toplevel, path, since): + if not path or path == "unknown": + return 0 + args = ["git", "-C", str(toplevel), "log", "--oneline"] + if since: + args += [f"--since={since}"] + args += ["--", path] + try: + out = subprocess.run(args, capture_output=True, text=True, timeout=3) + return sum(1 for ln in out.stdout.splitlines() if ln.strip()) + except (subprocess.SubprocessError, FileNotFoundError): + return 0 + + +def days_since(date_str): + if not date_str: + return 0 + try: + t = time.strptime(date_str, "%Y-%m-%d") + return max(0, int((time.time() - time.mktime(t)) / 86400)) + except ValueError: + return 0 + + +# Body preview: first non-empty line of prose after the frontmatter, ~85 chars. +def body_preview(text, max_chars=85): + if text.startswith("---"): + end = text.find("\n---", 3) + if end >= 0: + text = text[end + 4:] + for ln in text.splitlines(): + s = ln.strip() + if s and not s.startswith("#"): + if len(s) <= max_chars: + return s + return s[: max_chars - 1].rstrip() + "…" + return "" + + +# A, B, ..., Z, AA, AB — base-26 column-style (matches register.py). +def letter_for(n): + s = "" + n += 1 + while n > 0: + n -= 1 + s = chr(ord("A") + n % 26) + s + n //= 26 + return s + + +def audit_entry(toplevel, entry_path): + """Return a dict of frontmatter + audit signals for one registry entry.""" + try: + text = entry_path.read_text(encoding="utf-8") + except OSError: + return None + fm = parse_frontmatter(text) + slug = fm.get("title") or entry_path.stem + hotspot = fm.get("hotspot", "unknown") + created = fm.get("created", "") + quadrant = fm.get("quadrant", "reckless-inadvertent") + ai_authored = fm.get("ai_authored", "false").lower() == "true" + + target = (toplevel / hotspot) if hotspot and hotspot != "unknown" else None + file_exists = target.exists() if target else None + churn = churn_since(toplevel, hotspot, created) if (target and file_exists) else 0 + age = days_since(created) + + return { + "fname": entry_path.name, + "slug": slug, + "hotspot": hotspot, + "created": created, + "age_days": age, + "quadrant": quadrant, + "ai_authored": ai_authored, + "file_exists": file_exists, + "churn_since_created": churn, + "preview": body_preview(text), + } + + +def classify(entry): + """Return ('stale', reason) | ('cold', reason) | ('active', None). + + Only file-missing flags as stale outright (the one signal we trust at 100%). + Long-dormant files become cold (deprioritize). Everything else stays active + so the user — not the script — decides what's still real. + """ + if entry["file_exists"] is False: + return ("stale", "hotspot file missing") + if entry["file_exists"] is True and entry["churn_since_created"] == 0 and entry["age_days"] > 90: + return ("cold", f"unchanged in {entry['age_days']}d") + return ("active", None) + + +def score(entry): + """Higher = more important to pay down. Behavioral signal × Fowler triage.""" + weight = QUADRANT_WEIGHT.get(entry["quadrant"], 1) + base = entry["churn_since_created"] * weight + if entry["ai_authored"]: + base += 2 # leading-indicator bonus, per docs/tech-debt-plugin-plan.md:815 + if entry["age_days"] > 30: + base += 1 # mild age penalty for aged-out entries + return base + + +# Append one JSON line to metrics.jsonl in the cache dir; silent no-op on failure. +def log_metric(cache_dir, payload): + if not cache_dir.is_dir(): + return + payload["ts"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + try: + with (cache_dir / "metrics.jsonl").open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, separators=(",", ":")) + "\n") + except OSError: + pass + + +def write_stale_letters(cache_dir, stale_entries): + """Append letter mappings for stale entries to current-turn.txt so `drop A,B,C` works.""" + if not stale_entries: + return {} + try: + cache_dir.mkdir(parents=True, exist_ok=True) + except OSError: + return {} + turn_file = cache_dir / "current-turn.txt" + existing = 0 + if turn_file.is_file(): + try: + existing = sum(1 for ln in turn_file.read_text(encoding="utf-8").splitlines() if ln.strip()) + except OSError: + existing = 0 + mapping = {} + try: + with turn_file.open("a", encoding="utf-8") as f: + for i, e in enumerate(stale_entries): + L = letter_for(existing + i) + f.write(f"{L}\t{e['slug']}\t{e['fname']}\n") + mapping[L] = e + except OSError: + pass + return mapping + + +def print_report(stale_map, top, cold, active_kept_count): + lines = [] + n_stale = len(stale_map) + lines.append(f"debt-ops review — {n_stale + len(top) + len(cold) + active_kept_count} entries") + lines.append("─────────────────────────────────") + + if stale_map: + lines.append(f"likely stale ({n_stale}) — drop with `drop A,B,…` or `drop all`") + for L, e in stale_map.items(): + lines.append(f" {L} {e['slug']:<40} ({e['_reason']})") + lines.append("") + + if top: + lines.append(f"top {len(top)} to pay down") + for e in top: + tag = " [ai]" if e["ai_authored"] else "" + plain = QUADRANT_PLAIN.get(e["quadrant"], e["quadrant"]) + n = e["churn_since_created"] + edits = f"{n} edit{'' if n == 1 else 's'} since logged" + lines.append( + f" • {e['slug']:<40} {e['hotspot']} · {plain} · {edits}{tag}" + ) + if e["preview"]: + lines.append(f" {e['preview']}") + lines.append("") + + if cold: + lines.append(f"cold ({len(cold)}) — deprioritize; revisit on next hot edit") + for e in cold[:5]: + lines.append(f" · {e['slug']:<40} ({e['_reason']})") + if len(cold) > 5: + lines.append(f" · …and {len(cold) - 5} more") + lines.append("") + + if active_kept_count: + lines.append(f"kept ({active_kept_count}) — active, lower-ranked; rerun the review skill after paydown") + + sys.stdout.write("\n".join(lines).rstrip() + "\n") + + +def parse_args(): + p = argparse.ArgumentParser(description="Audit + rank the debt registry.") + p.add_argument("--top", type=int, default=3, help="how many top-paydown entries to surface (default 3)") + return p.parse_args() + + +def main(): + args = parse_args() + + toplevel = git_toplevel() + if toplevel is None: + sys.stderr.write("debt-ops: not in a git repo\n") + return 2 + + cache_dir = cache_base() / "cache" / repo_hash(toplevel) + registry = toplevel / read_registry_dir(cache_dir) + + if not registry.is_dir(): + sys.stdout.write(f"debt-ops review: no registry at {registry.relative_to(toplevel)} — nothing to review.\n") + return 0 + + entries = [] + for f in sorted(registry.glob("*.md")): + e = audit_entry(toplevel, f) + if e is not None: + entries.append(e) + + if not entries: + sys.stdout.write("debt-ops review: registry empty — nothing to review.\n") + return 0 + + stale, cold, active = [], [], [] + for e in entries: + bucket, reason = classify(e) + e["_reason"] = reason + if bucket == "stale": + stale.append(e) + elif bucket == "cold": + cold.append(e) + else: + active.append(e) + + active.sort(key=score, reverse=True) + top = active[:args.top] + kept = active[args.top:] + + stale_map = write_stale_letters(cache_dir, stale) + log_metric(cache_dir, { + "event": "review", + "total": len(entries), + "stale": len(stale), + "cold": len(cold), + "active": len(active), + }) + print_report(stale_map, top, cold, len(kept)) + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception as e: + sys.stderr.write(f"debt-ops review: {e}\n") + sys.exit(1) diff --git a/scripts/generate_plugins_json.py b/scripts/generate_plugins_json.py index 0923e96f..4ed55d24 100644 --- a/scripts/generate_plugins_json.py +++ b/scripts/generate_plugins_json.py @@ -52,6 +52,9 @@ "mturac/everything-openai-codex", } EXTRA_MIRROR_PATHS = { + # debt-ops's manifest points hooks at hooks/hooks.json; the hook commands + # invoke sibling Python scripts in the same hooks/ directory at runtime. + "bcanfield/agentic-tech-debt": ("hooks",), # Staff Engineer Mode exposes one router skill and loads routed specialist # files from a top-level specialists/ directory at runtime. "sirmarkz/staff-engineer-mode": ("specialists",),