diff --git a/src/bcli_cli/app.py b/src/bcli_cli/app.py index 97dc6e8..eaf9f43 100644 --- a/src/bcli_cli/app.py +++ b/src/bcli_cli/app.py @@ -174,6 +174,7 @@ def _emit_command_summary() -> None: post_cmd, query_cmd, registry_cmd, + skill_init_cmd, test_cmd, ) @@ -186,6 +187,7 @@ def _emit_command_summary() -> None: app.add_typer(test_cmd.app, name="test", help="Connection and endpoint testing") app.add_typer(batch_cmd.app, name="batch", help="Batch operations from YAML files") app.add_typer(attach_cmd.app, name="attach", help="Document-attachment workflows (two-phase /attachments upload)") +app.add_typer(skill_init_cmd.app, name="skill", help="Generate a per-user bcli skill bundle (AIP Phase 7)") app.command(name="get")(get_cmd.get_command) app.command(name="post")(post_cmd.post_command) app.command(name="patch")(patch_cmd.patch_command) diff --git a/src/bcli_cli/commands/skill_init_cmd.py b/src/bcli_cli/commands/skill_init_cmd.py new file mode 100644 index 0000000..0a32411 --- /dev/null +++ b/src/bcli_cli/commands/skill_init_cmd.py @@ -0,0 +1,755 @@ +"""``bcli skill init`` — Phase 7 mechanism (AIP v0.1 §Phase 8). + +Per-user setup wizard that consumes ``bcli describe`` and writes a +right-sized Claude Code skill bundle plus optional new saved-query +entries. **Mechanism only** — the OSS package ships no Beautech-specific +role templates. Third-party packages (``bcli-beautech-bootstrap``) plug +in via the ``bcli.skill_init.role_templates`` entry-point group. + +Hard guarantees this module enforces: + +* Reads ``bcli describe`` via subprocess; never imports the CLI's + internals. The wizard works against any installed bcli version that + emits the AIP §Phase 1 schema. +* Writes ONLY under ``~/.config/bcli/queries/`` (append-only edits) and + ``~/.claude/skills/bcli-/`` (atomic file replacement). Any other + destination raises :class:`SkillInitError` *before* the file is opened. +* New saved-query proposals require explicit per-query ``[y/N]`` + approval. Nothing is written without the operator's consent. +* Provenance frontmatter on every generated file so a later + ``bcli skill update`` can find and regenerate just its own work. +* Atomic rollback: writes are staged in memory; if any commit step + fails, no partial file lands. +* Idempotent: an interview-state cache at + ``~/.config/bcli/skills/.last-init.json`` lets ``bcli skill update + --non-interactive`` replay the prior interview unchanged. A schema + drift (different describe ``version`` or registry hash) refuses the + silent replay and asks for an interactive re-run. + +The wizard is the first AIP consumer in bcli's own repo — proof the +contract works for a real downstream tool. +""" + +from __future__ import annotations + +import getpass +import hashlib +import json +import os +import subprocess +import tempfile +from dataclasses import asdict, dataclass +from datetime import datetime, timezone +from difflib import SequenceMatcher +from pathlib import Path +from typing import Any, Callable, Sequence + +import typer +import yaml +from rich.console import Console +from rich.prompt import Confirm, Prompt + + +app = typer.Typer(no_args_is_help=True, help="Generate a per-user bcli skill bundle") +console = Console() +_stderr = Console(stderr=True) + + +PROVENANCE_VERSION = "0.1" +_ENTRYPOINT_GROUP = "bcli.skill_init.role_templates" +_STATE_CACHE_FILENAME = ".last-init.json" + + +# ─── Errors ─────────────────────────────────────────────────────────── + + +class SkillInitError(Exception): + """Raised by the wizard's pre-flight checks and atomic commit phase. + + Two callers expect this exception: + + * Tests, which pattern-match the message. + * The Typer entry points, which translate it to ``typer.Exit(1)`` with + a user-friendly stderr render. + """ + + +# ─── Path resolution + allow-list ───────────────────────────────────── + + +def _config_dir() -> Path: + """``~/.config/bcli`` resolved from ``Path.home()``.""" + return Path.home() / ".config" / "bcli" + + +def _queries_dir() -> Path: + return _config_dir() / "queries" + + +def _skills_root() -> Path: + """``~/.claude/skills`` — the directory Claude Code reads skill + bundles from. We write into a ``bcli-/`` subdir.""" + return Path.home() / ".claude" / "skills" + + +def _state_cache_path() -> Path: + return _config_dir() / "skills" / _STATE_CACHE_FILENAME + + +def _user_skill_dir() -> Path: + """``~/.claude/skills/bcli-/`` — the only place the wizard + writes Markdown.""" + return _skills_root() / f"bcli-{getpass.getuser()}" + + +def _assert_writable(path: Path) -> None: + """Refuse any path outside the wizard's allow-list. + + Allow-list has three roots: + + * ``~/.config/bcli/queries/`` — user-facing saved queries. + * ``~/.claude/skills/bcli-*/`` — Claude Code skill bundles. + * ``~/.config/bcli/skills/`` — wizard bookkeeping (state cache for + ``skill update``). Not user content; the directory is wizard-owned. + + Resolves both ``path`` and the allow-list roots without requiring + the files to exist (``Path.resolve(strict=False)``) so the check + runs before any directory has been created. + """ + resolved = Path(path).resolve() + queries_root = _queries_dir().resolve() + skills_root = _skills_root().resolve() + wizard_state_root = (_config_dir() / "skills").resolve() + + if resolved.is_relative_to(queries_root): + return + if resolved.is_relative_to(wizard_state_root): + # Wizard's own bookkeeping directory. Only the cache file lands + # here; user content never does. + return + if resolved.is_relative_to(skills_root): + # Within skills, only ``bcli-*`` subdirs are allowed. + parts_after = resolved.relative_to(skills_root).parts + if parts_after and parts_after[0].startswith("bcli-"): + return + raise SkillInitError( + f"refusing to write to {resolved} — outside the bcli skill-init " + "allow-list (queries dir, bcli-* skill dirs, wizard state dir)" + ) + + +# ─── Describe loader ────────────────────────────────────────────────── + + +def _load_describe_payload(profile: str | None = None) -> dict[str, Any]: + """Subprocess ``bcli describe --format json`` and return parsed JSON. + + Mirrors :func:`bcli_mcp._server._load_describe_payload` but lives + here so the skill wizard doesn't depend on the optional ``[mcp]`` + extra. Production calls this; tests monkeypatch it directly. + """ + argv = ["bcli"] + if profile: + argv.extend(["--profile", profile]) + argv.extend(["describe", "--format", "json"]) + proc = subprocess.run( + argv, capture_output=True, text=True, timeout=60.0, check=False, + ) + if proc.returncode != 0: + raise SkillInitError( + f"bcli describe exited {proc.returncode}: " + f"{(proc.stderr or proc.stdout or '').strip()}" + ) + try: + return json.loads(proc.stdout) + except json.JSONDecodeError as exc: + raise SkillInitError(f"bcli describe returned non-JSON: {exc}") from exc + + +def _payload_hash(payload: dict[str, Any]) -> str: + """sha256 over the describe payload — drives the idempotency check. + + Sorting keys keeps the hash stable across describe runs that may + reorder dict members. + """ + canonical = json.dumps(payload, sort_keys=True, default=str).encode("utf-8") + return "sha256:" + hashlib.sha256(canonical).hexdigest() + + +# ─── Interview state ────────────────────────────────────────────────── + + +@dataclass(frozen=True) +class InterviewState: + """Captured interview answers. Frozen so it round-trips cleanly + through the idempotency cache without accidental mutation.""" + + role: str + top_three: str + style: str # "flat" | "meta" | "both" + generate_new: bool + + +@dataclass(frozen=True) +class ProposedQuery: + """A new saved-query proposal awaiting operator approval.""" + + name: str + body: dict[str, Any] + + +@dataclass(frozen=True) +class SurfacedSlashCommand: + """An existing saved query the wizard chose to surface for this user.""" + + query_name: str + description: str + endpoint: str + + +@dataclass(frozen=True) +class SkillPlan: + """The complete set of writes the wizard is about to commit. + + Kept frozen so we can compute the plan, run safety checks, then + commit — and a failure in the commit phase doesn't leave the plan + object in a half-mutated state. + """ + + profile: str + describe_version: str + payload_hash: str + interview: InterviewState + slash_commands: tuple[SurfacedSlashCommand, ...] + approved_new_queries: tuple[ProposedQuery, ...] + generated_at: str + target_skills_dir: Path + + +# ─── Saved-queries discovery ────────────────────────────────────────── + + +def _load_existing_queries(profile: str) -> dict[str, dict[str, Any]]: + """Read ``~/.config/bcli/queries/.yaml`` if present. + + Returns ``{}`` when the file doesn't exist or is empty so the + wizard's projection step degrades to "no slash commands surfaced" + rather than crashing. + """ + path = _queries_dir() / f"{profile}.yaml" + if not path.is_file(): + return {} + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + except yaml.YAMLError: + return {} + queries = data.get("queries", {}) + if not isinstance(queries, dict): + return {} + return queries + + +def _surface_queries_for_interests( + queries: dict[str, dict[str, Any]], + top_three: str, +) -> tuple[SurfacedSlashCommand, ...]: + """Pick the saved queries that fuzzy-match the user's free-text top-3. + + No role-keyed logic — that would smuggle Beautech-specific + affinities into the OSS package. We tokenise the top-3 phrase on + whitespace + commas, then take any query whose ``description``, + ``name``, or ``endpoint`` contains the token (or has a SequenceMatcher + similarity ≥ 0.6 to it). + """ + needles = { + tok.strip().lower() + for chunk in top_three.split(",") + for tok in chunk.split() + if tok.strip() + } + surfaced: list[SurfacedSlashCommand] = [] + seen: set[str] = set() + for name, body in sorted(queries.items()): + if name in seen: + continue + hay = " ".join([ + name.lower(), + str(body.get("description", "")).lower(), + str(body.get("endpoint", "")).lower(), + ]) + match = False + for needle in needles: + if needle and needle in hay: + match = True + break + # Substring miss → fuzzy. + ratio = SequenceMatcher(None, needle, hay).ratio() + if needle and ratio >= 0.6: + match = True + break + if match: + surfaced.append(SurfacedSlashCommand( + query_name=name, + description=str(body.get("description", "")), + endpoint=str(body.get("endpoint", "")), + )) + seen.add(name) + return tuple(surfaced) + + +# ─── Role-template proposer (entry-point dispatch) ───────────────────── + + +def _default_role_template_proposer( + interview: InterviewState, payload: dict[str, Any], +) -> list[ProposedQuery]: + """OSS default: no proposals. + + Beautech (or any third party) plugs in via the + ``bcli.skill_init.role_templates`` entry-point group — each provider + is a callable with the same signature returning a list of + :class:`ProposedQuery`. The OSS mechanism stays opinion-free; role + content lives where it belongs (downstream). + """ + return [] + + +def _collect_proposed_new_queries( + interview: InterviewState, payload: dict[str, Any], +) -> list[ProposedQuery]: + """Run every registered role-template provider and concatenate. + + Discovers providers via ``importlib.metadata.entry_points`` so the + OSS package needs zero knowledge of which downstream packages exist. + Errors loading individual providers are logged and skipped — one + broken plugin must not break the wizard. + """ + proposals: list[ProposedQuery] = [] + # OSS default first (empty list today). + proposals.extend(_default_role_template_proposer(interview, payload)) + + try: + from importlib.metadata import entry_points + eps = entry_points(group=_ENTRYPOINT_GROUP) + except Exception: # pragma: no cover — defensive + eps = [] + + for ep in eps: + try: + provider: Callable[[InterviewState, dict[str, Any]], list[ProposedQuery]] + provider = ep.load() + proposals.extend(provider(interview, payload)) + except Exception as exc: # noqa: BLE001 + _stderr.print( + f"[yellow]skill init: provider '{ep.name}' failed " + f"({exc}); skipping.[/yellow]" + ) + return proposals + + +# ─── Interview prompts ──────────────────────────────────────────────── + + +def _interview_interactively() -> InterviewState: + """Ask the four contract-doc-mandated questions via Rich prompts.""" + role = Prompt.ask( + "Role (finance / ops / aviation / sales / dev / custom)", + default="custom", + ) + top_three = Prompt.ask( + "Top three things you ask BC about daily (free text, comma-separated)", + default="", + ) + style = Prompt.ask( + "Slash command style — flat / meta / both", + default="flat", + ) + generate_new = Confirm.ask( + "Generate suggested NEW queries from your role?", + default=False, + ) + return InterviewState( + role=role.strip(), + top_three=top_three.strip(), + style=style.strip(), + generate_new=bool(generate_new), + ) + + +def _approve_each_proposal( + proposed: Sequence[ProposedQuery], +) -> tuple[ProposedQuery, ...]: + """Show each proposal and require explicit ``y/N`` per query.""" + approved: list[ProposedQuery] = [] + for q in proposed: + console.print( + f"[bold]Proposed new query:[/bold] {q.name}\n" + f" description: {q.body.get('description', '')}\n" + f" endpoint: {q.body.get('endpoint', '')}\n" + ) + if Confirm.ask(f"Approve '{q.name}'?", default=False): + approved.append(q) + return tuple(approved) + + +# ─── Provenance helpers ─────────────────────────────────────────────── + + +def _provenance_dict(plan: SkillPlan) -> dict[str, Any]: + return { + "generated-by": "bcli skill init", + "version": PROVENANCE_VERSION, + "profile": plan.profile, + "role": plan.interview.role, + "generated_at": plan.generated_at, + "source_hash": plan.payload_hash, + } + + +def _render_skill_md(plan: SkillPlan) -> str: + """Compose the SKILL.md text for this user. + + Frontmatter is YAML-formatted (Claude Code reads it that way). The + body is a short, role-keyed orientation: which existing saved + queries the user picked + a discovery-first prose pointer to + ``bcli describe`` so the agent can drill deeper without our + enumerating every entity. + """ + meta_yaml = yaml.safe_dump( + _provenance_dict(plan), sort_keys=False, default_flow_style=False, + ).strip() + lines = [ + "---", + meta_yaml, + "---", + "", + f"# bcli — {plan.interview.role} skill bundle", + "", + "Personalised entry-points for this operator's daily workflow. " + "Generated by `bcli skill init` from the live describe surface.", + "", + ] + if plan.slash_commands: + lines.append("## Existing saved queries surfaced for you") + lines.append("") + for sc in plan.slash_commands: + line = f"- `bcli q {sc.query_name}`" + if sc.description: + line += f" — {sc.description}" + if sc.endpoint: + line += f" _(endpoint: `{sc.endpoint}`)_" + lines.append(line) + lines.append("") + if plan.approved_new_queries: + lines.append("## New queries you approved") + lines.append("") + for q in plan.approved_new_queries: + desc = q.body.get("description", "") + lines.append(f"- `bcli q {q.name}` — {desc}") + lines.append("") + lines.extend([ + "## Discovering more", + "", + "Run `bcli describe --format json` to see the full surface, or", + "`bcli q` (no args) for the current saved-query catalog.", + "", + ]) + return "\n".join(lines) + "\n" + + +# ─── Atomic commit ──────────────────────────────────────────────────── + + +def _stage_queries_file_update( + plan: SkillPlan, +) -> tuple[Path, str] | None: + """Build the new ``queries.yaml`` text without writing it. + + Reads the existing file, appends each approved new query with an + inline ``provenance`` block, and returns ``(path, serialised_text)``. + Returns ``None`` when there are no new queries to commit. + """ + if not plan.approved_new_queries: + return None + path = _queries_dir() / f"{plan.profile}.yaml" + existing = _load_existing_queries(plan.profile) + if path.is_file(): + raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + else: + raw = {} + queries = dict(existing) + for q in plan.approved_new_queries: + body = dict(q.body) + body["provenance"] = _provenance_dict(plan) + queries[q.name] = body + raw["queries"] = queries + text = yaml.safe_dump(raw, sort_keys=False, default_flow_style=False) + return path, text + + +def _atomic_write(path: Path, data: str) -> None: + """Write ``data`` to ``path`` via tmp + os.replace. + + Mirrors :func:`bcli.result_envelope.write_envelope`'s atomicity so a + SIGKILL between write and rename never leaves a half-written file + where Claude Code would read it. + """ + _assert_writable(path) + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_name = tempfile.mkstemp(prefix=path.name + ".", dir=str(path.parent)) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(data) + os.replace(tmp_name, str(path)) + except Exception: + try: + os.unlink(tmp_name) + except OSError: + pass + raise + + +def _commit_plan(plan: SkillPlan) -> None: + """Stage every write, validate paths, then commit atomically. + + All targets are validated by :func:`_assert_writable` *before* any + file is written, so a guardrail violation aborts cleanly with no + partial state. Roll-back semantics: each staged write only happens + after every other write succeeds; if one fails, prior writes are + reverted to their pre-commit content (or deleted if they didn't + exist before). + """ + # 1. Build the in-memory plan. + queries_change = _stage_queries_file_update(plan) + skill_md_path = plan.target_skills_dir / "SKILL.md" + skill_md_text = _render_skill_md(plan) + state_cache_path = _state_cache_path() + # Persist both the interview AND the approved-query bodies so a + # later ``bcli skill update --non-interactive`` replays the same + # writes (the operator already approved them — re-asking would + # break the idempotency contract documented in §Phase 8). + state_cache_text = json.dumps({ + "describe_payload_hash": plan.payload_hash, + "describe_version": plan.describe_version, + "profile": plan.profile, + "interview": asdict(plan.interview), + "approved_new_queries": [ + {"name": q.name, "body": q.body} + for q in plan.approved_new_queries + ], + "generated_at": plan.generated_at, + }, indent=2) + + # 2. Pre-flight: every destination must be in the allow-list. All + # three writes (queries YAML, SKILL.md, state cache) are checked + # before any file is opened so a guardrail violation aborts cleanly. + _assert_writable(skill_md_path) + _assert_writable(state_cache_path) + if queries_change is not None: + _assert_writable(queries_change[0]) + + # 3. Compute rollback snapshot. + snapshots: dict[Path, str | None] = {} + targets: list[tuple[Path, str]] = [] + if queries_change is not None: + targets.append(queries_change) + targets.append((skill_md_path, skill_md_text)) + + for target_path, _new_text in targets: + snapshots[target_path] = ( + target_path.read_text(encoding="utf-8") + if target_path.is_file() else None + ) + + # 4. Commit. Roll back on first failure. + written: list[Path] = [] + try: + for target_path, new_text in targets: + _atomic_write(target_path, new_text) + written.append(target_path) + # State cache is best-effort: failure here doesn't trigger + # rollback because no user-facing artefact has changed (the + # cache is just for ``skill update`` replay). + state_cache_path.parent.mkdir(parents=True, exist_ok=True) + try: + state_cache_path.write_text(state_cache_text, encoding="utf-8") + except OSError as exc: # pragma: no cover — disk full at flush + _stderr.print( + f"[yellow]skill init: could not write state cache " + f"({exc}); future ``skill update`` may need ``--non-" + f"interactive`` re-prompting.[/yellow]" + ) + except Exception: + for target_path in written: + snap = snapshots.get(target_path) + try: + if snap is None: + target_path.unlink(missing_ok=True) + else: + target_path.write_text(snap, encoding="utf-8") + except OSError: + pass + raise + + +# ─── State cache (idempotency) ───────────────────────────────────────── + + +def _load_state_cache() -> dict[str, Any] | None: + path = _state_cache_path() + if not path.is_file(): + return None + try: + return json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return None + + +# ─── Wizard orchestration ───────────────────────────────────────────── + + +def _iso_utc_now() -> str: + return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + + +def run_wizard( + *, + profile: str | None, + target_skills_dir: Path | None, + non_interactive: bool, +) -> SkillPlan: + """Run the wizard end-to-end and return the committed plan. + + Tests call this directly with monkeypatched prompts; the Typer + entry points wrap it with output-formatting + exit-code mapping. + """ + payload = _load_describe_payload(profile=profile) + profile_name = payload.get("profile") or profile or "default" + describe_version = payload.get("version", "0.1") + payload_hash = _payload_hash(payload) + + cache: dict[str, Any] | None = None + if non_interactive: + cache = _load_state_cache() + if cache is None: + raise SkillInitError( + "no interview cache found; run ``bcli skill init`` " + "interactively first." + ) + if cache.get("describe_payload_hash") != payload_hash: + raise SkillInitError( + "describe payload changed since the last init " + "(source_hash mismatch); re-run ``bcli skill init`` " + "interactively to refresh." + ) + interview = InterviewState(**cache["interview"]) + else: + interview = _interview_interactively() + + queries = _load_existing_queries(profile_name) + slash_commands = _surface_queries_for_interests(queries, interview.top_three) + + approved: tuple[ProposedQuery, ...] + if non_interactive: + # Replay the previously-approved queries verbatim. The operator + # already consented during the prior interactive ``init``, so + # re-asking would break the documented idempotency contract. + # ``cache`` is guaranteed non-None here (set above). + assert cache is not None + cached_approved = cache.get("approved_new_queries", []) or [] + approved = tuple( + ProposedQuery(name=entry["name"], body=dict(entry["body"])) + for entry in cached_approved + ) + else: + proposed: list[ProposedQuery] = [] + if interview.generate_new: + proposed = _collect_proposed_new_queries(interview, payload) + approved = _approve_each_proposal(proposed) + + target_dir = Path(target_skills_dir) if target_skills_dir else _user_skill_dir() + + plan = SkillPlan( + profile=profile_name, + describe_version=describe_version, + payload_hash=payload_hash, + interview=interview, + slash_commands=slash_commands, + approved_new_queries=approved, + generated_at=_iso_utc_now(), + target_skills_dir=target_dir, + ) + _commit_plan(plan) + return plan + + +# ─── Typer entry points ─────────────────────────────────────────────── + + +@app.command("init") +def init_command( + profile: str | None = typer.Option( + None, "--profile", help="Profile to project (defaults to active).", + ), + target_skills_dir: Path | None = typer.Option( + None, + "--target-skills-dir", + help="Override the output skill-bundle directory (default: " + "``~/.claude/skills/bcli-/``).", + ), + non_interactive: bool = typer.Option( + False, + "--non-interactive", + help="Replay a prior interview from the local cache; refuses if " + "the describe payload changed.", + ), +) -> None: + """Interview the user and generate a per-user bcli skill bundle. + + Run once per profile to bootstrap. ``bcli skill update`` re-runs + the same flow against the current describe surface. + """ + try: + plan = run_wizard( + profile=profile, + target_skills_dir=target_skills_dir, + non_interactive=non_interactive, + ) + except SkillInitError as exc: + _stderr.print(f"[red]skill init: {exc}[/red]") + raise typer.Exit(1) from exc + + console.print( + f"[green]✓[/green] Wrote skill bundle to " + f"{plan.target_skills_dir}/SKILL.md " + f"({len(plan.slash_commands)} surfaced, " + f"{len(plan.approved_new_queries)} new query/queries)." + ) + + +@app.command("update") +def update_command( + profile: str | None = typer.Option( + None, "--profile", help="Profile to refresh (defaults to active).", + ), + non_interactive: bool = typer.Option( + False, + "--non-interactive", + help="Replay the cached interview without prompting.", + ), +) -> None: + """Re-run the wizard against the current describe surface. + + With ``--non-interactive`` the previous interview is replayed + verbatim; if the describe surface changed (different source_hash) + the command refuses so the operator notices and runs ``init``. + """ + # ``update`` and ``init`` share the same implementation; the only + # behavioural difference is the help string and the user's mental + # model. Keeping them as separate Typer commands lets us evolve them + # independently in v0.2. + init_command( + profile=profile, + target_skills_dir=None, + non_interactive=non_interactive, + ) diff --git a/tests/test_skill_init/__init__.py b/tests/test_skill_init/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_skill_init/conftest.py b/tests/test_skill_init/conftest.py new file mode 100644 index 0000000..ec72834 --- /dev/null +++ b/tests/test_skill_init/conftest.py @@ -0,0 +1,147 @@ +"""Shared fixtures for the ``bcli skill init`` wizard tests. + +The wizard reads ``bcli describe --format json``, asks 3-4 questions via +Rich prompts, and writes: + +* zero or more new saved-query entries into + ``~/.config/bcli/queries/.yaml`` (each gated by ``[y/N]``); +* a fresh ``~/.claude/skills/bcli-/SKILL.md``; +* a state cache at ``~/.config/bcli/skills/.last-init.json``. + +Every test isolates ``HOME`` to ``tmp_path`` so the suite can run in +parallel without polluting the developer's real config dirs. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +import pytest + + +_FAKE_DESCRIBE: dict[str, Any] = { + "version": "0.1", + "tool": "bcli", + "tool_version": "0.4.0", + "profile": "finance_sandbox", + "commands": [ + { + "path": ["get"], "summary": "GET records from a Business Central entity.", + "options": [], "positionals": [], "effects": ["read"], + "supported_formats": ["json"], + }, + { + "path": ["q"], "summary": "Run a saved query (no OData required)", + "options": [], "positionals": [], "effects": ["read"], + "supported_formats": ["json"], + }, + ], + "registry": { + "tier_1_custom_count": 3, + "tier_2_standard_enabled": True, + "endpoints": [ + {"entity": "customers", "tier": "standard", "domain": "standard", + "ops": ["GET", "POST"], "fields_known": 12, "fields_discovered_at": None}, + {"entity": "vendors", "tier": "custom", "domain": "finance", + "ops": ["GET", "POST"], "fields_known": 8, "fields_discovered_at": None}, + {"entity": "salesInvoices", "tier": "standard", "domain": "standard", + "ops": ["GET"], "fields_known": 0, "fields_discovered_at": None}, + ], + }, + "profile_constraints": { + "disable_writes": False, + "disable_standard_api": False, + "allowed_categories": None, + }, +} + + +_FAKE_SAVED_QUERIES: dict[str, Any] = { + "queries": { + "vendor-by-no": { + "description": "Look up a vendor by number", + "endpoint": "vendors", + "params": {"no": {"required": True}}, + "filter": "number eq '${{ params.no }}'", + }, + "open-invoices": { + "description": "Outstanding sales invoices", + "endpoint": "salesInvoices", + "filter": "status eq 'Open'", + }, + "customer-by-name": { + "description": "Customer lookup by display name", + "endpoint": "customers", + "params": {"name": {"required": True}}, + "filter": "displayName eq '${{ params.name }}'", + }, + }, +} + + +@pytest.fixture +def isolated_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + """Scope HOME (and Path.home) to tmp_path so the wizard's writes + land in test-only directories.""" + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + return tmp_path + + +@pytest.fixture +def fake_describe(monkeypatch: pytest.MonkeyPatch): + """Stub the ``bcli describe`` subprocess so the wizard sees a known + payload instead of trying to fork a real CLI.""" + def _payload(profile: str | None = None) -> dict[str, Any]: + # Return a copy so tests that mutate don't leak across cases. + return json.loads(json.dumps(_FAKE_DESCRIBE)) + + monkeypatch.setattr( + "bcli_cli.commands.skill_init_cmd._load_describe_payload", + _payload, + ) + return _FAKE_DESCRIBE + + +@pytest.fixture +def seeded_saved_queries(isolated_home: Path) -> Path: + """Drop a saved-queries YAML under the isolated config dir so the + wizard has something to project.""" + import yaml + queries_dir = isolated_home / ".config" / "bcli" / "queries" + queries_dir.mkdir(parents=True, exist_ok=True) + path = queries_dir / "finance_sandbox.yaml" + path.write_text(yaml.safe_dump(_FAKE_SAVED_QUERIES), encoding="utf-8") + return path + + +@pytest.fixture +def interview_answers(monkeypatch: pytest.MonkeyPatch): + """Inject scripted answers to the Rich prompts. Tests pass a list of + ``(prompt_substring, answer)`` pairs; the patched ``Prompt.ask`` / + ``Confirm.ask`` walks them in order.""" + scripted: list[tuple[str, str]] = [] + + def _ask(prompt: str, *, default: Any = None, **kwargs): + for needle, answer in scripted: + if needle in prompt: + return answer + return default + + def _confirm(prompt: str, *, default: bool = False, **kwargs): + for needle, answer in scripted: + if needle in prompt: + return _coerce_bool(answer) + return default + + monkeypatch.setattr("rich.prompt.Prompt.ask", _ask) + monkeypatch.setattr("rich.prompt.Confirm.ask", _confirm) + return scripted + + +def _coerce_bool(answer: Any) -> bool: + if isinstance(answer, bool): + return answer + return str(answer).strip().lower() in {"y", "yes", "true", "1"} diff --git a/tests/test_skill_init/test_skill_init_wizard.py b/tests/test_skill_init/test_skill_init_wizard.py new file mode 100644 index 0000000..9f1010d --- /dev/null +++ b/tests/test_skill_init/test_skill_init_wizard.py @@ -0,0 +1,555 @@ +"""Tests for ``bcli skill init`` — Phase 7 wizard mechanism. + +The wizard is mechanism only. No Beautech-specific role content — the +OSS package emits an *empty* set of new-query proposals by default, and +defers role-aware proposals to entry-point providers (``bcli-beautech- +bootstrap`` ships one such provider). Tests here exercise the +mechanism: read describe, interview, project existing queries, write +provenance-headed files atomically, refuse to write outside the allow- +listed dirs. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +import yaml + +from bcli_cli.commands import skill_init_cmd + + +def _read_yaml(path: Path) -> dict: + return yaml.safe_load(path.read_text(encoding="utf-8")) or {} + + +# ─── 1. Wizard reads describe ─────────────────────────────────────── + + +class TestReadDescribe: + def test_wizard_loads_describe_payload( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + """Wizard must subprocess ``bcli describe`` (mocked here) and + produce a non-empty plan. The plan reflects the profile name + the describe payload reported.""" + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor lookups, open invoices, customer search"), + ("style", "flat"), + ("Generate", "n"), + ]) + + plan = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + assert plan.profile == "finance_sandbox" # from fake_describe + assert plan.describe_version == "0.1" + + +# ─── 2. Interview flow ───────────────────────────────────────────── + + +class TestInterviewFlow: + def test_full_interview_captures_all_answers( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendors, invoices, customers"), + ("style", "flat"), + ("Generate", "n"), + ]) + + plan = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + assert plan.interview.role == "finance" + assert plan.interview.top_three == "vendors, invoices, customers" + assert plan.interview.style == "flat" + assert plan.interview.generate_new is False + + def test_style_meta_and_both_round_trip( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + interview_answers.extend([ + ("Role", "ops"), + ("daily", "stuff"), + ("style", "both"), + ("Generate", "n"), + ]) + plan = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + assert plan.interview.style == "both" + + +# ─── 3. Existing-queries projection (mechanism-only behaviour) ───── + + +class TestExistingQueriesProjection: + def test_top_three_fuzzy_matches_existing_descriptions( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + """Top-3 free text fuzzy-matches against existing saved queries' + descriptions + endpoints. ``vendor`` should match ``vendor-by-no``. + ``customer`` should match ``customer-by-name``. + + Importantly: the wizard surfaces matches; it does NOT make role- + keyed assumptions. Plain Python ``difflib`` is the matcher. + """ + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor, customer"), + ("style", "flat"), + ("Generate", "n"), + ]) + plan = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + surfaced = {sc.query_name for sc in plan.slash_commands} + assert "vendor-by-no" in surfaced + assert "customer-by-name" in surfaced + + +# ─── 4. New-query proposals require y per query ──────────────────── + + +class TestApprovalGate: + def test_new_query_proposal_y_per_query( + self, isolated_home, fake_describe, seeded_saved_queries, + interview_answers, monkeypatch, + ): + """If a role-template provider proposes a new saved query, the + user must explicitly approve it. ``N`` → not written. ``y`` → + written with provenance header.""" + # Stub an entry-point provider that proposes one new query. + from bcli_cli.commands.skill_init_cmd import ProposedQuery + + proposed = ProposedQuery( + name="vendor-recent", + body={ + "description": "Vendors created in the last 30 days", + "endpoint": "vendors", + "filter": "createdDateTime gt '2026-04-17'", + }, + ) + monkeypatch.setattr( + skill_init_cmd, "_collect_proposed_new_queries", + lambda interview, payload: [proposed], + ) + # User says: generate-new=yes, then approve the one proposal. + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendors"), + ("style", "flat"), + ("Generate", "y"), + ("Approve 'vendor-recent'", "y"), + ]) + + plan = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + approved = {q.name for q in plan.approved_new_queries} + assert approved == {"vendor-recent"} + + def test_new_query_proposal_n_skips( + self, isolated_home, fake_describe, seeded_saved_queries, + interview_answers, monkeypatch, + ): + from bcli_cli.commands.skill_init_cmd import ProposedQuery + + monkeypatch.setattr( + skill_init_cmd, "_collect_proposed_new_queries", + lambda interview, payload: [ProposedQuery( + name="vendor-recent", + body={"description": "x", "endpoint": "vendors"}, + )], + ) + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendors"), + ("style", "flat"), + ("Generate", "y"), + ("Approve 'vendor-recent'", "n"), + ]) + plan = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + assert plan.approved_new_queries == () + + +# ─── 5. Provenance header ────────────────────────────────────────── + + +class TestProvenance: + def test_skill_md_carries_provenance_frontmatter( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor, customer"), + ("style", "flat"), + ("Generate", "n"), + ]) + skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + + skills_dirs = list((isolated_home / ".claude" / "skills").glob("bcli-*")) + assert len(skills_dirs) == 1 + skill_md = skills_dirs[0] / "SKILL.md" + assert skill_md.is_file() + text = skill_md.read_text(encoding="utf-8") + # YAML-style frontmatter wrapped in --- markers. + assert text.startswith("---\n") + head, _, _body = text.partition("\n---\n") + meta = yaml.safe_load(head.lstrip("-").lstrip("\n")) + assert meta["generated-by"] == "bcli skill init" + assert meta["version"] == "0.1" + assert meta["profile"] == "finance_sandbox" + assert meta["role"] == "finance" + assert "generated_at" in meta + assert meta["source_hash"].startswith("sha256:") + + def test_appended_query_carries_inline_provenance( + self, isolated_home, fake_describe, seeded_saved_queries, + interview_answers, monkeypatch, + ): + """Existing queries in the YAML file MUST NOT be modified. New + queries are appended with a per-query ``provenance`` block so a + ``bcli skill update`` can find and regenerate just its own work.""" + from bcli_cli.commands.skill_init_cmd import ProposedQuery + + monkeypatch.setattr( + skill_init_cmd, "_collect_proposed_new_queries", + lambda interview, payload: [ProposedQuery( + name="vendor-recent", + body={"description": "x", "endpoint": "vendors"}, + )], + ) + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendors"), + ("style", "flat"), + ("Generate", "y"), + ("Approve 'vendor-recent'", "y"), + ]) + skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + + data = _read_yaml(seeded_saved_queries) + assert "vendor-recent" in data["queries"] + # Existing entries preserved unchanged. + assert "vendor-by-no" in data["queries"] + # Provenance attached to the new query only. + new_q = data["queries"]["vendor-recent"] + assert new_q.get("provenance", {}).get("generated-by") == "bcli skill init" + assert "vendor-by-no" not in data["queries"] or ( + "provenance" not in data["queries"]["vendor-by-no"] + ) + + +# ─── 6. Guardrails: never writes outside the allow-listed dirs ──── + + +class TestGuardrails: + def test_writer_refuses_path_outside_allow_list(self, isolated_home): + """Defence-in-depth: ``_assert_writable`` rejects any path that + isn't under ``~/.config/bcli/queries/`` or + ``~/.claude/skills/bcli-*/``. Tests pin the rejection path so a + future refactor that loosens it tips a red light.""" + bad_path = isolated_home / ".bashrc" + with pytest.raises(skill_init_cmd.SkillInitError, match=r"refusing to write"): + skill_init_cmd._assert_writable(bad_path) + + def test_writer_accepts_queries_path(self, isolated_home): + ok = isolated_home / ".config" / "bcli" / "queries" / "finance.yaml" + skill_init_cmd._assert_writable(ok) # no raise + + def test_writer_accepts_skill_bundle_path(self, isolated_home): + ok = isolated_home / ".claude" / "skills" / "bcli-alice" / "SKILL.md" + skill_init_cmd._assert_writable(ok) # no raise + + def test_writer_rejects_other_skill_dirs(self, isolated_home): + """Only ``bcli-*`` subdirs of ``~/.claude/skills/`` are allowed. + A path under ``~/.claude/skills/other-skill/`` is rejected.""" + bad = isolated_home / ".claude" / "skills" / "other-skill" / "SKILL.md" + with pytest.raises(skill_init_cmd.SkillInitError): + skill_init_cmd._assert_writable(bad) + + +# ─── 7. Wizard never touches profile / registry / disable_* ──────── + + +class TestReadonlyConsumption: + def test_wizard_does_not_write_to_config_toml( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + config_path = isolated_home / ".config" / "bcli" / "config.toml" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text("# untouched\n", encoding="utf-8") + + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor"), + ("style", "flat"), + ("Generate", "n"), + ]) + skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + + assert config_path.read_text(encoding="utf-8") == "# untouched\n" + + def test_wizard_does_not_write_to_registry_dir( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + reg_dir = isolated_home / ".config" / "bcli" / "registries" + reg_dir.mkdir(parents=True, exist_ok=True) + sentinel = reg_dir / "finance_sandbox.json" + sentinel.write_text("{}", encoding="utf-8") + + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor"), + ("style", "flat"), + ("Generate", "n"), + ]) + skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + + assert sentinel.read_text(encoding="utf-8") == "{}" + + +# ─── 8. Idempotency ──────────────────────────────────────────────── + + +class TestIdempotency: + def test_update_non_interactive_reuses_cached_answers( + self, isolated_home, fake_describe, seeded_saved_queries, interview_answers, + ): + """First ``init`` writes the cache; ``update --non-interactive`` + replays the same interview AND produces the same SKILL.md + content. Asserting only on the interview state would be a + tautology (both plans read the same cache); pinning the on-disk + output is the real contract.""" + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor, customer"), + ("style", "flat"), + ("Generate", "n"), + ]) + plan1 = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + cache = isolated_home / ".config" / "bcli" / "skills" / ".last-init.json" + assert cache.is_file() + + skill_md_dir = isolated_home / ".claude" / "skills" + bundle_dirs = list(skill_md_dir.glob("bcli-*")) + assert len(bundle_dirs) == 1 + skill_md_path = bundle_dirs[0] / "SKILL.md" + skill_md_before = skill_md_path.read_text(encoding="utf-8") + + # Clear scripted answers — if a prompt fires now the test fails. + interview_answers.clear() + plan2 = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=True, + ) + assert plan1.interview == plan2.interview + + # The real idempotency contract: SKILL.md is unchanged on + # replay. Frontmatter's ``generated_at`` ticks forward (that's + # fine — it's a timestamp, not a content marker), so compare + # everything BELOW the frontmatter close. + skill_md_after = skill_md_path.read_text(encoding="utf-8") + + def _body(text: str) -> str: + _head, _sep, body = text.partition("\n---\n") + return body + assert _body(skill_md_before) == _body(skill_md_after) + + def test_update_replays_previously_approved_queries( + self, isolated_home, fake_describe, seeded_saved_queries, + interview_answers, monkeypatch, + ): + """The big idempotency invariant: a prior interactive ``init`` + that approved N new queries → a later ``update --non-interactive`` + re-writes the SAME N queries with refreshed provenance. Without + this, a non-interactive replay silently drops every approved + new query from SKILL.md.""" + from bcli_cli.commands.skill_init_cmd import ProposedQuery + + # Stub two proposals so we can assert both come back on replay. + monkeypatch.setattr( + skill_init_cmd, "_collect_proposed_new_queries", + lambda interview, payload: [ + ProposedQuery(name="vendor-recent", + body={"description": "vendor recent", + "endpoint": "vendors"}), + ProposedQuery(name="invoice-late", + body={"description": "invoices past due", + "endpoint": "salesInvoices"}), + ], + ) + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendors, invoices"), + ("style", "flat"), + ("Generate", "y"), + ("Approve 'vendor-recent'", "y"), + ("Approve 'invoice-late'", "y"), + ]) + plan1 = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + assert {q.name for q in plan1.approved_new_queries} == { + "vendor-recent", "invoice-late", + } + + interview_answers.clear() # any prompt now would be a regression + plan2 = skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=True, + ) + assert {q.name for q in plan2.approved_new_queries} == { + "vendor-recent", "invoice-late", + } + # Queries YAML still carries both entries after the replay. + data = yaml.safe_load(seeded_saved_queries.read_text(encoding="utf-8")) + assert "vendor-recent" in data["queries"] + assert "invoice-late" in data["queries"] + + def test_update_reinterviews_when_describe_version_changes( + self, isolated_home, fake_describe, seeded_saved_queries, + interview_answers, monkeypatch, + ): + """If the cached payload's hash differs from the live describe, + ``update --non-interactive`` aborts with a clear error so the + operator runs ``init`` interactively to refresh.""" + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor"), + ("style", "flat"), + ("Generate", "n"), + ]) + skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + + # Mutate the describe payload — wizard should refuse to reuse + # the cache because the source hash changed. + def _new_payload(profile: str | None = None) -> dict: + payload = json.loads(json.dumps(fake_describe)) + payload["version"] = "0.2" + return payload + monkeypatch.setattr( + "bcli_cli.commands.skill_init_cmd._load_describe_payload", + _new_payload, + ) + + interview_answers.clear() # any prompt would fail in non-interactive mode + with pytest.raises(skill_init_cmd.SkillInitError, match=r"changed"): + skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=True, + ) + + +# ─── 9. Atomic rollback ──────────────────────────────────────────── + + +class TestAtomicRollback: + def test_partial_write_failure_leaves_no_files( + self, isolated_home, fake_describe, seeded_saved_queries, + interview_answers, monkeypatch, + ): + """Inject a failure into the second commit and assert nothing + was written — neither the queries YAML change nor the skill MD.""" + before_yaml = seeded_saved_queries.read_text(encoding="utf-8") + + # Patch the final commit so the SKILL.md write fails after the + # queries YAML edit has been staged. The wizard's commit phase + # must roll back the queries change. + original_commit = skill_init_cmd._commit_plan + + def _flaky_commit(plan, *args, **kwargs): + # Force a failure once the staging dict has been computed. + raise RuntimeError("disk full simulation") + + monkeypatch.setattr(skill_init_cmd, "_commit_plan", _flaky_commit) + + from bcli_cli.commands.skill_init_cmd import ProposedQuery + + monkeypatch.setattr( + skill_init_cmd, "_collect_proposed_new_queries", + lambda interview, payload: [ProposedQuery( + name="vendor-recent", + body={"description": "x", "endpoint": "vendors"}, + )], + ) + interview_answers.extend([ + ("Role", "finance"), + ("daily", "vendor"), + ("style", "flat"), + ("Generate", "y"), + ("Approve 'vendor-recent'", "y"), + ]) + + with pytest.raises(RuntimeError, match="disk full"): + skill_init_cmd.run_wizard( + profile=None, target_skills_dir=None, non_interactive=False, + ) + + # The pre-existing queries YAML is untouched. + assert seeded_saved_queries.read_text(encoding="utf-8") == before_yaml + # No SKILL.md got created. + skill_dirs = list((isolated_home / ".claude" / "skills").glob("bcli-*")) + for d in skill_dirs: + assert not (d / "SKILL.md").exists() + + # Restore for any subsequent tests in the suite. + skill_init_cmd._commit_plan = original_commit + + +# ─── 10. OSS mechanism emits no Beautech-specific role content ──── + + +class TestOssMechanismHasNoRoleContent: + """Belt-and-braces: the OSS package's default new-query proposer + returns an empty list regardless of role. Beautech (or any third + party) plugs in via the ``bcli.skill_init.role_templates`` entry + point group; nothing in this PR ships role-keyed BC entity + affinities.""" + + def test_default_proposer_empty_for_finance_role( + self, isolated_home, fake_describe, seeded_saved_queries, + interview_answers, + ): + from bcli_cli.commands.skill_init_cmd import ( + InterviewState, + _default_role_template_proposer, + ) + interview = InterviewState( + role="finance", + top_three="vendors, invoices, customers", + style="flat", + generate_new=True, + ) + proposals = _default_role_template_proposer(interview, fake_describe) + assert proposals == [] + + def test_default_proposer_empty_for_any_role( + self, isolated_home, fake_describe, interview_answers, + ): + from bcli_cli.commands.skill_init_cmd import ( + InterviewState, + _default_role_template_proposer, + ) + for role in ("ops", "aviation", "sales", "dev", "custom"): + interview = InterviewState( + role=role, top_three="x", style="flat", generate_new=True, + ) + assert _default_role_template_proposer(interview, fake_describe) == []