From f636bb75337456d92a6cf9b85b3e23697545163d Mon Sep 17 00:00:00 2001 From: Max Haarhaus Date: Sun, 21 Jun 2026 16:32:14 -0400 Subject: [PATCH] chore(cli): rename artifacts directories --- .gitignore | 4 +++ README.md | 10 ++++---- src/cli/args.rs | 4 +-- src/cli/commands/pipeline.rs | 4 +-- src/cli/commands/workspace.rs | 4 +-- src/cli/mod.rs | 6 ++--- src/cli/run/orchestrate/build.rs | 5 +--- src/cli/run/runbook.rs | 6 ++--- src/cli/run/staging/mod.rs | 2 +- src/cli/run/util.rs | 4 +-- src/core/context.rs | 4 +-- src/pipeline/detect_stray_writes.rs | 4 +-- src/sandbox/decide.rs | 15 +++++++----- src/sandbox/guard.rs | 4 +-- src/sandbox/install.rs | 4 +-- src/sandbox/policy.rs | 38 +++++++++-------------------- src/workspace/promote.rs | 4 +-- src/workspace/snapshot.rs | 4 +-- src/workspace/teardown.rs | 22 ++++++++--------- tests/cli/aggregate.rs | 2 +- tests/cli/grade.rs | 12 ++++----- tests/cli/grade_models.rs | 2 +- tests/cli/guard.rs | 8 +++--- tests/cli/stray_writes.rs | 20 +++++++-------- tests/cli/workspace.rs | 19 +++++++-------- tests/run/claude_cli.rs | 2 +- tests/run/env_layout.rs | 4 +-- tests/run/helpers.rs | 2 +- tests/run/lifecycle.rs | 8 +++--- tests/run/reset_batch.rs | 4 +-- tests/run/staging.rs | 2 +- tests/run/switch_condition.rs | 2 +- 32 files changed, 109 insertions(+), 126 deletions(-) diff --git a/.gitignore b/.gitignore index 0592392..24b97b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ /target .DS_Store + +# eval-magic run artifacts (workspace root + per-env outputs) — churn every run +.eval-magic/ +.eval-magic-outputs/ diff --git a/README.md b/README.md index 4522da0..4d8ec07 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ environment. ```bash # 1. Build the iteration's isolated env (arm --guard — see Cost & confirmation). -# run stages skills into skills-workspace/my-skill/iteration-1/env/, copies +# run stages skills into .eval-magic/my-skill/iteration-1/env/, copies # fixtures in, and writes RUNBOOK.md. It does NOT dispatch — it prints a handoff. # Add --runs to dispatch every eval N times per condition for variance # reduction (a per-eval "runs" field in evals.json overrides the flag). @@ -112,7 +112,7 @@ eval-magic ingest # armed, finalize reminds you to run teardown-guard before editing source. eval-magic finalize -# 5. Read skills-workspace/my-skill/iteration-1/benchmark.json (the prep session +# 5. Read .eval-magic/my-skill/iteration-1/benchmark.json (the prep session # resumes here), then clean up: eval-magic teardown ``` @@ -201,7 +201,7 @@ Read `validity_warnings` **before** trusting any delta — a low skill-invocatio Per skill being evaluated, the runner produces this tree (everything but `evals/evals.json` is generated): ``` -skills-workspace// # outside the skill directory, gitignore it +.eval-magic// # outside the skill directory, gitignore it snapshots/ # Mode B baselines, persist across iterations