From f636bb75337456d92a6cf9b85b3e23697545163d Mon Sep 17 00:00:00 2001
From: Max Haarhaus <samiamorwas@gmail.com>
Date: Sun, 21 Jun 2026 16:32:14 -0400
Subject: [PATCH] chore(cli): rename artifacts directories

---
 .gitignore                          |  4 +++
 README.md                           | 10 ++++----
 src/cli/args.rs                     |  4 +--
 src/cli/commands/pipeline.rs        |  4 +--
 src/cli/commands/workspace.rs       |  4 +--
 src/cli/mod.rs                      |  6 ++---
 src/cli/run/orchestrate/build.rs    |  5 +---
 src/cli/run/runbook.rs              |  6 ++---
 src/cli/run/staging/mod.rs          |  2 +-
 src/cli/run/util.rs                 |  4 +--
 src/core/context.rs                 |  4 +--
 src/pipeline/detect_stray_writes.rs |  4 +--
 src/sandbox/decide.rs               | 15 +++++++-----
 src/sandbox/guard.rs                |  4 +--
 src/sandbox/install.rs              |  4 +--
 src/sandbox/policy.rs               | 38 +++++++++--------------------
 src/workspace/promote.rs            |  4 +--
 src/workspace/snapshot.rs           |  4 +--
 src/workspace/teardown.rs           | 22 ++++++++---------
 tests/cli/aggregate.rs              |  2 +-
 tests/cli/grade.rs                  | 12 ++++-----
 tests/cli/grade_models.rs           |  2 +-
 tests/cli/guard.rs                  |  8 +++---
 tests/cli/stray_writes.rs           | 20 +++++++--------
 tests/cli/workspace.rs              | 19 +++++++--------
 tests/run/claude_cli.rs             |  2 +-
 tests/run/env_layout.rs             |  4 +--
 tests/run/helpers.rs                |  2 +-
 tests/run/lifecycle.rs              |  8 +++---
 tests/run/reset_batch.rs            |  4 +--
 tests/run/staging.rs                |  2 +-
 tests/run/switch_condition.rs       |  2 +-
 32 files changed, 109 insertions(+), 126 deletions(-)
diff --git a/.gitignore b/.gitignore
index 0592392..24b97b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,6 @@
 /target
 .DS_Store
+
+# eval-magic run artifacts (workspace root + per-env outputs) — churn every run
+.eval-magic/
+.eval-magic-outputs/
diff --git a/README.md b/README.md
index 4522da0..4d8ec07 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ environment.
 
 ```bash
 # 1. Build the iteration's isolated env (arm --guard — see Cost & confirmation).
-#    run stages skills into skills-workspace/my-skill/iteration-1/env/, copies
+#    run stages skills into .eval-magic/my-skill/iteration-1/env/, copies
 #    fixtures in, and writes RUNBOOK.md. It does NOT dispatch — it prints a handoff.
 #    Add --runs <N> to dispatch every eval N times per condition for variance
 #    reduction (a per-eval "runs" field in evals.json overrides the flag).
@@ -112,7 +112,7 @@ eval-magic ingest
 #    armed, finalize reminds you to run teardown-guard before editing source.
 eval-magic finalize
 
-# 5. Read skills-workspace/my-skill/iteration-1/benchmark.json (the prep session
+# 5. Read .eval-magic/my-skill/iteration-1/benchmark.json (the prep session
 #    resumes here), then clean up:
 eval-magic teardown
 ```
@@ -201,7 +201,7 @@ Read `validity_warnings` **before** trusting any delta — a low skill-invocatio
 Per skill being evaluated, the runner produces this tree (everything but `evals/evals.json` is generated):
 
 ```
-skills-workspace/<skill>/                # outside the skill directory, gitignore it
+.eval-magic/<skill>/                     # outside the skill directory, gitignore it
   snapshots/                             # Mode B baselines, persist across iterations
     <label>/SKILL.md
   iteration-N/
@@ -228,7 +228,7 @@ independently and the benchmark's per-condition `mean`/`stddev`/`n` cover all of
         run-2/  outputs/  run.json  timing.json  grading.json
 ```
 
-The only source file you author for evals is `<skill>/evals/evals.json` (or create it with `eval-magic init`). Keep `skills-workspace/` out of version control — it churns on every run. Snapshot retention is manual: delete `<workspace>/<skill>/snapshots/<label>/` when no longer needed.
+The only source file you author for evals is `<skill>/evals/evals.json` (or create it with `eval-magic init`). Keep `.eval-magic/` out of version control — it churns on every run. Snapshot retention is manual: delete `<workspace>/<skill>/snapshots/<label>/` when no longer needed.
 
 ## Version-controlled baselines
 
@@ -294,7 +294,7 @@ Support today:
 
 ### Claude Code (fully wired)
 
-The run loop above *is* the Claude Code loop. By default this is the **fully-interactive** run mode (see [Run modes](#run-modes)) — subagents are dispatched in-session via the Task tool; the **hybrid** and **headless** (`claude -p`) modes are now wired too (pass `--run-mode hybrid` or `--run-mode headless`, see below). `eval-magic run` itself only *prepares* the isolated env (`skills-workspace/<skill>/iteration-N/env/`) and writes `RUNBOOK.md` into it, then prints a handoff: `cd` into `env/`, start a **fresh** Claude Code session there, and say *Read and follow RUNBOOK.md*. That fresh session — clean cwd, staged skills present at session start — drives the whole dispatch → switch-condition → ingest → finalize loop and writes `benchmark.json`, which the prep session resumes on. These are the Claude-Code-specific details:
+The run loop above *is* the Claude Code loop. By default this is the **fully-interactive** run mode (see [Run modes](#run-modes)) — subagents are dispatched in-session via the Task tool; the **hybrid** and **headless** (`claude -p`) modes are now wired too (pass `--run-mode hybrid` or `--run-mode headless`, see below). `eval-magic run` itself only *prepares* the isolated env (`.eval-magic/<skill>/iteration-N/env/`) and writes `RUNBOOK.md` into it, then prints a handoff: `cd` into `env/`, start a **fresh** Claude Code session there, and say *Read and follow RUNBOOK.md*. That fresh session — clean cwd, staged skills present at session start — drives the whole dispatch → switch-condition → ingest → finalize loop and writes `benchmark.json`, which the prep session resumes on. These are the Claude-Code-specific details:
 
 **Isolating from installed plugins.** Read this first if the skill you're evaluating shares a name with one an installed, enabled plugin provides. Subagents are dispatched via the **Task tool**, so they inherit *this session's* enabled plugins — the staging slug avoids an on-disk collision but does not stop the installed copy from being discoverable, contaminating both arms (the `without_skill` arm is then not truly skill-absent). Plugins load at session start and can't be unloaded mid-session, so the runner only *detects and warns* (the plugin-shadow banner). The isolated env gives a clean *cwd* but does not unload user/global plugins, so this still applies. To actually isolate, launch the **fresh session you start in `env/`** one of these ways — subagents inherit it:
 
diff --git a/src/cli/args.rs b/src/cli/args.rs
index 34bf654..0bdb0e7 100644
--- a/src/cli/args.rs
+++ b/src/cli/args.rs
@@ -95,7 +95,7 @@ pub struct CommonArgs {
     /// commands already carry it.
     #[arg(long)]
     pub run_mode: Option<RunMode>,
-    /// Workspace directory (defaults to `<cwd>/skills-workspace`).
+    /// Workspace directory (defaults to `<cwd>/.eval-magic`).
     ///
     /// The artifact root. Pass the same value to every command of a run, including
     /// `teardown`.
@@ -456,7 +456,7 @@ pub(crate) enum Commands {
     /// Swap the active isolation batch in a single-session isolated run.
     ///
     /// Wipes the shared `env/` working tree (keeping `.claude/skills/` and the
-    /// `.eval-magic/` outputs tree) and re-seeds it with `--group`'s fixtures — the
+    /// `.eval-magic-outputs/` tree) and re-seeds it with `--group`'s fixtures — the
     /// per-batch isolation barrier between eval groups in an interactive isolated run
     /// (see `RUNBOOK.md`). `--group` names the group you are
     /// about to dispatch next. Run it only after every Task subagent of the prior
diff --git a/src/cli/commands/pipeline.rs b/src/cli/commands/pipeline.rs
index 719d6d3..501ae96 100644
--- a/src/cli/commands/pipeline.rs
+++ b/src/cli/commands/pipeline.rs
@@ -238,7 +238,7 @@ pub(crate) fn run_switch_condition(args: SwitchConditionArgs) -> anyhow::Result<
 
 /// Swap the active isolation batch in a single-session (in-session) isolated run:
 /// wipe the shared `env/` working tree — keeping the staged skills and the
-/// `.eval-magic/` outputs tree — and re-seed it with `--group`'s fixtures, so the
+/// `.eval-magic-outputs/` tree — and re-seed it with `--group`'s fixtures, so the
 /// next batch starts from a clean tree the prior batch's fixtures and stray writes
 /// can't taint. A hard barrier: the runbook joins every Task subagent of the prior
 /// batch first. Resolves the iteration from `--workspace-dir`, so it runs from
@@ -300,7 +300,7 @@ pub(crate) fn run_reset_batch(args: ResetBatchArgs) -> anyhow::Result<()> {
         ".agents",
         ".codex",
         ".opencode",
-        ".eval-magic",
+        ".eval-magic-outputs",
         "RUNBOOK.md",
     ];
     for entry in std::fs::read_dir(&env_dir)? {
diff --git a/src/cli/commands/workspace.rs b/src/cli/commands/workspace.rs
index 289ede9..2905485 100644
--- a/src/cli/commands/workspace.rs
+++ b/src/cli/commands/workspace.rs
@@ -134,9 +134,7 @@ pub(crate) fn run_teardown(args: CommonArgs) -> anyhow::Result<()> {
         eprintln!(
             "⚠ Kept {} workspace iteration(s) with results not yet committed:\n{lines}\n   Commit them, e.g.:\n     eval-magic promote-baseline{target_args} --iteration <N>\n   or delete {}/ manually to discard.",
             ws.kept_iterations.len(),
-            Path::new("skills-workspace")
-                .join(&ctx.skill_name)
-                .display()
+            Path::new(".eval-magic").join(&ctx.skill_name).display()
         );
     }
     Ok(())
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
index 95a7b45..a83aca7 100644
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -120,7 +120,7 @@ pub(crate) fn parse_id_list(v: Option<&str>) -> Option<Vec<String>> {
 /// "Next:" commands are copy-pasteable from any cwd — not just the one `run`
 /// happened to start in. The absolute `--workspace-dir` is what lets the isolated
 /// session run `ingest`/`finalize`/`switch-condition` from `cwd = iteration-N/env/`:
-/// without it, `workspace_root` would default to `<cwd>/skills-workspace`
+/// without it, `workspace_root` would default to `<cwd>/.eval-magic`
 /// (`detect_run_context`) and the iteration tree above the env would not resolve.
 pub(crate) fn command_target_args(ctx: &RunContext) -> String {
     format!(
@@ -314,7 +314,7 @@ mod tests {
 
     /// The isolated session runs `ingest`/`finalize`/`switch-condition` from
     /// `cwd = iteration-N/env/`. Without an explicit workspace root those commands
-    /// default `workspace_root` to `<cwd>/skills-workspace` and bail "not found",
+    /// default `workspace_root` to `<cwd>/.eval-magic` and bail "not found",
     /// so the selector must carry an absolute `--workspace-dir` pointing at the
     /// real workspace above the env.
     #[test]
@@ -342,7 +342,7 @@ mod tests {
 
         // Round-trip from an env-like cwd below the workspace: feeding the
         // selector's roots back resolves the SAME workspace, not
-        // `<cwd>/skills-workspace`.
+        // `<cwd>/.eval-magic`.
         let env_like = ctx
             .workspace_root
             .join("mr-review")
diff --git a/src/cli/run/orchestrate/build.rs b/src/cli/run/orchestrate/build.rs
index 2526b9c..7eefc59 100644
--- a/src/cli/run/orchestrate/build.rs
+++ b/src/cli/run/orchestrate/build.rs
@@ -184,10 +184,7 @@ pub(super) fn write_dispatch(
                         None => format!("eval-{}/{cond_name}", ev.id),
                         Some(k) => format!("eval-{}/{cond_name}/run-{k}", ev.id),
                     };
-                    let outputs_dir = env_root
-                        .join(".eval-magic")
-                        .join("outputs")
-                        .join(outputs_rel);
+                    let outputs_dir = env_root.join(".eval-magic-outputs").join(outputs_rel);
                     fs::create_dir_all(&outputs_dir)?;
 
                     let fixtures = fixtures_by_eval
diff --git a/src/cli/run/runbook.rs b/src/cli/run/runbook.rs
index e2baeca..fb76b90 100644
--- a/src/cli/run/runbook.rs
+++ b/src/cli/run/runbook.rs
@@ -255,7 +255,7 @@ mod tests {
 
     #[test]
     fn interactive_runbook_carries_run_specifics_and_full_loop() {
-        let dir = PathBuf::from("/work/skills-workspace/widget-skill/iteration-5");
+        let dir = PathBuf::from("/work/.eval-magic/widget-skill/iteration-5");
         let book = build_runbook(&claude_ctx(&dir));
 
         // Run-specific identity.
@@ -326,7 +326,7 @@ mod tests {
 
     #[test]
     fn interactive_runbook_with_multiple_groups_carries_reset_batch_barriers() {
-        let dir = PathBuf::from("/work/skills-workspace/widget-skill/iteration-5");
+        let dir = PathBuf::from("/work/.eval-magic/widget-skill/iteration-5");
         let groups = ["g1".to_string(), "g2".to_string()];
         let book = build_runbook(&RunbookContext {
             groups: &groups,
@@ -362,7 +362,7 @@ mod tests {
 
     #[test]
     fn headless_runbook_is_human_followed_cli_recipe() {
-        let dir = PathBuf::from("/work/skills-workspace/widget-skill/iteration-2");
+        let dir = PathBuf::from("/work/.eval-magic/widget-skill/iteration-2");
         let ctx = RunbookContext {
             harness: Harness::Codex,
             run_mode: RunMode::Hybrid,
diff --git a/src/cli/run/staging/mod.rs b/src/cli/run/staging/mod.rs
index d6bea4b..d15ba80 100644
--- a/src/cli/run/staging/mod.rs
+++ b/src/cli/run/staging/mod.rs
@@ -257,7 +257,7 @@ pub fn stage_skill_for_harness(opts: &StageSkillOpts) -> Result<String, RunError
                 "SKILL.md"
                     | "evals"
                     | SNAPSHOT_META
-                    | "skills-workspace"
+                    | ".eval-magic"
                     | ".claude"
                     | ".agents"
                     | ".codex"
diff --git a/src/cli/run/util.rs b/src/cli/run/util.rs
index 31625c6..0a5738b 100644
--- a/src/cli/run/util.rs
+++ b/src/cli/run/util.rs
@@ -287,10 +287,10 @@ mod tests {
 
     #[test]
     fn isolated_handoff_points_into_env_and_at_the_runbook() {
-        let env = Path::new("/work/skills-workspace/widget/iteration-3/env");
+        let env = Path::new("/work/.eval-magic/widget/iteration-3/env");
         let handoff = insession_isolated_handoff(env);
         assert!(
-            handoff.contains("/work/skills-workspace/widget/iteration-3/env"),
+            handoff.contains("/work/.eval-magic/widget/iteration-3/env"),
             "names the env to cd into: {handoff}"
         );
         assert!(handoff.contains("cd "), "spells out the cd step: {handoff}");
diff --git a/src/core/context.rs b/src/core/context.rs
index 54754f9..9939f8c 100644
--- a/src/core/context.rs
+++ b/src/core/context.rs
@@ -207,7 +207,7 @@ pub fn detect_run_context(input: DetectInput) -> Result<RunContext, ContextError
 
     let workspace_root = match input.workspace_dir {
         Some(raw) => absolutize(&cwd, &raw)?,
-        None => cwd.join("skills-workspace"),
+        None => cwd.join(".eval-magic"),
     };
     let stage_root = cwd;
 
@@ -444,7 +444,7 @@ mod tests {
         let tmp = TempDir::new().unwrap();
         let skill_dir = make_skill_dir(tmp.path(), &["foo"]);
         let ctx = detect_run_context(input(&skill_dir, "foo")).unwrap();
-        let expected = std::env::current_dir().unwrap().join("skills-workspace");
+        let expected = std::env::current_dir().unwrap().join(".eval-magic");
         assert_eq!(ctx.workspace_root, expected);
     }
 
diff --git a/src/pipeline/detect_stray_writes.rs b/src/pipeline/detect_stray_writes.rs
index f30db32..a1a5c57 100644
--- a/src/pipeline/detect_stray_writes.rs
+++ b/src/pipeline/detect_stray_writes.rs
@@ -338,7 +338,7 @@ pub fn detect_stray_writes_report(
                 invocations_inspected += run.tool_invocations.len();
                 // `dispatch.json` is the authoritative source of the outputs
                 // boundary: an absolute path into the isolated env
-                // (`env/.eval-magic/outputs/...`). Without it we cannot honor the
+                // (`env/.eval-magic-outputs/...`). Without it we cannot honor the
                 // outputs-only contract, so we skip out-of-bounds *write*
                 // classification for that run rather than guess a boundary — the old
                 // `<slot>/outputs` convention no longer matches where agents write and
@@ -747,7 +747,7 @@ mod tests {
         let f = detect_live_source_reads(
             &[
                 inv("Read", json!({"file_path": format!("{OUTPUTS}/x.md")}), 0),
-                inv("Bash", json!({"command": "ls skills-workspace"}), 1),
+                inv("Bash", json!({"command": "ls .eval-magic"}), 1),
                 // Write tools are detect_stray_writes' jurisdiction — reads only here.
                 inv(
                     "Write",
diff --git a/src/sandbox/decide.rs b/src/sandbox/decide.rs
index 7beeb58..ba417a2 100644
--- a/src/sandbox/decide.rs
+++ b/src/sandbox/decide.rs
@@ -139,7 +139,7 @@ mod tests {
     use crate::sandbox::now_ms;
     use serde_json::json;
 
-    const ROOTS: [&str; 2] = ["/work/skills-workspace", "/work/.claude/skills"];
+    const ROOTS: [&str; 2] = ["/work/.eval-magic", "/work/.claude/skills"];
 
     /// An RFC3339 timestamp `offset_ms` from now — `future`/`past` bracket the
     /// current wall clock used by `decide`.
@@ -209,7 +209,7 @@ mod tests {
     fn allows_a_write_under_an_allowed_root() {
         let d = decide_now(
             "Write",
-            json!({ "file_path": "/work/skills-workspace/x/outputs/a.md" }),
+            json!({ "file_path": "/work/.eval-magic/x/outputs/a.md" }),
             Some(&marker()),
         );
         assert!(d.allow);
@@ -241,7 +241,7 @@ mod tests {
     fn allows_a_bash_command_scoped_to_an_allowed_root() {
         let d = decide_now(
             "Bash",
-            json!({ "command": "echo hi > /work/skills-workspace/x/outputs/log" }),
+            json!({ "command": "echo hi > /work/.eval-magic/x/outputs/log" }),
             Some(&marker()),
         );
         assert!(d.allow);
@@ -286,7 +286,7 @@ mod tests {
     fn allows_apply_patch_inside_allowed_roots() {
         let d = decide_now(
             "apply_patch",
-            json!({ "files": ["/work/skills-workspace/eval/outputs/out.md"] }),
+            json!({ "files": ["/work/.eval-magic/eval/outputs/out.md"] }),
             Some(&marker()),
         );
         assert!(d.allow);
@@ -363,10 +363,13 @@ mod tests {
     }
 
     #[test]
-    fn does_not_flag_skills_workspace_as_a_bare_skills_write() {
+    fn does_not_flag_a_skills_prefixed_dir_as_a_bare_skills_write() {
+        // A `skills`-prefixed path that is NOT an allowed root: the bare-`skills/`
+        // heuristic only fires on a bare `skills` at a path boundary, so a
+        // `skills-`-prefixed dir must not be flagged and the write is allowed.
         let d = decide_now(
             "Bash",
-            json!({ "command": "mkdir -p /work/skills-workspace/x/outputs" }),
+            json!({ "command": "mkdir -p /work/skills-data/x/outputs" }),
             Some(&marker()),
         );
         assert!(d.allow);
diff --git a/src/sandbox/guard.rs b/src/sandbox/guard.rs
index a4ff94b..51af2db 100644
--- a/src/sandbox/guard.rs
+++ b/src/sandbox/guard.rs
@@ -118,7 +118,7 @@ mod tests {
     fn marker() -> GuardMarker {
         GuardMarker {
             active: Some(true),
-            allowed_roots: Some(vec!["/work/skills-workspace".to_string()]),
+            allowed_roots: Some(vec!["/work/.eval-magic".to_string()]),
             expires_at: None,
         }
     }
@@ -170,7 +170,7 @@ mod tests {
 
     #[test]
     fn codex_apply_patch_inside_allowed_roots_allows() {
-        let payload = r#"{ "hook_event_name": "PreToolUse", "tool_name": "apply_patch", "tool_input": { "files": ["/work/skills-workspace/out.md"] } }"#;
+        let payload = r#"{ "hook_event_name": "PreToolUse", "tool_name": "apply_patch", "tool_input": { "files": ["/work/.eval-magic/out.md"] } }"#;
         assert_eq!(codex_guard_decision(payload, Some(marker())), None);
     }
 
diff --git a/src/sandbox/install.rs b/src/sandbox/install.rs
index a39754e..6f9d774 100644
--- a/src/sandbox/install.rs
+++ b/src/sandbox/install.rs
@@ -72,7 +72,7 @@ fn write_json(path: &Path, value: &Value) -> io::Result<()> {
 /// agent-under-test's cwd) and the OS temp dir. The staged skills dir
 /// (`stage_root/.claude/skills` or `.agents/skills`) and the per-task outputs dir
 /// both live *inside* `stage_root`, so a single env root covers every legitimate
-/// agent write. Scoping to the env — not the parent `skills-workspace/` — keeps the
+/// agent write. Scoping to the env — not the parent `.eval-magic/` — keeps the
 /// guard boundary identical to the isolation boundary: the agent can't reach a
 /// sibling iteration or the `iteration-N/` meta tree above its cwd. eval-magic's own
 /// above-env writes (e.g. `benchmark.json`) are not gated here: they run as
@@ -408,7 +408,7 @@ mod tests {
         let temp = absolutize(&std::env::temp_dir()).display().to_string();
         assert_eq!(roots, vec![env, temp]);
         assert!(
-            !roots.iter().any(|r| r.ends_with("skills-workspace")),
+            !roots.iter().any(|r| r.ends_with(".eval-magic")),
             "workspace_root must not be an allowed root: {roots:?}"
         );
     }
diff --git a/src/sandbox/policy.rs b/src/sandbox/policy.rs
index 6f5b99e..02e9d0a 100644
--- a/src/sandbox/policy.rs
+++ b/src/sandbox/policy.rs
@@ -51,7 +51,7 @@ static BASH_MUTATION_PATTERNS: LazyLock<Vec<(Regex, &'static str)>> = LazyLock::
             "path under .claude",
         ),
         // The same create verbs whose operand is a top-level `skills/` directory —
-        // catches a bare `skills/` left in the cwd. `skills-workspace` and other
+        // catches a bare `skills/` left in the cwd. `skills-data` and other
         // `skills`-prefixed names are excluded by the trailing `/`, whitespace, or
         // end-of-string boundary.
         (
@@ -156,7 +156,7 @@ fn absolutize(target: &str, repo_root: &Path) -> std::path::PathBuf {
 
 /// True when `target` resolves to `dir` or a descendant of it. Relative `target`s
 /// resolve against `repo_root`. `Path::starts_with` matches whole path
-/// components, so `skills-workspace2` is correctly not under `skills-workspace`.
+/// components, so `.eval-magic2` is correctly not under `.eval-magic`.
 pub fn is_under(target: &str, dir: &str, repo_root: &Path) -> bool {
     let base = absolutize(dir, repo_root);
     let abs = absolutize(target, repo_root);
@@ -189,7 +189,7 @@ mod tests {
     use super::*;
     use serde_json::json;
 
-    const ROOTS: [&str; 2] = ["/work/skills-workspace", "/work/.claude/skills"];
+    const ROOTS: [&str; 2] = ["/work/.eval-magic", "/work/.claude/skills"];
 
     fn roots() -> Vec<String> {
         ROOTS.iter().map(|s| s.to_string()).collect()
@@ -243,37 +243,21 @@ mod tests {
     #[test]
     fn is_under_matches_dir_and_descendants() {
         let repo = Path::new("/work");
+        assert!(is_under("/work/.eval-magic", "/work/.eval-magic", repo));
         assert!(is_under(
-            "/work/skills-workspace",
-            "/work/skills-workspace",
-            repo
-        ));
-        assert!(is_under(
-            "/work/skills-workspace/x/out.md",
-            "/work/skills-workspace",
-            repo
-        ));
-        assert!(!is_under(
-            "/work/runner/run.ts",
-            "/work/skills-workspace",
-            repo
-        ));
-        // `skills-workspace2` is not under `skills-workspace` (separator boundary).
-        assert!(!is_under(
-            "/work/skills-workspace2/x",
-            "/work/skills-workspace",
+            "/work/.eval-magic/x/out.md",
+            "/work/.eval-magic",
             repo
         ));
+        assert!(!is_under("/work/runner/run.ts", "/work/.eval-magic", repo));
+        // `.eval-magic2` is not under `.eval-magic` (separator boundary).
+        assert!(!is_under("/work/.eval-magic2/x", "/work/.eval-magic", repo));
     }
 
     #[test]
     fn is_under_resolves_relative_targets_against_repo_root() {
         let repo = Path::new("/work");
-        assert!(is_under(
-            "skills-workspace/x",
-            "/work/skills-workspace",
-            repo
-        ));
+        assert!(is_under(".eval-magic/x", "/work/.eval-magic", repo));
     }
 
     #[test]
@@ -303,7 +287,7 @@ mod tests {
     fn classify_bash_allows_scoped_and_readonly_commands() {
         // Textually references an allowed root → scoped → allowed.
         assert_eq!(
-            classify_bash("echo hi > /work/skills-workspace/x/log", &roots()),
+            classify_bash("echo hi > /work/.eval-magic/x/log", &roots()),
             None
         );
         assert_eq!(classify_bash("ls -la /", &roots()), None);
diff --git a/src/workspace/promote.rs b/src/workspace/promote.rs
index 4688b16..d4193fa 100644
--- a/src/workspace/promote.rs
+++ b/src/workspace/promote.rs
@@ -269,7 +269,7 @@ fn provenance(opts: &PromoteOptions, conditions: Option<&ConditionsRecord>, head
             "`eval-magic promote-baseline --iteration {}` after aggregating. The ephemeral workspace (run records, timing,",
             opts.iteration
         ),
-        "dispatch files, produced outputs) stays gitignored under `skills-workspace/`".to_string(),
+        "dispatch files, produced outputs) stays gitignored under `.eval-magic/`".to_string(),
         "and is reclaimable by `eval-magic teardown` once promoted (this commit's marker)."
             .to_string(),
         String::new(),
@@ -322,7 +322,7 @@ mod tests {
             &skill_subdir.join("SKILL.md"),
             "---\nname: mr-review\ndescription: review MRs\n---\n\nbody\n",
         );
-        let workspace_root = tmp.path().join("work").join("skills-workspace");
+        let workspace_root = tmp.path().join("work").join(".eval-magic");
         let iteration_dir = workspace_root
             .join("mr-review")
             .join(format!("iteration-{iteration}"));
diff --git a/src/workspace/snapshot.rs b/src/workspace/snapshot.rs
index 861124f..11455fa 100644
--- a/src/workspace/snapshot.rs
+++ b/src/workspace/snapshot.rs
@@ -1,7 +1,7 @@
 //! Skill snapshotting.
 //!
 //! Capture a skill's `SKILL.md` plus
-//! sibling assets into `skills-workspace/<skill>/snapshots/<label>/`, either from
+//! sibling assets into `.eval-magic/<skill>/snapshots/<label>/`, either from
 //! the working tree or — read straight from the git object database without
 //! touching the working tree — as it existed at a git ref. The
 //! `evals/` directory is always excluded; a `.snapshot-meta.json` records the
@@ -225,7 +225,7 @@ mod tests {
         // Working tree diverges to v2; the commit still holds v1.
         write(&skill_subdir.join("SKILL.md"), "v2 working tree\n");
 
-        let workspace_root = root.join("work").join("skills-workspace");
+        let workspace_root = root.join("work").join(".eval-magic");
         Repo {
             _tmp: tmp,
             skill_subdir,
diff --git a/src/workspace/teardown.rs b/src/workspace/teardown.rs
index e4920be..fb9f649 100644
--- a/src/workspace/teardown.rs
+++ b/src/workspace/teardown.rs
@@ -1,7 +1,7 @@
 //! End-of-run workspace cleanup.
 //!
 //! Reclaim a skill's ephemeral
-//! `skills-workspace/<skill>/` subtree without ever destroying results the user
+//! `.eval-magic/<skill>/` subtree without ever destroying results the user
 //! hasn't moved into version control.
 
 use std::fs;
@@ -42,7 +42,7 @@ pub struct WorkspaceCleanupSummary {
 /// The reason string attached to a kept, unpromoted iteration.
 const UNCOMMITTED_REASON: &str = "uncommitted results — not promoted to evals/baseline/";
 
-/// End-of-run cleanup of a skill's `skills-workspace/<skill>/` subtree.
+/// End-of-run cleanup of a skill's `.eval-magic/<skill>/` subtree.
 ///
 /// Per iteration: promoted (marker present) → removed; unpromoted but holding
 /// captured results → kept and reported; unpromoted scaffolding → removed. Per
@@ -251,7 +251,7 @@ mod tests {
     #[test]
     fn removes_promoted_iteration_and_prunes_workspace() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
         let iter = make_iteration(
             &ws,
@@ -277,7 +277,7 @@ mod tests {
     #[test]
     fn keeps_unpromoted_iteration_with_benchmark_and_reports_it() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
         let iter = make_iteration(
             &ws,
@@ -300,7 +300,7 @@ mod tests {
     #[test]
     fn keeps_unpromoted_iteration_with_only_a_run_record() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
         let iter = make_iteration(
             &ws,
@@ -321,7 +321,7 @@ mod tests {
     #[test]
     fn removes_unpromoted_scaffolding_only_iteration() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
         let iter = make_iteration(
             &ws,
@@ -342,7 +342,7 @@ mod tests {
     #[test]
     fn mixed_promoted_removed_kept_with_results_skill_dir_not_pruned() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
         let promoted = make_iteration(
             &ws,
@@ -377,7 +377,7 @@ mod tests {
     #[test]
     fn removes_ref_snapshots_keeps_working_tree_and_legacy() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
         let ref_snap = make_snapshot(&ws, "mr-review", "old-ref", Some("ref"));
         let wt_snap = make_snapshot(&ws, "mr-review", "wt", Some("working-tree"));
@@ -393,9 +393,9 @@ mod tests {
     }
 
     #[test]
-    fn never_touches_another_skills_workspace_and_leaves_root_intact() {
+    fn never_touches_another_skill_and_leaves_workspace_root_intact() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
         make_iteration(
             &ws,
@@ -426,7 +426,7 @@ mod tests {
     #[test]
     fn empty_summary_when_skill_has_no_workspace() {
         let tmp = TempDir::new().unwrap();
-        let ws = tmp.path().join("skills-workspace");
+        let ws = tmp.path().join(".eval-magic");
         fs::create_dir_all(&ws).unwrap();
 
         let summary = cleanup_workspace(&ws, "never-ran");
diff --git a/tests/cli/aggregate.rs b/tests/cli/aggregate.rs
index a47d97c..d7f51b4 100644
--- a/tests/cli/aggregate.rs
+++ b/tests/cli/aggregate.rs
@@ -25,7 +25,7 @@ fn setup_agg(
     let skill_md = skill_sub.join("SKILL.md").to_string_lossy().into_owned();
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     fs::create_dir_all(&iteration_dir).unwrap();
diff --git a/tests/cli/grade.rs b/tests/cli/grade.rs
index 655b3eb..6bbb434 100644
--- a/tests/cli/grade.rs
+++ b/tests/cli/grade.rs
@@ -52,7 +52,7 @@ fn grade_codex_staged_run_uses_llm_meta_check_with_skill_content() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-pos-eval").join("with_skill");
@@ -126,7 +126,7 @@ fn grade_omits_meta_check_for_negative_evals() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -202,7 +202,7 @@ fn grade_emits_and_finalizes_per_nested_run_dir() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -304,7 +304,7 @@ fn grade_fails_fast_on_malformed_run_record() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -363,7 +363,7 @@ fn grade_writes_prompt_files_and_drops_inline_prompt() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -446,7 +446,7 @@ fn grade_finalize_folds_responses_into_grading() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-pos-eval").join("with_skill");
diff --git a/tests/cli/grade_models.rs b/tests/cli/grade_models.rs
index 4e9df7a..6c21e3c 100644
--- a/tests/cli/grade_models.rs
+++ b/tests/cli/grade_models.rs
@@ -53,7 +53,7 @@ fn grade_defaults_judge_tasks_to_recorded_judge_model() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-pos-eval").join("with_skill");
diff --git a/tests/cli/guard.rs b/tests/cli/guard.rs
index 7efaee5..c05cd20 100644
--- a/tests/cli/guard.rs
+++ b/tests/cli/guard.rs
@@ -58,7 +58,7 @@ fn write_codex_armed_marker(
 #[test]
 fn guard_denies_out_of_bounds_write() {
     let tmp = TempDir::new().unwrap();
-    let marker = write_armed_marker(tmp.path(), &tmp.path().join("skills-workspace"));
+    let marker = write_armed_marker(tmp.path(), &tmp.path().join(".eval-magic"));
 
     skill_eval()
         .arg("guard")
@@ -73,7 +73,7 @@ fn guard_denies_out_of_bounds_write() {
 #[test]
 fn guard_allows_in_bounds_write() {
     let tmp = TempDir::new().unwrap();
-    let workspace = tmp.path().join("skills-workspace");
+    let workspace = tmp.path().join(".eval-magic");
     let marker = write_armed_marker(tmp.path(), &workspace);
 
     skill_eval()
@@ -91,7 +91,7 @@ fn guard_allows_in_bounds_write() {
 #[test]
 fn guard_codex_subcommand_blocks_with_codex_verdict_shape() {
     let tmp = TempDir::new().unwrap();
-    let marker = write_codex_armed_marker(tmp.path(), &tmp.path().join("skills-workspace"));
+    let marker = write_codex_armed_marker(tmp.path(), &tmp.path().join(".eval-magic"));
 
     skill_eval()
         .arg("guard-codex")
@@ -134,7 +134,7 @@ fn teardown_guard_reports_nothing_to_remove() {
 #[test]
 fn teardown_guard_removes_installed_guard() {
     let tmp = TempDir::new().unwrap();
-    write_armed_marker(tmp.path(), &tmp.path().join("skills-workspace"));
+    write_armed_marker(tmp.path(), &tmp.path().join(".eval-magic"));
 
     skill_eval()
         .arg("teardown-guard")
diff --git a/tests/cli/stray_writes.rs b/tests/cli/stray_writes.rs
index 96c284d..1e29f88 100644
--- a/tests/cli/stray_writes.rs
+++ b/tests/cli/stray_writes.rs
@@ -27,7 +27,7 @@ fn detect_stray_writes_reports_live_source_reads() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-e1").join("old_skill");
@@ -119,7 +119,7 @@ fn detect_stray_writes_flags_unverifiable_when_nothing_was_inspected() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-e1").join("old_skill");
@@ -176,7 +176,7 @@ fn detect_stray_writes_flags_unverifiable_when_nothing_was_inspected() {
 
 /// Without a `dispatch.json` outputs_dir for the run, the detector must NOT
 /// fabricate the old flat-layout boundary (`<cond_dir>/outputs`). Under the
-/// isolated env layout the agent writes into `env/.eval-magic/outputs/...`, an
+/// isolated env layout the agent writes into `env/.eval-magic-outputs/...`, an
 /// absolute path only `dispatch.json` carries; guessing the old convention would
 /// mis-flag every legitimate write as a violation. The detector instead skips
 /// out-of-bounds write classification for that run and logs why.
@@ -198,7 +198,7 @@ fn detect_stray_writes_skips_write_classification_without_dispatch_outputs_dir()
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-e1").join("old_skill");
@@ -223,8 +223,7 @@ fn detect_stray_writes_skips_write_classification_without_dispatch_outputs_dir()
     // location, which is NOT under the old `<cond_dir>/outputs` fallback path.
     let env_output = iteration_dir
         .join("env")
-        .join(".eval-magic")
-        .join("outputs")
+        .join(".eval-magic-outputs")
         .join("eval-e1")
         .join("old_skill")
         .join("answer.md")
@@ -273,7 +272,7 @@ fn detect_stray_writes_skips_write_classification_without_dispatch_outputs_dir()
 }
 
 /// With `dispatch.json` carrying the env-layout outputs_dir
-/// (`env/.eval-magic/outputs/...`), the detector classifies against that real
+/// (`env/.eval-magic-outputs/...`), the detector classifies against that real
 /// boundary: a write inside it is clean, a write elsewhere in the env (the realistic
 /// repo, outside outputs) is a violation under the outputs-only contract.
 #[test]
@@ -294,7 +293,7 @@ fn detect_stray_writes_uses_env_layout_outputs_dir_from_dispatch() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-e1").join("old_skill");
@@ -303,8 +302,7 @@ fn detect_stray_writes_uses_env_layout_outputs_dir_from_dispatch() {
     // The isolated env's outputs tree — where the agent is supposed to write.
     let outputs_dir = iteration_dir
         .join("env")
-        .join(".eval-magic")
-        .join("outputs")
+        .join(".eval-magic-outputs")
         .join("eval-e1")
         .join("old_skill");
     let in_bounds = outputs_dir.join("answer.md").to_string_lossy().into_owned();
@@ -406,7 +404,7 @@ fn detect_stray_writes_scans_nested_run_dirs_and_reports_run_index() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     let cond_dir = iteration_dir.join("eval-e1").join("old_skill");
diff --git a/tests/cli/workspace.rs b/tests/cli/workspace.rs
index b0c7980..3ccfc2c 100644
--- a/tests/cli/workspace.rs
+++ b/tests/cli/workspace.rs
@@ -40,7 +40,7 @@ fn promote_baseline_copies_artifacts_and_reports() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-2");
     let cond_dir = iteration_dir.join("eval-e1").join("with_skill");
@@ -83,7 +83,7 @@ fn promote_baseline_captures_multi_run_gradings() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-2");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -130,7 +130,7 @@ fn promote_baseline_warns_when_run_cells_missing_gradings() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-2");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -165,7 +165,7 @@ fn promote_baseline_writes_notes_stub_and_reports_it() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-1");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -203,7 +203,7 @@ fn promote_baseline_warns_when_prior_notes_retained() {
 
     let cwd = root.join("work");
     let iteration_dir = cwd
-        .join("skills-workspace")
+        .join(".eval-magic")
         .join("mr-review")
         .join("iteration-2");
     fs::create_dir_all(&iteration_dir).unwrap();
@@ -263,7 +263,7 @@ fn snapshot_working_tree_copies_and_records_provenance() {
         .success()
         .stdout(contains("Snapshotted mr-review →"));
 
-    let snap = cwd.join("skills-workspace/mr-review/snapshots/wt");
+    let snap = cwd.join(".eval-magic/mr-review/snapshots/wt");
     assert_eq!(
         fs::read_to_string(snap.join("SKILL.md")).unwrap(),
         "v2 working tree\n"
@@ -291,8 +291,7 @@ fn snapshot_defaults_to_baseline_label() {
         .stdout(contains("Snapshotted mr-review"));
 
     assert_eq!(
-        fs::read_to_string(cwd.join("skills-workspace/mr-review/snapshots/baseline/SKILL.md"))
-            .unwrap(),
+        fs::read_to_string(cwd.join(".eval-magic/mr-review/snapshots/baseline/SKILL.md")).unwrap(),
         "v2 working tree\n"
     );
 }
@@ -320,7 +319,7 @@ fn snapshot_ref_reads_committed_content() {
         .success()
         .stdout(contains("Snapshotted mr-review at HEAD →"));
 
-    let snap = cwd.join("skills-workspace/mr-review/snapshots/old");
+    let snap = cwd.join(".eval-magic/mr-review/snapshots/old");
     assert_eq!(
         fs::read_to_string(snap.join("SKILL.md")).unwrap(),
         "v1 baseline\n"
@@ -345,7 +344,7 @@ fn teardown_reclaims_promoted_and_keeps_uncommitted() {
     let (skill_dir, _skill_sub) = write_skill_md(&root, "---\nname: mr-review\n---\nbody\n");
 
     let cwd = root.join("work");
-    let skill_ws = cwd.join("skills-workspace").join("mr-review");
+    let skill_ws = cwd.join(".eval-magic").join("mr-review");
     let promoted = skill_ws.join("iteration-1");
     let kept = skill_ws.join("iteration-2");
     fs::create_dir_all(&promoted).unwrap();
diff --git a/tests/run/claude_cli.rs b/tests/run/claude_cli.rs
index 68b3f50..55a167b 100644
--- a/tests/run/claude_cli.rs
+++ b/tests/run/claude_cli.rs
@@ -219,7 +219,7 @@ fn claude_hybrid_record_runs_does_not_require_a_session_id() {
         .args(["record-runs", "--skill-dir"])
         .arg(&skill_dir)
         .args(["--skill", "mr-review", "--workspace-dir"])
-        .arg(cwd.join("skills-workspace"))
+        .arg(cwd.join(".eval-magic"))
         .args(["--harness", "claude-code", "--run-mode", "hybrid"])
         .assert()
         .success()
diff --git a/tests/run/env_layout.rs b/tests/run/env_layout.rs
index f881546..f0f6073 100644
--- a/tests/run/env_layout.rs
+++ b/tests/run/env_layout.rs
@@ -156,13 +156,13 @@ fn dispatch_outputs_live_under_env() {
     // tempdir, so a lexical starts_with would mismatch.
     let env = fs::canonicalize(env_dir(&cwd)).unwrap();
     let iter = fs::canonicalize(iteration_dir(&cwd)).unwrap();
-    let outputs_root = env.join(".eval-magic").join("outputs");
+    let outputs_root = env.join(".eval-magic-outputs");
     for task in tasks {
         // The agent-under-test (cwd = env/) writes only inside its env.
         let outputs_dir = fs::canonicalize(task["outputs_dir"].as_str().unwrap()).unwrap();
         assert!(
             outputs_dir.starts_with(&outputs_root),
-            "outputs_dir under env/.eval-magic/outputs/: {}",
+            "outputs_dir under env/.eval-magic-outputs/: {}",
             outputs_dir.display()
         );
         // run.json / timing.json are eval-magic meta: above the env, in iteration-N/.
diff --git a/tests/run/helpers.rs b/tests/run/helpers.rs
index 39c0948..1c5caab 100644
--- a/tests/run/helpers.rs
+++ b/tests/run/helpers.rs
@@ -31,7 +31,7 @@ pub fn setup(root: &Path, evals_json: &str) -> (PathBuf, PathBuf) {
 }
 
 pub fn iteration_dir(cwd: &Path) -> PathBuf {
-    cwd.join("skills-workspace")
+    cwd.join(".eval-magic")
         .join("mr-review")
         .join("iteration-1")
 }
diff --git a/tests/run/lifecycle.rs b/tests/run/lifecycle.rs
index 2177a14..f7a1b15 100644
--- a/tests/run/lifecycle.rs
+++ b/tests/run/lifecycle.rs
@@ -128,7 +128,7 @@ fn teardown_reclaims_workspace_and_env_guard() {
         .args(["--skill", "mr-review"])
         .assert()
         .success();
-    assert!(!cwd.join("skills-workspace").exists());
+    assert!(!cwd.join(".eval-magic").exists());
     assert!(!settings.exists());
     assert!(!staged.exists());
     assert!(!cwd.join(".claude").exists());
@@ -335,7 +335,7 @@ fn runs_flag_expands_dispatches_into_run_dirs() {
         // same-batch subagents can't collide; run-<k> is the leaf segment.
         let outputs_dir = task["outputs_dir"].as_str().unwrap();
         assert!(
-            outputs_dir.contains(".eval-magic/outputs/")
+            outputs_dir.contains(".eval-magic-outputs/")
                 && outputs_dir.ends_with(&format!("run-{k}")),
             "outputs not namespaced under env per run: {outputs_dir}"
         );
@@ -357,7 +357,7 @@ fn runs_flag_expands_dispatches_into_run_dirs() {
                 assert!(run_dir.is_dir(), "missing meta run dir {run_dir:?}");
                 // Per-run outputs dir inside the env.
                 let out_dir = env_dir(&cwd)
-                    .join(".eval-magic/outputs")
+                    .join(".eval-magic-outputs")
                     .join(format!("eval-{eval}"))
                     .join(cond)
                     .join(format!("run-{k}"));
@@ -397,7 +397,7 @@ fn runs_one_keeps_flat_single_run_layout() {
     assert!(cond_dir.is_dir());
     assert!(!cond_dir.join("run-1").exists());
     // Outputs live inside the env, flat (no run-1/ segment) for a single-run cell.
-    let out_dir = env_dir(&cwd).join(".eval-magic/outputs/eval-e1/with_skill");
+    let out_dir = env_dir(&cwd).join(".eval-magic-outputs/eval-e1/with_skill");
     assert!(out_dir.is_dir());
     assert!(!out_dir.join("run-1").exists());
 }
diff --git a/tests/run/reset_batch.rs b/tests/run/reset_batch.rs
index 142a8d9..f2b7247 100644
--- a/tests/run/reset_batch.rs
+++ b/tests/run/reset_batch.rs
@@ -41,7 +41,7 @@ fn reset_to(cwd: &Path, skill_dir: &Path, group: &str) -> assert_cmd::assert::As
         .args(["reset-batch", "--skill-dir"])
         .arg(skill_dir)
         .args(["--skill", "mr-review", "--workspace-dir"])
-        .arg(cwd.join("skills-workspace"))
+        .arg(cwd.join(".eval-magic"))
         .args(["--iteration", "1", "--group", group])
         .assert()
 }
@@ -73,7 +73,7 @@ fn reset_batch_wipes_working_tree_and_reseeds_group_fixtures() {
             .is_dir(),
         "the staged skill survives reset-batch"
     );
-    assert!(env_dir(&cwd).join(".eval-magic/outputs").exists());
+    assert!(env_dir(&cwd).join(".eval-magic-outputs").exists());
 }
 
 #[test]
diff --git a/tests/run/staging.rs b/tests/run/staging.rs
index 7b1a401..0db4828 100644
--- a/tests/run/staging.rs
+++ b/tests/run/staging.rs
@@ -29,7 +29,7 @@ fn setup_direct_skill(root: &Path) -> (PathBuf, PathBuf, PathBuf) {
 }
 
 fn direct_iteration_dir(cwd: &Path) -> PathBuf {
-    cwd.join("skills-workspace")
+    cwd.join(".eval-magic")
         .join("mr-review")
         .join("iteration-1")
 }
diff --git a/tests/run/switch_condition.rs b/tests/run/switch_condition.rs
index 901d829..cd39279 100644
--- a/tests/run/switch_condition.rs
+++ b/tests/run/switch_condition.rs
@@ -20,7 +20,7 @@ fn switch_to(cwd: &Path, skill_dir: &Path, condition: &str) -> assert_cmd::asser
         .args(["switch-condition", "--skill-dir"])
         .arg(skill_dir)
         .args(["--skill", "mr-review", "--workspace-dir"])
-        .arg(cwd.join("skills-workspace"))
+        .arg(cwd.join(".eval-magic"))
         .args(["--iteration", "1", "--condition", condition])
         .assert()
 }