From 7dfe1a2eb771069a9c26f0cb4962f29a79db17ad Mon Sep 17 00:00:00 2001
From: Max Haarhaus <max@seekinsights.com>
Date: Wed, 24 Jun 2026 22:18:46 -0400
Subject: [PATCH] fix(cli-mode): fix runbook framing and permission issues for
 cli mode

---
 src/adapters/claude_cli.rs       |  19 ++-
 src/adapters/codex_cli.rs        |  22 +++-
 src/adapters/harness.rs          |  15 ++-
 src/cli/commands/pipeline.rs     |  17 ++-
 src/cli/run/dispatch.rs          |   8 +-
 src/cli/run/orchestrate/build.rs |  10 +-
 src/cli/run/runbook.rs           |   5 +-
 src/pipeline/record_runs.rs      | 200 ++++++++++++++++++++++++++++++-
 8 files changed, 266 insertions(+), 30 deletions(-)
diff --git a/src/adapters/claude_cli.rs b/src/adapters/claude_cli.rs
index 2377902..2924b49 100644
--- a/src/adapters/claude_cli.rs
+++ b/src/adapters/claude_cli.rs
@@ -10,6 +10,7 @@
 //! block on a TTY and piped task data cannot become extra prompt context.
 
 use super::cli_command::render_cli_model_arg;
+use std::path::Path;
 
 /// Copy/pasteable Claude Code dispatch command template.
 pub(crate) fn claude_exec_command_template(
@@ -56,8 +57,13 @@ pub(crate) fn claude_parallel_dispatch_recipe(
 }
 
 /// Judge dispatch recipe over `judge-tasks.json`, one `claude -p` per task.
-pub(crate) fn claude_judge_dispatch_recipe(model_flag: Option<&str>) -> String {
+///
+/// Judges run from `judge_cwd` (the iteration dir) — a common ancestor of every
+/// judge prompt, verdict `response_path`, and agent `outputs_dir`, and a dir with
+/// no write-guard hook.
+pub(crate) fn claude_judge_dispatch_recipe(model_flag: Option<&str>, judge_cwd: &Path) -> String {
     let model_flag = model_flag.unwrap_or("--model");
+    let cwd = judge_cwd.display();
     [
         "Dispatch each judge task from judge-tasks.json with:".to_string(),
         String::new(),
@@ -73,7 +79,9 @@ pub(crate) fn claude_judge_dispatch_recipe(model_flag: Option<&str>) -> String {
         "    model_arg=\"\"; [ -n \"$model\" ] && model_arg=\"".to_string()
             + model_flag
             + " $model\"",
-        "    cd <eval-root> && claude -p --output-format stream-json --verbose --permission-mode acceptEdits $model_arg \\".to_string(),
+        format!(
+            "    cd \"{cwd}\" && claude -p --output-format stream-json --verbose --permission-mode acceptEdits $model_arg \\"
+        ),
         "      \"Read the file at $prompt_path and follow it exactly. You are a judge worker only: write the JSON verdict to $response_path, then reply with one sentence. Do not run eval-magic. Do not dispatch other judge tasks. Do not wait for other workers.\" \\".to_string(),
         "      </dev/null \\".to_string(),
         "      > \"$response_base.claude-events.jsonl\" \\".to_string(),
@@ -89,6 +97,7 @@ mod tests {
     use super::{
         claude_exec_command_template, claude_judge_dispatch_recipe, claude_parallel_dispatch_recipe,
     };
+    use std::path::Path;
 
     #[test]
     fn exec_template_carries_required_stream_json_flags() {
@@ -127,10 +136,12 @@ mod tests {
     }
 
     #[test]
-    fn judge_recipe_drives_claude_p() {
-        let recipe = claude_judge_dispatch_recipe(Some("--model"));
+    fn judge_recipe_runs_from_iteration_dir() {
+        let recipe = claude_judge_dispatch_recipe(Some("--model"), Path::new("/work/iter-1"));
         assert!(recipe.contains("claude -p"), "{recipe}");
         assert!(recipe.contains("judge-tasks.json"), "{recipe}");
         assert!(recipe.contains("response_path"), "{recipe}");
+        assert!(!recipe.contains("<eval-root>"), "{recipe}");
+        assert!(recipe.contains("cd \"/work/iter-1\" &&"), "{recipe}");
     }
 }
diff --git a/src/adapters/codex_cli.rs b/src/adapters/codex_cli.rs
index 8b6935a..de64db0 100644
--- a/src/adapters/codex_cli.rs
+++ b/src/adapters/codex_cli.rs
@@ -1,6 +1,7 @@
 //! Codex CLI command rendering for `DispatchMechanism::Cli` guidance.
 
 use super::cli_command::render_cli_model_arg;
+use std::path::Path;
 
 /// Copy/pasteable Codex dispatch command template. Stdin is detached so a
 /// surrounding `xargs`/pipe cannot be treated as extra prompt context.
@@ -60,13 +61,20 @@ pub(crate) fn codex_parallel_dispatch_recipe(
     .join("\n")
 }
 
-pub(crate) fn codex_judge_dispatch_recipe(model_flag: Option<&str>, guard: bool) -> String {
+/// Judges run from `judge_cwd` (the iteration dir) — a common ancestor of every
+/// judge prompt, verdict `response_path`, and agent `outputs_dir`.
+pub(crate) fn codex_judge_dispatch_recipe(
+    model_flag: Option<&str>,
+    guard: bool,
+    judge_cwd: &Path,
+) -> String {
     let hook_trust = if guard {
         " --dangerously-bypass-hook-trust"
     } else {
         ""
     };
     let model_flag = model_flag.unwrap_or("-m");
+    let cwd = judge_cwd.display();
     [
         "Dispatch each judge task from judge-tasks.json with:".to_string(),
         String::new(),
@@ -81,7 +89,7 @@ pub(crate) fn codex_judge_dispatch_recipe(model_flag: Option<&str>, guard: bool)
         "    mkdir -p \"$(dirname \"$response_path\")\"".to_string(),
         "    if [ -n \"$model\" ]; then".to_string(),
         format!(
-            "      codex --ask-for-approval never exec --cd <eval-root> --sandbox workspace-write{hook_trust} {model_flag} \"$model\" --json \\"
+            "      codex --ask-for-approval never exec --cd \"{cwd}\" --sandbox workspace-write{hook_trust} {model_flag} \"$model\" --json \\"
         ),
         "        \"Read the file at $prompt_path and follow it exactly. You are a judge worker only: write the JSON verdict to $response_path, then reply with one sentence. Do not run eval-magic. Do not dispatch other judge tasks. Do not wait for other workers.\" \\".to_string(),
         "        </dev/null \\".to_string(),
@@ -89,7 +97,7 @@ pub(crate) fn codex_judge_dispatch_recipe(model_flag: Option<&str>, guard: bool)
         "        2> \"$response_base.codex-stderr.log\"".to_string(),
         "    else".to_string(),
         format!(
-            "      codex --ask-for-approval never exec --cd <eval-root> --sandbox workspace-write{hook_trust} --json \\"
+            "      codex --ask-for-approval never exec --cd \"{cwd}\" --sandbox workspace-write{hook_trust} --json \\"
         ),
         "        \"Read the file at $prompt_path and follow it exactly. You are a judge worker only: write the JSON verdict to $response_path, then reply with one sentence. Do not run eval-magic. Do not dispatch other judge tasks. Do not wait for other workers.\" \\".to_string(),
         "        </dev/null \\".to_string(),
@@ -107,6 +115,7 @@ mod tests {
     use super::{
         codex_exec_command_template, codex_judge_dispatch_recipe, codex_parallel_dispatch_recipe,
     };
+    use std::path::Path;
 
     #[test]
     fn exec_template_places_approval_policy_before_exec() {
@@ -133,19 +142,20 @@ mod tests {
 
     #[test]
     fn judge_recipe_places_approval_policy_before_exec() {
-        let recipe = codex_judge_dispatch_recipe(Some("-m"), true);
+        let recipe = codex_judge_dispatch_recipe(Some("-m"), true, Path::new("/work/iter-1"));
 
         assert!(
             recipe.contains(
-                "      codex --ask-for-approval never exec --cd <eval-root> --sandbox workspace-write --dangerously-bypass-hook-trust -m \"$model\" --json \\"
+                "      codex --ask-for-approval never exec --cd \"/work/iter-1\" --sandbox workspace-write --dangerously-bypass-hook-trust -m \"$model\" --json \\"
             ),
             "{recipe}"
         );
         assert!(
             recipe.contains(
-                "      codex --ask-for-approval never exec --cd <eval-root> --sandbox workspace-write --dangerously-bypass-hook-trust --json \\"
+                "      codex --ask-for-approval never exec --cd \"/work/iter-1\" --sandbox workspace-write --dangerously-bypass-hook-trust --json \\"
             ),
             "{recipe}"
         );
+        assert!(!recipe.contains("<eval-root>"), "{recipe}");
     }
 }
diff --git a/src/adapters/harness.rs b/src/adapters/harness.rs
index 725a737..07426c3 100644
--- a/src/adapters/harness.rs
+++ b/src/adapters/harness.rs
@@ -120,7 +120,7 @@ pub trait HarnessAdapter {
     /// The post-`grade` / post-`ingest` judge dispatch guidance for a
     /// [`Cli`](crate::core::DispatchMechanism::Cli)-dispatch harness. `None`
     /// leaves the generic in-session-style judge handoff in place.
-    fn cli_judge_next_steps(&self, _ctx: CliJudgeContext) -> Option<String> {
+    fn cli_judge_next_steps(&self, _ctx: CliJudgeContext<'_>) -> Option<String> {
         None
     }
 
@@ -195,8 +195,9 @@ pub struct CliManifestContext<'a> {
 
 /// Context for rendering a harness's one-shot CLI judge-dispatch guidance.
 #[derive(Debug, Clone, Copy)]
-pub struct CliJudgeContext {
+pub struct CliJudgeContext<'a> {
     pub guard: bool,
+    pub iteration_dir: &'a Path,
 }
 
 impl HarnessAdapter for ClaudeCodeAdapter {
@@ -261,8 +262,11 @@ impl HarnessAdapter for ClaudeCodeAdapter {
             String::new(),
         ])
     }
-    fn cli_judge_next_steps(&self, _ctx: CliJudgeContext) -> Option<String> {
-        Some(claude_judge_dispatch_recipe(self.cli_model_flag()))
+    fn cli_judge_next_steps(&self, ctx: CliJudgeContext<'_>) -> Option<String> {
+        Some(claude_judge_dispatch_recipe(
+            self.cli_model_flag(),
+            ctx.iteration_dir,
+        ))
     }
     fn parse_transcript(&self, path: &Path) -> io::Result<Vec<ToolInvocation>> {
         parse_transcript(path)
@@ -350,10 +354,11 @@ impl HarnessAdapter for CodexAdapter {
             String::new(),
         ])
     }
-    fn cli_judge_next_steps(&self, ctx: CliJudgeContext) -> Option<String> {
+    fn cli_judge_next_steps(&self, ctx: CliJudgeContext<'_>) -> Option<String> {
         Some(codex_judge_dispatch_recipe(
             self.cli_model_flag(),
             ctx.guard,
+            ctx.iteration_dir,
         ))
     }
     fn parse_transcript(&self, path: &Path) -> io::Result<Vec<ToolInvocation>> {
diff --git a/src/cli/commands/pipeline.rs b/src/cli/commands/pipeline.rs
index 501ae96..ab185e0 100644
--- a/src/cli/commands/pipeline.rs
+++ b/src/cli/commands/pipeline.rs
@@ -18,7 +18,11 @@ use crate::validation;
 
 const JUDGE_WORKER_PROMPT: &str = "Read the file at <dispatch_prompt_path> and follow it exactly. You are a judge worker only: write the JSON verdict to <response_path>, then reply with one sentence. Do not run eval-magic. Do not dispatch other judge tasks. Do not wait for other workers.";
 
-fn judge_dispatch_guidance(ctx: &RunContext) -> String {
+fn judge_dispatch_guidance(ctx: &RunContext, iteration: u32) -> String {
+    let iteration_dir = ctx
+        .workspace_root
+        .join(&ctx.skill_name)
+        .join(format!("iteration-{iteration}"));
     match ctx.run_mode.mechanism() {
         DispatchMechanism::InSession => {
             format!("Dispatch each task as a judge subagent with:\n  {JUDGE_WORKER_PROMPT}")
@@ -26,6 +30,7 @@ fn judge_dispatch_guidance(ctx: &RunContext) -> String {
         DispatchMechanism::Cli => adapter_for(ctx.harness)
             .cli_judge_next_steps(CliJudgeContext {
                 guard: sandbox::guard_is_armed(&ctx.stage_root),
+                iteration_dir: &iteration_dir,
             })
             .unwrap_or_else(|| {
                 format!(
@@ -107,7 +112,7 @@ pub(crate) fn run_ingest(args: CommonArgs) -> anyhow::Result<()> {
         .and_then(|s| serde_json::from_str::<serde_json::Value>(&s).ok())
         .and_then(|v| v.get("total_tasks").and_then(serde_json::Value::as_u64));
     let target_args = command_target_args(&ctx);
-    let judge_guidance = judge_dispatch_guidance(&ctx);
+    let judge_guidance = judge_dispatch_guidance(&ctx, iteration);
     match total_tasks {
         Some(0) => println!(
             "\n✅ Ingest complete — no judge dispatches needed.\nNext: eval-magic finalize{target_args} --iteration {iteration}"
@@ -351,15 +356,19 @@ pub(crate) fn run_record_runs(args: CommonArgs) -> anyhow::Result<()> {
         pipeline::record_runs(&dir, ctx.harness, mechanism, subagents_dir, args.overwrite)?;
 
     println!(
-        "\nRecorded: {}, skipped (existing run.json): {}, skipped (no final message): {}, missing transcript: {}",
+        "\nRecorded: {}, skipped (existing run.json): {}, skipped (no final message): {}, skipped (prompt unread): {}, missing transcript: {}",
         result.recorded,
         result.skipped_existing,
         result.skipped_no_final_message,
+        result.skipped_prompt_unread,
         result.missing_transcript
     );
     if let Some(warning) = result.transcript_warning(ctx.harness, mechanism) {
         eprintln!("{warning}");
     }
+    if let Some(warning) = result.prompt_unread_warning() {
+        eprintln!("{warning}");
+    }
     Ok(())
 }
 
@@ -502,7 +511,7 @@ pub(crate) fn run_grade(args: GradeArgs) -> anyhow::Result<()> {
             );
         }
         let target_args = command_target_args(&ctx);
-        let judge_guidance = judge_dispatch_guidance(&ctx);
+        let judge_guidance = judge_dispatch_guidance(&ctx, iteration);
         println!(
             "\nNext: {judge_guidance}\nThen run: eval-magic grade{target_args} --iteration {iteration} --finalize"
         );
diff --git a/src/cli/run/dispatch.rs b/src/cli/run/dispatch.rs
index 87ddf94..e610166 100644
--- a/src/cli/run/dispatch.rs
+++ b/src/cli/run/dispatch.rs
@@ -270,7 +270,7 @@ pub fn build_dispatch_task(opts: &DispatchTaskOpts) -> Result<DispatchTask, RunE
         run_record_path: cond_dir.join("run.json").to_string_lossy().into_owned(),
         timing_path: cond_dir.join("timing.json").to_string_lossy().into_owned(),
         agent_description,
-        dispatch_prompt_path: cond_dir
+        dispatch_prompt_path: Path::new(opts.outputs_dir)
             .join("dispatch-prompt.txt")
             .to_string_lossy()
             .into_owned(),
@@ -681,9 +681,11 @@ mod tests {
     }
 
     #[test]
-    fn dispatch_prompt_path_under_cond_dir() {
+    fn dispatch_prompt_path_under_outputs_dir() {
         let task = build_dispatch_task(&base_opts()).unwrap();
-        assert_eq!(task.dispatch_prompt_path, "/tmp/cond/dispatch-prompt.txt");
+        assert_eq!(task.dispatch_prompt_path, "/tmp/out/dispatch-prompt.txt");
+        assert_eq!(task.run_record_path, "/tmp/cond/run.json");
+        assert_eq!(task.timing_path, "/tmp/cond/timing.json");
     }
 
     const SAMPLE_DIRECTORY: &str = "## Active Skills Directory\n\n* **`test-driven-development`**\n  * *Trigger:* Use whenever implementing code.\n* **`systematic-debugging`**\n  * *Trigger:* Use when debugging.";
diff --git a/src/cli/run/orchestrate/build.rs b/src/cli/run/orchestrate/build.rs
index 7eefc59..37d1251 100644
--- a/src/cli/run/orchestrate/build.rs
+++ b/src/cli/run/orchestrate/build.rs
@@ -173,13 +173,13 @@ pub(super) fn write_dispatch(
                     } else {
                         (cond_dir.join(format!("run-{run_idx}")), Some(run_idx))
                     };
-                    // Create the per-run meta dir (run.json / timing.json /
-                    // dispatch-prompt.txt), which lives above the env.
+                    // Create the per-run meta dir (run.json / timing.json), which
+                    // lives above the env.
                     fs::create_dir_all(&run_dir)?;
                     // The agent-under-test's cwd is its env, so its outputs land
-                    // *inside* it — never above its sandbox.
-                    // A hidden, per-(eval, condition, run) subtree keeps concurrent
-                    // same-env subagents from colliding.
+                    // *inside* it — never above its sandbox. A hidden,
+                    // per-(eval, condition, run) subtree keeps concurrent same-env
+                    // subagents from colliding.
                     let outputs_rel = match run_index {
                         None => format!("eval-{}/{cond_name}", ev.id),
                         Some(k) => format!("eval-{}/{cond_name}/run-{k}", ev.id),
diff --git a/src/cli/run/runbook.rs b/src/cli/run/runbook.rs
index fb76b90..ac0b921 100644
--- a/src/cli/run/runbook.rs
+++ b/src/cli/run/runbook.rs
@@ -172,7 +172,10 @@ pub(crate) fn build_runbook(ctx: &RunbookContext) -> String {
                 agent_model: ctx.agent_model,
             });
             judge_recipe = adapter
-                .cli_judge_next_steps(CliJudgeContext { guard: ctx.guard })
+                .cli_judge_next_steps(CliJudgeContext {
+                    guard: ctx.guard,
+                    iteration_dir: ctx.iteration_dir,
+                })
                 .unwrap_or_else(|| {
                     "Dispatch each judge task `ingest` listed through the same harness CLI, \
                      capturing its transcript output, then finalize."
diff --git a/src/pipeline/record_runs.rs b/src/pipeline/record_runs.rs
index 3d84277..c97034b 100644
--- a/src/pipeline/record_runs.rs
+++ b/src/pipeline/record_runs.rs
@@ -31,8 +31,7 @@ struct DispatchFile {
 }
 
 /// The subset of a dispatch task record-runs consumes. `dispatch.json` carries
-/// more fields (e.g. `staged_skill_slug`, `dispatch_prompt_path`); serde ignores
-/// the extras.
+/// more fields (e.g. `staged_skill_slug`); serde ignores the extras.
 #[derive(Debug, Deserialize)]
 struct DispatchTask {
     eval_id: String,
@@ -46,6 +45,8 @@ struct DispatchTask {
     run_record_path: String,
     timing_path: String,
     agent_description: String,
+    #[serde(default)]
+    dispatch_prompt_path: String,
 }
 
 /// Tally of what record-runs did across the dispatch's tasks.
@@ -55,6 +56,7 @@ pub struct RecordRunsResult {
     pub skipped_existing: usize,
     pub skipped_no_final_message: usize,
     pub missing_transcript: usize,
+    pub skipped_prompt_unread: usize,
 }
 
 impl RecordRunsResult {
@@ -102,6 +104,24 @@ impl RecordRunsResult {
              assertions will grade unverifiable."
         ))
     }
+
+    /// A loud, actionable warning when one or more dispatches were excluded
+    /// because their transcript shows a failed read of the dispatch prompt — the
+    /// agent never received its instructions, so the result is a no-op, not data.
+    /// `None` when none were flagged.
+    pub fn prompt_unread_warning(&self) -> Option<String> {
+        if self.skipped_prompt_unread == 0 {
+            return None;
+        }
+        let n = self.skipped_prompt_unread;
+        let plural = if n == 1 { "" } else { "es" };
+        Some(format!(
+            "⚠ {n} dispatch{plural} skipped — the transcript shows a failed read of the dispatch \
+             prompt (the agent never received its instructions). These are NOT recorded, so they \
+             cannot be graded as data. Check the env/sandbox can reach each task's \
+             `dispatch_prompt_path`, then re-dispatch."
+        ))
+    }
 }
 
 /// Assemble `run.json` + `timing.json` for every task in
@@ -137,6 +157,19 @@ pub fn record_runs(
         if run_record_path.exists() && !overwrite {
             // An agent/operator already wrote this run.json — leave it untouched.
             result.skipped_existing += 1;
+        } else if let Some(summary) = &summary
+            && prompt_read_failed(
+                summary,
+                &task.dispatch_prompt_path,
+                &prompt_sentinel(&task.dispatch_prompt_path),
+            )
+        {
+            // The transcript shows the agent tried to read its prompt and the
+            // read returned an error, not the prompt — it never received its
+            // instructions. Skip both run.json and timing so the no-op can't be
+            // graded as data.
+            result.skipped_prompt_unread += 1;
+            continue;
         } else {
             let final_message_path = Path::new(&task.outputs_dir).join("final-message.md");
             let final_message = if final_message_path.exists() {
@@ -192,6 +225,58 @@ pub fn record_runs(
     Ok(result)
 }
 
+/// Positive evidence that the agent tried to read its dispatch prompt and
+/// failed: the transcript has a tool call referencing `prompt_path`, yet no such
+/// call returned the prompt's content (its distinctive first-line `sentinel`).
+///
+/// A run that never references the prompt path is NOT flagged — absence is not
+/// proof of failure (an in-session subagent can receive the prompt another way),
+/// and requiring positive evidence keeps the check free of false positives.
+/// Returns `false` when `sentinel` is empty (the prompt file was missing or
+/// unreadable, so the read cannot be judged).
+fn prompt_read_failed(summary: &TranscriptSummary, prompt_path: &str, sentinel: &str) -> bool {
+    if sentinel.is_empty() {
+        return false;
+    }
+    let mut referenced = false;
+    let mut delivered = false;
+    for inv in &summary.tool_invocations {
+        let mentions_prompt = inv
+            .args
+            .as_ref()
+            .is_some_and(|a| a.to_string().contains(prompt_path));
+        if !mentions_prompt {
+            continue;
+        }
+        referenced = true;
+        if inv
+            .result
+            .as_ref()
+            .and_then(serde_json::Value::as_str)
+            .is_some_and(|r| r.contains(sentinel))
+        {
+            delivered = true;
+        }
+    }
+    referenced && !delivered
+}
+
+/// The dispatch prompt's distinctive first non-empty line, used as the sentinel
+/// for [`prompt_read_failed`]. Empty when the prompt file is missing/unreadable.
+fn prompt_sentinel(prompt_path: &str) -> String {
+    if prompt_path.is_empty() {
+        return String::new();
+    }
+    fs::read_to_string(prompt_path)
+        .ok()
+        .and_then(|p| {
+            p.lines()
+                .find(|l| !l.trim().is_empty())
+                .map(|l| l.trim().to_string())
+        })
+        .unwrap_or_default()
+}
+
 /// Resolve a task's transcript summary, keyed on the dispatch mechanism: a
 /// `Cli`-mechanism harness reads the events file its CLI wrote under the task's
 /// outputs dir (e.g. Codex's `codex-events.jsonl`); an `InSession` harness reads
@@ -299,6 +384,117 @@ mod tests {
         fs::write(outputs_dir.join("claude-events.jsonl"), jsonl(&lines)).unwrap();
     }
 
+    /// A `claude -p` events fixture where the agent reads its dispatch prompt:
+    /// a `Read` tool call whose `input.file_path` is `prompt_path`, a
+    /// `tool_result` carrying `read_result` (the file content on success, an
+    /// error string on a denied/out-of-cwd read), and a terminal `result` event.
+    fn write_claude_events_prompt_read(
+        outputs_dir: &Path,
+        prompt_path: &str,
+        read_result: &str,
+        final_text: &str,
+    ) {
+        let lines = vec![
+            json!({"type": "system", "subtype": "init", "cwd": "/env"}),
+            json!({"type": "assistant", "message": {"id": "msg_1", "role": "assistant", "content": [{"type": "tool_use", "id": "toolu_1", "name": "Read", "input": {"file_path": prompt_path}}]}}),
+            json!({"type": "user", "message": {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "toolu_1", "content": read_result}]}}),
+            json!({"type": "result", "subtype": "success", "is_error": false, "result": final_text, "duration_ms": 30_000, "usage": {"input_tokens": 100, "output_tokens": 20, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 5}}),
+        ];
+        fs::write(outputs_dir.join("claude-events.jsonl"), jsonl(&lines)).unwrap();
+    }
+
+    const PROMPT_SENTINEL: &str =
+        "You are executing a single test case for a skill evaluation framework.";
+
+    #[test]
+    fn flags_dispatch_whose_prompt_read_failed() {
+        // A dispatch that couldn't read its prompt still exits 0 and emits a
+        // final message — but the run is a silent no-op, not data (issue #109).
+        let tmp = TempDir::new().unwrap();
+        let iter = tmp.path();
+        let paths = write_iteration(
+            iter,
+            &[FixtureTask {
+                eval_id: "e1",
+                condition: "with_skill",
+                final_message: Some("I could not read the prompt file."),
+            }],
+        );
+        let prompt_path = iter
+            .join("eval-e1")
+            .join("with_skill")
+            .join("dispatch-prompt.txt");
+        fs::write(
+            &prompt_path,
+            format!("{PROMPT_SENTINEL}\n\nUser request:\ndo it"),
+        )
+        .unwrap();
+        // The transcript shows a Read of the prompt path that ERRORED — the
+        // result is a denial, not the prompt content.
+        write_claude_events_prompt_read(
+            &paths[0].outputs_dir,
+            &prompt_path.to_string_lossy(),
+            "<tool_use_error>File is outside the allowed working directory.</tool_use_error>",
+            "I could not read the prompt file.",
+        );
+
+        let result = record_runs(
+            iter,
+            Harness::ClaudeCode,
+            DispatchMechanism::Cli,
+            None,
+            false,
+        )
+        .unwrap();
+
+        assert_eq!(result.skipped_prompt_unread, 1);
+        assert_eq!(result.recorded, 0);
+        assert!(!paths[0].run_record_path.exists());
+    }
+
+    #[test]
+    fn records_dispatch_when_prompt_read_succeeded() {
+        // The same shape, but the Read returned the prompt content (Read echoes
+        // it with a line-number prefix) — a legitimate run, recorded as data.
+        let tmp = TempDir::new().unwrap();
+        let iter = tmp.path();
+        let paths = write_iteration(
+            iter,
+            &[FixtureTask {
+                eval_id: "e1",
+                condition: "with_skill",
+                final_message: Some("Done."),
+            }],
+        );
+        let prompt_path = iter
+            .join("eval-e1")
+            .join("with_skill")
+            .join("dispatch-prompt.txt");
+        fs::write(
+            &prompt_path,
+            format!("{PROMPT_SENTINEL}\n\nUser request:\ndo it"),
+        )
+        .unwrap();
+        write_claude_events_prompt_read(
+            &paths[0].outputs_dir,
+            &prompt_path.to_string_lossy(),
+            &format!("     1→{PROMPT_SENTINEL}\n     2→\n     3→User request:"),
+            "Done.",
+        );
+
+        let result = record_runs(
+            iter,
+            Harness::ClaudeCode,
+            DispatchMechanism::Cli,
+            None,
+            false,
+        )
+        .unwrap();
+
+        assert_eq!(result.recorded, 1);
+        assert_eq!(result.skipped_prompt_unread, 0);
+    }
+
     struct FixtureTask {
         eval_id: &'static str,
         condition: &'static str,