From 44841b8403c76a93040fd45913cc01c4d1c11bb2 Mon Sep 17 00:00:00 2001
From: "Strix (Claude Opus 4.6)" <noreply@anthropic.com>
Date: Sat, 11 Apr 2026 01:07:10 +0000
Subject: [PATCH] Weave 5 Whys into harness failure response paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four integration points (cycle detection was PR #80):

1. System prompt: route introspection → 5 Whys when patterns emerge,
   and route prediction misses to structured reflection
2. Post-turn failure context: when a turn ends with an error or circuit
   breaker, inject reflection guidance into the next turn's prompt
3. Prediction-review → 5 Whys bridge: SKILL.md now guides agents to
   decompose surprising misses via 5 Whys instead of just logging false
4. Chat history scan: prediction-review scheduled job now also scans
   recent chat for corrections, error reactions, and repeated attempts

The 5 Whys skill existed but nothing routed agents toward it when
failure happened. Now four paths converge on it: introspection findings,
prediction misses, circuit breaker events, and chat history patterns.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 open_strix/app.py                                | 14 ++++++++++++++
 .../builtin_skills/prediction-review/SKILL.md    | 16 ++++++++++++++++
 open_strix/config.py                             |  7 +++++++
 open_strix/prompts.py                            | 13 +++++++++++--
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/open_strix/app.py b/open_strix/app.py
index bdadda7..eef3dbc 100644
--- a/open_strix/app.py
+++ b/open_strix/app.py
@@ -370,6 +370,7 @@ def __init__(self, home: Path) -> None:
         self._send_message_similarity_streak = 0
         self._send_message_circuit_breaker_active = False
         self._send_message_warning_reaction_sent = False
+        self._last_turn_failure: str | None = None
 
         self.phone_book = load_phone_book(self.layout.phone_book_file)
         self.supervisor = Supervisor(self.layout.state_dir / "climbers")
@@ -730,6 +731,7 @@ def _render_prompt(self, event: AgentEvent) -> str:
                 "source_id": event.source_id,
                 "source_platform": event.source_platform,
             },
+            last_turn_failure=self._last_turn_failure,
         )
 
     def _load_blocks_for_prompt(self) -> list[dict[str, Any]]:
@@ -755,7 +757,14 @@ async def _event_worker(self) -> None:
             self.current_turn_start = time.monotonic()
             try:
                 await self._process_event(event)
+                self._last_turn_failure = None
             except SendMessageCircuitBreakerStop as exc:
+                self._last_turn_failure = (
+                    "Your previous turn was terminated by the send_message circuit breaker "
+                    "(repeated near-duplicate messages). Before retrying, reflect on what "
+                    "caused the loop. Consider using the five-whys skill to find the root "
+                    "cause before attempting a different approach."
+                )
                 self.log_event(
                     "warning",
                     where="event_worker",
@@ -766,6 +775,11 @@ async def _event_worker(self) -> None:
                     **_error_log_fields(exc),
                 )
             except Exception as exc:
+                self._last_turn_failure = (
+                    f"Your previous turn ended with an error: {type(exc).__name__}: {exc}. "
+                    "Before retrying, reflect on what went wrong. If this is a recurring "
+                    "failure, consider using the five-whys skill to find the structural cause."
+                )
                 reacted = False
                 if _should_react_to_error(event):
                     reacted = await self._react_to_latest_message(
diff --git a/open_strix/builtin_skills/prediction-review/SKILL.md b/open_strix/builtin_skills/prediction-review/SKILL.md
index 20666c6..1e44a1a 100644
--- a/open_strix/builtin_skills/prediction-review/SKILL.md
+++ b/open_strix/builtin_skills/prediction-review/SKILL.md
@@ -162,6 +162,22 @@ Infrastructure: X/Y (expected ~50%)
 
 If collaborative accuracy is high but observational is low, that's normal — don't let easy wins inflate your overall number. The interesting question is always: **how do I get better at the hard ones?**
 
+## When a Prediction Misses — Root Cause Reflection
+
+Logging `prediction_true: false` is the beginning, not the end. A miss means your world model was wrong about something. The interesting question is: **what structural property of your model produced the wrong prediction?**
+
+For each miss, ask:
+1. **What did I assume that turned out to be false?** (Not "I was wrong" but "I assumed X, and X wasn't true.")
+2. **Is this a one-off or a pattern?** Check your previous prediction reviews — have you missed similar predictions before?
+3. **If it's a pattern or genuinely surprising**, use the **five-whys** skill to decompose it. The prediction miss is the problem statement; the 5 Whys finds the structural gap in your world model.
+
+Don't run 5 Whys on every miss — collaborative misses and obvious infrastructure failures don't need it. Run it when:
+- An observational prediction misses and you can't immediately explain why
+- The same category of prediction keeps missing
+- The miss reveals something about how your human, your environment, or your own reasoning works that you didn't know
+
+The goal: predictions that miss should make you smarter, not just more cautious.
+
 ## Common Miss Patterns (from real data)
 
 - **Infrastructure failures are unpredictable:** "OLMoE results will come in" → FALSE (Strix was down with infinite loop bug). You can't predict other agents' uptime.
diff --git a/open_strix/config.py b/open_strix/config.py
index fddf67d..55fb4d9 100644
--- a/open_strix/config.py
+++ b/open_strix/config.py
@@ -54,6 +54,13 @@
       For each reviewed prediction, append a structured entry:
       `uv run python .open_strix_builtin_skills/scripts/prediction_review_log.py --prediction-datetime ... --is-true ... --comments ...`
       Include evidence and behavior adjustments in comments.
+      For any surprising misses or repeated miss patterns, use the five-whys skill
+      to decompose why your world model was wrong — don't just log the miss.
+
+      Also: scan your recent chat history (last 24h) for things that didn't go well —
+      corrections from your human, error reactions, repeated attempts at the same thing,
+      or moments where you changed approach. If any of these are patterns (not one-offs),
+      use the five-whys skill on them. This catches failures that predictions miss.
 """
 
 DEFAULT_INIT_BLOCK = """\
diff --git a/open_strix/prompts.py b/open_strix/prompts.py
index dcf8d79..f886bac 100644
--- a/open_strix/prompts.py
+++ b/open_strix/prompts.py
@@ -54,7 +54,8 @@
 - **Before writing to memory or files**, read the memory skill for guidance on what goes where.
 - **Periodically** (e.g., during scheduled ticks or quiet moments), review your journal predictions using the prediction-review skill.
 - **When creating new reusable workflows**, use the skill-creator skill.
-- **When something goes wrong or feels off**, use the introspection skill to query your event logs and journal before guessing at causes.
+- **When something goes wrong or feels off**, use the introspection skill to query your event logs and journal before guessing at causes. If introspection reveals a recurring pattern or structural issue, follow up with the five-whys skill to find the root cause — introspection finds *what* happened, five-whys finds *why*.
+- **When a prediction turns out wrong**, don't just log the miss. Ask *why* your world model produced the wrong prediction. If the miss is surprising or part of a pattern, use the five-whys skill to decompose it.
 - **When your human asks what you did** (or why), use the introspection skill to answer from your actual logs — not from memory, which may be incomplete or wrong.
 - Don't wait for your human to say "use the memory skill." If the moment calls for it, reach for it yourself.
 - **Never edit `.open_strix_builtin_skills/`** — these are read-only system skills managed upstream. For custom skills, use the skill-creator skill. To change system skills, PR the open-strix repo.
@@ -282,6 +283,7 @@ def render_turn_prompt(
     memory_blocks: list[dict[str, Any]],
     recent_messages: list[dict[str, Any]],
     current_event: Mapping[str, Any],
+    last_turn_failure: str | None = None,
 ) -> str:
     journals = render_journal_entries(journal_entries)
     blocks_text = render_memory_blocks(memory_blocks)
@@ -289,6 +291,13 @@ def render_turn_prompt(
     channel_context_text = render_channel_context(current_event)
     current_event_text = render_current_event(current_event)
 
+    failure_section = ""
+    if last_turn_failure:
+        failure_section = f"""
+        6) Previous turn failure:
+        {last_turn_failure}
+        """
+
     return textwrap.dedent(
         f"""\
         Context for this turn:
@@ -309,5 +318,5 @@ def render_turn_prompt(
         {current_event_text}
 
         If you need to message the user, call send_message.
-        """
+        {failure_section}"""
     )