From 44841b8403c76a93040fd45913cc01c4d1c11bb2 Mon Sep 17 00:00:00 2001 From: "Strix (Claude Opus 4.6)" Date: Sat, 11 Apr 2026 01:07:10 +0000 Subject: [PATCH] Weave 5 Whys into harness failure response paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four integration points (cycle detection was PR #80): 1. System prompt: route introspection → 5 Whys when patterns emerge, and route prediction misses to structured reflection 2. Post-turn failure context: when a turn ends with an error or circuit breaker, inject reflection guidance into the next turn's prompt 3. Prediction-review → 5 Whys bridge: SKILL.md now guides agents to decompose surprising misses via 5 Whys instead of just logging false 4. Chat history scan: prediction-review scheduled job now also scans recent chat for corrections, error reactions, and repeated attempts The 5 Whys skill existed but nothing routed agents toward it when failure happened. Now four paths converge on it: introspection findings, prediction misses, circuit breaker events, and chat history patterns. Co-Authored-By: Claude Opus 4.6 --- open_strix/app.py | 14 ++++++++++++++ .../builtin_skills/prediction-review/SKILL.md | 16 ++++++++++++++++ open_strix/config.py | 7 +++++++ open_strix/prompts.py | 13 +++++++++++-- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/open_strix/app.py b/open_strix/app.py index bdadda7..eef3dbc 100644 --- a/open_strix/app.py +++ b/open_strix/app.py @@ -370,6 +370,7 @@ def __init__(self, home: Path) -> None: self._send_message_similarity_streak = 0 self._send_message_circuit_breaker_active = False self._send_message_warning_reaction_sent = False + self._last_turn_failure: str | None = None self.phone_book = load_phone_book(self.layout.phone_book_file) self.supervisor = Supervisor(self.layout.state_dir / "climbers") @@ -730,6 +731,7 @@ def _render_prompt(self, event: AgentEvent) -> str: "source_id": event.source_id, "source_platform": event.source_platform, }, + last_turn_failure=self._last_turn_failure, ) def _load_blocks_for_prompt(self) -> list[dict[str, Any]]: @@ -755,7 +757,14 @@ async def _event_worker(self) -> None: self.current_turn_start = time.monotonic() try: await self._process_event(event) + self._last_turn_failure = None except SendMessageCircuitBreakerStop as exc: + self._last_turn_failure = ( + "Your previous turn was terminated by the send_message circuit breaker " + "(repeated near-duplicate messages). Before retrying, reflect on what " + "caused the loop. Consider using the five-whys skill to find the root " + "cause before attempting a different approach." + ) self.log_event( "warning", where="event_worker", @@ -766,6 +775,11 @@ async def _event_worker(self) -> None: **_error_log_fields(exc), ) except Exception as exc: + self._last_turn_failure = ( + f"Your previous turn ended with an error: {type(exc).__name__}: {exc}. " + "Before retrying, reflect on what went wrong. If this is a recurring " + "failure, consider using the five-whys skill to find the structural cause." + ) reacted = False if _should_react_to_error(event): reacted = await self._react_to_latest_message( diff --git a/open_strix/builtin_skills/prediction-review/SKILL.md b/open_strix/builtin_skills/prediction-review/SKILL.md index 20666c6..1e44a1a 100644 --- a/open_strix/builtin_skills/prediction-review/SKILL.md +++ b/open_strix/builtin_skills/prediction-review/SKILL.md @@ -162,6 +162,22 @@ Infrastructure: X/Y (expected ~50%) If collaborative accuracy is high but observational is low, that's normal — don't let easy wins inflate your overall number. The interesting question is always: **how do I get better at the hard ones?** +## When a Prediction Misses — Root Cause Reflection + +Logging `prediction_true: false` is the beginning, not the end. A miss means your world model was wrong about something. The interesting question is: **what structural property of your model produced the wrong prediction?** + +For each miss, ask: +1. **What did I assume that turned out to be false?** (Not "I was wrong" but "I assumed X, and X wasn't true.") +2. **Is this a one-off or a pattern?** Check your previous prediction reviews — have you missed similar predictions before? +3. **If it's a pattern or genuinely surprising**, use the **five-whys** skill to decompose it. The prediction miss is the problem statement; the 5 Whys finds the structural gap in your world model. + +Don't run 5 Whys on every miss — collaborative misses and obvious infrastructure failures don't need it. Run it when: +- An observational prediction misses and you can't immediately explain why +- The same category of prediction keeps missing +- The miss reveals something about how your human, your environment, or your own reasoning works that you didn't know + +The goal: predictions that miss should make you smarter, not just more cautious. + ## Common Miss Patterns (from real data) - **Infrastructure failures are unpredictable:** "OLMoE results will come in" → FALSE (Strix was down with infinite loop bug). You can't predict other agents' uptime. diff --git a/open_strix/config.py b/open_strix/config.py index fddf67d..55fb4d9 100644 --- a/open_strix/config.py +++ b/open_strix/config.py @@ -54,6 +54,13 @@ For each reviewed prediction, append a structured entry: `uv run python .open_strix_builtin_skills/scripts/prediction_review_log.py --prediction-datetime ... --is-true ... --comments ...` Include evidence and behavior adjustments in comments. + For any surprising misses or repeated miss patterns, use the five-whys skill + to decompose why your world model was wrong — don't just log the miss. + + Also: scan your recent chat history (last 24h) for things that didn't go well — + corrections from your human, error reactions, repeated attempts at the same thing, + or moments where you changed approach. If any of these are patterns (not one-offs), + use the five-whys skill on them. This catches failures that predictions miss. """ DEFAULT_INIT_BLOCK = """\ diff --git a/open_strix/prompts.py b/open_strix/prompts.py index dcf8d79..f886bac 100644 --- a/open_strix/prompts.py +++ b/open_strix/prompts.py @@ -54,7 +54,8 @@ - **Before writing to memory or files**, read the memory skill for guidance on what goes where. - **Periodically** (e.g., during scheduled ticks or quiet moments), review your journal predictions using the prediction-review skill. - **When creating new reusable workflows**, use the skill-creator skill. -- **When something goes wrong or feels off**, use the introspection skill to query your event logs and journal before guessing at causes. +- **When something goes wrong or feels off**, use the introspection skill to query your event logs and journal before guessing at causes. If introspection reveals a recurring pattern or structural issue, follow up with the five-whys skill to find the root cause — introspection finds *what* happened, five-whys finds *why*. +- **When a prediction turns out wrong**, don't just log the miss. Ask *why* your world model produced the wrong prediction. If the miss is surprising or part of a pattern, use the five-whys skill to decompose it. - **When your human asks what you did** (or why), use the introspection skill to answer from your actual logs — not from memory, which may be incomplete or wrong. - Don't wait for your human to say "use the memory skill." If the moment calls for it, reach for it yourself. - **Never edit `.open_strix_builtin_skills/`** — these are read-only system skills managed upstream. For custom skills, use the skill-creator skill. To change system skills, PR the open-strix repo. @@ -282,6 +283,7 @@ def render_turn_prompt( memory_blocks: list[dict[str, Any]], recent_messages: list[dict[str, Any]], current_event: Mapping[str, Any], + last_turn_failure: str | None = None, ) -> str: journals = render_journal_entries(journal_entries) blocks_text = render_memory_blocks(memory_blocks) @@ -289,6 +291,13 @@ def render_turn_prompt( channel_context_text = render_channel_context(current_event) current_event_text = render_current_event(current_event) + failure_section = "" + if last_turn_failure: + failure_section = f""" + 6) Previous turn failure: + {last_turn_failure} + """ + return textwrap.dedent( f"""\ Context for this turn: @@ -309,5 +318,5 @@ def render_turn_prompt( {current_event_text} If you need to message the user, call send_message. - """ + {failure_section}""" )