From 08d0c5238e5fd5bff7f6a84685f4cb890f63478d Mon Sep 17 00:00:00 2001
From: moss <moss@mosss-MacBook-Pro-2.local>
Date: Sun, 24 May 2026 21:18:06 +0700
Subject: [PATCH 1/2] feat(runtime,bridge): r_t tool_outcome attribution for
 plasticity reweight

Reinforce or weaken concerns on after_tool_call when reflex policy_id is
present; emit structured reflex metadata from tool outcomes; document score checks.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../openclaw-opencoat-bridge/README.md        | 13 +++-
 .../openclaw-opencoat-bridge/src/index.ts     |  2 +-
 .../src/r-t-emit.test.ts                      | 20 ++++++
 .../openclaw-opencoat-bridge/src/r-t-emit.ts  | 12 +++-
 .../src/reflex-monitor.ts                     |  7 +-
 .../credit/plasticity_engine.py               | 41 +++++++----
 .../tests/core/test_plasticity_engine.py      | 68 +++++++++++++++++++
 7 files changed, 147 insertions(+), 16 deletions(-)

diff --git a/integrations/openclaw-opencoat-bridge/README.md b/integrations/openclaw-opencoat-bridge/README.md
index fcfca92..5cad2db 100644
--- a/integrations/openclaw-opencoat-bridge/README.md
+++ b/integrations/openclaw-opencoat-bridge/README.md
@@ -362,7 +362,17 @@ fire-and-forgets structured outcome records to daemon `credit.r_t.append`:
 | `llm_output` | `llm_output` |
 | `agent_end` | `turn_complete` |
 
-Log file: `~/.opencoat/r_t.jsonl`. Each append runs warm-path **reweight** (v0.3 §3.6 subset): reflex `tool_blocked` / `deny` reinforces the matching concern (`policy_id`). Heartbeat also drains unread lines via `RtPlasticityWorker`. Inspect:
+Log file: `~/.opencoat/r_t.jsonl`. Each append runs warm-path **reweight** (v0.3 §3.6 subset):
+
+| `r_t` signal | Plasticity (when `policy_id` present) |
+| --- | --- |
+| `tool_blocked` / reflex `deny` | **reinforce** — policy did its job |
+| `tool_outcome` + `r=1` | **reinforce** — guarded tool succeeded |
+| `tool_outcome` + error / `r=0` | **weaken** — attributed tool failed |
+
+`before_tool_call` stores reflex metadata when a policy matches; `after_tool_call` emits `tool_outcome` with that `policy_id` for daemon reweight. Heartbeat also drains unread lines via `RtPlasticityWorker`.
+
+Inspect:
 
 ```bash
 curl -sS http://127.0.0.1:7878/rpc -H 'Content-Type: application/json' \
@@ -371,6 +381,7 @@ curl -sS http://127.0.0.1:7878/rpc -H 'Content-Type: application/json' \
   -d '{"jsonrpc":"2.0","method":"credit.r_t.consume","params":{},"id":2}' | python3 -m json.tool
 tail -3 ~/.opencoat/r_t.jsonl | python3 -m json.tool
 opencoat concern show demo-tool-block
+# activation_state.score should move after tool_blocked (reinforce) or failed tool_outcome (weaken)
 ```
 
 Requires daemon built from repo (includes `credit.r_t.append` / `credit.r_t.consume` RPCs).
diff --git a/integrations/openclaw-opencoat-bridge/src/index.ts b/integrations/openclaw-opencoat-bridge/src/index.ts
index d9dbedf..0402210 100644
--- a/integrations/openclaw-opencoat-bridge/src/index.ts
+++ b/integrations/openclaw-opencoat-bridge/src/index.ts
@@ -303,7 +303,7 @@ async function handleHook(
                 params: decision.params,
               };
             }
-            if (decision.record) {
+            if (decision.record?.policy_id) {
               lastReflexByRunTool.set(
                 reflexToolKey(run, toolName),
                 decision.record,
diff --git a/integrations/openclaw-opencoat-bridge/src/r-t-emit.test.ts b/integrations/openclaw-opencoat-bridge/src/r-t-emit.test.ts
index 48f2f00..ebe9127 100644
--- a/integrations/openclaw-opencoat-bridge/src/r-t-emit.test.ts
+++ b/integrations/openclaw-opencoat-bridge/src/r-t-emit.test.ts
@@ -41,6 +41,26 @@ describe("r_t record builders", () => {
     assert.equal(row.signal.kind, "tool_outcome");
   });
 
+  it("builds tool_outcome with reflex policy_id for plasticity", () => {
+    const row = buildToolOutcomeRt(
+      "after_tool_call",
+      "after_tool_call",
+      ctx,
+      { toolName: "read", durationMs: 12 },
+      {
+        turn_id: "run-1",
+        action_kind: "tool_call",
+        action_name: "read",
+        decision: "rewrite",
+        policy_id: "demo-tool-block",
+        criticality: "safety_critical",
+      },
+    );
+    assert.equal(row.r, 1);
+    assert.equal(row.signal.reflex?.policy_id, "demo-tool-block");
+    assert.equal(row.signal.reflex?.decision, "rewrite");
+  });
+
   it("builds turn_complete", () => {
     const row = buildTurnCompleteRt(
       "agent_end",
diff --git a/integrations/openclaw-opencoat-bridge/src/r-t-emit.ts b/integrations/openclaw-opencoat-bridge/src/r-t-emit.ts
index 330aafe..3a99073 100644
--- a/integrations/openclaw-opencoat-bridge/src/r-t-emit.ts
+++ b/integrations/openclaw-opencoat-bridge/src/r-t-emit.ts
@@ -80,6 +80,16 @@ export function buildToolOutcomeRt(
     typeof event.durationMs === "number" ? event.durationMs : undefined;
   const blocked = reflex?.decision === "deny";
   const success = !error && !blocked;
+  const reflexPayload =
+    reflex && (reflex.policy_id || reflex.decision)
+      ? {
+          policy_id: reflex.policy_id,
+          decision: reflex.decision,
+          reason: reflex.reason,
+          criticality: reflex.criticality,
+          action_name: reflex.action_name,
+        }
+      : undefined;
 
   return {
     record_version: 1,
@@ -96,7 +106,7 @@ export function buildToolOutcomeRt(
       blocked,
       error,
       duration_ms: durationMs,
-      reflex: reflex ? { ...reflex } : undefined,
+      reflex: reflexPayload,
       payload: {
         has_result: event.result !== undefined,
       },
diff --git a/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts b/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts
index 7a12609..0617fbd 100644
--- a/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts
+++ b/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts
@@ -97,10 +97,12 @@ export class ReflexMonitor {
   mediate(action: Action, state: State): { decision: Decision; record: DecisionRecord } {
     let decision: Decision = { kind: "allow" };
     let winningPolicy: ReflexPolicy | undefined;
+    let matchedPolicyId: string | undefined;
 
     for (const policy of this.policies) {
       try {
         if (!policy.applies(action, state)) continue;
+        matchedPolicyId = policy.id;
         const next = policy.decide(action, state);
         if (next.kind === "allow") continue;
         decision = winningPolicy
@@ -138,7 +140,10 @@ export class ReflexMonitor {
       action_kind: action.kind,
       action_name: action.name,
       decision: decision.kind,
-      policy_id: decision.kind === "allow" ? undefined : decision.policy_id,
+      policy_id:
+        decision.kind === "allow"
+          ? matchedPolicyId
+          : decision.policy_id ?? matchedPolicyId,
       reason: decision.kind === "allow" ? undefined : decision.reason,
       criticality: winningPolicy?.criticality,
     };
diff --git a/packages/opencoat-runtime/opencoat_runtime_core/credit/plasticity_engine.py b/packages/opencoat-runtime/opencoat_runtime_core/credit/plasticity_engine.py
index cc16f91..acc08d5 100644
--- a/packages/opencoat-runtime/opencoat_runtime_core/credit/plasticity_engine.py
+++ b/packages/opencoat-runtime/opencoat_runtime_core/credit/plasticity_engine.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from typing import Any
 
 from opencoat_runtime_core.concern.lifecycle import ConcernLifecycleManager
 from opencoat_runtime_core.credit.r_t_record import RtRecord
@@ -83,27 +84,43 @@ def _attribute(self, record: RtRecord) -> tuple[str | None, float]:
         reflex = record.signal.reflex if isinstance(record.signal.reflex, dict) else None
         policy_id = reflex.get("policy_id") if reflex else None
         if isinstance(policy_id, str) and policy_id.strip():
-            concern_id = policy_id.strip()
-            if record.signal.kind == "tool_blocked":
-                return concern_id, +1.0
-            decision = reflex.get("decision") if reflex else None
-            if decision == "deny":
-                return concern_id, +1.0
+            return self._attribute_policy(record, concern_id=policy_id.strip(), reflex=reflex)
+
+        if record.signal.kind in {"llm_output", "turn_complete"}:
+            return None, 0.0
+
+        return None, 0.0
+
+    def _attribute_policy(
+        self,
+        record: RtRecord,
+        *,
+        concern_id: str,
+        reflex: dict[str, Any] | None,
+    ) -> tuple[str | None, float]:
+        """Attribute rows that carry a reflex ``policy_id`` (tool guard outcomes)."""
+        if record.signal.kind == "tool_blocked":
+            return concern_id, +1.0
+
+        decision = reflex.get("decision") if reflex else None
+        if decision == "deny":
+            return concern_id, +1.0
+
+        if record.signal.kind == "tool_outcome":
             advantage = record.r - record.baseline_b
             if advantage > 0:
-                return concern_id, +advantage
+                return concern_id, advantage
             if advantage < 0:
                 return concern_id, advantage
-            return None, 0.0
-
-        if record.signal.kind in {"llm_output", "turn_complete"}:
+            if record.signal.error:
+                return concern_id, -1.0
             return None, 0.0
 
         advantage = record.r - record.baseline_b
         if advantage > 0:
-            return None, 0.0
+            return concern_id, advantage
         if advantage < 0:
-            return None, 0.0
+            return concern_id, advantage
         return None, 0.0
 
 
diff --git a/packages/opencoat-runtime/tests/core/test_plasticity_engine.py b/packages/opencoat-runtime/tests/core/test_plasticity_engine.py
index 21ab4b3..c5de50a 100644
--- a/packages/opencoat-runtime/tests/core/test_plasticity_engine.py
+++ b/packages/opencoat-runtime/tests/core/test_plasticity_engine.py
@@ -72,3 +72,71 @@ def test_tool_blocked_revives_archived_concern() -> None:
     updated = store.get("demo-tool-block")
     assert updated is not None
     assert updated.lifecycle_state == "reinforced"
+
+
+def test_tool_outcome_success_reinforces_policy() -> None:
+    store = MemoryConcernStore()
+    dcn = MemoryDCNStore()
+    store.upsert(Concern(id="demo-tool-block", name="block"))
+    lifecycle = ConcernLifecycleManager(concern_store=store, dcn_store=dcn)
+    engine = PlasticityEngine(step_delta=0.1)
+
+    record = RtRecord(
+        ts=datetime(2026, 5, 24, tzinfo=UTC),
+        session_id="s1",
+        turn_id="run-1",
+        joinpoint="after_tool_call",
+        hook="after_tool_call",
+        signal=RtSignal(
+            kind="tool_outcome",
+            tool_name="read",
+            reflex={"policy_id": "demo-tool-block", "decision": "rewrite"},
+        ),
+        r=1.0,
+    )
+    stats = engine.reweight([record], concern_store=store, lifecycle=lifecycle)
+
+    assert stats.reinforced == 1
+    updated = store.get("demo-tool-block")
+    assert updated is not None
+    assert updated.activation_state is not None
+    assert updated.activation_state.score > 0.0
+
+
+def test_tool_outcome_error_weakens_policy() -> None:
+    from opencoat_runtime_protocol import ActivationState
+
+    store = MemoryConcernStore()
+    dcn = MemoryDCNStore()
+    store.upsert(
+        Concern(
+            id="demo-tool-block",
+            name="block",
+            activation_state=ActivationState(score=0.5, active=True, decay=0.0),
+            lifecycle_state="active",
+        )
+    )
+    lifecycle = ConcernLifecycleManager(concern_store=store, dcn_store=dcn)
+    engine = PlasticityEngine(step_delta=0.1)
+
+    record = RtRecord(
+        ts=datetime(2026, 5, 24, tzinfo=UTC),
+        session_id="s1",
+        turn_id="run-1",
+        joinpoint="after_tool_call",
+        hook="after_tool_call",
+        signal=RtSignal(
+            kind="tool_outcome",
+            tool_name="shell.exec",
+            error="exit 1",
+            reflex={"policy_id": "demo-tool-block", "decision": "rewrite"},
+        ),
+        r=0.0,
+    )
+    stats = engine.reweight([record], concern_store=store, lifecycle=lifecycle)
+
+    assert stats.weakened == 1
+    updated = store.get("demo-tool-block")
+    assert updated is not None
+    assert updated.activation_state is not None
+    assert updated.activation_state.score < 0.5

From 9c4d6bf5971922b23ad526e1b02dad1bf55bbe1d Mon Sep 17 00:00:00 2001
From: moss <moss@mosss-MacBook-Pro-2.local>
Date: Sun, 24 May 2026 23:27:21 +0700
Subject: [PATCH 2/2] fix(bridge): latch matched policy only after successful
 decide

Avoid attributing r_t outcomes to a policy when advisory decide() throws
before producing a valid decision.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../src/reflex-monitor.test.ts                 | 18 ++++++++++++++++++
 .../src/reflex-monitor.ts                      |  2 +-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/integrations/openclaw-opencoat-bridge/src/reflex-monitor.test.ts b/integrations/openclaw-opencoat-bridge/src/reflex-monitor.test.ts
index 32f6762..456497e 100644
--- a/integrations/openclaw-opencoat-bridge/src/reflex-monitor.test.ts
+++ b/integrations/openclaw-opencoat-bridge/src/reflex-monitor.test.ts
@@ -78,4 +78,22 @@ describe("ReflexMonitor", () => {
     );
     assert.equal(decision.kind, "deny");
   });
+
+  it("does not latch policy_id when advisory policy throws", () => {
+    const flaky: ReflexPolicy = {
+      id: "flaky-advisory",
+      criticality: "advisory",
+      applies: () => true,
+      decide: () => {
+        throw new Error("predicate bug");
+      },
+    };
+    const monitor = new ReflexMonitor([flaky]);
+    const { decision, record } = monitor.mediate(
+      { kind: "tool_call", name: "read", args: {} },
+      state,
+    );
+    assert.equal(decision.kind, "allow");
+    assert.equal(record.policy_id, undefined);
+  });
 });
diff --git a/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts b/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts
index 0617fbd..7e1695a 100644
--- a/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts
+++ b/integrations/openclaw-opencoat-bridge/src/reflex-monitor.ts
@@ -102,9 +102,9 @@ export class ReflexMonitor {
     for (const policy of this.policies) {
       try {
         if (!policy.applies(action, state)) continue;
-        matchedPolicyId = policy.id;
         const next = policy.decide(action, state);
         if (next.kind === "allow") continue;
+        matchedPolicyId = policy.id;
         decision = winningPolicy
           ? mergeDecisions(decision, next)
           : next;