From 5fd6138d836d9383afb89ba7f34e002df64448ee Mon Sep 17 00:00:00 2001 From: Foo Bender Bot Date: Sat, 21 Feb 2026 20:21:12 +0000 Subject: [PATCH 1/3] feat(policy): add trigger-based runtime policy helper and tool gates --- includes/helpers/class-abilities-helper.php | 120 +++++++++++++- includes/helpers/class-chat-helper.php | 24 ++- includes/helpers/class-policy-helper.php | 164 ++++++++++++++++++++ tests/Unit/AbilitiesHelperTest.php | 45 ++++++ tests/Unit/PolicyHelperTest.php | 61 ++++++++ 5 files changed, 410 insertions(+), 4 deletions(-) create mode 100644 includes/helpers/class-policy-helper.php create mode 100644 tests/Unit/PolicyHelperTest.php diff --git a/includes/helpers/class-abilities-helper.php b/includes/helpers/class-abilities-helper.php index 69f5037..998a592 100644 --- a/includes/helpers/class-abilities-helper.php +++ b/includes/helpers/class-abilities-helper.php @@ -64,6 +64,13 @@ final class Abilities_Helper { */ private Action_Log_Helper $action_log_helper; + /** + * Policy helper. + * + * @var Policy_Helper + */ + private Policy_Helper $policy_helper; + /** * Constructor. */ @@ -71,6 +78,7 @@ private function __construct() { $this->settings_helper = Settings_Helper::get_instance(); $this->security = Security::get_instance(); $this->action_log_helper = Action_Log_Helper::get_instance(); + $this->policy_helper = Policy_Helper::get_instance(); } /** @@ -194,6 +202,20 @@ public function execute_tool_call( string $tool_name, $raw_args = null, array $e $allowed_confirmation_tokens = $this->normalize_allowed_confirmation_tokens( $execution_context['allowed_confirmation_tokens'] ?? null ); + $trigger_type = isset( $execution_context['trigger_type'] ) + ? (string) $execution_context['trigger_type'] + : 'chat'; + $runtime_policy = isset( $execution_context['runtime_policy'] ) && is_array( $execution_context['runtime_policy'] ) + ? $execution_context['runtime_policy'] + : $this->policy_helper->resolve_runtime_policy( + $trigger_type, + isset( $execution_context['session_metadata'] ) && is_array( $execution_context['session_metadata'] ) + ? $execution_context['session_metadata'] + : [], + isset( $execution_context['policy_overrides'] ) && is_array( $execution_context['policy_overrides'] ) + ? $execution_context['policy_overrides'] + : [] + ); $args_json = wp_json_encode( $args ); $args_hash = false !== $args_json ? hash( 'sha256', (string) $args_json ) : ''; @@ -255,8 +277,52 @@ public function execute_tool_call( string $tool_name, $raw_args = null, array $e return $payload; } - $safety_class = $this->infer_safety_class( $ability ); - if ( ! $skip_confirmation && $this->security->requires_confirmation_for_safety_class( $safety_class ) ) { + $safety_class = $this->infer_safety_class( $ability ); + $is_destructive = 'destructive' === $safety_class; + + if ( ! $this->is_policy_enabled( $runtime_policy['allow_tools'] ?? true ) ) { + $payload = $this->build_policy_violation_payload( + 'clawpress_policy_tools_denied', + __( 'Tool execution is blocked by runtime policy.', 'clawpress' ), + $normalized_tool_name, + $ability_name, + $safety_class, + $runtime_policy, + 'deny_tools' + ); + $this->log_tool_call( $normalized_tool_name, $ability_name, $requesting_user_id, $execution_user_id, 'warning', $args_hash, $payload ); + return $payload; + } + + if ( $is_destructive && ! $this->is_policy_enabled( $runtime_policy['allow_destructive_tools'] ?? true ) ) { + $payload = $this->build_policy_violation_payload( + 'clawpress_policy_destructive_tools_denied', + __( 'Destructive tools are not allowed for this runtime trigger.', 'clawpress' ), + $normalized_tool_name, + $ability_name, + $safety_class, + $runtime_policy, + 'deny_destructive_tools' + ); + $this->log_tool_call( $normalized_tool_name, $ability_name, $requesting_user_id, $execution_user_id, 'warning', $args_hash, $payload ); + return $payload; + } + + if ( 'file_delete' === $normalized_tool_name && ! $this->is_policy_enabled( $runtime_policy['allow_file_delete'] ?? true ) ) { + $payload = $this->build_policy_violation_payload( + 'clawpress_policy_file_delete_denied', + __( 'File delete is blocked by runtime policy.', 'clawpress' ), + $normalized_tool_name, + $ability_name, + $safety_class, + $runtime_policy, + 'deny_file_delete' + ); + $this->log_tool_call( $normalized_tool_name, $ability_name, $requesting_user_id, $execution_user_id, 'warning', $args_hash, $payload ); + return $payload; + } + + if ( $is_destructive && ! $skip_confirmation && $this->is_policy_enabled( $runtime_policy['require_confirmation_for_destructive'] ?? true ) && $this->security->requires_confirmation_for_safety_class( $safety_class ) ) { if ( 'batch' === $confirmation_scope ) { $payload = [ 'success' => false, @@ -469,6 +535,56 @@ private function infer_safety_class( \WP_Ability $ability ): string { return 'write'; } + /** + * Check whether a policy field is enabled. + * + * @param mixed $value Raw value. + */ + private function is_policy_enabled( $value ): bool { + return function_exists( 'clawpress_sanitize_boolean' ) + ? clawpress_sanitize_boolean( $value ) + : (bool) $value; + } + + /** + * Build a structured policy-violation payload. + * + * @param string $code Error code. + * @param string $message Error message. + * @param string $tool_name Tool name. + * @param string $ability_name Ability ID. + * @param string $safety_class Safety class. + * @param array $runtime_policy Resolved runtime policy. + * @param string $decision Decision outcome. + * @return array + */ + private function build_policy_violation_payload( + string $code, + string $message, + string $tool_name, + string $ability_name, + string $safety_class, + array $runtime_policy, + string $decision + ): array { + return [ + 'success' => false, + 'error' => [ + 'code' => $code, + 'message' => $message, + ], + 'tool' => $tool_name, + 'ability' => $ability_name, + 'safety_class' => $safety_class, + 'policy' => [ + 'trigger_type' => isset( $runtime_policy['trigger_type'] ) ? (string) $runtime_policy['trigger_type'] : 'chat', + 'policy_profile' => isset( $runtime_policy['policy_profile'] ) ? (string) $runtime_policy['policy_profile'] : 'default', + 'on_violation' => isset( $runtime_policy['on_policy_violation'] ) ? (string) $runtime_policy['on_policy_violation'] : 'deny', + 'decision' => $decision, + ], + ]; + } + /** * Write one tool-call action ledger row. * diff --git a/includes/helpers/class-chat-helper.php b/includes/helpers/class-chat-helper.php index 4c8e9c5..9a9f216 100644 --- a/includes/helpers/class-chat-helper.php +++ b/includes/helpers/class-chat-helper.php @@ -106,6 +106,13 @@ final class Chat_Helper { */ private Command_Confirmation_Store $confirmation_store; + /** + * Policy helper. + * + * @var Policy_Helper + */ + private Policy_Helper $policy_helper; + /** * LLM reply generator. * @@ -137,6 +144,7 @@ private function __construct( $this->context_helper = $context_helper ?? Context_Helper::get_instance(); $this->abilities_helper = Abilities_Helper::get_instance(); $this->confirmation_store = new Command_Confirmation_Store(); + $this->policy_helper = Policy_Helper::get_instance(); $this->online_reply_generator = $online_reply_generator ?? [ $this, 'generate_online_reply' ]; $this->provider_model_resolver = $provider_model_resolver ?? [ $this, 'resolve_provider_and_model' ]; } @@ -270,6 +278,16 @@ private function generate_online_reply( array $context, string $provider, string $tool_declarations = $this->normalize_tool_declarations( $context ); $requesting_user_id = isset( $context['requesting_user_id'] ) ? (int) $context['requesting_user_id'] : 0; $execution_user_id = isset( $context['execution_user_id'] ) ? (int) $context['execution_user_id'] : 0; + $trigger_type = isset( $context['trigger_type'] ) ? (string) $context['trigger_type'] : 'chat'; + $runtime_policy = $this->policy_helper->resolve_runtime_policy( + $trigger_type, + isset( $context['session_metadata'] ) && is_array( $context['session_metadata'] ) + ? $context['session_metadata'] + : [], + isset( $context['policy_overrides'] ) && is_array( $context['policy_overrides'] ) + ? $context['policy_overrides'] + : [] + ); $this->confirmation_store->clear_tool_batch( $requesting_user_id > 0 ? $requesting_user_id : null ); $conversation = $history_messages; @@ -284,7 +302,7 @@ private function generate_online_reply( array $context, string $provider, string $latest_context_usage = null; $tool_call_trace = []; - for ( $round = 0; $round < self::MAX_TOOL_ROUNDS; ++$round ) { + for ( $round = 0; $round < (int) $runtime_policy['max_tool_rounds']; ++$round ) { $builder = AiClient::prompt( $conversation )->usingProvider( $provider ); if ( '' !== $system_prompt ) { $builder = $builder->usingSystemInstruction( $system_prompt ); @@ -312,7 +330,7 @@ private function generate_online_reply( array $context, string $provider, string } $function_responses = []; - foreach ( array_slice( $function_calls, 0, self::MAX_TOOL_CALLS_PER_ROUND ) as $index => $function_call ) { + foreach ( array_slice( $function_calls, 0, (int) $runtime_policy['max_tool_calls_per_round'] ) as $index => $function_call ) { $tool_name = trim( (string) $function_call->getName() ); if ( '' === $tool_name ) { continue; @@ -330,6 +348,8 @@ private function generate_online_reply( array $context, string $provider, string 'requesting_user_id' => $requesting_user_id, 'execution_user_id' => $execution_user_id, 'confirmation_scope' => 'batch', + 'trigger_type' => $trigger_type, + 'runtime_policy' => $runtime_policy, ] ); $tool_call_trace[] = $this->build_tool_call_trace_entry( diff --git a/includes/helpers/class-policy-helper.php b/includes/helpers/class-policy-helper.php new file mode 100644 index 0000000..21d42aa --- /dev/null +++ b/includes/helpers/class-policy-helper.php @@ -0,0 +1,164 @@ + + */ + private const BASE_POLICY = [ + 'allow_tools' => true, + 'allow_destructive_tools' => true, + 'require_confirmation_for_destructive' => true, + 'allow_file_delete' => true, + 'max_tool_rounds' => 4, + 'max_tool_calls_per_round' => 6, + 'max_wall_time_seconds' => 120, + 'allow_network' => false, + 'allow_background_followups' => true, + 'on_policy_violation' => 'deny', + ]; + + /** + * Trigger-specific policy overrides. + * + * @var array> + */ + private const TRIGGER_POLICY_OVERRIDES = [ + 'chat' => [], + 'heartbeat' => [ + 'allow_destructive_tools' => false, + 'allow_file_delete' => false, + 'max_tool_rounds' => 2, + 'max_tool_calls_per_round' => 3, + 'max_wall_time_seconds' => 45, + 'allow_background_followups' => false, + ], + 'spawned_agent' => [ + 'allow_destructive_tools' => false, + 'allow_file_delete' => false, + 'max_tool_rounds' => 3, + 'max_tool_calls_per_round' => 4, + 'max_wall_time_seconds' => 90, + ], + ]; + + /** + * Get singleton instance. + */ + public static function get_instance(): self { + if ( null === self::$instance ) { + self::$instance = new self(); + } + + return self::$instance; + } + + /** + * Resolve a normalized runtime policy for the current run. + * + * @param string $trigger_type Trigger type (`chat`, `heartbeat`, `spawned_agent`). + * @param array $session_metadata Optional session metadata. + * @param array $profile_overrides Optional direct policy overrides. + * @return array + */ + public function resolve_runtime_policy( string $trigger_type, array $session_metadata = [], array $profile_overrides = [] ): array { + $normalized_trigger = $this->normalize_trigger_type( $trigger_type ); + $policy_profile = isset( $session_metadata['policy_profile'] ) + ? $this->normalize_key( (string) $session_metadata['policy_profile'] ) + : 'default'; + + $resolved = self::BASE_POLICY; + $resolved = array_merge( $resolved, self::TRIGGER_POLICY_OVERRIDES[ $normalized_trigger ] ?? [] ); + + if ( isset( $session_metadata['policy_overrides'] ) && is_array( $session_metadata['policy_overrides'] ) ) { + $resolved = array_merge( $resolved, $session_metadata['policy_overrides'] ); + } + + if ( [] !== $profile_overrides ) { + $resolved = array_merge( $resolved, $profile_overrides ); + } + + return [ + 'trigger_type' => $normalized_trigger, + 'policy_profile' => '' !== $policy_profile ? $policy_profile : 'default', + 'allow_tools' => clawpress_sanitize_boolean( $resolved['allow_tools'] ?? true ), + 'allow_destructive_tools' => clawpress_sanitize_boolean( $resolved['allow_destructive_tools'] ?? true ), + 'require_confirmation_for_destructive' => clawpress_sanitize_boolean( $resolved['require_confirmation_for_destructive'] ?? true ), + 'allow_file_delete' => clawpress_sanitize_boolean( $resolved['allow_file_delete'] ?? true ), + 'max_tool_rounds' => max( 1, (int) ( $resolved['max_tool_rounds'] ?? 4 ) ), + 'max_tool_calls_per_round' => max( 1, (int) ( $resolved['max_tool_calls_per_round'] ?? 6 ) ), + 'max_wall_time_seconds' => max( 1, (int) ( $resolved['max_wall_time_seconds'] ?? 120 ) ), + 'allow_network' => clawpress_sanitize_boolean( $resolved['allow_network'] ?? false ), + 'allow_background_followups' => clawpress_sanitize_boolean( $resolved['allow_background_followups'] ?? true ), + 'on_policy_violation' => $this->normalize_violation_mode( $resolved['on_policy_violation'] ?? 'deny' ), + ]; + } + + /** + * Normalize trigger type. + * + * @param string $trigger_type Raw trigger type. + */ + private function normalize_trigger_type( string $trigger_type ): string { + $normalized = $this->normalize_key( $trigger_type ); + if ( '' === $normalized ) { + return 'chat'; + } + + if ( isset( self::TRIGGER_POLICY_OVERRIDES[ $normalized ] ) ) { + return $normalized; + } + + return 'chat'; + } + + /** + * Normalize policy violation handling mode. + * + * @param mixed $raw_mode Raw mode value. + */ + private function normalize_violation_mode( $raw_mode ): string { + $mode = $this->normalize_key( (string) $raw_mode ); + if ( in_array( $mode, [ 'deny', 'degrade', 'fail' ], true ) ) { + return $mode; + } + + return 'deny'; + } + + /** + * Normalize key-like text without requiring WordPress globals. + * + * @param string $value Raw key-like text. + */ + private function normalize_key( string $value ): string { + if ( function_exists( 'sanitize_key' ) ) { + return sanitize_key( $value ); + } + + $normalized = strtolower( trim( $value ) ); + return (string) preg_replace( '/[^a-z0-9_\-]/', '', $normalized ); + } +} diff --git a/tests/Unit/AbilitiesHelperTest.php b/tests/Unit/AbilitiesHelperTest.php index 1ef0a45..62a0758 100644 --- a/tests/Unit/AbilitiesHelperTest.php +++ b/tests/Unit/AbilitiesHelperTest.php @@ -106,6 +106,51 @@ public function test_tool_execution_logs_requesting_and_execution_actors(): void $this->assertSame( 'tool_call', $GLOBALS['wpdb']->insert_calls[0]['data']['event_type'] ); } + public function test_destructive_tools_are_denied_for_heartbeat_trigger_policy(): void { + $result = Abilities_Helper::get_instance()->execute_tool_call( + 'file_delete', + [ + 'path' => 'notes.md', + ], + [ + 'requesting_user_id' => 1, + 'execution_user_id' => 1, + 'trigger_type' => 'heartbeat', + ] + ); + + $this->assertFalse( $result['success'] ); + $this->assertSame( 'clawpress_policy_destructive_tools_denied', $result['error']['code'] ); + $this->assertSame( 'heartbeat', $result['policy']['trigger_type'] ); + $this->assertSame( 'deny_destructive_tools', $result['policy']['decision'] ); + } + + public function test_runtime_policy_can_enforce_file_delete_gate_for_spawned_agent(): void { + $result = Abilities_Helper::get_instance()->execute_tool_call( + 'file_delete', + [ + 'path' => 'notes.md', + ], + [ + 'requesting_user_id' => 1, + 'execution_user_id' => 1, + 'runtime_policy' => [ + 'trigger_type' => 'spawned_agent', + 'policy_profile' => 'default', + 'allow_tools' => true, + 'allow_destructive_tools' => true, + 'require_confirmation_for_destructive' => true, + 'allow_file_delete' => false, + 'on_policy_violation' => 'deny', + ], + ] + ); + + $this->assertFalse( $result['success'] ); + $this->assertSame( 'clawpress_policy_file_delete_denied', $result['error']['code'] ); + $this->assertSame( 'spawned_agent', $result['policy']['trigger_type'] ); + } + public function test_destructive_confirmation_token_must_be_allowlisted_by_execution_context(): void { $initial = Abilities_Helper::get_instance()->execute_tool_call( 'file_delete', diff --git a/tests/Unit/PolicyHelperTest.php b/tests/Unit/PolicyHelperTest.php new file mode 100644 index 0000000..dc0c1f5 --- /dev/null +++ b/tests/Unit/PolicyHelperTest.php @@ -0,0 +1,61 @@ +resolve_runtime_policy( 'chat' ); + + $this->assertSame( 'chat', $policy['trigger_type'] ); + $this->assertTrue( $policy['allow_tools'] ); + $this->assertTrue( $policy['allow_destructive_tools'] ); + $this->assertTrue( $policy['require_confirmation_for_destructive'] ); + $this->assertSame( 4, $policy['max_tool_rounds'] ); + $this->assertSame( 6, $policy['max_tool_calls_per_round'] ); + } + + public function test_heartbeat_trigger_resolves_stricter_defaults(): void { + $policy = Policy_Helper::get_instance()->resolve_runtime_policy( 'heartbeat' ); + + $this->assertSame( 'heartbeat', $policy['trigger_type'] ); + $this->assertTrue( $policy['allow_tools'] ); + $this->assertFalse( $policy['allow_destructive_tools'] ); + $this->assertFalse( $policy['allow_file_delete'] ); + $this->assertSame( 2, $policy['max_tool_rounds'] ); + $this->assertSame( 3, $policy['max_tool_calls_per_round'] ); + } + + public function test_profile_overrides_are_applied_deterministically(): void { + $first = Policy_Helper::get_instance()->resolve_runtime_policy( + 'spawned_agent', + [ 'policy_profile' => 'trusted-runner' ], + [ + 'allow_destructive_tools' => true, + 'allow_file_delete' => true, + ] + ); + $second = Policy_Helper::get_instance()->resolve_runtime_policy( + 'spawned_agent', + [ 'policy_profile' => 'trusted-runner' ], + [ + 'allow_destructive_tools' => true, + 'allow_file_delete' => true, + ] + ); + + $this->assertSame( $first, $second ); + $this->assertSame( 'trusted-runner', $first['policy_profile'] ); + $this->assertTrue( $first['allow_destructive_tools'] ); + $this->assertTrue( $first['allow_file_delete'] ); + } +} From 7125293c150b2a0c9b5479293e4e48833229ebfd Mon Sep 17 00:00:00 2001 From: Brad Vincent Date: Mon, 23 Feb 2026 19:40:05 +0000 Subject: [PATCH 2/3] added agent loop specs --- docs/agent-loop-spec-edits.md | 270 ++++++++++++++++++++++++++++++++++ docs/agent-loop-spec.md | 201 +++++++++++++++++++++++++ 2 files changed, 471 insertions(+) create mode 100644 docs/agent-loop-spec-edits.md create mode 100644 docs/agent-loop-spec.md diff --git a/docs/agent-loop-spec-edits.md b/docs/agent-loop-spec-edits.md new file mode 100644 index 0000000..b4771d9 --- /dev/null +++ b/docs/agent-loop-spec-edits.md @@ -0,0 +1,270 @@ +## Issue #12 Spec Update: Non-Streaming Now, Streaming Later (Transport-Agnostic + Time-Sliced Runner) + +### Background / Constraint (NEW) + +* Current LLM integration uses **WP AI Client**, which **does not support streaming yet**. +* Therefore, long-running “run-to-completion” turns inside a single web request are **not reliable** (timeouts / proxy buffering / request aborts). +* The agent loop must support: + + 1. **non-streaming execution** with **polling-based progress**, and + 2. an **upgrade path** to streaming **without rewriting** core loop logic. + +### Non-Negotiable Design Rule (NEW) + +> The **agent loop logic** must be deterministic and resumable **independent of delivery transport**. +> Streaming is an optimization layer (transport), not a control-plane rewrite. + +--- + +## Updated Goal + +Keep the original goal (“one core loop used by chat + heartbeat + spawning”). ([GitHub][1]) +**Add**: the loop must support **time-sliced execution** and **event persistence** so it can run safely under Action Scheduler with non-streaming providers. + +--- + +## Architecture Changes / Additions + +### 0) Introduce explicit layers (NEW) + +1. **Loop Engine** (pure logic; no DB; no HTTP; no Action Scheduler assumptions) +2. **Session Store** (DB-backed, already implied by issue) +3. **Runner** (Action Scheduler tick executor; uses store + lock) +4. **Transport** (how progress is delivered: polling now, streaming later) + +This clarifies “transport-agnostic” in a concrete way. + +--- + +### 1) Agent Loop Helper becomes a “Loop Engine” (UPDATED) + +Current proposal: `class-agent-loop-helper.php` extracted from `Chat_Helper`. ([GitHub][1]) +Adjust it to: + +#### A) Provide **step-based execution** (NEW) + +Instead of “run loop until done”, expose: + +* `run_turn(TurnRequest $req, AgentSession $session, LoopOptions $opts, AgentTransport $transport): TurnResult` +* `run_slice(RunSliceRequest $req): RunSliceResult` + + * Executes **bounded work** (one LLM call OR a limited batch of tool calls), then returns a resumable state marker. + +**Why:** enables safe background execution even when LLM calls/tool runs are slow. + +#### B) Make the engine emit events (NEW) + +The engine must emit structured events as it progresses (even non-streaming): + +* `agent_start`, `turn_start`, `message_start`, `message_end` +* `tool_execution_start`, `tool_execution_update`, `tool_execution_end` +* `turn_end`, `agent_end` +* optionally `message_update` (no-op for non-streaming today, real deltas later) + +These events go to the **Transport** layer. + +--- + +### 2) Expand TurnRequest / TurnResult contracts (UPDATED) + +Existing contracts are good. ([GitHub][1]) +Add the fields needed for slicing + resumability: + +#### `TurnRequest` additions + +* `run_id` (unique per attempt) +* `attempt` (int) +* `slice_budget_ms` (hard per-tick time budget; e.g., 2000–5000ms) +* `max_steps_per_slice` (hard cap; e.g., 1 LLM call or N tool calls) +* `transport_mode` (`polling` | `streaming`) +* `resume_cursor` (opaque engine cursor/state token; optional) + +#### `TurnResult` additions + +* `status` expands to include: + + * `success`, `requires_confirmation`, `error`, `timeout`, + * **`in_progress`** (paused due to slice budget) +* `next_action` expands to: + + * `continue_now` (enqueue next tick ASAP) + * `continue_later` (backoff/retry scheduling) + * `stop` +* `resume_cursor` (present when `status=in_progress`) +* `events_cursor` (cursor for UI polling; optional) + +--- + +### 3) Formalize Store Models: Thread vs Run vs Event (NEW) + +Issue already calls for thread/session state + locking. ([GitHub][1]) +Make it explicit: + +#### A) `agent_threads` (long-lived) + +* `thread_id` +* `status` (idle|running|paused|error|dead) +* `policy_profile` (by trigger) +* scheduling: `last_run_at`, `next_run_at` +* lock fields (or separate lock table; see below) + +#### B) `agent_runs` (per attempt) + +* `run_id`, `thread_id` +* `trigger` (chat|heartbeat|spawned_agent|...) +* `status` (queued|running|waiting_llm|waiting_tools|paused|done|error) +* `attempt`, `retry_at`, `error_code`, `error_message` +* `resume_cursor` (opaque engine cursor) +* usage totals (tokens/cost if available) + +#### C) `agent_events` (append-only log for polling + debugging) + +* `event_id` (monotonic) +* `run_id`, `thread_id` +* `type` +* `payload` (json) +* `created_at` + +**Polling UI reads events**: `GET /runs/{run_id}/events?after={event_id}` + +--- + +### 4) Transport abstraction (NEW) + +Add an internal interface: + +* `AgentTransport::emit(AgentEvent $event): void` +* `AgentTransport::close(): void` + +Implementations: + +1. **PollingTransport** (default today): writes events to `agent_events` table +2. **StreamingTransport** (future): emits SSE/websocket updates *and optionally also persists events* (debug mode) + +**Core loop engine must never care which transport is in use.** + +--- + +### 5) Runner: Action Scheduler ticks become first-class (UPDATED) + +Issue already proposes a heartbeat consumer that claims runnable threads and runs the loop helper. ([GitHub][1]) +Adjust so that the heartbeat worker executes **run slices**: + +#### Runner algorithm (per tick) + +1. Claim runnable `agent_threads` +2. Acquire lock/lease (existing requirement) +3. Load or create `agent_run` in `running` state +4. Execute **one slice**: + + * time budget enforcement + * either: perform the next LLM call OR next tool batch +5. Persist: + + * updated `resume_cursor` + * `agent_run.status` + `next_action` + * emitted events (via PollingTransport) +6. If `next_action=continue_now`, enqueue another AS action immediately +7. Release lock/lease + +--- + +## Lock/Lease Semantics (UPDATED) + +Issue already calls for lock/lease + stale recovery. ([GitHub][1]) +Clarify: + +* Lock covers a **single slice execution**, not “the entire multi-slice run”. +* Lease must be renewed each tick; stale lease recovery should requeue the run safely. +* Store must support idempotency: + + * if a tick repeats (duplicate AS run), it should detect already-advanced `resume_cursor`/status and no-op. + +--- + +## UI / API Implications (NEW) + +Because WP AI Client is non-streaming today, “real time” must be achieved via polling: + +### Endpoints (internal or REST) + +* `POST /agent/runs` (chat/spawn) -> returns `run_id` +* `POST /agent/runs/{run_id}/enqueue` (optional) -> enqueue tick +* `GET /agent/runs/{run_id}` -> status + summary +* `GET /agent/runs/{run_id}/events?after=...` -> incremental event feed + +Chat can remain synchronous for small turns, but must have a fallback: + +* if request budget exceeded, return `{ run_id, status: in_progress }` and client switches to polling. + +--- + +## Streaming Upgrade Path (NEW) + +When WP AI Client supports streaming: + +* Implement `StreamingTransport` that emits `message_update` events live. +* Update the LLM adapter to emit deltas to the transport. +* **No change** to: + + * Run/session store schemas + * Lock/lease mechanism + * Tool execution logic + * State machine + * Runner (still valid; streaming can be used for UI only) + +Optional: allow a “streaming-only” immediate path for chat if hosting supports it, but do not remove the slice runner. + +--- + +## Updated Implementation Phases + +### Phase 1: Extract Loop Engine + Events (behavior-preserving) + +* Extract core loop out of `Chat_Helper` into Loop Engine +* Introduce `AgentTransport` and implement `PollingTransport` +* Emit events, even if chat endpoint doesn’t use them yet + +### Phase 2: Session Store + Run/Event tables + Lock manager + +* Implement DB-backed thread/run/event storage +* Implement lock/lease with stale recovery + idempotency keys +* Wire minimal admin debugging (at least inspect by run_id) + +### Phase 3: Runner (Action Scheduler) with slice execution + +* Implement slice/tick runner +* Enqueue follow-up ticks until run completes +* Ensure time budget enforcement + retry/backoff + +### Phase 4: Spawn adapter + hardening + +* Spawn endpoint creates thread + run and enqueues tick +* Dead-letter state after N failures +* Policy profiles by trigger (chat vs heartbeat vs spawned) + +### Phase 5: Streaming transport (future) + +* Add `StreamingTransport` + LLM delta emission when WP AI Client supports it +* Keep polling mode as config fallback + +--- + +## Acceptance Criteria (UPDATED) + +Keep existing acceptance criteria, and add: + +* Engine supports **time-sliced** execution (`status=in_progress` + `resume_cursor`). +* Background runner can complete multi-step runs without a long-lived HTTP request. +* Event log exists and UI can poll incremental progress (minimum viable observability). +* Transport is configurable: `polling` now; `streaming` later, without loop rewrite. +* Lock/lease is safe across multiple slices and resilient to duplicate scheduler invocations. + +--- + +### Notes / Definitions + +* “Slice” = bounded unit of work (one LLM call OR bounded tool batch). +* “Transport” = how progress events are delivered (persisted polling vs streaming). + +[1]: https://github.com/bradvin/clawpress/issues/12 "Refactor to reusable Agent Loop Helper for chat + heartbeat + future spawning · Issue #12 · bradvin/clawpress · GitHub" diff --git a/docs/agent-loop-spec.md b/docs/agent-loop-spec.md new file mode 100644 index 0000000..fae18cf --- /dev/null +++ b/docs/agent-loop-spec.md @@ -0,0 +1,201 @@ +## Summary +Refactor current chat-bound agent execution into a reusable **Agent Loop Helper** (runtime service) that can be called from: + +1. current synchronous chat requests, +2. heartbeat/background jobs, +3. future agent spawning APIs. + +This should make agent execution transport-agnostic and enable true async/background runs without duplicating core loop logic. + +--- + +## Current State (as of now) + +### What exists +- Chat execution path currently drives model/tool loop: + - `includes/rest/class-chat-controller.php` → `Chat_Helper::generate_ai_reply()` + - Core loop currently in `includes/helpers/class-chat-helper.php` +- Heartbeat scheduler exists: + - `includes/class-heartbeat.php` + - Schedules `clawpress_heartbeat_tick` every 15 min using Action Scheduler + - Tick triggers: `do_action( 'clawpress_run_scheduled_tasks' )` + +### What is missing +- No consumer/handler currently attached to `clawpress_run_scheduled_tasks` in plugin code. +- No independent agent thread/session runtime (outside chat request path). +- No spawn manager / spawn endpoint that creates and runs separate agent threads. +- No async run coordinator (claim/lock/retry/failure lifecycle) for autonomous runs. + +Result: loop logic is effectively request-bound to chat right now. + +--- + +## Goal +Create a reusable **Agent Loop Helper** so one core loop can be used by multiple entry points (chat, heartbeat, spawning) with consistent behavior and policy. + +--- + +## Proposed Architecture + +### 1) Extract a core runtime service +Create e.g. `includes/helpers/class-agent-loop-helper.php` (name flexible) and move loop responsibilities out of `Chat_Helper`: + +- provider/model resolution +- context assembly + prompt prep +- model call +- tool-call loop (`MAX_TOOL_ROUNDS`, `MAX_TOOL_CALLS_PER_ROUND`) +- confirmation batching behavior +- context/token usage collection +- normalized result payload + +`Chat_Helper` should become an adapter that invokes this service. + +--- + +### 2) Define canonical request/response contracts +Introduce internal DTO-like arrays/classes: + +#### `TurnRequest` +- `thread_id` (or session id) +- `trigger` (`chat`, `heartbeat`, `spawned_agent`, etc.) +- `message` (optional for heartbeat-driven turns) +- `requesting_user_id` +- `execution_user_id` +- policy knobs: `allow_tools`, `require_confirmation`, limits/timeouts + +#### `TurnResult` +- `assistant_text` +- `tool_calls` trace +- `card` payload (optional) +- `status` (`success`, `requires_confirmation`, `error`, `timeout`) +- usage/context metadata +- optional `next_action` hint (`continue`, `stop`, `reschedule`) + +This contract is key to reusability across adapters. + +--- + +### 3) Separate state persistence from execution +Add explicit agent-thread/session state (rather than implicitly relying on chat history only). + +Minimum state needed: +- thread/session identity +- lifecycle status +- `last_run_at`, `next_run_at` +- lock/lease metadata (owner + expiry) +- failure/retry counters +- trigger metadata + +Storage can be CPT-based or custom table (table likely better for concurrency/locking). + +--- + +### 4) Add background runner adapter (heartbeat path) +Implement a consumer for `clawpress_run_scheduled_tasks` that: + +1. finds/claims runnable agent threads +2. acquires lock/lease +3. builds `TurnRequest` +4. calls Agent Loop Helper +5. persists run outputs/logs/state +6. schedules follow-up if required +7. releases lock + +This enables async operation without rewriting loop logic. + +--- + +### 5) Keep chat path synchronous but thin +`Chat_Controller` flow should be: +1. persist inbound user message +2. call Agent Loop Helper synchronously +3. persist assistant response/meta +4. return response + +No duplicated loop logic in chat layer. + +--- + +### 6) Add spawn entry point later as another adapter +Future spawn endpoint should: +- create a new thread/session record, +- seed initial context/message, +- enqueue first run via Action Scheduler, +- return spawned thread id. + +Spawn endpoint should not contain loop internals. + +--- + +## Policy & Safety Considerations + +### Confirmation/destructive tools by trigger +Define policy by trigger source: +- `chat`: current confirmation behavior acceptable +- `heartbeat` / `spawned`: likely deny or queue destructive calls by default +- optionally allow policy profiles per agent/thread + +### Runtime guardrails +- max wall time per run +- max tool calls per run +- bounded retries + exponential backoff +- dead-letter/failure terminal state after N failures +- idempotency key per run attempt + +### Concurrency controls +- at-most-one active run per thread/session (lock/lease) +- stale lock recovery +- avoid duplicate processing by concurrent scheduler invocations + +--- + +## Observability / Debuggability +Add structured run logging (reuse/extend action log): +- `run_id`, `thread_id`, trigger source +- tool trace + per-call status +- provider/model + token/context usage +- error classification + retry count +- final run outcome + +Without this, async failures will be hard to diagnose. + +--- + +## Suggested Implementation Phases + +### Phase 1: Internal refactor (no behavior change) +- Introduce Agent Loop Helper +- Move loop logic from `Chat_Helper` into helper +- Keep current chat behavior identical + +### Phase 2: Background execution wiring +- Implement `clawpress_run_scheduled_tasks` consumer +- Add minimal thread/run state + locking +- Run one background thread safely + +### Phase 3: Spawn support +- Add spawn API/service +- Create separate threads and schedule independent runs + +### Phase 4: Hardening +- retries/backoff/dead-letter +- policy profiles by trigger +- richer observability and admin inspection UI + +--- + +## Acceptance Criteria +- Chat uses Agent Loop Helper (no duplicated loop logic in chat layer). +- Heartbeat can run at least one agent thread asynchronously. +- Background runs are lock-safe (no duplicate concurrent turn execution per thread). +- Destructive tool behavior is explicitly policy-controlled per trigger. +- Run status/errors are inspectable via logs. + +--- + +## Why this is worth doing +This makes ClawPress extensible: +- one “agent brain,” many transports/triggers, +- clean path to autonomous agents, +- clean path to true spawned parallel threads, +- less tech debt than cloning chat logic into heartbeat/spawn flows. From a62c88ffc48c598c230e737e7e2a4fb60c885051 Mon Sep 17 00:00:00 2001 From: Brad Vincent Date: Mon, 23 Feb 2026 19:48:37 +0000 Subject: [PATCH 3/3] added doc for policy helper --- docs/policy-helper.md | 213 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 docs/policy-helper.md diff --git a/docs/policy-helper.md b/docs/policy-helper.md new file mode 100644 index 0000000..da2fad8 --- /dev/null +++ b/docs/policy-helper.md @@ -0,0 +1,213 @@ +# ClawPress Policy Helper + +Version: 0.1 +Owner: ClawPress +Status: Draft + +## Purpose + +`Policy_Helper` provides a single runtime policy contract for tool execution. It translates trigger context (`chat`, `heartbeat`, `spawned_agent`) plus optional overrides into a normalized policy array that downstream helpers can enforce consistently. + +Implementation: `includes/helpers/class-policy-helper.php` + +## What It Owns + +`Policy_Helper` is responsible for: + +1. Defining default policy fields. +2. Defining trigger-specific policy overrides. +3. Applying override precedence. +4. Normalizing output types (bool/int/enum-like strings). + +`Policy_Helper` is not responsible for: + +1. Executing tool calls. +2. Performing permission checks. +3. Enforcing all policy fields itself. + +Enforcement is done by consumers such as `Abilities_Helper` and `Chat_Helper`. + +## Runtime Policy Contract + +Current resolved fields: + +1. `trigger_type` (`chat|heartbeat|spawned_agent`) +2. `policy_profile` (normalized profile key, default `default`) +3. `allow_tools` (bool) +4. `allow_destructive_tools` (bool) +5. `require_confirmation_for_destructive` (bool) +6. `allow_file_delete` (bool) +7. `max_tool_rounds` (int >= 1) +8. `max_tool_calls_per_round` (int >= 1) +9. `max_wall_time_seconds` (int >= 1) +10. `allow_network` (bool) +11. `allow_background_followups` (bool) +12. `on_policy_violation` (`deny|degrade|fail`) + +## Resolution Algorithm + +`Policy_Helper::resolve_runtime_policy( $trigger_type, $session_metadata, $profile_overrides )` resolves policy in this order: + +1. Start with `BASE_POLICY`. +2. Merge trigger profile from `TRIGGER_POLICY_OVERRIDES[normalized_trigger]`. +3. Merge `session_metadata['policy_overrides']` if present. +4. Merge explicit `$profile_overrides` (highest precedence). +5. Normalize and clamp values before return. + +Precedence summary (lowest to highest): + +1. Base defaults +2. Trigger defaults +3. Session metadata overrides +4. Direct method overrides + +Unknown or empty trigger types are normalized to `chat`. + +## Current Trigger Profiles + +### `chat` + +Uses base defaults (least restrictive profile). + +### `heartbeat` + +More restrictive than chat: + +1. `allow_destructive_tools = false` +2. `allow_file_delete = false` +3. `max_tool_rounds = 2` +4. `max_tool_calls_per_round = 3` +5. `max_wall_time_seconds = 45` +6. `allow_background_followups = false` + +### `spawned_agent` + +Restrictive but less constrained than heartbeat: + +1. `allow_destructive_tools = false` +2. `allow_file_delete = false` +3. `max_tool_rounds = 3` +4. `max_tool_calls_per_round = 4` +5. `max_wall_time_seconds = 90` + +## Where It Is Used + +## 1) Chat loop policy resolution + +`Chat_Helper::generate_online_reply()` resolves policy once per model run and then uses it to bound loop execution: + +1. `max_tool_rounds` controls tool-call rounds. +2. `max_tool_calls_per_round` caps calls per round. +3. The resolved policy is passed into each `Abilities_Helper::execute_tool_call()` invocation as `runtime_policy`. + +Implementation: `includes/helpers/class-chat-helper.php` + +## 2) Tool execution policy enforcement + +`Abilities_Helper::execute_tool_call()` enforces policy gates in this order: + +1. `allow_tools` +2. `allow_destructive_tools` (for destructive abilities) +3. `allow_file_delete` (for `file_delete`) +4. `require_confirmation_for_destructive` (confirmation workflow gate) + +On policy violation, it returns a structured payload with: + +1. `success: false` +2. error code/message +3. `policy` block (`trigger_type`, `policy_profile`, `on_violation`, `decision`) + +Implementation: `includes/helpers/class-abilities-helper.php` + +## Practical Usage Patterns + +## Let helper resolve from trigger + overrides + +Use this when the caller has context but does not need to pre-resolve policy: + +```php +$result = Abilities_Helper::get_instance()->execute_tool_call( + 'file_delete', + [ 'path' => 'notes.md' ], + [ + 'trigger_type' => 'heartbeat', + 'session_metadata' => [ + 'policy_profile' => 'default', + ], + 'policy_overrides' => [ + 'allow_file_delete' => false, + ], + ] +); +``` + +## Pre-resolve once and pass `runtime_policy` + +Use this in loops so every call uses the same resolved contract: + +```php +$policy = Policy_Helper::get_instance()->resolve_runtime_policy( + 'spawned_agent', + [ 'policy_profile' => 'trusted-runner' ], + [ 'allow_destructive_tools' => true ] +); + +$result = Abilities_Helper::get_instance()->execute_tool_call( + 'file_delete', + [ 'path' => 'notes.md' ], + [ + 'runtime_policy' => $policy, + ] +); +``` + +## Extension Guide + +## Add a new trigger profile + +1. Add key to `TRIGGER_POLICY_OVERRIDES` in `class-policy-helper.php`. +2. Keep values partial: only overrides that differ from base. +3. Add unit coverage in `tests/Unit/PolicyHelperTest.php`. +4. Add enforcement coverage in `tests/Unit/AbilitiesHelperTest.php` (if behavior affects tool gates). + +## Add a new policy field + +1. Add default in `BASE_POLICY`. +2. Add optional per-trigger overrides. +3. Add normalization in `resolve_runtime_policy()`. +4. Enforce in the correct consumer (`Chat_Helper`, `Abilities_Helper`, command handlers, or scheduler jobs). +5. Add tests for: + - default behavior + - override behavior + - enforcement behavior + +## Extend policy violation handling + +`on_policy_violation` is currently normalized and returned in violation payload metadata. If richer behavior is needed (`degrade`, `fail` semantics), implement that behavior in enforcement consumers, not in the helper. + +Recommended pattern: + +1. Keep helper deterministic and side-effect free. +2. Implement runtime action semantics where the violation occurs. +3. Maintain structured, machine-readable violation payloads. + +## Suggested Future Improvements + +1. Enforce `max_wall_time_seconds` in `Chat_Helper` loop using elapsed wall-clock checks. +2. Wire `allow_network` to network-capable tools or provider request constraints. +3. Use `allow_background_followups` in scheduler/spawn logic. +4. Add a policy filter hook (for example, `clawpress_runtime_policy_resolved`) if third-party plugins need policy customization without patching core code. + +## Test Coverage Today + +Current coverage includes: + +1. Base and trigger policy contracts (`PolicyHelperTest`). +2. Deterministic override application (`PolicyHelperTest`). +3. Destructive/file-delete gates under policy (`AbilitiesHelperTest`). + +Recommended additional coverage: + +1. Chat loop behavior under non-default `max_tool_rounds` and `max_tool_calls_per_round`. +2. Any new enforcement path added for currently informational fields. +