diff --git a/.conductor/registry/workflows/ado-pr.yaml b/.conductor/registry/workflows/ado-pr.yaml index 4ed1968..5e0c4dd 100644 --- a/.conductor/registry/workflows/ado-pr.yaml +++ b/.conductor/registry/workflows/ado-pr.yaml @@ -23,9 +23,9 @@ # decides whether the implementer needs to revise: # • has_negative_feedback=true → approvals_policy → revise_counter # → pr_fixer → push commits → back to poll_status. -# • has_negative_feedback=false → pending_poll_counter → either the -# pending throttle gate (poll cap not reached) or the stuck-review -# escalation (cap reached). +# • has_negative_feedback=false → notify_pr_pending (emit domain signal) +# → poll_pr_state_delta (blocks until reaction or timeout) +# → reaction_kind routing (pr_merged, pr_closed, timeout, new_review_*, etc.). # # ADO terminal signals (encoded in PrPollTerminalRoute.Classify): # - status == "completed" → already_merged @@ -63,8 +63,7 @@ workflow: single advisory bot review (marker-only — no native approval vote), then loops poll → analyze → fix → push until the operator completes, abandons, votes -10, or the revise cap fires. - version: "2.4.8" - entry_point: ado_inputs_validator + version: "2.5.0" metadata: min_polyphony_version: "2.4.8" limits: @@ -73,6 +72,39 @@ workflow: # bounded by revise_counter cap, not max_iterations. max_iterations: 200 + notifications: + namespace: polyphony.ado_pr + correlation: + - work_item_id + types: + pr_review_required: + version: 1 + description: ADO PR awaiting human action (vote, complete, comment, or abandon) + payload: + kind: {type: string} + severity: {type: string} + title: {type: string} + message: {type: string} + cta_url: {type: string} + cta_kind: {type: string} + correlation_id: {type: string} + expires_at: {type: string} + disposition: {type: string} + details: {type: object} + pr_ci_attention_required: + version: 1 + description: CI build policy on ADO PR changed to failed — operator should push a fix + payload: + kind: {type: string} + severity: {type: string} + title: {type: string} + message: {type: string} + cta_url: {type: string} + cta_kind: {type: string} + correlation_id: {type: string} + disposition: {type: string} + details: {type: object} + input: pr_number: type: number @@ -127,6 +159,19 @@ workflow: Root work-item id of the run's root. Used by `pr merge-feature-ado` to derive the head branch (`feature/{root_id}`). When zero, ado_inputs_validator routes to the inputs-missing gate. + poll_timeout_seconds: + type: integer + required: false + default: 86400 + description: >- + Seconds before poll_pr_state_delta times out and escalates to + stuck_review_gate_policy_router. Default 24 h (86400 s). + poll_interval_seconds: + type: integer + required: false + default: 30 + description: >- + Seconds between polls inside Poll-PrStateDelta.ps1. Default 30 s. tools: - filesystem @@ -530,6 +575,33 @@ agents: value: abort route: abort_run + # ── PR review resolved signal ───────────────────────────────────────── + # + # Emits disposition:resolved on the pr_review_required notification type + # so platespinner closes the pending CTA opened by notify_pr_pending. + # Fires only on the poll_pr_state_delta pr_merged path. + # + # TODO(post-upstream-merge): rename type: notification → type: emit. + - name: notify_pr_review_resolved + type: notification + notification: pr_review_required + payload: + kind: pr_review_required + severity: info + title: "ADO PR Completed" + message: >- + ADO PR #{{ workflow.input.pr_number }} on `{{ workflow.input.branch_name }}` + was completed. Workflow continuing. + cta_url: "{{ poll_status.output.pr_url }}" + cta_kind: open_pr + correlation_id: "{{ workflow.input.work_item_id }}:pr-review:{{ workflow.input.pr_number }}" + disposition: resolved + details: + pr_number: "{{ workflow.input.pr_number }}" + branch_name: "{{ workflow.input.branch_name }}" + routes: + - to: already_merged_emitter + # ── Already-merged emitter ──────────────────────────────────────────── # # Operator completed the PR through the ADO UI (or it was completed via @@ -731,7 +803,7 @@ agents: routes: - to: approvals_policy when: "{{ pr_feedback_analyzer.output.has_negative_feedback == true }}" - - to: pending_poll_counter + - to: notify_pr_pending # ── Approvals policy resolver ───────────────────────────────────────── # @@ -1063,120 +1135,116 @@ agents: routes: - to: poll_status - # ── Pending poll counter (stuck-review timeout MVP) ─────────────────── + # ── PR pending — emit domain signal ────────────────────────────────── # - # Increments each time pr_feedback_analyzer concludes there is no - # actionable negative feedback. Counter file is keyed by composite - # (pr_number + work_item_id) so siblings don't collide. + # Emits a pr_review_required domain signal once when the gate opens. + # ADO-specific payload includes organization and project. # - # When the count reaches POLL_CAP (60), routes to - # stuck_review_gate_policy_router so the operator can decide whether - # to keep waiting, override-approve, or abort. Below the cap, routes - # to pending_review_gate_policy_router (the throttle gate) for the - # unchanged operator UX in the common case. - - name: pending_poll_counter - type: script - description: Track pending-review poll count (cap 60 — stuck-review timeout MVP) - command: pwsh - args: - - "-NoProfile" - - "-Command" - - | - $counterFile = Join-Path ([System.IO.Path]::GetTempPath()) "conductor-ado-pr-pending-poll-{{ workflow.input.work_item_id }}-{{ workflow.input.pr_number }}" - if (Test-Path $counterFile) { - $count = [int](Get-Content $counterFile -Raw) + 1 - } else { - $count = 1 - } - Set-Content $counterFile $count - $cap = 60 - @{ count = $count; cap = $cap; cap_reached = ($count -ge $cap) } | ConvertTo-Json + # Replaces: pending_poll_counter + pending_review_gate_policy_router + + # pending_review_gate (all three collapsed per the + # gate-compression-pattern ADR). + # + # ADO vote mapping reference (for Liszt / Poll-PrStateDelta.ps1): + # +10 Approved / +5 Approved with suggestions → new_review_approved + # -10 Rejected / -5 Waiting for author → new_review_changes_requested + # PR Completed → pr_merged + # PR Abandoned → pr_closed + # + # TODO(post-upstream-merge): rename type: notification → type: emit. + - name: notify_pr_pending + type: notification + notification: pr_review_required + payload: + kind: pr_review_required + severity: warning + title: "ADO PR Review Required" + message: >- + ADO PR #{{ workflow.input.pr_number }} on `{{ workflow.input.branch_name }}` + is open and awaiting action (vote +5/+10, complete, comment, or abandon) + before the workflow continues. + cta_url: "{{ poll_status.output.pr_url }}" + cta_kind: review_pr + correlation_id: "{{ workflow.input.work_item_id }}:pr-review:{{ workflow.input.pr_number }}" + expires_at: "" + disposition: pending + details: + pr_number: "{{ workflow.input.pr_number }}" + branch_name: "{{ workflow.input.branch_name }}" + work_item_id: "{{ workflow.input.work_item_id }}" + organization: "{{ workflow.input.organization }}" + project: "{{ workflow.input.project }}" routes: - - to: stuck_review_gate_policy_router - when: "{{ pending_poll_counter.output.cap_reached == true }}" - - to: pending_review_gate_policy_router - when: "{{ pending_poll_counter.output.cap_reached == false }}" - # M10 defensive catch-all (#222): if cap_reached evaluates to - # anything other than true/false (counter script crash, malformed - # JSON, template eval edge case), fail safe to the stuck-review - # gate rather than re-entering the polling cycle via the policy - # router's skip-mode bypass. - - to: stuck_review_gate_policy_router + - to: poll_pr_state_delta - # ── Pending-review gate policy router ───────────────────────────────── + # ── PR state delta poll (ADO) ───────────────────────────────────────── # - # Reads `policy.unattended.review_wait_mode`. Three modes: - # • `wait` (default) → fall through to the human throttle gate. - # • `skip` → bypass the gate, route directly back to re-poll. - # • `auto` → REJECTED. The old `auto` mode self-approved via - # magic comments; that mechanism is gone in the - # sentiment-driven loop. Treated as a hard - # misconfiguration so it surfaces immediately - # instead of silently degrading to `skip`. - - name: pending_review_gate_policy_router + # ADO vote reaction mapping (handled by Poll-PrStateDelta.ps1): + # +10 / +5 → new_review_approved + # -10 / -5 → new_review_changes_requested (routes to pr_fixer via poll_status) + # Completed → pr_merged + # Abandoned → pr_closed + # + # TODO(AB#3257): add on_error: to: poll_error_gate once conductor RFC + # Phase 2 ships. + - name: poll_pr_state_delta type: script - description: Bypass pending_review_gate when policy.unattended.review_wait_mode is skip; reject auto. - command: polyphony + description: Poll for ADO PR state delta — blocks until reaction or timeout (Poll-PrStateDelta.ps1) + command: pwsh args: - - "policy" - - "load" + - "-NoProfile" + - "-File" + - "{{ workflow.dir }}/../scripts/Poll-PrStateDelta.ps1" + - "-Platform" + - "ado" + - "-PrUrl" + - "{{ poll_status.output.pr_url }}" + - "-TimeoutSeconds" + - "{{ workflow.input.poll_timeout_seconds | default(86400) }}" + - "-PollIntervalSeconds" + - "{{ workflow.input.poll_interval_seconds | default(30) }}" routes: - - to: abort_auto_mode_unsupported - when: "{{ pending_review_gate_policy_router.output.unattended.review_wait_mode == 'auto' }}" + - to: poll_pr_state_delta + when: "{{ poll_pr_state_delta.output.reaction_kind == 'initial_observation' }}" + - to: notify_pr_review_resolved + when: "{{ poll_pr_state_delta.output.reaction_kind == 'pr_merged' }}" + - to: closed_unmerged_emitter + when: "{{ poll_pr_state_delta.output.reaction_kind == 'pr_closed' }}" + - to: stuck_review_gate_policy_router + when: "{{ poll_pr_state_delta.output.reaction_kind == 'timeout' }}" + # Q3 Option C: CI build policy failed → emit attention + re-poll. + - to: notify_pr_ci_attention + when: "{{ poll_pr_state_delta.output.reaction_kind == 'ci_status_changed' and poll_pr_state_delta.output.delta_details.new == 'failed' }}" + # new_review_*, new_commit, new_comment, ci_status_changed (non-failed), catch-all: - to: poll_status - when: "{{ pending_review_gate_policy_router.output.unattended.review_wait_mode == 'skip' }}" - - to: pending_review_gate - # ── Pending-review gate (throttle in the sentiment-driven loop) ─────── + # ── CI attention notification (ADO) ────────────────────────────────── # - # Reached when pr_feedback_analyzer found no actionable negative - # feedback this poll. The PR is open and awaiting an explicit positive - # signal (operator completes, +5 or +10 vote) or a negative one - # (operator abandons, -10 vote, or new comments that change the - # analyzer's verdict next poll). - - name: pending_review_gate - type: human_gate - prompt: | - ## ⏳ PR Awaiting Action — ADO PR #{{ workflow.input.pr_number }} - - The PR is open. The polyphony bot reviewer has posted (see the - marker comment thread on the PR) and the sentiment-driven analyzer - did NOT find actionable negative feedback this poll. - - - **PR:** {{ poll_status.output.pr_url }} - - **Branch:** `{{ workflow.input.branch_name }}` - - To proceed, take one of these actions in ADO, then click - **Continue** to re-poll: - - - **Complete the PR** — workflow detects already_merged next poll. - - **Vote +5 or +10** — workflow detects merge_now next poll - (assuming no unresolved blocking threads or -5/-10 votes). - - **Vote -10 (reject)** — workflow exits this PR leg as unmerged - on the next poll. The parent feature-pr.yaml decides whether to - accept the abandonment or run its remediation cycle. - - **Add a comment requesting changes** — the analyzer reads the - comment on the next poll and routes pr_fixer back to revise. - - **Abandon the PR** — workflow exits this PR leg as unmerged on - the next poll. - - --- + # Emits pr_ci_attention_required when Poll-PrStateDelta detects the ADO + # build policy has flipped to failed. Routes back to poll_pr_state_delta. + # + # TODO(post-upstream-merge): rename type: notification → type: emit. + - name: notify_pr_ci_attention + type: notification + notification: pr_ci_attention_required + payload: + kind: pr_ci_attention_required + severity: warning + title: "CI Build Policy Failed on ADO PR" + message: >- + CI build policy on ADO PR #{{ workflow.input.pr_number }} changed + to failed. Push a fix commit to restart the build policy. + cta_url: "{{ poll_status.output.pr_url }}" + cta_kind: open_pr + correlation_id: "{{ workflow.input.work_item_id }}:pr-ci:{{ workflow.input.pr_number }}" + disposition: pending + details: + pr_number: "{{ workflow.input.pr_number }}" + branch_name: "{{ workflow.input.branch_name }}" + old_ci_status: "{{ poll_pr_state_delta.output.delta_details.prev | default('unknown') }}" + new_ci_status: "{{ poll_pr_state_delta.output.delta_details.new | default('failed') }}" + routes: + - to: poll_pr_state_delta - Choose an action: - - **Continue** — re-poll the PR for the updated state - - **I manually verified merge** — rare; covers PRs completed - externally between poll and decision. Emits merged=true. - - **Abort** — halt the entire run (terminates the conductor process) - options: - - label: "🔄 Continue" - value: continue - route: poll_status - - label: "✅ I manually verified merge" - value: verified_merge - route: treat_as_merged_emitter - - label: "🛑 Abort" - value: abort - route: abort_run # ── Stuck-review gate policy router ─────────────────────────────────── # @@ -1198,24 +1266,24 @@ agents: when: "{{ stuck_review_gate_policy_router.output.unattended.review_wait_mode == 'skip' }}" - to: stuck_review_gate - # ── Stuck-review gate (poll-cap escalation) ─────────────────────────── + # ── Stuck-review gate (timeout escalation) ─────────────────────────── # - # Fires when pending_poll_counter hits POLL_CAP (60). Reviewers have - # gone silent — surface that fact rather than letting the operator keep - # clicking "Continue" on pending_review_gate forever. + # Fires when poll_pr_state_delta hits the TimeoutSeconds budget with no + # reaction. Reviewers have gone silent — operator decides whether to + # keep waiting, override-approve, or abort. - name: stuck_review_gate type: human_gate - description: ADO PR review pending past poll cap — operator decides wait, override, or abort. + description: ADO PR review pending past poll timeout — operator decides wait, override, or abort. prompt: | - ## ⏰ Stuck Review — Poll Cap Reached for ADO PR #{{ workflow.input.pr_number }} + ## ⏰ Stuck Review — Poll Timeout for ADO PR #{{ workflow.input.pr_number }} - The PR has been awaiting reviewer action for - **{{ pending_poll_counter.output.count }}** polls - (cap: **{{ pending_poll_counter.output.cap }}**). + The PR has been awaiting reviewer action for the full poll timeout + period ({{ workflow.input.poll_timeout_seconds | default(86400) }} s) + with no reaction detected. Reviewers may be unavailable or unblocked but silent. You can keep - waiting (the counter resets), bypass review and merge anyway, or - abort. + waiting (re-enters the poll loop), bypass review and merge anyway, + or abort. - **PR:** {{ poll_status.output.pr_url }} - **Branch:** `{{ workflow.input.branch_name }}` @@ -1223,7 +1291,7 @@ agents: --- Choose an action: - - **Continue waiting** — reset the poll counter and return to the pending-review gate + - **Continue waiting** — re-enter the poll loop with a fresh budget - **Override approved** — bypass review and proceed to merge via `pr merge-feature-ado` - **Abort** — halt the entire run (terminates the conductor process) options: @@ -1239,23 +1307,20 @@ agents: # ── Stuck-review reset ──────────────────────────────────────────────── # - # Zeros the pending poll counter file when the operator chose - # "Continue waiting" at stuck_review_gate, then routes back to - # pending_review_gate_policy_router so the operator gets the regular - # pending UX again with a fresh cap budget. + # Re-enters the poll loop after the operator chose "Continue waiting" + # at stuck_review_gate. Routes directly to poll_pr_state_delta + # (pending_review_gate_policy_router is gone post-compression). - name: stuck_review_reset type: script - description: Reset pending-poll counter after stuck-review continue-waiting choice + description: Re-enter poll loop after stuck-review continue-waiting choice command: pwsh args: - "-NoProfile" - "-Command" - - | - $counterFile = Join-Path ([System.IO.Path]::GetTempPath()) "conductor-ado-pr-pending-poll-{{ workflow.input.work_item_id }}-{{ workflow.input.pr_number }}" - Set-Content $counterFile 0 - @{ count = 0; reset = $true } | ConvertTo-Json + - >- + @{ reset = $true } | ConvertTo-Json -Compress routes: - - to: pending_review_gate_policy_router + - to: poll_pr_state_delta # ── Pre-merge policy router (AB#3184 + AB#3217 warning-stamp split) ─── # @@ -1336,7 +1401,7 @@ agents: # fallback). Operator decides whether to merge, defer, or abort. # # - approve → pr_merger (proceed with noFastForward merge) - # - defer → pending_review_gate (wait for more input / re-poll) + # - defer → poll_pr_state_delta (re-enter poll loop; pending signal already live) # - abort → abort_run - name: pr_pre_merge_gate type: human_gate @@ -1357,7 +1422,7 @@ agents: Choose: - **Approve merge** → pr_merger fires `polyphony pr merge-feature-ado` - - **Defer** → back to pending_review_gate (operator can re-evaluate later) + - **Defer** → re-enter poll loop (poll signal already live; workflow waits for next reaction) - **Abort** → halt the run cleanly options: - label: "✅ Approve merge" @@ -1365,7 +1430,7 @@ agents: route: pr_merger - label: "⏸️ Defer (back to pending)" value: defer - route: pending_review_gate + route: poll_pr_state_delta - label: "🛑 Abort" value: abort route: abort_run diff --git a/.conductor/registry/workflows/github-pr.yaml b/.conductor/registry/workflows/github-pr.yaml index e35b0bf..9b28363 100644 --- a/.conductor/registry/workflows/github-pr.yaml +++ b/.conductor/registry/workflows/github-pr.yaml @@ -21,9 +21,9 @@ # decides whether the implementer needs to revise: # • has_negative_feedback=true → approvals_policy → revise_counter # → pr_fixer → push commits → back to poll_status. -# • has_negative_feedback=false → pending_poll_counter → either the -# pending throttle gate (poll cap not reached) or the stuck-review -# escalation (cap reached). +# • has_negative_feedback=false → notify_pr_pending (emit domain signal) +# → poll_pr_state_delta (blocks until reaction or timeout) +# → reaction_kind routing (merged, closed, timeout, new_review, ci_changed). # # revise_counter (AB#3236): increments unconditionally on every loop # iteration, AND separately tracks consecutive no-commit iterations by @@ -51,13 +51,45 @@ workflow: single advisory bot review (marker-only — no native approval vote), then loops poll → analyze → fix → push until the operator merges, the operator closes the PR, or the revise cap fires. - version: "2.4.8" - entry_point: guidance_loader + version: "2.5.0" metadata: min_polyphony_version: "2.4.8" limits: max_iterations: 200 + notifications: + namespace: polyphony.github_pr + correlation: + - work_item_id + types: + pr_review_required: + version: 1 + description: PR awaiting human action (review, approval, merge, or close) + payload: + kind: {type: string} + severity: {type: string} + title: {type: string} + message: {type: string} + cta_url: {type: string} + cta_kind: {type: string} + correlation_id: {type: string} + expires_at: {type: string} + disposition: {type: string} + details: {type: object} + pr_ci_attention_required: + version: 1 + description: CI status on PR changed to FAILURE — operator should push a fix + payload: + kind: {type: string} + severity: {type: string} + title: {type: string} + message: {type: string} + cta_url: {type: string} + cta_kind: {type: string} + correlation_id: {type: string} + disposition: {type: string} + details: {type: object} + input: pr_number: type: number @@ -84,6 +116,22 @@ workflow: type-refinement lookup and is part of the composite counter keys so the sentiment loop's pending-poll + revise counters don't collide across siblings. + poll_timeout_seconds: + type: integer + required: false + default: 86400 + description: >- + Seconds before poll_pr_state_delta times out and escalates to + stuck_review_gate_policy_router. Default 24 h (86400 s). Set to + a smaller value (e.g. 3600) for short-lived CI environments. + poll_interval_seconds: + type: integer + required: false + default: 30 + description: >- + Seconds between polls inside Poll-PrStateDelta.ps1. Default 30 s. + Increase to reduce API rate consumption; decrease for faster + reaction detection in test environments. tools: - gh @@ -428,6 +476,37 @@ agents: value: abort route: abort_run + # ── PR review resolved signal ───────────────────────────────────────── + # + # Emits disposition:resolved on the pr_review_required notification type + # so platespinner closes the pending CTA that notify_pr_pending opened. + # Fires only on the poll_pr_state_delta pr_merged path. Routes to + # already_merged_emitter which emits the canonical merged=true terminal. + # + # See gate-compression-pattern ADR §"Resolved-Disposition Signal". + # + # TODO(post-upstream-merge): rename type: notification → type: emit, + # and notification: → emit: when conductor PR #213 cherry-pick lands. + - name: notify_pr_review_resolved + type: notification + notification: pr_review_required + payload: + kind: pr_review_required + severity: info + title: "PR Merged" + message: >- + PR #{{ workflow.input.pr_number }} on `{{ workflow.input.branch_name }}` + was merged. Workflow continuing. + cta_url: "{{ poll_status.output.pr_url }}" + cta_kind: open_pr + correlation_id: "{{ workflow.input.work_item_id }}:pr-review:{{ workflow.input.pr_number }}" + disposition: resolved + details: + pr_number: "{{ workflow.input.pr_number }}" + branch_name: "{{ workflow.input.branch_name }}" + routes: + - to: already_merged_emitter + # ── Already-merged emitter ──────────────────────────────────────────── # # Operator-merged the PR through the GitHub UI. Emit the canonical @@ -603,7 +682,7 @@ agents: routes: - to: approvals_policy when: "{{ pr_feedback_analyzer.output.has_negative_feedback == true }}" - - to: pending_poll_counter + - to: notify_pr_pending # ── Approvals policy resolver ───────────────────────────────────────── # @@ -942,117 +1021,129 @@ agents: routes: - to: poll_status - # ── Pending poll counter (stuck-review timeout MVP) ─────────────────── + # ── PR pending — emit domain signal ────────────────────────────────── # - # Increments each time pr_feedback_analyzer concludes there is no - # actionable negative feedback. Counter file is keyed by composite - # (pr_number + work_item_id) so siblings don't collide. + # Emits a pr_review_required domain signal once when the gate opens + # (pr_feedback_analyzer found no actionable negative feedback). The + # poll loop handles waiting silently until a reaction is detected. # - # When the count reaches POLL_CAP (60), routes to - # stuck_review_gate_policy_router so the operator can decide whether - # to keep waiting, override-approve, or abort. Below the cap, routes - # to pending_review_gate_policy_router (the throttle gate) for the - # unchanged operator UX in the common case. - - name: pending_poll_counter - type: script - description: Track pending-review poll count (cap 60 — stuck-review timeout MVP) - command: pwsh - args: - - "-NoProfile" - - "-Command" - - | - $counterFile = Join-Path ([System.IO.Path]::GetTempPath()) "conductor-github-pr-pending-poll-{{ workflow.input.work_item_id }}-{{ workflow.input.pr_number }}" - if (Test-Path $counterFile) { - $count = [int](Get-Content $counterFile -Raw) + 1 - } else { - $count = 1 - } - Set-Content $counterFile $count - $cap = 60 - @{ count = $count; cap = $cap; cap_reached = ($count -ge $cap) } | ConvertTo-Json + # Replaces: pending_poll_counter + pending_review_gate_policy_router + + # pending_review_gate (all three collapsed into emit + poll + # per the gate-compression-pattern ADR). + # + # The old review_wait_mode=skip/wait/auto distinction is superseded: + # wait (default) → emit notification + poll (the new behaviour) + # skip → same (poll script IS the "skip" — no human gate) + # auto → flagged in the ADR as deprecated; no longer needed + # + # TODO(post-upstream-merge): rename type: notification → type: emit, + # and notification: → emit: when conductor PR #213 cherry-pick lands. + - name: notify_pr_pending + type: notification + notification: pr_review_required + payload: + kind: pr_review_required + severity: warning + title: "PR Review Required" + message: >- + PR #{{ workflow.input.pr_number }} on `{{ workflow.input.branch_name }}` + is open and awaiting action (approve, merge, comment, or close) before + the workflow continues. + cta_url: "{{ poll_status.output.pr_url }}" + cta_kind: review_pr + correlation_id: "{{ workflow.input.work_item_id }}:pr-review:{{ workflow.input.pr_number }}" + expires_at: "" + disposition: pending + details: + pr_number: "{{ workflow.input.pr_number }}" + branch_name: "{{ workflow.input.branch_name }}" + work_item_id: "{{ workflow.input.work_item_id }}" routes: - - to: stuck_review_gate_policy_router - when: "{{ pending_poll_counter.output.cap_reached == true }}" - - to: pending_review_gate_policy_router - when: "{{ pending_poll_counter.output.cap_reached == false }}" - # M10 defensive catch-all (#222): if cap_reached evaluates to - # anything other than true/false (counter script crash, malformed - # JSON, template eval edge case), fail safe to the stuck-review - # gate rather than re-entering the polling cycle via the policy - # router's skip-mode bypass. - - to: stuck_review_gate_policy_router + - to: poll_pr_state_delta - # ── Pending-review gate policy router ───────────────────────────────── + # ── PR state delta poll ─────────────────────────────────────────────── # - # Reads `policy.unattended.review_wait_mode`. Three modes: - # • `wait` (default) → fall through to the human throttle gate. - # • `skip` → bypass the gate, route directly back to re-poll. - # • `auto` → REJECTED. The old `auto` mode self-approved via - # magic comments; that mechanism is gone in the - # sentiment-driven loop. Treated as a hard - # misconfiguration so it surfaces immediately - # instead of silently degrading to `skip` (which - # keeps polling forever with no human in the - # loop — an unattended operator might wait days - # for a PR they thought was being auto-merged). - - name: pending_review_gate_policy_router + # Calls Poll-PrStateDelta.ps1 which blocks internally (polling every + # PollIntervalSeconds) until a reaction is detected or TimeoutSeconds + # elapses. Script exits 0 for all normal domain outcomes; non-zero + # only for infrastructure failures (auth, network, invalid args). + # + # reaction_kind routing (Poll-PrStateDelta.ps1 enum values): + # initial_observation → poll_pr_state_delta (watermark just established; re-poll) + # pr_merged → notify_pr_review_resolved → already_merged_emitter + # pr_closed → closed_unmerged_emitter + # timeout → stuck_review_gate_policy_router (human escalation) + # ci_status_changed → notify_pr_ci_attention (if delta_details.new == FAILURE) + # poll_status (green/pending — re-read absolute state) + # new_review_*, new_commit, new_comment, catch-all → poll_status + # + # TODO(AB#3257): add on_error: to: poll_error_gate once conductor RFC + # Phase 2 (on_error: in routes) ships. Until then, infrastructure + # failures exit non-zero and surface as conductor step errors. + - name: poll_pr_state_delta type: script - description: Bypass pending_review_gate when policy.unattended.review_wait_mode is skip; reject auto. - command: polyphony + description: Poll for GitHub PR state delta — blocks until reaction or timeout (Poll-PrStateDelta.ps1) + command: pwsh args: - - "policy" - - "load" + - "-NoProfile" + - "-File" + - "{{ workflow.dir }}/../scripts/Poll-PrStateDelta.ps1" + - "-Platform" + - "github" + - "-PrUrl" + - "{{ poll_status.output.pr_url }}" + - "-TimeoutSeconds" + - "{{ workflow.input.poll_timeout_seconds | default(86400) }}" + - "-PollIntervalSeconds" + - "{{ workflow.input.poll_interval_seconds | default(30) }}" routes: - - to: abort_auto_mode_unsupported - when: "{{ pending_review_gate_policy_router.output.unattended.review_wait_mode == 'auto' }}" + # Watermark just established on first call — re-enter poll loop immediately. + - to: poll_pr_state_delta + when: "{{ poll_pr_state_delta.output.reaction_kind == 'initial_observation' }}" + - to: notify_pr_review_resolved + when: "{{ poll_pr_state_delta.output.reaction_kind == 'pr_merged' }}" + - to: closed_unmerged_emitter + when: "{{ poll_pr_state_delta.output.reaction_kind == 'pr_closed' }}" + - to: stuck_review_gate_policy_router + when: "{{ poll_pr_state_delta.output.reaction_kind == 'timeout' }}" + # Q3 Option C: CI red → emit attention signal + re-poll. + - to: notify_pr_ci_attention + when: "{{ poll_pr_state_delta.output.reaction_kind == 'ci_status_changed' and poll_pr_state_delta.output.delta_details.new == 'FAILURE' }}" + # new_review_*, new_commit, new_comment, ci_status_changed (non-failure), catch-all: + # re-read absolute state via poll_status; routes to analyzer or merge path based on fresh data. - to: poll_status - when: "{{ pending_review_gate_policy_router.output.unattended.review_wait_mode == 'skip' }}" - - to: pending_review_gate - # ── Pending-review gate (throttle in the sentiment-driven loop) ─────── + # ── CI attention notification ───────────────────────────────────────── # - # Reached when pr_feedback_analyzer found no actionable negative - # feedback this poll. The PR is open and awaiting an explicit positive - # signal (operator merges) or a negative one (operator closes the PR, - # or new comments that change the analyzer's verdict next poll). - - name: pending_review_gate - type: human_gate - prompt: | - ## ⏳ PR Awaiting Action — PR #{{ workflow.input.pr_number }} - - The PR is open. The polyphony bot reviewer has posted (see the - marker comment on the PR) and the sentiment-driven analyzer did - NOT find actionable negative feedback this poll. - - - **PR:** {{ poll_status.output.pr_url }} - - **Branch:** `{{ workflow.input.branch_name }}` - - To proceed, take one of these actions on GitHub, then click - **Continue** to re-poll: - - - **Merge the PR** — workflow detects merged state next poll. - - **Comment requesting changes** — the analyzer reads the comment - on the next poll and routes pr_fixer back to revise. - - **Close the PR** — workflow exits this PR leg as unmerged on the - next poll. The parent feature-pr.yaml decides whether to accept - the abandonment or run its remediation cycle. - - _Note: GitHub blocks PR-author self-approval. If you opened this - PR yourself, the merge happens once a second reviewer approves — - or you can simply use the **Merge** button directly._ - - --- - - Choose an action: - - **Continue** — re-poll the PR for the updated state - - **Abort** — halt the entire run (terminates the conductor process) - options: - - label: "🔄 Continue" - value: continue - route: poll_status - - label: "🛑 Abort" - value: abort - route: abort_run + # Emits pr_ci_attention_required when Poll-PrStateDelta detects CI has + # flipped to FAILURE (Q3 Option C). Routes back to poll_pr_state_delta + # to continue waiting — the operator is informed but the workflow does + # not stall. A subsequent new_commit reaction (after the fix push) will + # re-read absolute state via poll_status. + # + # TODO(post-upstream-merge): rename type: notification → type: emit. + - name: notify_pr_ci_attention + type: notification + notification: pr_ci_attention_required + payload: + kind: pr_ci_attention_required + severity: warning + title: "CI Failed on PR" + message: >- + CI status on PR #{{ workflow.input.pr_number }} changed to FAILURE. + Push a fix commit to restart CI; the workflow will detect the new + commit and re-evaluate. + cta_url: "{{ poll_status.output.pr_url }}" + cta_kind: open_pr + correlation_id: "{{ workflow.input.work_item_id }}:pr-ci:{{ workflow.input.pr_number }}" + disposition: pending + details: + pr_number: "{{ workflow.input.pr_number }}" + branch_name: "{{ workflow.input.branch_name }}" + old_ci_status: "{{ poll_pr_state_delta.output.delta_details.prev | default('unknown') }}" + new_ci_status: "{{ poll_pr_state_delta.output.delta_details.new | default('FAILURE') }}" + routes: + - to: poll_pr_state_delta # ── Stuck-review gate policy router ─────────────────────────────────── # @@ -1074,24 +1165,25 @@ agents: when: "{{ stuck_review_gate_policy_router.output.unattended.review_wait_mode == 'skip' }}" - to: stuck_review_gate - # ── Stuck-review gate (poll-cap escalation) ─────────────────────────── + # ── Stuck-review gate (timeout escalation) ─────────────────────────── # - # Fires when pending_poll_counter hits POLL_CAP (60). The reviewer has - # gone silent — surface that fact rather than letting the operator keep - # clicking "Continue" on pending_review_gate forever. + # Fires when poll_pr_state_delta hits the TimeoutSeconds budget with no + # reaction. Reviewer / approver has gone silent — surface that fact so + # the operator can decide whether to keep waiting, override-approve, or + # abort. "Continue waiting" resets the poll loop via stuck_review_reset. - name: stuck_review_gate type: human_gate - description: PR review pending past poll cap — operator decides wait, override, or abort. + description: PR review pending past poll timeout — operator decides wait, override, or abort. prompt: | - ## ⏰ Stuck Review — Poll Cap Reached for PR #{{ workflow.input.pr_number }} + ## ⏰ Stuck Review — Poll Timeout for PR #{{ workflow.input.pr_number }} - The PR has been awaiting reviewer action for - **{{ pending_poll_counter.output.count }}** polls - (cap: **{{ pending_poll_counter.output.cap }}**). + The PR has been awaiting reviewer action for the full poll timeout + period ({{ workflow.input.poll_timeout_seconds | default(86400) }} s) + with no reaction detected. The reviewer may be unavailable or unblocked but silent. You can - keep waiting (the counter resets), bypass review and merge anyway, - or abort. + keep waiting (re-enters the poll loop), bypass review and merge + anyway, or abort. - **PR:** {{ poll_status.output.pr_url }} - **Branch:** `{{ workflow.input.branch_name }}` @@ -1099,7 +1191,7 @@ agents: --- Choose an action: - - **Continue waiting** — reset the poll counter and return to the pending-review gate + - **Continue waiting** — re-enter the poll loop with a fresh budget - **Override approved** — treat the PR as approved and proceed to merge - **Abort** — halt the entire run (terminates the conductor process) options: @@ -1115,23 +1207,20 @@ agents: # ── Stuck-review reset ──────────────────────────────────────────────── # - # Zeros the pending poll counter file when the operator chose - # "Continue waiting" at stuck_review_gate, then routes back to - # pending_review_gate_policy_router so the operator gets the regular - # pending UX again with a fresh cap budget. + # Re-enters the poll loop after the operator chose "Continue waiting" + # at stuck_review_gate. Routes directly to poll_pr_state_delta + # (pending_review_gate_policy_router is gone post-compression). - name: stuck_review_reset type: script - description: Reset pending-poll counter after stuck-review continue-waiting choice + description: Re-enter poll loop after stuck-review continue-waiting choice command: pwsh args: - "-NoProfile" - "-Command" - - | - $counterFile = Join-Path ([System.IO.Path]::GetTempPath()) "conductor-github-pr-pending-poll-{{ workflow.input.work_item_id }}-{{ workflow.input.pr_number }}" - Set-Content $counterFile 0 - @{ count = 0; reset = $true } | ConvertTo-Json + - >- + @{ reset = $true } | ConvertTo-Json -Compress routes: - - to: pending_review_gate_policy_router + - to: poll_pr_state_delta # ── Pre-merge policy router (AB#3184 + AB#3217 warning-stamp split) ─── # @@ -1206,7 +1295,7 @@ agents: # fallback). Operator decides whether to merge, defer, or abort. # # - approve → pr_merger (proceed with squash merge) - # - defer → pending_review_gate (wait for more input / re-poll) + # - defer → poll_pr_state_delta (re-enter poll loop; pending signal already live) # - abort → abort_run - name: pr_pre_merge_gate type: human_gate @@ -1227,7 +1316,7 @@ agents: Choose: - **Approve merge** → squash merge proceeds (pr_merger fires `gh pr merge --squash --delete-branch`) - - **Defer** → back to pending_review_gate (operator can re-evaluate later) + - **Defer** → re-enter poll loop (poll signal already live; workflow waits for next reaction) - **Abort** → halt the run cleanly options: - label: "✅ Approve merge" @@ -1235,7 +1324,7 @@ agents: route: pr_merger - label: "⏸️ Defer (back to pending)" value: defer - route: pending_review_gate + route: poll_pr_state_delta - label: "🛑 Abort" value: abort route: abort_run diff --git a/docs/decisions/domain-signal-envelope.md b/docs/decisions/domain-signal-envelope.md new file mode 100644 index 0000000..7308a59 --- /dev/null +++ b/docs/decisions/domain-signal-envelope.md @@ -0,0 +1,293 @@ +# ADR: Domain Signal Envelope + +**Status:** Accepted +**Date:** 2026-05-28 +**Authors:** Bach (Architect) +**Vocabulary authority:** Daniel Green — "domain signal" is the polyphony-side +noun; `emit:` (conductor YAML primitive) is the verb. A workflow _emits_ a +_domain signal_. +**Platespinner gaps:** Closes design dependency for issues #541 and #542. + +--- + +## Context + +Conductor PR #213 adds user-defined notification support to the workflow +engine. Workflows declare a `notifications:` block, define typed notification +schemas, and emit them via `type: notification` steps (renamed to `type: emit` +via commit `27006af` — the rename is transparent to this ADR). + +When a step fires, conductor writes a JSON envelope to: +``` +{TMPDIR}/conductor/conductor-{workflow_name}-{ts}-{run_id}.notifications.jsonl +``` + +Each line is: +```json +{"type": "notification", "timestamp": 1748472194.42, "data": { /* envelope */ }} +``` + +The `data` object is the **conductor notification envelope** — a fixed shape +owned by the conductor engine. Polyphony contributes the `payload` sub-object +of that envelope. + +### The Conductor Envelope (data field) + +The `data` field is the conductor notification envelope built by +`NotificationExecutor.build_envelope()`. Fields confirmed by the dogfood smoke +capture at `.squad/experiments/dogfood-smoke/README.md` (2026-05-28). + +| Field | Type | Notes | +|---|---|---| +| `emission_id` | string | `"{run_id}:{agent_name}:{iteration}"` — deterministic, dedup key | +| `schema_id` | string | `"{namespace}.{notification_type}@{version}"` — stable type identifier | +| `notification_type` | string | e.g. `"pr_review_required"` | +| `namespace` | string | e.g. `"polyphony.feature_pr"` | +| `version` | int | starts at 1; bumped on breaking payload changes | +| `run_id` | string | hex run identifier shared with the `.events.jsonl` filename stem | +| `workflow` | string | workflow name | +| `source_agent` | string | name of the YAML step that fired | +| `subworkflow_path` | string[] | nesting path for sub-workflow steps | +| `correlation` | object | workflow-declared correlation keys, e.g. `{"root_id": "AB#4567"}` | +| `workflow_metadata` | object | engine-supplied metadata dict (may be `{}`) | +| `payload` | object | **polyphony-owned domain signal fields — defined by this ADR** | + +### The Problem + +Polyphony's workflows are moving from `human_gate` nodes to a +`(emit domain signal + script-poll-loop)` gate-compression pattern (see the +`gate-compression-pattern` ADR). For this pattern to work, domain signals must +carry enough context for: + +1. **Platespinner** to render a CTA (call-to-action) notification — e.g. a + "Review PR" button that opens the right URL +2. **Poll scripts** to identify whether they are responding to the right + signal instance (correlation) +3. **Lifecycle management** — knowing when a signal is still actionable vs + resolved or expired + +No schema for `payload` exists today. This ADR defines it. + +### Vocabulary + +- **Domain signal:** the polyphony-side unit of user-actionable information, + emitted by a workflow step. This ADR defines its wire shape. +- **`emit:`** (or currently `notification:` in dogfood): the conductor YAML + primitive that fires a domain signal. +- **Conductor envelope:** the outer wrapper that conductor adds (schema_id, + emission_id, correlation, etc.) — NOT owned by polyphony, not defined here. +- **Payload:** the `payload` sub-object of the conductor envelope — THIS IS + what polyphony owns and what this ADR defines. + +--- + +## Decision + +Polyphony owns the `payload` field of the conductor notification envelope. +The schema below is the polyphony domain signal payload contract. + +### Required Payload Fields + +| Field | Type | Constraints | Description | +|---|---|---|---| +| `kind` | string | non-empty, machine-readable | Signal type identifier. See Kind Vocabulary below. | +| `severity` | string | `info` \| `warning` \| `error` \| `critical` | Rendering hint and routing input. | +| `title` | string | ≤80 chars | Short human-readable title for toast/notification header. | +| `message` | string | non-empty | Full human-readable body. | + +### Optional Payload Fields + +| Field | Type | Constraints | Description | +|---|---|---|---| +| `cta_url` | string | valid URI | Primary action URL. Platespinner renders as a "→ Open" button. | +| `cta_kind` | string | see CTA Kind Vocabulary | Machine-readable hint for how to handle the URL. | +| `correlation_id` | string | stable within a gate lifecycle | Opaque ID for this gate/action instance. Shared between the `emit` step and the poll script. | +| `expires_at` | string | ISO 8601 datetime | When this signal is no longer actionable. | +| `disposition` | string | `pending` \| `resolved` \| `expired` | Lifecycle state. Defaults to `pending` if absent. | +| `details` | object | any | Freeform enrichment dict for tooling and diagnostics (e.g. `pr_number`, `repo`, `work_item_id`). | + +### Kind Vocabulary + +| Kind | When to use | +|---|---| +| `pr_review_required` | A PR is awaiting reviewer approval | +| `pr_checks_pending` | CI/CD checks running on a PR | +| `gate_pending` | A workflow gate is open and awaiting resolution | +| `work_item_pending` | An ADO work item needs a state transition | +| `run_completed` | A workflow run completed successfully | +| `run_failed` | A workflow run failed | +| `ci_awaited` | Waiting for a CI run to complete | +| `merge_conflict` | A PR has a merge conflict requiring resolution | + +New kinds may be added without a version bump as long as existing consumers +ignore unknown values. + +### CTA Kind Vocabulary + +| CTA Kind | Meaning for platespinner | +|---|---| +| `review_pr` | Open PR page and expect the user to leave a review | +| `open_pr` | Open PR page (no specific action expected) | +| `open_gate` | Open the conductor gate interaction UI | +| `open_run` | Open a CI or conductor run page | +| `open_work_item` | Open an ADO work item | +| `external` | Open an arbitrary external URL | + +### Complete Wire Example (payload only — goes inside conductor envelope's `payload` field) + +```json +{ + "kind": "pr_review_required", + "severity": "warning", + "title": "PR Review Required", + "message": "PR #123 (feat/my-feature) is awaiting at least one reviewer approval before the workflow can continue.", + "cta_url": "https://github.com/org/repo/pull/123", + "cta_kind": "review_pr", + "correlation_id": "a3f1b2c4:pr-review:AB#4567", + "expires_at": "2026-05-29T23:43:14Z", + "disposition": "pending", + "details": { + "pr_number": 123, + "repo": "org/repo", + "work_item_id": "AB#4567" + } +} +``` + +### Full `.notifications.jsonl` Line (for platespinner implementors) + +Grounded in the dogfood smoke capture (`.squad/experiments/dogfood-smoke/README.md`, +2026-05-28). Field order matches actual conductor output. + +```json +{"type":"notification","timestamp":1748472194.42,"data":{"emission_id":"a3f1b2c4:notify_review_required:1","schema_id":"polyphony.feature_pr.pr_review_required@1","notification_type":"pr_review_required","namespace":"polyphony.feature_pr","version":1,"run_id":"a3f1b2c4","workflow":"feature-pr","source_agent":"notify_review_required","subworkflow_path":[],"correlation":{"root_id":"AB#4567"},"workflow_metadata":{},"payload":{"kind":"pr_review_required","severity":"warning","title":"PR Review Required","message":"PR #123 (feat/my-feature) is awaiting at least one reviewer approval before the workflow can continue.","cta_url":"https://github.com/org/repo/pull/123","cta_kind":"review_pr","correlation_id":"a3f1b2c4:pr-review:AB#4567","expires_at":"2026-05-29T23:43:14Z","disposition":"pending","details":{"pr_number":123,"repo":"org/repo","work_item_id":"AB#4567"}}}} +``` + +### Workflow YAML Declaration (canonical `emit:` syntax) + +```yaml +workflow: + name: feature-pr + notifications: + namespace: polyphony.feature_pr + correlation: + - root_id + types: + pr_review_required: + version: 1 + payload: + kind: {type: string} + severity: {type: string} + title: {type: string} + message: {type: string} + cta_url: {type: string} + cta_kind: {type: string} + correlation_id: {type: string} + expires_at: {type: string} + disposition: {type: string} + details: {type: object} + +agents: + - name: notify_review_required + type: emit # dogfood pre-refresh: type: notification + emit: pr_review_required # dogfood pre-refresh: notification: pr_review_required + payload: + kind: pr_review_required + severity: warning + title: "PR Review Required" + message: "PR #{{ workflow.input.pr_number }} is awaiting review before the workflow continues." + cta_url: "{{ workflow.input.pr_url }}" + cta_kind: review_pr + correlation_id: "{{ workflow.input.run_id }}:pr-review:{{ workflow.input.root_id }}" + expires_at: "{{ workflow.input.deadline }}" + disposition: pending + details: + pr_number: "{{ workflow.input.pr_number }}" + repo: "{{ workflow.input.repo }}" + routes: + - to: poll_pr_approved +``` + +--- + +## One-Way Contract + +The domain signal envelope travels in ONE direction only: + +``` +polyphony workflow → conductor → .notifications.jsonl → platespinner → toast/UI → user +``` + +No system downstream of the `.notifications.jsonl` write (platespinner, toast +subscribers, external tooling) writes back to conductor or to polyphony. +Resolution of the gate is signalled by **external state change** (PR merged, +review approved) that is detected by the **poll script running inside the +workflow** — not by platespinner. + +--- + +## Alternatives Considered + +**Reuse conductor's error envelope shape:** Rejected. The error envelope is +`{kind, message, details}` — no severity, no CTA, no lifecycle. The domain +signal envelope serves a fundamentally different purpose (user notification vs +error routing). + +**Minimal payload (title + message only):** Rejected. Without `cta_url`, the +gate-compression pattern cannot surface actionable notifications. Without +`correlation_id`, poll scripts cannot safely associate a pending signal with +their current run context. Without `disposition`, platespinner cannot manage +signal lifecycle (resolved signals should not fire new toasts). + +**CTA URL as a top-level conductor field:** Rejected. Conductor's envelope +fields (schema_id, emission_id, correlation) are engine-level concepts. +Polyphony-specific semantics belong in `payload`. This keeps the polyphony +contract additive on top of conductor without requiring conductor changes. + +--- + +## Consequences + +### Positive +- Platespinner can render CTA-bearing notifications from polyphony domain + signals (closes design dependency for issues #541 and #542) +- Poll scripts can correlate signals to their gate instance via `correlation_id` +- `disposition` enables lifecycle management — platespinner can suppress new + toasts for resolved/expired signals +- The schema is additive: new optional fields can be added without version bumps + +### Negative +- Workflow authors must set `expires_at` explicitly from workflow input; there + is no engine-level TTL (see Open Questions in `gate-compression-pattern` ADR) +- `details` is freeform — not schema-validated — which trades safety for + flexibility + +--- + +## Evolution Policy + +1. **Adding new optional fields:** permitted without a `version` bump on the + `NotificationTypeDef`. All consumers MUST ignore unknown fields. +2. **Adding new `kind` or `cta_kind` values:** permitted without version bump. + Consumers that switch on `kind` MUST have a default/fallback case. +3. **Changing field type or making an optional field required:** requires a + `version` bump on the `NotificationTypeDef` in the workflow YAML. +4. **Removing fields:** requires a `version` bump AND a migration plan for + existing consumers. + +--- + +## Invariants + +1. **`payload` is polyphony-owned.** The conductor engine wraps it but does + not interpret it. Only polyphony and its downstream consumers (platespinner) + may define field semantics. +2. **Consumers MUST ignore unknown fields.** Forward compatibility is non- + negotiable. +3. **`kind` and `severity` are ALWAYS required.** A domain signal without + these fields is a polyphony implementation defect. +4. **`cta_url` MUST be present when `cta_kind` is present.** `cta_kind` is + meaningless without a URL. +5. **No write-back.** No system that reads a domain signal from + `.notifications.jsonl` writes to conductor, polyphony, or any conductor- + managed file. diff --git a/docs/decisions/gate-compression-pattern.md b/docs/decisions/gate-compression-pattern.md new file mode 100644 index 0000000..12d689a --- /dev/null +++ b/docs/decisions/gate-compression-pattern.md @@ -0,0 +1,296 @@ +# ADR: Gate Compression Pattern + +**Status:** Accepted +**Date:** 2026-05-28 +**Authors:** Bach (Architect) +**Depends on:** `domain-signal-envelope.md`, `polyphony-verb-error-boundary.md` +**Companion issues:** #536 (verb error boundary), #541/#542 (platespinner gaps) + +--- + +## Context + +Polyphony's workflows contain two broad categories of gate node: + +1. **Deterministic-outcome gates** — the workflow pauses waiting for an + observable external condition: PR approved, CI checks passed, work item + in target state, review timeout elapsed. The human's _role_ is to take an + external action (approve the PR, fix the tests), not to make a judgment + _inside the workflow_. + +2. **Judgment gates** — the workflow genuinely requires a human to evaluate + something and decide: does this scope change make sense? is this design + acceptable? should we proceed despite the conflict? No poll script can + detect resolution here because "resolution" is defined by human judgment, + not by observable state. + +The current workflow suite treats both categories as `human_gate` nodes, which +means every pause requires a human to click through a gate UI — even when the +resolution is "PR was approved, continue." This wastes human attention on +logistics, obscures the gates that actually need judgment, and creates a +brittle dependency on a human being present and attentive. + +**Gate compression** is the architectural pattern that converts category-1 +gates from `human_gate` nodes to an `(emit domain signal + script-poll-loop)` +structure. The workflow emits a domain signal (platespinner renders a CTA +notification), then immediately begins polling for the observable condition. +When the condition resolves, the workflow continues automatically. The human +takes the external action (merges the PR, approves the review); the poll +script detects it. + +This ADR defines: +- Which gate shapes compress vs which stay `human_gate` +- The canonical YAML structure for the compressed pattern +- The timing/backoff policy +- Open questions that require further input + +--- + +## Decision + +### The Compression Rule + +A gate node is a candidate for compression if AND ONLY IF: + +1. Resolution is detectable by a poll script (i.e. there exists a CLI command + or API call that returns a deterministic boolean: "condition met or not") +2. The human's role is to take an _external action_, not to make a workflow + decision +3. Timeout is acceptable as a fallback — the workflow can handle "condition + never resolved within N minutes" gracefully + +A gate stays `human_gate` if: +- Resolution requires a judgment call made _inside the workflow_ (scope + approval, design sign-off, conflict resolution) +- The action is irreversible or high-risk (production deploy, destructive + operation) — even if technically pollable, these warrant explicit human + confirmation in the gate UI +- No reliable poll mechanism exists (e.g. external system has no queryable API) + +### Gate Shape Catalogue + +| Gate shape | Compresses? | Pattern | +|---|---|---| +| PR review approval | ✅ Yes | emit `pr_review_required` + poll `polyphony pr-status --approved` | +| CI/CD checks passing | ✅ Yes | emit `pr_checks_pending` + poll `polyphony pr-status --checks-passed` | +| PR merge | ✅ Yes | emit `pr_review_required` + poll `polyphony pr-status --merged` | +| Work item state transition | ✅ Yes | emit `work_item_pending` + poll `twig state` | +| Review timeout (wait N hours) | ✅ Yes | conductor `type: wait` (no emit needed — no human action required) | +| Scope violation decision | ❌ No — stays `human_gate` | Human evaluates, workflow adapts | +| Design / architectural sign-off | ❌ No — stays `human_gate` | Judgment inside workflow | +| Conflict resolution | ❌ No — stays `human_gate` | Human judgment + possible replan | +| Production deploy confirmation | ❌ No — stays `human_gate` | High-risk irreversible action | +| Stuck review (human override) | ❌ No — stays `human_gate` | Human decides to proceed or escalate | + +### Canonical Compressed Gate Pattern (YAML) + +The pattern is: **emit domain signal → poll → wait-on-miss → loop**. + +```yaml +# Step 1: emit domain signal (CTA fires in platespinner) +- name: notify_pr_review_required + type: emit # dogfood pre-refresh: type: notification + emit: pr_review_required # dogfood pre-refresh: notification: pr_review_required + payload: + kind: pr_review_required + severity: warning + title: "PR Review Required" + message: "PR #{{ workflow.input.pr_number }} is awaiting review before the workflow continues." + cta_url: "{{ workflow.input.pr_url }}" + cta_kind: review_pr + correlation_id: "{{ workflow.input.run_id }}:pr-review:{{ workflow.input.root_id }}" + expires_at: "{{ workflow.input.review_deadline }}" + disposition: pending + details: + pr_number: "{{ workflow.input.pr_number }}" + routes: + - to: poll_pr_approved + +# Step 2: poll the observable condition +- name: poll_pr_approved + type: script + script: | + $result = polyphony pr-status --pr-id "{{ workflow.input.pr_number }}" --repo "{{ workflow.input.repo }}" + $out = $result | ConvertFrom-Json + $out | ConvertTo-Json -Compress | Out-File -Encoding utf8 $env:CONDUCTOR_OUTPUT + routes: + - condition: "{{ agent.output.approved == true }}" + to: post_review_step + - condition: "{{ agent.output.approved == false }}" + to: wait_before_retry_poll + +# Step 3: wait before retrying (backoff) +- name: wait_before_retry_poll + type: wait + seconds: 120 + routes: + - to: poll_pr_approved +``` + +#### Notes on the Pattern + +- The emit step fires **once** when the gate opens. The poll loop does NOT + re-emit on every miss — that would spam platespinner with duplicate + notifications. +- If the poll script fails with a non-zero exit (infrastructure failure, not + a "condition not met" result), the `on_error:` handler on `poll_pr_approved` + should route to the appropriate retry or escalation path. +- The wait step's `seconds` value is a policy choice — see Open Asks below. +- The poll script should write a deterministic JSON output: `{"approved": true}` + or `{"approved": false}`. It should NOT return a non-zero exit for "condition + not yet met" — that is a domain outcome, not an infrastructure failure + (per the `polyphony-verb-error-boundary` ADR). + +### When to Emit a "Resolved" Domain Signal + +When the poll loop detects resolution, workflows SHOULD emit a second domain +signal with `disposition: resolved` so platespinner can update the notification +lifecycle (suppress future toasts, mark the item resolved). This is optional +but strongly encouraged for gates with `cta_url`. + +```yaml +- name: notify_pr_review_approved + type: emit # dogfood pre-refresh: type: notification + emit: pr_review_required # dogfood pre-refresh: notification: pr_review_required + payload: + kind: pr_review_required + severity: info + title: "PR Review Approved" + message: "PR #{{ workflow.input.pr_number }} received approval. Workflow continuing." + cta_url: "{{ workflow.input.pr_url }}" + cta_kind: review_pr + correlation_id: "{{ workflow.input.run_id }}:pr-review:{{ workflow.input.root_id }}" + disposition: resolved + routes: + - to: post_review_step +``` + +### Maximum Poll Iterations + +To prevent infinite loops, every compressed gate MUST have a maximum poll +count or a wall-clock expiry. The pattern uses the `expires_at` field in the +domain signal as the semantic expiry, but the workflow itself enforces it +via one of: + +1. A conductor `on_error: timeout` at the workflow level +2. An iteration counter in the poll script that routes to a fallback + `human_gate` after N misses +3. A sentinel date check in the poll script against `expires_at` + +Without one of these, a compressed gate over an unresolvable condition will +loop forever. Wagner must choose which approach to use in each workflow. + +--- + +## Alternatives Considered + +**Compress ALL gates including judgment calls:** Rejected. Judgment gates +require human evaluation that no poll script can substitute. Compressing them +would either cause infinite poll loops or require complex "did the human +decide" detection that would reinvent the gate UI. + +**Keep all gates as `human_gate`, add CTA rendering only:** Rejected. +This is a half-measure — the human still has to click through the gate UI even +when the workflow can self-advance once the external condition resolves. The +poll loop is what makes the gate truly automatic. + +**Script-only with no domain signal (silent poll):** Rejected. Without a +domain signal, the user has no visibility that the workflow is waiting for +them to take an action. Platespinner shows no notification; the user may not +know a PR review is needed. The domain signal is the user's actionable prompt. + +--- + +## Consequences + +### Positive +- Human attention is reserved for genuine judgment calls (scope, design, + conflict resolution) — the only gates that remain as `human_gate` +- Observable conditions (PR approval, CI, work item state) resolve + automatically; operators do not need to be present at their workflow + dashboard to advance the run +- Platespinner's CTA notifications give operators an ambient awareness of + what is pending without requiring them to actively monitor the gate UI +- Gate infrastructure failures (twig unavailable, ADO unreachable) route + through `on_error:` chains rather than blocking on a human gate + +### Negative +- Poll loops consume workflow execution time; a long review can hold a + workflow active for hours (mitigated by `type: wait` backoff) +- Workflow YAML is more verbose: each compressed gate is 3 steps + (emit + poll + wait) vs 1 `human_gate` node +- If the poll script misclassifies a domain outcome (condition not met) as a + script failure (non-zero exit), `on_error:` will trigger spuriously — + careful implementation of poll scripts is required + +--- + +## OPEN ASKS FOR DANIEL + +These are specific judgment calls where the architectural decision has an +impact on operator experience and workflow authoring conventions. Bach has +made a recommendation for each; Daniel's input confirms or overrides. + +### Ask 1 — Default Poll Backoff + +**Question:** What is the default wait duration between poll retries? +**Current pattern:** 120 seconds (2 minutes) hardcoded in `type: wait` +**Options:** +- A) Fixed 120s (simple, predictable, may be slow for fast CI) +- B) Fixed 60s (faster, more aggressive) +- C) Configurable per workflow via a workflow input (most flexible, most + verbose for authors) +- D) Exponential backoff (1 min → 2 min → 4 min → cap at 15 min) starting + fresh on each workflow run + +**Bach's recommendation:** C — configurable per workflow input with a +documented default of 120s. This way operator-facing docs can specify the +default, and high-urgency workflows (fast CI runs) can dial it down. + +### Ask 2 — Expiry Behavior + +**Question:** When `expires_at` is reached while the poll loop is still +running, what should the workflow do? +**Options:** +- A) Abort the poll loop and fail the workflow with an error +- B) Emit an "expired" domain signal (`disposition: expired`) and continue + polling indefinitely (the expiry is advisory only) +- C) Route to a fallback `human_gate` where the operator decides whether to + extend or abort +- D) Emit an "expired" signal and abort without human intervention + +**Bach's recommendation:** C — route to a `human_gate` fallback. The expiry +is a signal that the automated wait has hit its limit; the human should decide +whether to extend, escalate, or abort. This preserves the principle that +judgment calls stay human. + +### Ask 3 — Resolved Signal Convention + +**Question:** Should emitting a `disposition: resolved` signal on poll +success be a workflow authoring REQUIREMENT or a RECOMMENDATION? +**Options:** +- A) Required — polyphony's linter will flag a compressed gate that has no + resolved-disposition emit on its success path +- B) Recommended — documented convention, not enforced +- C) Optional — left entirely to workflow authors + +**Bach's recommendation:** B — recommended but not linted for now, since the +linter infrastructure for this check does not yet exist. Revisit to A when the +Jinja-resolver lint (ADR #175 companion) ships. + +### Ask 4 — Maximum Poll Iteration Cap + +**Question:** Should polyphony's workflow conventions specify a maximum number +of poll iterations (a hard cap) in addition to the `expires_at` field? +**Options:** +- A) Yes — workflow template includes a `max_poll_attempts` input with a + documented default (e.g. 50); poll script increments a counter and routes + to `human_gate` when exceeded +- B) No — `expires_at` is the only cap; wall-clock expiry is sufficient +- C) Yes — but implemented as a conductor-level `on_error: timeout` at the + workflow level, not as a poll-script counter + +**Bach's recommendation:** C — conductor-level timeout is cleaner than a poll +counter (no counter state to thread through YAML), but requires Mahler to +confirm that conductor's `on_error: timeout` works correctly for long-running +poll loops. If Mahler confirms, prefer C; otherwise fall back to A. diff --git a/docs/decisions/polyphony-verb-error-boundary.md b/docs/decisions/polyphony-verb-error-boundary.md new file mode 100644 index 0000000..956d804 --- /dev/null +++ b/docs/decisions/polyphony-verb-error-boundary.md @@ -0,0 +1,158 @@ +# ADR: Polyphony Verb Error Boundary + +**Status:** Accepted +**Date:** 2026-05-28 +**Authors:** Bach (Architect) +**Implements:** GitHub issue #536 (verb error emission retrofit) +**Implementation owner:** Mozart (.NET/C#) + +--- + +## Context + +Polyphony CLI verbs currently exit 0 regardless of execution outcome and write +structured JSON to stdout. On domain failure (e.g. work item not found, PR +already closed) verbs emit `{"error": "..."}` to stdout — still exit 0. + +Conductor's upcoming `on_error:` routing (PR #229) routes workflow execution +based on the exit code of the previous step and on the presence of a typed +error envelope written to `CONDUCTOR_ERROR_OUT`. For `on_error:` to catch +polyphony-verb failures, verbs must emit non-zero exits for the failure classes +conductor needs to route on. Without this change, every polyphony verb failure +passes silently through `on_error:` as if it were a success. + +This ADR defines the **policy** for when polyphony verbs emit non-zero exits +and what they write to `CONDUCTOR_ERROR_OUT`. Mozart owns the per-verb +implementation and the exit-code catalogue. + +### Constraints + +- Polyphony workflows today check stdout JSON for error conditions. Any regime + change must not silently break workflows that currently work. +- Conductor's `on_error:` routing requires **both** a non-zero exit AND a + typed envelope on `CONDUCTOR_ERROR_OUT` to give downstream routes meaningful + context. +- Domain outcomes (expected failure states that a workflow should handle + programmatically — e.g. "this PR is already merged, skip step") must NOT + become non-zero exits; they are valid workflow signals, not infrastructure + failures. + +--- + +## Decision + +**Option C — Hybrid exit codes:** polyphony verbs exit **0 for all domain +outcomes** (success and expected/named failure states) and exit **non-zero for +infrastructure failures only**. + +| Class | Exit code | CONDUCTOR_ERROR_OUT | +|---|---|---| +| Domain success | 0 | not written | +| Domain failure (expected, named) | 0 | not written; failure signalled via stdout JSON `error` field | +| Infrastructure failure (see catalogue) | non-zero (typed) | typed error envelope | + +### Exit Code Catalogue (Mozart owns — this table is the policy seed) + +| Code | Meaning | +|---|---| +| 0 | Domain outcome (success or named domain failure) | +| 1 | Generic unclassified infrastructure failure | +| 2 | Configuration error (missing or invalid `.polyphony-config`) | +| 3 | Twig CLI unavailable or unresponsive | +| 4 | Git operation failure (not a domain outcome — infrastructure I/O) | +| 5 | ADO / PR platform unreachable (not a domain outcome — infrastructure I/O) | +| 6 | Polyphony-level timeout (verb-level, distinct from conductor-level `on_error: timeout`) | + +Mozart may extend this table when retrofitting individual verbs. The catalogue +lives in `docs/polyphony-architecture.md` under a new § Exit Code Catalogue +section. Mozart adds entries there; this ADR defines the taxonomy only. + +### What Verbs Write on Infrastructure Failure + +On a non-zero exit, a verb MUST also write a typed error envelope to +`CONDUCTOR_ERROR_OUT`. Minimum required fields: + +```json +{ + "kind": "twig_unavailable", + "message": "twig CLI did not respond within the expected timeout", + "details": { + "exit_code": 3, + "command": "twig state Active --id AB#4567" + } +} +``` + +The `kind` field is a dotted string matching the verb's failure class (e.g. +`twig_unavailable`, `git_push_rejected`, `config_missing`). Mozart defines the +kind vocabulary per verb. + +### What Verbs Write on Domain Outcome (Unchanged) + +All domain outcomes — success and named domain failures — continue to produce +structured JSON on stdout and exit 0. This preserves backward compatibility +with all existing workflows that check stdout for `error` fields. + +```json +{"status": "merged", "pr_number": 42} // success +{"error": "pr_not_found", "pr_number": 999, "retryable": false} // domain failure +``` + +--- + +## Alternatives Considered + +**Option A — Exit 0 always (current behavior):** Rejected. Conductor's +`on_error:` routing cannot detect polyphony verb failures. The 14 error gates +removed in #535 cannot be replaced with automated `on_error:` chains unless +verbs signal failures. + +**Option B — Non-zero for any non-success:** Rejected. Too coarse. Domain +failures (PR already merged, work item in terminal state) are expected workflow +signals that should be routed programmatically, not surfaced as infrastructure +errors. Conflating them forces every workflow author to handle "PR already +merged" as if it were a network failure. + +**Option C (chosen) — Hybrid:** Preserves backward compatibility (domain +outcomes keep exit 0), enables `on_error:` routing for infrastructure failures +that polyphony cannot recover from automatically. + +--- + +## Consequences + +### Positive +- Conductor `on_error:` can now catch and route infrastructure failures + without human gate intervention +- Verbs remain deterministic about what is a "domain outcome" vs an + "infrastructure failure" — the distinction is explicit and testable +- Existing workflows are not broken: exit-0 domain outcomes still produce the + same stdout JSON they always did + +### Negative +- Mozart must audit ~50 verb implementations and classify each failure path + as domain-outcome vs infrastructure-failure — this is a non-trivial retrofit +- Any verb that currently exits 0 on an infrastructure failure (e.g. swallows + a twig timeout and returns `{"error": "..."}`) needs to be reclassified; + this may surface latent issues + +### Neutral +- The `CONDUCTOR_ERROR_OUT` mechanism is conductor-specific. Polyphony verbs + that are invoked outside conductor (e.g. in tests, in scripts) will still + emit non-zero on infrastructure failure — callers should check the exit code + and optionally read `CONDUCTOR_ERROR_OUT` if set + +--- + +## Invariants + +1. **Exit 0 NEVER means "nothing happened."** A verb that exits 0 MUST produce + structured JSON on stdout describing the outcome. +2. **Domain failures are domain outcomes.** Expected, named failure states + (e.g. work item already in terminal state, PR already merged) are exit 0 + with a JSON `error` field — NOT non-zero exits. +3. **Infrastructure failures exit non-zero AND write `CONDUCTOR_ERROR_OUT`.** + A non-zero exit without a typed envelope on `CONDUCTOR_ERROR_OUT` is a + polyphony implementation defect. +4. **Mozart owns the exit-code catalogue.** This ADR establishes the taxonomy; + per-verb assignments live in `docs/polyphony-architecture.md`. diff --git a/docs/north-star.md b/docs/north-star.md new file mode 100644 index 0000000..d087199 --- /dev/null +++ b/docs/north-star.md @@ -0,0 +1,462 @@ +# Polyphony North Star + +> **Status:** Living document. Sections labelled **(locked)** are the result of explicit decisions reached through grilling sessions; sections labelled **(open)** are deferred to follow-up sessions. Re-grill before changing a locked decision. +> +> **Scope:** What polyphony is, who it's for, why it exists, where its seams live, the shape of operator engagement, the philosophy that governs when polyphony surfaces a moment for operator input, the trust-ramp posture between polyphony and the operator, the time-scale agnosticism of the engine, and the failure/recovery posture. Observability, multi-run concurrency, and success signal are still open (§5.6–§5.8) and will be filled as the corresponding grilling completes. +> +> **Companion docs:** [`docs/glossary.md`](glossary.md) is the ubiquitous-language source of truth; [`docs/polyphony-architecture.md`](polyphony-architecture.md) is the layering/data-flow reference; [`docs/decisions/`](decisions/) holds the ADR catalogue. + +--- + +## 1. Identity (locked) + +Polyphony is a **type-agnostic SDLC routing engine for Azure DevOps work items**. Its architecture is **platform-portable by design**: ADO and twig sit behind a YAML-level seam so a future polyphony can adopt other platforms without re-architecting the engine. Today, ADO is the only supported platform; portability is a property we preserve, not a feature we ship. + +This is the **A-today / B-by-design** framing: Identity-A (ADO-only) describes current reality; Identity-B (platform-portable) describes the architecture we will not regress. + +--- + +## 2. Audience (locked) + +**Primary audience.** A single **operator** driving their own ADO backlog with AI agents. The operator supervises; the agents do the keystrokes; polyphony is the deterministic skeleton both sides trust. + +**Supported beyond primary.** + +- **iii-B (in-goal):** Teams adopting polyphony as a shared pattern — each operator runs their own instance against a committed `.polyphony-config/`. Achieved through docs and conventions, not by building new machinery. +- **iii-A (supported but not optimized):** Concurrent multi-operator runs against the same repo, as a side effect of the existing run-lock model and per-run worktree layout. + +**Deferred — not built for, not foreclosed.** + +- **iii-C:** Hosted/multi-tenant polyphony service. Would require a different product (auth, RBAC, queuing, persistence-across-restarts). We do not build for it, but we do not actively foreclose it either — e.g., we do not hard-wire `~/.polyphony/` deep into the engine. +- **iii-D:** Unattended CI-style runs without a live operator. Requires answering "what does polyphony do at a `human_gate` when nobody is reachable?" — deferred until we have a real use case. + +**Long-arc aspiration informing design without gating features.** + +- **iv:** Agents operating with progressively less human supervision. This is the trajectory polyphony's existence points along; it is not a feature we currently ship. + +--- + +## 3. Mission (locked) + +Polyphony exists to make **AI-agent-driven SDLC trustworthy**. Agents are individually capable but collectively unreliable: they hallucinate states, skip steps, take destructive shortcuts, and lose track of lifecycle. Polyphony pins down the decisions agents shouldn't be making — what phase a work item is in, what branch to cut, when to call the PR done, when to merge, when to escalate — as deterministic verbs over typed inputs. + +**Agents do the work; polyphony decides what work, in what order, against what guardrails. The operator supervises outcomes instead of steps.** + +**"Trustworthy" decomposes as:** + +- **Deterministic** where it can be (routing, validation, state interpretation). +- **Bounded** where it can't be (agent invocations have explicit scope, tool addenda, evidence requirements). +- **Recoverable** when it goes wrong (gates, evidence, reset, restack). + +### The α / β / γ framing + +| | Role | What it is | Notes | +|---|---|---|---| +| **α** | **Mission** | Make agent-driven SDLC trustworthy. | What we exist for. Arbitrates priorities. | +| **γ** | **Mechanism (currently chosen)** | Config-driven type-agnosticism so the same engine works across ADO process templates (Basic / Agile / Scrum / CMMI / custom) without rewriting workflows. | How we currently achieve α. Could change. | +| **β** | **Hook** | Relieve the SDLC busywork (branch naming, state transitions, PR plumbing) humans hate. | How we explain polyphony to skeptics. Does not drive priorities. | + +When α, β, γ conflict in feature prioritization, **α wins**. + +--- + +## 4. Seams + +Polyphony is defined as much by what sits on the *other* side of each seam as by what's inside it. + +### 4.1 Polyphony ↔ conductor (locked, S1) + +Polyphony **authors** SDLC workflows and the deterministic verbs they call; **conductor executes** them. The boundary is **directional** — when polyphony needs a capability conductor lacks, we **grow conductor**, we do not absorb its domain. + +Polyphony may grow adjacent operator surfaces (visualizations, run tooling, and possibly a workflow-authoring DSL that compiles to YAML at build time — see [`docs/discussions/workflow-compiler-evaluation/`](discussions/workflow-compiler-evaluation/)) that complement conductor without replacing its execution role. The YAML interface between authoring and execution is a **stable, jointly-evolved contract**. + +The polyphony C# engine stays engine-agnostic (no conductor types leak in) — **hygiene that keeps the verbs usable standalone**, not a substitutability claim. We are committed to conductor as the engine, not preserving optionality to swap it out. + +**Generates the invariants:** + +- No orchestration runtime inside polyphony. +- No conductor types in `src/Polyphony/{Routing,Configuration,Policy}/`. +- Workflow suite and CLI ship from the same repo and release together (single version stamp). +- "Grow conductor first" is the default response when tempted to build orchestration behaviour polyphony-side. + +### 4.2 Polyphony ↔ twig (locked, S2) + +**Engine ↔ ADO seam (load-bearing).** Polyphony's core engine (routing, validation, hierarchy, state interpretation, policy) reads ADO state through twig's read-side library (`Twig.Domain`, `Twig.Infrastructure`, project-referenced and in-process) and **never directly accesses ADO**. Verified by grep: `src/Polyphony/{Routing,Configuration,Policy}/` holds zero references to `Polyphony.Infrastructure.AzureDevOps`. Promotable to a lint rule. + +**Work-item writes (load-bearing).** ADO work item state writes — fields, states, tags, comments, sync — cross a process/service boundary into twig. Twig owns the write transaction. + +**Bounded carve-outs (current reality, not invariant).** Polyphony's infrastructure layer holds platform transport for capabilities no upstream tool covers today: + +- **ADO Pull Requests:** in-process REST via `src/Polyphony/Infrastructure/AzureDevOps/AdoClient.cs`, used by the `Commands/PrCommands.*Ado.cs` family. Polyphony holds ADO credentials *only for this purpose*. +- **GitHub Pull Requests:** shell-out to `gh` via `src/Polyphony/Infrastructure/Processes/GhClient.cs`. + +These carve-outs live in `Infrastructure/` and the `Commands/Pr*` family, never in the engine. + +**Operating norm (provisional, may evolve).** Delegate to the dedicated upstream tool when one exists; carry transport in-process only when forced to. The current mix follows this rule. May be promoted to a load-bearing invariant after surviving more carve-out comparisons. + +**Open architectural question (deferred to project-level).** Should ADO PR transport migrate to twig (or another tool) over time? "Grow twig, don't absorb twig" points toward yes (i.e., Option A — twig grows PR support, polyphony's `AdoClient` retires). Decision deferred — to be resolved through a stakeholder conversation with twig when capacity permits, not in the north star. + +**Generates the invariants:** + +- No ADO REST in `src/Polyphony/{Routing,Configuration,Policy}/`. +- Polyphony holds no ADO credentials for work-item operations (twig holds them). +- Verb naming discipline: engine verbs decide (`route`, `validate`, `ensure`, `resolve`, `check`); they do not execute ADO writes. +- Twig's CLI surface is a stable contract — arg shapes are load-bearing across releases. +- "Grow twig first" is the default response when tempted to build twig-overlapping behaviour polyphony-side. + +### 4.3 Polyphony ↔ agents (open, S3) + +Not yet grilled. Likely shape: polyphony scopes and bounds the work agents do (facet profiles, addendum composition, tool addenda, evidence requirements); agents produce open-ended content (code, plans, reviews); polyphony reads the artifacts the agent produced. To be verified in a follow-up session. + +### 4.4 Polyphony ↔ ADO (open, S4) + +Not yet grilled. Likely shape: ADO is the system of record; polyphony owns no authoritative state; every read goes through twig's cache; every work-item write goes through twig (with the bounded ADO PR carve-out from S2). To be verified in a follow-up session. + +--- + +## 5. Behavioral / lifecycle + +### 5.1 Operator engagement (locked) + +**Polyphony surfaces moments; the operator chooses the depth of engagement.** + +At each surfaced moment — a gate fires, a phase completes, evidence is ready, a decision needs sign-off — polyphony presents a concise summary and yields. The operator picks the depth: + +- **Rubber-stamp** — "looks fine, continue" — friction-free, one click. +- **Light engagement** — skim the evidence, then continue. +- **Deep engagement** — pause, read carefully, discuss with the agent, redirect. + +Polyphony does not force depth. It guarantees *visibility* and *the option to engage*. The operator's time is theirs. + +**Policy-configured.** Surfacing thresholds, rubber-stamp eligibility, and force-read requirements (for destructive or high-stakes actions) are governed by **policy** — `.polyphony-config/` for the team-shared shape, `~/.polyphony/` for per-operator preferences. Default policy ships safe; operators tune as trust grows. This is the entry point for the trust-ramp question (§5.3). + +**Non-goals.** + +- **Continuous-presence-required chat surface.** The operator is not expected to be present moment-to-moment. Polyphony is not Copilot Chat. +- **Forced-deep-engagement as default.** Rubber-stamp is a first-class path. Force-read is rare and policy-justified (e.g., destructive operations). +- **Surface-less runs (fire-and-forget).** There *are* surface moments; the operator may rubber-stamp every one, but they cannot be skipped entirely. (Consistent with iii-D being deferred.) + +**Deferred — not foreclosed.** + +- **Truly operator-initiated dialogue** — pre-run brainstorming, mid-run pause-and-discuss at moments polyphony did not surface. Plausible future expansion; not built for today; not designed against. + +**Generates the invariants:** + +- Every surfaced moment is **dual-mode**: a 2-second rubber-stamp must be possible *and* a deeper-engagement entry point must be present. Information design at the surface is load-bearing. +- Surfacing behavior is policy-driven; defaults ship safe. +- Polyphony never executes a destructive operation without at least surfacing it, even when policy permits rubber-stamp. + +### 5.2 Gate philosophy (locked) + +**Clock time is the currency of every surface.** A surfaced moment is a *pause point* — the run cannot continue until the operator answers. The operator is human; they are not necessarily reachable when the surface fires. Therefore every surface costs **wall-clock latency**, regardless of how cheaply the operator answers once they see it. + +This makes attention cost (§5.1) and latency cost (§5.2) orthogonal: rubber-stamp lowers attention; only *not surfacing* lowers latency. + +**Three reasons polyphony surfaces** — distinguished by what polyphony *lacks*: + +| Category | Capability gap | Operator's leverage | Canonical examples | +|---|---|---|---| +| **Inflection** | Direction-setting authority — polyphony has a direction, needs operator buy-in before committing | Redirect cheaply now, or rubber-stamp | Open questions before plan commit; PR direction confirmation before merge | +| **Surrender** | Decision authority — polyphony has alternatives but no basis to rank them | Pick between options polyphony cannot rank | Cap-hit gates, root-fallback, renegotiation | +| **Attestation** | Action capability — polyphony cannot perform the work itself | Do the work (or arrange for it) and vouch for completion | Actionable-facet satisfaction gates: "release this, provide evidence" | + +The inflection category corresponds to moments where *the cost of changing course is about to jump* — the plan hasn't been built yet, the PR hasn't merged yet. Course-correction is cheap now, expensive later. Operator input has maximum leverage at inflection moments. + +The surrender category corresponds to moments where polyphony has exhausted its own decision authority — a cap fired, recovery is needed, polyphony lacks context only the operator can supply. + +The attestation category corresponds to work polyphony *constitutionally cannot do* — release a product, deploy to production, sign a contract, confirm a real-world fact. The actionable facet is defined by this kind of gap. + +**Facet ↔ surface category intersection.** The polyphony facet vocabulary determines which surface categories a step can generate. This is load-bearing: + +- **plannable** → inflection (pre-commit to plan) +- **implementable** → inflection (pre-publish PR) + surrender (revise/remediation caps) +- **actionable** → attestation (satisfaction gates) +- **decomposable** → inflection (pre-commit to decomposition) + +**What does NOT justify a surface:** + +- Boundary-crossings in the technical sense (every CLI call crosses some boundary). +- Routine state transitions polyphony is confident in. +- "Important to log" moments — that's observability (§5.6), not a gate. + +**Policy-configured posture per surface.** Even within a category, *how* polyphony surfaces is policy: + +- **`manual`** — always surface. Reserved for the highest-leverage surfaces where input value justifies the wait unconditionally (e.g., root PR merge). +- **`warning`** — surface only when triggered (quality threshold missed, cap hit, severity met). Default for most decisions: silent in the common case, surfaces when polyphony's own confidence is low. +- **`auto`** — never surface; engine picks a deterministic answer. For unattended modes, fast-track runs, and kinds the operator has judged delegable. + +Workflow YAML declares the **decision kind** (approval, PR merge, open-questions, cap-hit, renegotiation, satisfaction). Policy decides which posture applies, scoped by `root` / `type:` / `defaults`. Workflow authors do not hardcode surface posture. + +**Defaults ship cautious.** Root approval & root PR merge default to `manual`; routine decisions default to `warning`; broad `auto` is opt-in via explicit presets (`.polyphony-config/policy-fasttrack.yaml`). + +**Attestation surfaces cannot meaningfully `auto`.** Policy can *suppress* the gate (for fast-track / dogfood / CI), but doing so means the actionable facet *isn't satisfied* — not that polyphony satisfied it on the operator's behalf. **The gate is the satisfaction event.** Actionable facets are constitutionally human-in-the-loop today; this sharpens why iii-D (unattended CI runs) is genuinely deferred-not-trivially-buildable, not merely behind a feature. + +**Non-goals.** + +- **Surface every boundary-crossing action.** Latency cost is real even for "small" interruptions. +- **Surface on low confidence alone.** Confidence is one input to inflection or surrender posture; it does not by itself justify a surface. +- **Treat all gates as `manual` by default.** `manual` is the most expensive posture; reserve it for decisions where input value clearly exceeds the wait. +- **Hardcode latency-vs-value threshold in workflow YAML.** YAML declares decision kind; policy decides posture. +- **Auto-satisfy attestation surfaces on the operator's behalf.** Suppressing an attestation gate means the facet isn't satisfied, not that the engine satisfied it. + +**Deferred — not foreclosed.** + +- **Batching / digest surfaces** — group several pending decisions into a single operator session to amortize latency. Plausible if surface volume grows. +- **Notification (non-blocking) surfaces** — FYI events that don't pause the run. Likely lives in §5.6 (observability) rather than §5.2. +- **Designated-actor attestation** — a service account, on-call rotation, or trusted automation that can attest on the operator's behalf for specific actionable-facet kinds. Plausible iii-D enabler; not designed against; not built today. + +**Generates the invariants** (additions to §7): + +- Surfaces exist at **inflection**, **surrender**, or **attestation** moments; never at every state transition. +- Every gate in workflow YAML declares a **decision kind** scopable by policy. +- The `manual` / `warning` / `auto` posture model (plus decision-kind-specific variants — `auto_proceed`, `auto_fail`, `auto_restart`, `skip`) is the load-bearing surface mechanism. +- Default policy ships cautious; broad `auto` is opt-in via explicit presets. +- **Facet vocabulary determines surface-category eligibility:** plannable / decomposable → inflection; implementable → inflection + surrender; actionable → attestation. Steps inside a facet cannot generate surface categories the facet does not own. +- **Attestation surfaces are constitutionally human-in-the-loop.** Policy may suppress them, but suppression means the facet is unsatisfied — never that polyphony satisfied it itself. + +**Preview of §5.3 (trust ramp):** Trust grows *by domain*, not as a single global dial. The operator shifts inflection and surrender postures rightward (`manual` → `warning` → `auto`) selectively — faster for low-stakes work item kinds, slower for root-level decisions. Already reflected in the active `.polyphony-config/policy.yaml` (`Task: auto`, `root: manual`). Attestation surfaces are largely immune to trust ramp — the operator is the only entity that can satisfy them, unless a designated-actor pattern (deferred above) is built. + +### 5.3 Trust ramp (locked) + +**Trust ramp is the operator's trajectory through policy-space — not a polyphony feature.** + +As operator confidence grows, the operator shifts decision kinds rightward (`manual` → `warning` → `auto`) selectively, by domain. Polyphony's role is to make the trajectory *legible* — current state visible, alternatives discoverable, consequences clear. Polyphony does **not** auto-suggest, auto-escalate, or auto-claw-back trust. Those decisions remain explicit and operator-owned. + +This posture protects three properties of polyphony's identity: + +- **Determinism (§3 mission).** A polyphony whose defaults silently shift over time is not deterministic. The operator must be able to reason about behavior without checking "what trust state am I in today?" +- **Accountability (§5.1).** Auto-escalated trust = polyphony deciding on the operator's behalf about what counts as worth their attention. That's the operator's call. +- **Reversibility.** Any policy posture the operator adopts must be trivially reversible. No baked-in defaults that "learn" and resist reversion. + +**Trust grows in shapes, not on a dial.** Four orthogonal dimensions already in the model: + +1. **Per work-item-type** (`Task: auto`, `root: manual`) +2. **Per decision kind** (approval / pr / open_questions / unattended / etc. — independent postures) +3. **Per scope** (`defaults` / `root` / `by_type` — most-specific-wins) +4. **Per operator** (`~/.polyphony/` overlays `.polyphony-config/`) + +The operator's trajectory: start cautious → observe outcomes → tune specific axes rightward as confidence in specific domains grows. Not linear. Not global. Not automatic. + +**Two trust axes the current schema collapses** (worth distinguishing, even if today's policy posture is a single value): + +- **Engine trust** — confidence that polyphony's deterministic plumbing fires the right decision kind at the right time. Largely domain-independent; grows once if the engine works. Tuned today via **policy** (`.polyphony-config/policy.yaml`). +- **Agent trust** — confidence that the LLM's output on *this kind of work* is reliable enough to delegate. Strongly domain-dependent; may regress when agents change, work types shift, or familiarity with the domain changes. Tuned today via **guidance** (`.polyphony-config/guidance/`, addendums, tool addenda). + +A single `manual` / `warning` / `auto` posture collapses both. The operator's mental model is probably two-axis even if the schema is one-axis; future operator-tuning surfaces should respect the distinction (see *deferred* below). + +**Attestation surfaces are immune.** Trust ramp grows polyphony's license to decide on the operator's behalf about things polyphony *can do* — not its ability to act on the real world. (Designated-actor attestation, deferred per §5.2, would be the only path to genuine attestation trust-ramp.) + +**Locked properties of operator-tuning surfaces** (vs. today's mechanism): + +The four properties below describe what every operator-tuning surface — policy, guidance, future intuitive UIs — must remain. Today's YAML/prompt-file mechanism is *one implementation* of these properties, not the only acceptable one: + +- **Operator-owned.** Polyphony never mutates an operator's policy or guidance without explicit operator action. +- **Explicit.** No silent shifts. Schema changes are documented; defaults applied to ungiven values are commented in the artifact. +- **Legible.** The operator can always see their current state in plain text. +- **Reversible.** Any posture or guidance change is trivially revertible. No "learned" defaults that resist reversion. + +**Mechanism today is in flux.** Policy lives in YAML; guidance lives in prompt files. Both are functional but unsatisfying. The properties above describe what the surfaces *must remain*; the surfaces themselves are anticipated to evolve. + +**Non-goals.** + +- **Auto-escalating policy postures.** Polyphony never automatically suggests "you should switch to `auto`" without explicit operator request. +- **Trust as a single global dial.** "Trust level 1–10" oversimplifies; trust is per-domain. +- **Hiding operator-tuning surfaces from operators.** Operators must always be able to see and edit their current state. (Mechanism may evolve; visibility must not regress.) +- **Silent schema evolution.** Schema changes are explicit; defaults applied to ungiven values are documented in artifact comments. +- **"Learned" defaults that resist reversion.** Any operator-set posture must be trivially reversible. + +**Deferred — not foreclosed.** + +- **More intuitive operator-tuning experience.** Policy management today is YAML editing; agent-prompt management today is prompt-file editing in `guidance/`. Both are functional but unsatisfying. A more intuitive surface — UI, guided editor, suggested presets, telemetry-informed dashboards — is anticipated. Open question: which axes (policy / guidance / process config / profile) belong to one unified surface vs. independent surfaces. Not built today; not designed against. (Detailed shape of the agent-prompt surface specifically lives in §4.3.) +- **Trust telemetry that informs (not auto-suggests).** "This kind of surface fired N times last month; you accepted 95%." Data to help operators tune; doesn't tune for them. Plausible enabler for the intuitive-tuning-surface deferred above. +- **Preset libraries beyond fast-track.** Named presets (`cautious`, `balanced`, `experimental`) mapping to common trajectories. +- **`polyphony policy suggest`** — a verb that recommends posture shifts based on observed outcomes. Explicit operator invocation; never automatic. + +**Generates the invariants** (additions to §7): + +- Operator-tuning surfaces are **operator-owned, explicit, legible, and reversible**. The *mechanism* by which the operator interacts with them may evolve; these four properties must not. +- Trust ramp is per-domain; **no global trust scalar exists in the model**. +- Default policy ships cautious; ramp-up requires explicit operator action. +- **Engine trust and agent trust are distinct axes** even though current policy schema collapses them; future operator-tuning surfaces should respect the distinction. +- Polyphony never mutates an operator's policy or guidance without explicit operator action. + +**Connections to other sections:** + +- **§5.4 time scale:** Wall-clock budget per run is a function of policy posture × operator availability. Higher trust → fewer surfaces → shorter runs. +- **§5.5 failure & recovery:** Trust ramp is reversible. After a bad outcome under `auto`, the operator may revert to `warning` — the engine must make this trivial. +- **§5.6 observability:** Trust grows via observation. Better observability between surfaces → faster, better-informed trust ramp. + +### 5.4 Time scale (locked) + +**Polyphony is *time-scale agnostic* by design. Runs scale with work-item complexity, not toward a single target.** + +A run's wall-clock time is the sum of three terms: + +1. **Intrinsic work latency** — agent work time + platform/CI latency + real-world attestation time. +2. **Operator response latency** — wall-clock waiting for the operator to answer surfaces (§5.2). +3. **Polyphony overhead** — the engine's own decisions, verb dispatches, state reads. + +Current dogfood runs span minutes (a single Task) to multiple days (an Epic with deep decomposition and multiple attestations); polyphony's design holds across that range *without changing shape*. + +**Guidance — three illustrative tiers (descriptive, not contractual).** These tiers are *not* categories the engine treats differently; they exist to make "agnostic" concrete and to give downstream sections (§5.5, §5.6, §5.7) shared vocabulary when reasoning about operator habits at different scales: + +| Tier | Typical scale | Operator habit | What it stresses in the design | +|---|---|---|---| +| **Tiny** | Minutes to ~1 hour. Single Task, no decomposition, no actionable facet. | Operator may sit through it. | Real-time terminal visibility; fast surface turnaround. | +| **Medium** | Hours to a day. Story-scale work, a few surfaces, maybe one attestation. | Operator checks in periodically. | Persistence across operator absence; surfaces survive the operator stepping away. | +| **Large** | Days. Epic-scale work, deep decomposition, multiple attestations. | Asynchronous engagement is the norm. | Notification (§5.6), multi-run dashboards (§5.7), trust ramp (§5.3) all become load-bearing. | + +A messy-attestation Task can span days; an all-`auto` Epic can complete in an hour. The tiers describe the *range* polyphony must handle; the engine itself doesn't branch on them. + +**Three properties the engine must hold:** + +- **Polyphony overhead is negligible** relative to (1) and (2). The engine's own decisions are measured in seconds; agents in minutes; operators in minutes-to-hours-to-days. +- **Time-scale agnosticism.** No design choice may break at either extreme of the range. The same engine, the same workflows, the same policy model serve every tier. +- **No engine-imposed kill timers.** Operator response latency is bounded only by the operator's choices. Runs do not abandon for "taking too long." + +**Why agnosticism, not tier-optimization.** Optimizing for a specific tier (e.g., "we are a tool for hours-long runs") would let us bake in assumptions — operator is at the terminal; a surface fires and resolves within minutes; a run that's been open for a day is a bug — that immediately break for the other tiers. Locking agnosticism rejects those assumptions up front. The §5.2 surface model already accommodates the range: clock time is the currency at every tier. + +**Why no kill timers.** Operator silence is *slow*, not *failed*. Polyphony already has surrender surfaces (§5.2 — cap-hit gates, remediation caps) that fire when the *engine* has exhausted its decision authority. Adding a second timeout layer that fires when the *operator* has exhausted polyphony's patience would conflate two distinct failure modes and let the engine abandon work the operator hasn't yet decided to abandon. The cap-hit surface already exists; we don't need a *second* timeout. + +**iii-D as the asymptote.** The deferred unattended-CI audience (§2 iii-D) is the asymptotic case of operator response latency = infinity. Polyphony already handles "operator takes a long time" because it's time-scale agnostic. iii-D requires answering "what does polyphony decide when the operator *never* responds?" — a *policy* question (auto-decide rules at scale), not a *time-scale* question. Sharpens why iii-D is a separate deferral. + +**Non-goals.** + +- **Time-bounded runs / kill timeouts.** Polyphony does not abandon a run because it has "taken too long." Long is a feature, not a bug. +- **Reliance on operator presence at the terminal.** Surfaces must work even if the operator is away (consistent with §5.1 non-goal). +- **A single target time scale.** Polyphony is not "designed for X-minute runs." The engine is time-scale agnostic. +- **Pretending operator response latency is bounded.** Operators take arbitrarily long; the engine accommodates. + +**Deferred — not foreclosed.** + +- **Configurable max-run-duration check-in *surface*.** "This run has been open for N days; want to abandon, pause, or continue?" A surface (operator decides), not a kill timer. Plausible if long-running runs become common and operators want a nudge. Distinct from a kill timer because the operator answers; the engine does not auto-abandon. +- **Time-aware notification cadence** (belongs in §5.6). Tiny runs notify in real-time; medium-run surfaces notify at the surface moment; large-run surfaces may batch into digests. Flagged here so §5.6 picks it up. +- **Run scheduling.** "Don't fire surfaces between midnight and 7am operator local time." Plausible operator preference; not designed today. + +**Generates the invariants** (additions to §7): + +- Polyphony's wall clock per run = intrinsic work latency + operator response latency + polyphony overhead. **Polyphony overhead is negligible** relative to the other two terms. +- **Polyphony is time-scale agnostic.** Same engine handles minutes-long and weeks-long runs. +- **Operator response latency is bounded only by the operator's choices**, never by engine timeouts. +- **Run termination is via operator action or a §5.2 surrender surface**, never via a timer. + +**Connections to other sections:** + +- **§5.5 failure & recovery:** Long runs need cheap interruption recovery. A 3-day run dying on day 2 must be trivially resumable. +- **§5.6 observability:** At larger time scales the operator can't watch the run. Observability is how they know what's happening *between* surfaces. +- **§5.7 multi-run concurrency:** Long runs make multi-run inherent. With several roots in flight, "what needs my attention?" becomes a hard requirement. + +### 5.5 Failure & recovery (locked) + +Recovery is not magic. Polyphony commits to **legible, operator-mediated recovery over the surface it controls**, and is honest about the surface it does not. + +**The state surface, partitioned.** + +- **Engine-controlled state surface.** PRs, worktrees, branches, the run manifest, the per-root run-started-at watermark. Polyphony owns these; recovery primitives target them. +- **Out-of-reach state surface.** Uncommitted operator edits in worktrees; twig's per-worktree `.twig/config`; manually-curated work-item fields/tags/state; PRs that the platform has already completed or closed. Polyphony cannot or will not silently rewrite these; operator awareness is required. + +The split is load-bearing. Reset is total over the first surface and respectful of the second. + +**Failure categories — descriptive, not contractual.** + +Useful guidance for shaping recovery behavior: + +1. **Infrastructure failure.** Transient: network timeouts, ADO rate limits, conductor process crash. The engine retries; chains halt-on-step-failure; idempotent verbs make retry safe. No surface unless retries are exhausted. +2. **State drift.** The engine's view diverges from observable reality: a branch was deleted out-of-band, a PR was completed manually, the manifest references a merge group that no longer corresponds to live branches. Surfaces as a surrender (per §5.2) — polyphony lacks the authority to choose between repair, accept-divergence, or reset. +3. **Work failure.** Agents produced an unsatisfactory result; an evidence check failed; a PR review surfaced a blocking concern. Routes through the normal SDLC pathway (remediation, restack, abandon) — recovery is the same machinery as forward progress, not a separate code path. + +**Recovery primitives.** + +- **Observable-state re-entry rebuilds the worklist.** Every batch, `polyphony.yaml` calls `polyphony state next-ready` against the EdgeGraph; the next batch is whatever is ready *now*, given current PR/branch/manifest reality. This is what makes long-running runs survivable across interruptions, conductor restarts, and operator absence (per §5.4). It is the worklist that is re-derived — not the whole engine state. +- **The manifest is mixed-role and authoritative for what it owns.** `MergeGroups` + `TopologyHash`, `PlanGenerations`, `RetiredMergeGroupIds`, and `HumanApprovals` are authoritative state the engine cannot reconstruct from git/ADO alone. `Rebases` and `MergedPlanPrs` are log-shaped (audit / idempotency ledger). Recovery presupposes the manifest is intact; corruption is a different failure mode and out of scope. +- **Reset is the universal escape hatch over the engine-controlled surface.** `reset-root@polyphony` is operator-mediated end-to-end: dry-run preview → confirmation gate showing per-leg counts (PRs to abandon, worktrees to remove, branches to delete, manifest action, watermark target) → execute. The chain halts-on-step-failure; verbs are idempotent on retry. Reset is always available; it is never auto-invoked. +- **Surgical fixes happen via the same verbs that move work forward.** A single failed PR is remediated via the normal feature-PR-remediation pathway, not a recovery-specific code path. This keeps recovery cheap when reset would be heavy-handed. +- **Operator-visible artifacts are never silently auto-modified during recovery.** Reset deletes only via operator-confirmed gate; remediation comments on PRs are explicit and attributed; the manifest is mutated only by named verbs the operator can audit. + +**The residual gap (honest about what reset does not handle).** + +`reset-root` covers the engine-controlled surface cleanly. The operator still must, today, handle the four residual classes themselves: stash dirty worktree edits before reset will proceed; restore `.twig/config` when twig has rewritten it; verify work-item state baseline if they hand-edited tags/fields between runs; accept that platform-completed PRs cannot be un-merged (the run-started-at watermark hides them from observers, which is the design). The `polyphony-dogfood-recovery` skill catalogues these gotchas as a runbook. + +This residual operator load is the live UX-maturity gap. Closing it is **deferred, not foreclosed.** + +**Deferred — not foreclosed.** + +- **Reduce manifest to a log.** Derive topology, plan generations, retirements, and approvals from git refs / PR descriptions / ADO tags so the manifest could be regenerated from observable reality. Would make recovery near-magical but is a material redesign of the authoritative surface; explicitly out of scope today. +- **Expand reset to cover more of the residual surface.** Auto-stash + restore for dirty worktrees; `.twig/config` reconciliation; structured work-item-state diff & restore prompts. Plausible incremental UX wins. +- **Surface-mediated recovery.** When reset hits a residual gotcha (e.g., dirty worktree), surface a structured remediation surface (per §5.2) rather than printing a CLI error the operator has to interpret. +- **Recovery diagnostics surface** (`polyphony run diagnose` or equivalent) — single command that surfaces "what is in this run's state surface, what is anomalous, what would reset do." Sharpens the operator's ability to choose between surgical fix, reset, and abandon. + +--- + +### 5.6–5.8 Open questions + +Queued for follow-up grilling sessions, in dependency order: + +1. **§5.6 Observability.** How does the operator know what polyphony is doing without watching it? (Distinct from §5.1: §5.1 is about engagement *at* surfaced moments; §5.6 is about awareness *between* surfaced moments. Notification-style non-blocking surfaces likely live here. Trust ramp depends on observability — see §5.3. Time-aware notification cadence lives here — see §5.4. Recovery diagnostics surface from §5.5 also lands here.) +2. **§5.7 Multi-run concurrency.** Operator with several roots in flight — what surface tells them which needs attention? (Made inherent by larger time scales per §5.4.) +3. **§5.8 Success signal.** How do we know polyphony is winning? + +--- + +## 6. Non-goals (consolidated, locked so far) + +- **Generalize polyphony to non-SDLC work.** SDLC is the domain. Facet vocabulary is SDLC-shaped. +- **Absorb conductor's domain.** No polyphony-native workflow runtime; no polyphony-native gate execution; no polyphony-native agent process supervision. Grow conductor instead. +- **Absorb twig's domain.** No ADO REST in the engine; no second cache; no bypass writes for work-item state. Grow twig instead. +- **Become a hosted multi-tenant service** (iii-C, deferred, not foreclosed). +- **Run unattended in CI without a live operator** (iii-D, deferred, not foreclosed). +- **Be a continuous-presence-required chat surface.** Operator is not expected to be present moment-to-moment. (See §5.1.) +- **Force deep engagement as the default at surfaced moments.** Rubber-stamp is a first-class path; force-read is rare and policy-justified. (See §5.1.) +- **Surface every boundary-crossing action.** Crossing a process boundary does not by itself justify a surface; latency cost is real even for "small" interruptions. (See §5.2.) +- **Treat all gates as `manual` by default.** `manual` is the most expensive posture; reserve it for the highest-leverage inflection/surrender/attestation surfaces. (See §5.2.) +- **Auto-satisfy attestation surfaces on the operator's behalf.** Suppressing an attestation gate means the actionable facet is unsatisfied, not that polyphony satisfied it. (See §5.2.) +- **Auto-escalate, auto-suggest, or auto-claw-back policy postures.** Trust ramp is the operator's trajectory; polyphony makes it legible but never moves the operator along it without explicit action. (See §5.3.) +- **Treat trust as a single global dial.** Trust is per-domain; no global trust scalar exists in the model. (See §5.3.) +- **Mutate an operator's policy or guidance.** Operator-tuning surfaces are operator-owned. Polyphony never edits them without explicit operator action. (See §5.3.) +- **Allow "learned" defaults that resist reversion.** Any operator-set posture must be trivially reversible. (See §5.3.) +- **Impose kill timers on runs.** Runs do not abandon for "taking too long." Long is a feature, not a bug. Operator silence is slow, not failure. (See §5.4.) +- **Rely on operator presence at the terminal.** Surfaces must work even if the operator is away. (See §5.1, §5.4.) +- **Optimize for a single time-scale tier.** Polyphony is time-scale agnostic; the same engine serves minutes-long and weeks-long runs. (See §5.4.) +- **Pretend reset is total.** Reset covers the engine-controlled state surface (PRs, worktrees, branches, manifest, watermark). Uncommitted operator edits, `.twig/config` drift, manually-curated work-item state, and platform-completed PRs are outside reset's reach by design or by platform limitation. Operator awareness of these residuals is required today; closing that gap is deferred, not foreclosed. (See §5.5.) +- **Auto-invoke recovery.** Reset is never automatic. Surgical fix verbs run under normal operator-mediated flow. Engine-side retries are bounded to transient infrastructure failures; anything requiring a decision surfaces. (See §5.5.) +- **Silently auto-modify operator-visible artifacts during recovery.** Reset gates on confirmation; remediation comments are explicit and attributed; the manifest is mutated only by named, auditable verbs. (See §5.5.) +- **Hold ADO credentials for work-item operations** — twig holds them. *Carve-out: polyphony holds ADO creds for PR operations today; see §4.2.* +- **Preserve engine substitutability** as a feature. Conductor-as-engine is a strategic commitment; engine-agnostic verb vocabulary is hygiene, not portability. + +--- + +## 7. Load-bearing invariants (consolidated, locked so far) + +- **Engine is platform-agnostic.** `src/Polyphony/{Routing,Configuration,Policy}/` holds no platform-specific types. Promotable to lint. +- **No work-item-type names anywhere in routing logic.** Type names live only in `process-config.yaml`, loaded at runtime. (Authoritative: [`docs/glossary.md`](glossary.md) "Type-agnosticism rule".) +- **No conductor types in the polyphony engine.** Verbs are callable standalone. +- **No ADO REST in the polyphony engine.** ADO transport lives only in the infrastructure layer, only for bounded carve-outs. +- **Workflow YAML and CLI release together.** Single version stamp; no cross-repo skew. +- **Grow upstream tools (conductor, twig); do not absorb their domains.** Default response when tempted is "file an upstream issue." +- **Polyphony interprets work-item state; it never originates state.** ADO is the system of record. +- **Polyphony decides; twig executes ADO work-item writes.** Verb names reflect this (decide-verbs, not execute-verbs). +- **`.polyphony-config/` is team-shared; `~/.polyphony/` is per-operator.** Boundary supports iii-B (team adoption as pattern) without committing to iii-C (hosted service). +- **Every operator-facing surface is dual-mode.** A 2-second rubber-stamp must be possible *and* a deeper-engagement entry point must be present. Information design at the surface is load-bearing, not cosmetic. (See §5.1.) +- **Surfacing behavior is policy-driven; defaults ship safe.** Operators tune surfacing thresholds, rubber-stamp eligibility, and force-read requirements through `.polyphony-config/` and `~/.polyphony/`; the engine ships defaults that err on the side of surfacing. (See §5.1.) +- **Surfaces are inflection, surrender, or attestation moments — never every state transition.** The three categories cover every justified surface; no other category exists. (See §5.2.) +- **Every gate in workflow YAML declares a decision kind scopable by policy.** The `manual` / `warning` / `auto` posture model (plus kind-specific variants — `auto_proceed`, `auto_fail`, `auto_restart`, `skip`) is the load-bearing surface mechanism. Workflow authors do not hardcode surface posture. (See §5.2.) +- **Facet vocabulary determines surface-category eligibility.** plannable / decomposable → inflection; implementable → inflection + surrender; actionable → attestation. Steps inside a facet cannot generate surface categories the facet does not own. (See §5.2.) +- **Attestation surfaces are constitutionally human-in-the-loop.** Policy may suppress them, but suppression means the actionable facet is unsatisfied — never that polyphony satisfied it itself. (See §5.2.) +- **Operator-tuning surfaces are operator-owned, explicit, legible, and reversible.** Policy, guidance, and any future intuitive-tuning surface must hold all four properties. The mechanism (YAML today; possibly UIs / guided editors later) may evolve; these properties may not. (See §5.3.) +- **Trust ramp is per-domain; no global trust scalar exists in the model.** (See §5.3.) +- **Engine trust and agent trust are distinct axes.** Today's policy posture collapses them into a single value; future operator-tuning surfaces should respect the distinction. (See §5.3.) +- **Polyphony never mutates an operator's policy or guidance without explicit operator action.** (See §5.3.) +- **Polyphony is time-scale agnostic.** Same engine handles minutes-long and weeks-long runs without changing shape. Polyphony overhead is negligible relative to intrinsic work latency and operator response latency. (See §5.4.) +- **Operator response latency is bounded only by the operator's choices, never by engine timeouts.** Run termination is via operator action or a §5.2 surrender surface; never via a timer. (See §5.4.) +- **Observable-state re-entry rebuilds the worklist, not the whole engine state.** Every batch is re-derived from `polyphony state next-ready` + EdgeGraph against current git/ADO/manifest reality. This is the load-bearing recovery primitive that makes long runs survivable across interruptions. (See §5.5.) +- **The manifest is authoritative for topology, plan generations, retirements, and approvals.** These cannot be reconstructed from git/ADO alone. `Rebases` and `MergedPlanPrs` are log-shaped (audit / idempotency); the rest is authoritative state recovery presupposes. (See §5.5.) +- **Reset is always available, always operator-mediated, never auto-invoked.** Preview → confirmation gate → execute is the only path; verbs are idempotent on retry; the chain halts-on-step-failure. (See §5.5.) +- **Surgical fixes use the same verbs that move work forward.** Recovery is not a separate code path from normal SDLC progression — it is the same machinery applied in a remediation direction. (See §5.5.) + +--- + +## 8. Provenance + +This document is built incrementally through grilling sessions using the `grill-with-docs` skill. Each section is locked only after explicit question-and-answer agreement. The session that locked each section is recorded in git history; the open-questions list (§4.3, §4.4, §5.6–§5.8) and the deferred-decision list (§4.2 ADO PR direction; §5.1 operator-initiated dialogue; §5.2 batching / notification / designated-actor attestation; §5.3 intuitive operator-tuning experience / trust telemetry / preset libraries / `policy suggest` verb; §5.4 max-run-duration check-in surface / time-aware notification cadence / run scheduling; §5.5 reduce-manifest-to-a-log / expand reset coverage / surface-mediated recovery / recovery diagnostics surface) are the live agenda for subsequent sessions. + +When this document and [`docs/glossary.md`](glossary.md) diverge: glossary wins for terminology, north star wins for direction. Update both together. diff --git a/scripts/Poll-PrStateDelta.Tests.ps1 b/scripts/Poll-PrStateDelta.Tests.ps1 new file mode 100644 index 0000000..fa92c71 --- /dev/null +++ b/scripts/Poll-PrStateDelta.Tests.ps1 @@ -0,0 +1,639 @@ +#requires -Version 7.0 + +<# +.SYNOPSIS + Pester tests for scripts/Poll-PrStateDelta.ps1. + +.DESCRIPTION + Tests are structured around the contract defined in Poll-PrStateDelta.ps1. + Rather than dot-sourcing the script (which has a top-level param block and + auto-executes), each test invokes it as a subprocess via `pwsh -File ...`, + captures stdout (JSON), stderr, and the exit code. + + CLI stubs (gh, az) are installed per-test onto a PATH-isolated directory + using the same Windows .cmd shim pattern as Resolve-GhIdentity.Tests.ps1. + + Coverage: + - Initial observation: watermark missing → records state, returns immediately + - Reaction kinds: pr_merged, pr_closed, new_commit, new_review_* (all three), + new_comment, ci_status_changed + - Precedence: multiple deltas in one poll → highest precedence returned + - Timeout: no delta within budget → reaction_kind: timeout, exit 0 + - Bad input: missing platform, bad URL → exit 2 + - Auth failure: gh exits 401 → exit 3 + - PR not found: gh exits 404 → exit 4 + - Network failure: gh hangs → exit 5 + + NOTE: Tests are written but NOT executed as part of authoring (no live + gh/az credentials or real PRs in this environment). Run with: + pwsh -Command "Invoke-Pester scripts/Poll-PrStateDelta.Tests.ps1 -Output Detailed" +#> + +BeforeAll { + $script:ScriptPath = Join-Path $PSScriptRoot 'Poll-PrStateDelta.ps1' + + # ── Stub installers ─────────────────────────────────────────────────── + + function Install-CliStub { + param( + [Parameter(Mandatory)][string]$StubDir, + [Parameter(Mandatory)][string]$ExeName, + [Parameter(Mandatory)][string]$ScriptBody + ) + $ps1Path = Join-Path $StubDir "$ExeName-impl.ps1" + Set-Content -Path $ps1Path -Value $ScriptBody -Encoding utf8 + + if ($IsWindows) { + $cmdPath = Join-Path $StubDir "$ExeName.cmd" + Set-Content -Path $cmdPath -Value "@pwsh -NoProfile -ExecutionPolicy Bypass -File `"$ps1Path`" %*" -Encoding ascii + } else { + $shPath = Join-Path $StubDir $ExeName + Set-Content -Path $shPath -Value "#!/usr/bin/env bash`npwsh -NoProfile -File `"$ps1Path`" `"`$@`"" -Encoding utf8 + chmod +x $shPath + } + } + + # ── Script invocation helper ───────────────────────────────────────── + + function Invoke-PollScript { + param( + [string]$Platform = 'github', + [string]$PrUrl = 'https://github.com/org/repo/pull/42', + [string]$WatermarkPath, + [int]$TimeoutSeconds = 5, + [int]$PollIntervalSeconds = 1, + [string]$StubDir, + [hashtable]$ExtraEnv = @{} + ) + + $psi = [System.Diagnostics.ProcessStartInfo]::new() + $psi.FileName = (Get-Command pwsh).Source + [void]$psi.ArgumentList.Add('-NoProfile') + [void]$psi.ArgumentList.Add('-ExecutionPolicy') + [void]$psi.ArgumentList.Add('Bypass') + [void]$psi.ArgumentList.Add('-File') + [void]$psi.ArgumentList.Add($script:ScriptPath) + [void]$psi.ArgumentList.Add('-Platform') + [void]$psi.ArgumentList.Add($Platform) + [void]$psi.ArgumentList.Add('-PrUrl') + [void]$psi.ArgumentList.Add($PrUrl) + [void]$psi.ArgumentList.Add('-WatermarkPath') + [void]$psi.ArgumentList.Add($WatermarkPath) + [void]$psi.ArgumentList.Add('-TimeoutSeconds') + [void]$psi.ArgumentList.Add([string]$TimeoutSeconds) + [void]$psi.ArgumentList.Add('-PollIntervalSeconds') + [void]$psi.ArgumentList.Add([string]$PollIntervalSeconds) + + $psi.RedirectStandardOutput = $true + $psi.RedirectStandardError = $true + $psi.UseShellExecute = $false + $psi.CreateNoWindow = $true + + # Inject stub dir at front of PATH + $sep = [System.IO.Path]::PathSeparator + $psi.Environment['PATH'] = if ($StubDir) { "$StubDir$sep$env:PATH" } else { $env:PATH } + + foreach ($k in $ExtraEnv.Keys) { $psi.Environment[$k] = $ExtraEnv[$k] } + + $proc = [System.Diagnostics.Process]::new() + $proc.StartInfo = $psi + [void]$proc.Start() + + $stdoutTask = $proc.StandardOutput.ReadToEndAsync() + $stderrTask = $proc.StandardError.ReadToEndAsync() + + [void]$proc.WaitForExit(30000) # hard 30s wall-clock guard + + return [pscustomobject]@{ + ExitCode = $proc.ExitCode + Stdout = $stdoutTask.GetAwaiter().GetResult().Trim() + Stderr = $stderrTask.GetAwaiter().GetResult().Trim() + } + } + + # ── GitHub stub builders ───────────────────────────────────────────── + + function Build-GhStub { + param( + [string]$PrState = 'open', + [bool]$Merged = $false, + [string]$HeadSha = 'abc1234', + [string]$ReviewsJson = '[]', + [string]$CommentsJson = '[]', + [string]$CheckRunsJson = '{"check_runs":[]}' + ) + return @" +`$argLine = `$args -join ' ' +if (`$argLine -like 'api repos/*/pulls/* --jq {*') { + Write-Output '{"state":"$PrState","merged":$($Merged.ToString().ToLower()),"head_sha":"$HeadSha"}' + exit 0 +} +if (`$argLine -like 'api repos/*/pulls/*/reviews --jq *') { + Write-Output '$ReviewsJson' + exit 0 +} +if (`$argLine -like 'api repos/*/issues/*/comments --jq *') { + Write-Output '$CommentsJson' + exit 0 +} +if (`$argLine -like 'api repos/*/commits/*/check-runs --jq *') { + `$json = '$CheckRunsJson' + `$data = `$json | ConvertFrom-Json + `$conclusions = `$data.check_runs | ForEach-Object { `$_.conclusion } | ConvertTo-Json -Compress + if (-not `$conclusions) { `$conclusions = '[]' } + Write-Output `$conclusions + exit 0 +} +[Console]::Error.WriteLine("Unhandled gh args: `$argLine") +exit 1 +"@ + } +} + +# ── Test workspace cleanup ──────────────────────────────────────────────────── + +Describe 'Poll-PrStateDelta' { + + BeforeEach { + $script:testDir = Join-Path ([System.IO.Path]::GetTempPath()) ` + "pps-test-$([guid]::NewGuid().ToString('N').Substring(0,8))" + New-Item -ItemType Directory -Path $script:testDir -Force | Out-Null + + $script:watermarkFile = Join-Path $script:testDir 'watermark.json' + $script:stubDir = Join-Path $script:testDir 'stubs' + New-Item -ItemType Directory -Path $script:stubDir -Force | Out-Null + } + + AfterEach { + Remove-Item -Path $script:testDir -Recurse -Force -ErrorAction SilentlyContinue + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Initial observation (watermark missing)' { + + It 'Returns reaction_kind=initial_observation and exit 0 when watermark absent' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'sha111' -ReviewsJson '[]' -CommentsJson '[]' + ) + + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir + + $r.ExitCode | Should -Be 0 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'initial_observation' + $out.platform | Should -Be 'github' + $out.pr_url | Should -Be 'https://github.com/org/repo/pull/42' + $out.new_watermark.last_commit_sha | Should -Be 'sha111' + } + + It 'Writes watermark file on initial observation' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'sha111' + ) + + Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir | Out-Null + + Test-Path $script:watermarkFile | Should -BeTrue + $wm = Get-Content $script:watermarkFile -Raw | ConvertFrom-Json + $wm.last_commit_sha | Should -Be 'sha111' + } + + It 'Watermark contains null for last_merged_state when PR is open' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -PrState 'open' -Merged $false -HeadSha 'sha0' + ) + Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir | Out-Null + $wm = Get-Content $script:watermarkFile -Raw | ConvertFrom-Json + $wm.last_merged_state | Should -BeNullOrEmpty + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Reaction kinds — GitHub' { + + BeforeEach { + # Lay down a base watermark (open PR, sha 'base000') + $baseWatermark = [ordered]@{ + last_merged_state = $null + last_commit_sha = 'base000' + last_review_ids = @() + last_comment_ids = @() + last_ci_conclusion = $null + } + $baseWatermark | ConvertTo-Json | Set-Content -Path $script:watermarkFile -Encoding utf8NoBOM + } + + It 'Returns pr_merged when PR transitions to merged state' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -PrState 'closed' -Merged $true -HeadSha 'base000' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $r.ExitCode | Should -Be 0 + $out.reaction_kind | Should -Be 'pr_merged' + } + + It 'Returns pr_closed when PR is closed without merging' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -PrState 'closed' -Merged $false -HeadSha 'base000' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $r.ExitCode | Should -Be 0 + $out.reaction_kind | Should -Be 'pr_closed' + } + + It 'Returns new_commit when HEAD SHA changes' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'newsha1' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'new_commit' + $out.delta_details.new_sha | Should -Be 'newsha1' + $out.delta_details.prev_sha | Should -Be 'base000' + } + + It 'Returns new_review_changes_requested when CHANGES_REQUESTED review appears' { + $reviewsJson = '[{"id":"101","state":"CHANGES_REQUESTED"}]' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -ReviewsJson $reviewsJson -HeadSha 'base000' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'new_review_changes_requested' + } + + It 'Returns new_review_approved when APPROVED review appears' { + $reviewsJson = '[{"id":"102","state":"APPROVED"}]' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -ReviewsJson $reviewsJson -HeadSha 'base000' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'new_review_approved' + } + + It 'Returns new_review_commented when COMMENTED review appears' { + $reviewsJson = '[{"id":"103","state":"COMMENTED"}]' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -ReviewsJson $reviewsJson -HeadSha 'base000' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'new_review_commented' + } + + It 'Returns new_comment when a new comment ID appears' { + $commentsJson = '["9001"]' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -CommentsJson $commentsJson -HeadSha 'base000' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'new_comment' + } + + It 'Returns ci_status_changed when CI conclusion changes from null to SUCCESS' { + $checkRunsJson = '{"check_runs":[{"conclusion":"success"}]}' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -CheckRunsJson $checkRunsJson -HeadSha 'base000' + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'ci_status_changed' + } + + It 'Does not trigger ci_status_changed when CI unchanged (still null)' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'base000' -CheckRunsJson '{"check_runs":[]}' + ) + # With no delta, poll will run until timeout + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir ` + -TimeoutSeconds 2 -PollIntervalSeconds 1 + $out = $r.Stdout | ConvertFrom-Json + $r.ExitCode | Should -Be 0 + $out.reaction_kind | Should -Be 'timeout' + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Precedence' { + + BeforeEach { + $baseWatermark = [ordered]@{ + last_merged_state = $null + last_commit_sha = 'base000' + last_review_ids = @() + last_comment_ids = @() + last_ci_conclusion = $null + } + $baseWatermark | ConvertTo-Json | Set-Content -Path $script:watermarkFile -Encoding utf8NoBOM + } + + It 'Returns new_commit (not new_comment) when both occur in same poll' { + # New commit (sha changed) + new comment — new_commit wins + $commentsJson = '["8888"]' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'newsha9' -CommentsJson $commentsJson + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'new_commit' + } + + It 'Bundles lower-precedence deltas into also_observed' { + $commentsJson = '["8888"]' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'newsha9' -CommentsJson $commentsJson + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 10 + $out = $r.Stdout | ConvertFrom-Json + $out.delta_details.also_observed | Should -Contain 'new_comment' + } + + It 'Returns pr_merged even when new_commit also present' { + # PR merged + SHA technically different — pr_merged wins + $checkRunsJson = '{"check_runs":[{"conclusion":"success"}]}' + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -PrState 'closed' -Merged $true -HeadSha 'newsha9' -CheckRunsJson $checkRunsJson + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir -TimeoutSeconds 30 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'pr_merged' + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Timeout path' { + + It 'Returns reaction_kind=timeout and exit 0 when no delta within budget' { + # Watermark already in sync with API — nothing changes + $baseWatermark = [ordered]@{ + last_merged_state = $null + last_commit_sha = 'stablesha' + last_review_ids = @() + last_comment_ids = @() + last_ci_conclusion = $null + } + $baseWatermark | ConvertTo-Json | Set-Content -Path $script:watermarkFile -Encoding utf8NoBOM + + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'stablesha' + ) + + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir ` + -TimeoutSeconds 3 -PollIntervalSeconds 1 + $out = $r.Stdout | ConvertFrom-Json + + $r.ExitCode | Should -Be 0 + $out.reaction_kind | Should -Be 'timeout' + $out.delta_details.poll_count | Should -BeGreaterThan 0 + } + + It 'Preserves original watermark content after timeout (no write)' { + $baseWatermark = [ordered]@{ + last_merged_state = $null + last_commit_sha = 'stablesha' + last_review_ids = @() + last_comment_ids = @() + last_ci_conclusion = $null + } + $baseWatermark | ConvertTo-Json | Set-Content -Path $script:watermarkFile -Encoding utf8NoBOM + + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'stablesha' + ) + + Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir ` + -TimeoutSeconds 2 -PollIntervalSeconds 1 | Out-Null + + # Watermark SHA should still be stablesha (no reaction = no rewrite) + $wm = Get-Content $script:watermarkFile -Raw | ConvertFrom-Json + $wm.last_commit_sha | Should -Be 'stablesha' + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Bad input — exit 2' { + + It 'Exits 2 for an unparseable GitHub URL' { + $r = Invoke-PollScript ` + -PrUrl 'https://not-a-valid-pr-url/foo' ` + -WatermarkPath $script:watermarkFile + $r.ExitCode | Should -Be 2 + } + + It 'Exits 2 for an unparseable ADO URL' { + $r = Invoke-PollScript ` + -Platform 'ado' ` + -PrUrl 'https://dev.azure.com/bad-url' ` + -WatermarkPath $script:watermarkFile + $r.ExitCode | Should -Be 2 + } + + It 'Exits 2 for TimeoutSeconds = 0' { + $r = Invoke-PollScript ` + -WatermarkPath $script:watermarkFile ` + -TimeoutSeconds 0 + $r.ExitCode | Should -Be 2 + } + + It 'Exits 2 for PollIntervalSeconds = -1' { + $r = Invoke-PollScript ` + -WatermarkPath $script:watermarkFile ` + -PollIntervalSeconds (-1) + $r.ExitCode | Should -Be 2 + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Auth failure — exit 3' { + + It 'Exits 3 when gh returns 401 Unauthorized' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody @' +$argLine = $args -join ' ' +if ($argLine -like 'api *') { + [Console]::Error.WriteLine('HTTP 401: Bad credentials - authentication required') + exit 1 +} +exit 1 +'@ + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir + $r.ExitCode | Should -Be 3 + } + + It 'Writes CONDUCTOR_ERROR_OUT with kind=auth_failure on exit 3' { + $errFile = Join-Path $script:testDir 'conductor-error.json' + + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody @' +$argLine = $args -join ' ' +if ($argLine -like 'api *') { + [Console]::Error.WriteLine('unauthorized - token invalid') + exit 1 +} +'@ + Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir ` + -ExtraEnv @{ CONDUCTOR_ERROR_OUT = $errFile } | Out-Null + + Test-Path $errFile | Should -BeTrue + $err = Get-Content $errFile -Raw | ConvertFrom-Json + $err.kind | Should -Be 'auth_failure' + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'PR not found — exit 4' { + + It 'Exits 4 when gh returns 404 Not Found' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody @' +$argLine = $args -join ' ' +if ($argLine -like 'api *') { + [Console]::Error.WriteLine('HTTP 404: Not Found - no pull request at that URL') + exit 1 +} +exit 1 +'@ + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir + $r.ExitCode | Should -Be 4 + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Network / rate-limit failure — exit 5' { + + It 'Exits 5 when gh returns 429 rate-limit' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody @' +$argLine = $args -join ' ' +if ($argLine -like 'api *') { + [Console]::Error.WriteLine('HTTP 429: API rate limit exceeded') + exit 1 +} +exit 1 +'@ + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir + $r.ExitCode | Should -Be 5 + } + + It 'Exits 5 when gh hangs beyond its call timeout' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody @' +$argLine = $args -join ' ' +if ($argLine -like 'api *') { + Start-Sleep -Seconds 90 # will be killed by Invoke-CliCaptured timeout + Write-Output '{}' + exit 0 +} +exit 1 +'@ + # We cannot easily test the internal 60s CLI timeout here in unit tests + # without a very long wait. This test exists for CI documentation only + # — skip it in fast test runs. + Set-ItResult -Skipped -Because 'Requires 60s+ internal CLI timeout — run in dedicated slow-test suite' + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Output shape invariants' { + + It 'Always emits observed_at_utc in ISO 8601 format' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir + $out = $r.Stdout | ConvertFrom-Json + # ConvertFrom-Json may auto-parse the ISO string into a DateTime; normalise before asserting. + $utc = if ($out.observed_at_utc -is [datetime]) { + $out.observed_at_utc.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') + } else { [string]$out.observed_at_utc } + $utc | Should -Match '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$' + } + + It 'Always echoes pr_url and platform in output' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub + ) + $r = Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir + $out = $r.Stdout | ConvertFrom-Json + $out.pr_url | Should -Be 'https://github.com/org/repo/pull/42' + $out.platform | Should -Be 'github' + } + + It 'Watermark file is UTF-8 NoBOM' { + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub + ) + Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir | Out-Null + + $bytes = [System.IO.File]::ReadAllBytes($script:watermarkFile) + # UTF-8 BOM is EF BB BF — assert no BOM + if ($bytes.Count -ge 3) { + ($bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) | + Should -BeFalse -Because 'watermark file must be UTF-8 NoBOM' + } + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'ADO platform — initial observation' { + + It 'Returns initial_observation for ADO URL (az stub)' { + Install-CliStub -StubDir $script:stubDir -ExeName 'az' -ScriptBody @' +$argLine = $args -join ' ' +if ($argLine -like 'repos pr show *') { + @{ + status = 'active' + lastMergeSourceCommit = @{ commitId = 'adosha1' } + reviewers = @() + } | ConvertTo-Json -Depth 5 + exit 0 +} +if ($argLine -like 'repos pr thread list *') { + Write-Output '[]' + exit 0 +} +if ($argLine -like 'repos pr policy list *') { + Write-Output '[]' + exit 0 +} +[Console]::Error.WriteLine("Unhandled az args: $argLine") +exit 1 +'@ + $r = Invoke-PollScript ` + -Platform 'ado' ` + -PrUrl 'https://dev.azure.com/myorg/myproject/_git/myrepo/pullrequest/99' ` + -WatermarkPath $script:watermarkFile ` + -StubDir $script:stubDir + + $r.ExitCode | Should -Be 0 + $out = $r.Stdout | ConvertFrom-Json + $out.reaction_kind | Should -Be 'initial_observation' + $out.platform | Should -Be 'ado' + $out.new_watermark.last_commit_sha | Should -Be 'adosha1' + } + } + + # ───────────────────────────────────────────────────────────────────────── + Context 'Watermark written on reaction detection' { + + It 'Updates the watermark file after a reaction is detected' { + $baseWatermark = [ordered]@{ + last_merged_state = $null + last_commit_sha = 'oldsha' + last_review_ids = @() + last_comment_ids = @() + last_ci_conclusion = $null + } + $baseWatermark | ConvertTo-Json | Set-Content -Path $script:watermarkFile -Encoding utf8NoBOM + + Install-CliStub -StubDir $script:stubDir -ExeName 'gh' -ScriptBody ( + Build-GhStub -HeadSha 'newsha' + ) + + Invoke-PollScript -WatermarkPath $script:watermarkFile -StubDir $script:stubDir ` + -TimeoutSeconds 10 | Out-Null + + $wm = Get-Content $script:watermarkFile -Raw | ConvertFrom-Json + $wm.last_commit_sha | Should -Be 'newsha' + } + } +} diff --git a/scripts/Poll-PrStateDelta.ps1 b/scripts/Poll-PrStateDelta.ps1 new file mode 100644 index 0000000..a2aaf12 --- /dev/null +++ b/scripts/Poll-PrStateDelta.ps1 @@ -0,0 +1,693 @@ +#!/usr/bin/env pwsh +#requires -Version 7.0 + +<# +.SYNOPSIS + Polls a PR (GitHub or ADO) for state changes since a recorded watermark. + Returns when a reaction is detected OR when the timeout expires. + +.PARAMETER Platform + Required. One of: github, ado. + +.PARAMETER PrUrl + Required. Full URL to the PR. Used for both API access and rendering CTA links. + +.PARAMETER WatermarkPath + Optional. File path. Read at start to load last-seen state; written on completion + with new state. If the file does not exist, treat as "first observation" — record + current state and return immediately with reaction_kind: "initial_observation". + When omitted or empty, a temp-file path is derived automatically from the platform + and PR coordinates (suitable for most workflows). + +.PARAMETER TimeoutSeconds + Optional. Default 86400 (24h). Max time to wait for a reaction. + +.PARAMETER PollIntervalSeconds + Optional. Default 30. Time between polls. Jitter of ±10% is applied to avoid + thundering-herd against the PR API. + +.OUTPUTS + JSON on stdout (single object): + { + "reaction_kind": "initial_observation" | "pr_merged" | "pr_closed" | + "new_review_approved" | "new_review_changes_requested" | + "new_review_commented" | "new_commit" | "new_comment" | + "ci_status_changed" | "timeout", + "new_watermark": { ...platform-specific state snapshot... }, + "delta_details": { ...kind-specific payload... }, + "pr_url": "", + "platform": "", + "observed_at_utc": "2026-05-29T16:22:12Z" + } + + Stderr: human-readable progress/warnings (one line per poll). + +.EXIT CODES + 0 reaction detected OR timeout reached cleanly (route on reaction_kind) + 2 invalid arguments (missing required, unparseable URL) + 3 auth/permission failure (gh / az auth lapsed) + 4 PR not found at URL + 5 network or API rate-limit failure after retries +#> + +[CmdletBinding()] +param( + [Parameter(Mandatory)][ValidateSet('github', 'ado')][string]$Platform, + [Parameter(Mandatory)][string]$PrUrl, + [string]$WatermarkPath = '', # Optional; auto-derived from PR coords when empty + [int]$TimeoutSeconds = 86400, + [int]$PollIntervalSeconds = 30 +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +# ── Exit code constants ────────────────────────────────────────────────────── +$EXIT_SUCCESS = 0 +$EXIT_BAD_ARGS = 2 +$EXIT_AUTH = 3 +$EXIT_NOT_FOUND = 4 +$EXIT_NETWORK = 5 + +# ── Reaction precedence (lower index = higher priority) ───────────────────── +$REACTION_PRECEDENCE = [string[]]@( + 'pr_merged', 'pr_closed', + 'new_commit', + 'new_review_changes_requested', + 'new_review_approved', + 'new_review_commented', + 'new_comment', + 'ci_status_changed' +) + +# ── Helpers ────────────────────────────────────────────────────────────────── + +function Write-Stderr { + param([string]$Message) + [Console]::Error.WriteLine($Message) +} + +function Invoke-CliCaptured { + [CmdletBinding()] + param( + [Parameter(Mandatory)][string]$Exe, + [Parameter(Mandatory)][string[]]$Arguments, + [int]$TimeoutSeconds = 60 + ) + + $cmd = Get-Command $Exe -ErrorAction SilentlyContinue + if (-not $cmd) { + return [pscustomobject]@{ + Ok = $false + ExitCode = -1 + Stdout = '' + Stderr = "$Exe not found on PATH" + TimedOut = $false + } + } + + $psi = [System.Diagnostics.ProcessStartInfo]::new() + $psi.FileName = $cmd.Source + foreach ($a in $Arguments) { [void]$psi.ArgumentList.Add($a) } + $psi.RedirectStandardOutput = $true + $psi.RedirectStandardError = $true + $psi.UseShellExecute = $false + $psi.CreateNoWindow = $true + + $proc = [System.Diagnostics.Process]::new() + $proc.StartInfo = $psi + [void]$proc.Start() + + $stdoutTask = $proc.StandardOutput.ReadToEndAsync() + $stderrTask = $proc.StandardError.ReadToEndAsync() + + if (-not $proc.WaitForExit($TimeoutSeconds * 1000)) { + try { $proc.Kill($true) } catch { } + return [pscustomobject]@{ + Ok = $false + ExitCode = -1 + Stdout = '' + Stderr = "$Exe $($Arguments -join ' ') timed out after ${TimeoutSeconds}s" + TimedOut = $true + } + } + + return [pscustomobject]@{ + Ok = ($proc.ExitCode -eq 0) + ExitCode = $proc.ExitCode + Stdout = $stdoutTask.GetAwaiter().GetResult().Trim() + Stderr = $stderrTask.GetAwaiter().GetResult().Trim() + TimedOut = $false + } +} + +# Throws with a typed prefix so the top-level catch can map to exit codes. +function Invoke-AssertCli { + param( + [Parameter(Mandatory)]$Result, + [Parameter(Mandatory)][string]$Context + ) + if ($Result.Ok) { return } + + $combined = "$($Result.Stderr) $($Result.Stdout)".Trim() + + if ($Result.TimedOut) { + throw "NETWORK:$Context timed out. $combined" + } + if ($Result.ExitCode -eq -1 -and $combined -match 'not found on PATH') { + throw "NETWORK:$Context — $combined" + } + if ($combined -match '(?i)(401|unauthorized|authentication|credentials|auth\s+fail|token.*invalid|invalid.*token)') { + throw "AUTH:$Context failed (exit $($Result.ExitCode)): $combined" + } + if ($combined -match '(?i)(404|not\s+found|does\s+not\s+exist|no\s+pull\s+request)') { + throw "NOTFOUND:$Context failed (exit $($Result.ExitCode)): $combined" + } + if ($combined -match '(?i)(429|rate.?limit|too\s+many\s+requests)') { + throw "NETWORK:$Context rate-limited (exit $($Result.ExitCode)): $combined" + } + throw "NETWORK:$Context failed (exit $($Result.ExitCode)): $combined" +} + +function Resolve-GitHubCoords { + param([Parameter(Mandatory)][string]$Url) + if ($Url -match '^https://github\.com/([^/]+)/([^/]+)/pull/(\d+)') { + return [pscustomobject]@{ + Owner = $Matches[1] + Repo = $Matches[2] + Number = [int]$Matches[3] + } + } + throw "Cannot parse GitHub PR URL (expected https://github.com/{owner}/{repo}/pull/{n}): $Url" +} + +function Resolve-AdoCoords { + param([Parameter(Mandatory)][string]$Url) + # https://dev.azure.com/{org}/{project}/_git/{repo}/pullrequest/{id} + if ($Url -match '^https://dev\.azure\.com/([^/]+)/([^/?]+)/_git/([^/]+)/pullrequest/(\d+)') { + return [pscustomobject]@{ + Org = $Matches[1] + Project = [System.Uri]::UnescapeDataString($Matches[2]) + Repo = $Matches[3] + Id = [int]$Matches[4] + } + } + # https://{org}.visualstudio.com/{project}/_git/{repo}/pullrequest/{id} + if ($Url -match '^https://([^.]+)\.visualstudio\.com/([^/?]+)/_git/([^/]+)/pullrequest/(\d+)') { + return [pscustomobject]@{ + Org = $Matches[1] + Project = [System.Uri]::UnescapeDataString($Matches[2]) + Repo = $Matches[3] + Id = [int]$Matches[4] + } + } + throw "Cannot parse ADO PR URL (expected https://dev.azure.com/{org}/{project}/_git/{repo}/pullrequest/{n}): $Url" +} + +function Get-GitHubSnapshot { + param([Parameter(Mandatory)]$Coords) + + $base = "repos/$($Coords.Owner)/$($Coords.Repo)" + $n = $Coords.Number + + # PR core state + $prResult = Invoke-CliCaptured -Exe 'gh' -Arguments @( + 'api', "$base/pulls/$n", + '--jq', '{state:.state, merged:.merged, head_sha:.head.sha}' + ) + Invoke-AssertCli -Result $prResult -Context "gh api $base/pulls/$n" + $prData = $prResult.Stdout | ConvertFrom-Json + + # Reviews + $revResult = Invoke-CliCaptured -Exe 'gh' -Arguments @( + 'api', "$base/pulls/$n/reviews", + '--jq', '[.[] | {id:(.id|tostring), state:.state}]' + ) + Invoke-AssertCli -Result $revResult -Context "gh api $base/pulls/$n/reviews" + $reviews = @($revResult.Stdout | ConvertFrom-Json) + + # Issue comments (general PR comments, not review comments) + $cmtResult = Invoke-CliCaptured -Exe 'gh' -Arguments @( + 'api', "$base/issues/$n/comments", + '--jq', '[.[] | .id | tostring]' + ) + Invoke-AssertCli -Result $cmtResult -Context "gh api $base/issues/$n/comments" + $commentIds = @($cmtResult.Stdout | ConvertFrom-Json) + + # Check runs on HEAD commit (best-effort — no error if CI not configured) + $ciConclusion = $null + if ($prData.head_sha) { + $ciResult = Invoke-CliCaptured -Exe 'gh' -Arguments @( + 'api', "$base/commits/$($prData.head_sha)/check-runs", + '--jq', '[.check_runs[] | .conclusion]' + ) + if ($ciResult.Ok -and $ciResult.Stdout -ne '[]' -and $ciResult.Stdout -ne '') { + $conclusions = @($ciResult.Stdout | ConvertFrom-Json) + $ciConclusion = if ($conclusions -contains 'failure' -or $conclusions -contains 'timed_out') { + 'FAILURE' + } elseif ($conclusions | Where-Object { $_ -eq $null }) { + 'PENDING' + } else { + 'SUCCESS' + } + } + } + + return [pscustomobject]@{ + State = $prData.state + Merged = [bool]$prData.merged + HeadSha = $prData.head_sha + Reviews = $reviews + CommentIds = $commentIds + CiConclusion = $ciConclusion + } +} + +function Get-AdoSnapshot { + param([Parameter(Mandatory)]$Coords) + + $orgUrl = "https://dev.azure.com/$($Coords.Org)" + $commonArgs = @( + '--id', [string]$Coords.Id, + '--org', $orgUrl, + '--project', $Coords.Project, + '--detect', 'false', + '--output', 'json' + ) + + # PR core state + reviewers + $prResult = Invoke-CliCaptured -Exe 'az' -Arguments (@('repos', 'pr', 'show') + $commonArgs) + Invoke-AssertCli -Result $prResult -Context "az repos pr show --id $($Coords.Id)" + $prData = $prResult.Stdout | ConvertFrom-Json + + # Discussion threads + $threadIds = @() + $threadResult = Invoke-CliCaptured -Exe 'az' -Arguments (@('repos', 'pr', 'thread', 'list') + $commonArgs) + if ($threadResult.Ok -and $threadResult.Stdout -ne '' -and $threadResult.Stdout -ne '[]') { + $threads = @($threadResult.Stdout | ConvertFrom-Json) + $threadIds = @($threads | ForEach-Object { [string]$_.id }) + } + + # Build status via policy evaluations (best-effort) + $buildStatus = $null + $policyResult = Invoke-CliCaptured -Exe 'az' -Arguments (@('repos', 'pr', 'policy', 'list') + $commonArgs) + if ($policyResult.Ok -and $policyResult.Stdout -ne '' -and $policyResult.Stdout -ne '[]') { + $policies = @($policyResult.Stdout | ConvertFrom-Json) + $buildPols = @($policies | Where-Object { $_.configuration.type.displayName -like '*Build*' }) + if ($buildPols.Count -gt 0) { + $policyStatuses = @($buildPols | ForEach-Object { $_.status }) + $buildStatus = if ($policyStatuses -contains 'rejected') { 'failed' } + elseif ($policyStatuses -contains 'running' -or $policyStatuses -contains 'queued') { 'inProgress' } + elseif ($policyStatuses -contains 'approved') { 'succeeded' } + else { $null } + } + } + + return [pscustomobject]@{ + Status = $prData.status # active / completed / abandoned + HeadSha = $prData.lastMergeSourceCommit.commitId + Reviewers = @($prData.reviewers) + ThreadIds = $threadIds + BuildStatus = $buildStatus + } +} + +function New-GitHubWatermark { + param([Parameter(Mandatory)]$Snapshot) + return [ordered]@{ + last_merged_state = if ($Snapshot.Merged) { 'MERGED' } + elseif ($Snapshot.State -eq 'closed') { 'CLOSED' } + else { $null } + last_commit_sha = $Snapshot.HeadSha + last_review_ids = @($Snapshot.Reviews | ForEach-Object { $_.id }) + last_comment_ids = @($Snapshot.CommentIds) + last_ci_conclusion = $Snapshot.CiConclusion + } +} + +function New-AdoWatermark { + param([Parameter(Mandatory)]$Snapshot) + $revMap = [ordered]@{} + foreach ($r in $Snapshot.Reviewers) { + $revMap[$r.uniqueName] = [int]$r.vote + } + return [ordered]@{ + last_status = $Snapshot.Status + last_commit_sha = $Snapshot.HeadSha + last_review_revs = $revMap + last_thread_ids = @($Snapshot.ThreadIds) + last_build_status = $Snapshot.BuildStatus + } +} + +function Find-GitHubDeltas { + param( + [Parameter(Mandatory)]$Watermark, + [Parameter(Mandatory)]$Snapshot + ) + + $deltas = [System.Collections.Generic.List[hashtable]]::new() + + # pr_merged / pr_closed (terminal — check first) + if ($Snapshot.Merged -and $Watermark.last_merged_state -ne 'MERGED') { + $deltas.Add(@{ kind = 'pr_merged'; details = @{ state = 'MERGED' } }) + } elseif ($Snapshot.State -eq 'closed' -and -not $Snapshot.Merged -and $Watermark.last_merged_state -ne 'CLOSED') { + $deltas.Add(@{ kind = 'pr_closed'; details = @{ state = 'CLOSED' } }) + } + + # new_commit + if ($Snapshot.HeadSha -and $Snapshot.HeadSha -ne $Watermark.last_commit_sha) { + $deltas.Add(@{ kind = 'new_commit'; details = @{ new_sha = $Snapshot.HeadSha; prev_sha = $Watermark.last_commit_sha } }) + } + + # new reviews (by ID) + $seenIds = @($Watermark.last_review_ids | ForEach-Object { [string]$_ }) + $newReviews = @($Snapshot.Reviews | Where-Object { [string]$_.id -notin $seenIds }) + + $crReviews = @($newReviews | Where-Object { $_.state -eq 'CHANGES_REQUESTED' }) + $apReviews = @($newReviews | Where-Object { $_.state -eq 'APPROVED' }) + $cmReviews = @($newReviews | Where-Object { $_.state -eq 'COMMENTED' }) + + if ($crReviews.Count -gt 0) { + $deltas.Add(@{ kind = 'new_review_changes_requested'; details = @{ review_ids = @($crReviews | ForEach-Object { $_.id }) } }) + } + if ($apReviews.Count -gt 0) { + $deltas.Add(@{ kind = 'new_review_approved'; details = @{ review_ids = @($apReviews | ForEach-Object { $_.id }) } }) + } + if ($cmReviews.Count -gt 0) { + $deltas.Add(@{ kind = 'new_review_commented'; details = @{ review_ids = @($cmReviews | ForEach-Object { $_.id }) } }) + } + + # new_comment + $seenCommentIds = @($Watermark.last_comment_ids | ForEach-Object { [string]$_ }) + $newComments = @($Snapshot.CommentIds | Where-Object { $_ -notin $seenCommentIds }) + if ($newComments.Count -gt 0) { + $deltas.Add(@{ kind = 'new_comment'; details = @{ comment_ids = $newComments } }) + } + + # ci_status_changed + if ($Snapshot.CiConclusion -and $Snapshot.CiConclusion -ne $Watermark.last_ci_conclusion) { + $deltas.Add(@{ kind = 'ci_status_changed'; details = @{ prev = $Watermark.last_ci_conclusion; new = $Snapshot.CiConclusion } }) + } + + return $deltas +} + +function Find-AdoDeltas { + param( + [Parameter(Mandatory)]$Watermark, + [Parameter(Mandatory)]$Snapshot + ) + + $deltas = [System.Collections.Generic.List[hashtable]]::new() + + # pr_merged / pr_closed + if ($Snapshot.Status -eq 'completed' -and $Watermark.last_status -ne 'completed') { + $deltas.Add(@{ kind = 'pr_merged'; details = @{ status = 'completed' } }) + } elseif ($Snapshot.Status -eq 'abandoned' -and $Watermark.last_status -ne 'abandoned') { + $deltas.Add(@{ kind = 'pr_closed'; details = @{ status = 'abandoned' } }) + } + + # new_commit + if ($Snapshot.HeadSha -and $Snapshot.HeadSha -ne $Watermark.last_commit_sha) { + $deltas.Add(@{ kind = 'new_commit'; details = @{ new_sha = $Snapshot.HeadSha; prev_sha = $Watermark.last_commit_sha } }) + } + + # review vote changes + $prevRevs = $Watermark.last_review_revs + $crReviewers = [System.Collections.Generic.List[string]]::new() + $apReviewers = [System.Collections.Generic.List[string]]::new() + $cmReviewers = [System.Collections.Generic.List[string]]::new() + + foreach ($reviewer in $Snapshot.Reviewers) { + $email = $reviewer.uniqueName + $vote = [int]$reviewer.vote + $prevVote = 0 + if ($prevRevs) { + $prevProp = $prevRevs.PSObject.Properties[$email] + if ($prevProp) { $prevVote = [int]$prevProp.Value } + } + if ($vote -ne $prevVote) { + if ($vote -le -10) { $crReviewers.Add($email) } + elseif ($vote -ge 10) { $apReviewers.Add($email) } + elseif ($vote -ne 0) { $cmReviewers.Add($email) } + } + } + + if ($crReviewers.Count -gt 0) { + $deltas.Add(@{ kind = 'new_review_changes_requested'; details = @{ reviewers = @($crReviewers) } }) + } + if ($apReviewers.Count -gt 0) { + $deltas.Add(@{ kind = 'new_review_approved'; details = @{ reviewers = @($apReviewers) } }) + } + if ($cmReviewers.Count -gt 0) { + $deltas.Add(@{ kind = 'new_review_commented'; details = @{ reviewers = @($cmReviewers) } }) + } + + # new_comment (new thread IDs) + $seenThreadIds = @($Watermark.last_thread_ids | ForEach-Object { [string]$_ }) + $newThreadIds = @($Snapshot.ThreadIds | Where-Object { $_ -notin $seenThreadIds }) + if ($newThreadIds.Count -gt 0) { + $deltas.Add(@{ kind = 'new_comment'; details = @{ thread_ids = $newThreadIds } }) + } + + # ci_status_changed + if ($Snapshot.BuildStatus -and $Snapshot.BuildStatus -ne $Watermark.last_build_status) { + $deltas.Add(@{ kind = 'ci_status_changed'; details = @{ prev = $Watermark.last_build_status; new = $Snapshot.BuildStatus } }) + } + + return $deltas +} + +function Select-HighestPrecedenceDelta { + param([Parameter(Mandatory)]$Deltas) + + $sorted = @($Deltas | Sort-Object -Property { + $idx = [Array]::IndexOf($REACTION_PRECEDENCE, $_.kind) + if ($idx -lt 0) { 999 } else { $idx } + }) + + $primary = $sorted[0] + $alsoKinds = @($sorted | Select-Object -Skip 1 | ForEach-Object { $_.kind }) + + if ($alsoKinds.Count -gt 0) { + $primary.details['also_observed'] = $alsoKinds + } + + return $primary +} + +function Write-WatermarkAtomic { + param( + [Parameter(Mandatory)][string]$Path, + [Parameter(Mandatory)][object]$Watermark + ) + $absPath = [System.IO.Path]::GetFullPath($Path) + $json = $Watermark | ConvertTo-Json -Depth 10 + $tmpPath = $absPath + '.tmp.' + [System.IO.Path]::GetRandomFileName().Replace('.', '') + [System.IO.File]::WriteAllText($tmpPath, $json, [System.Text.UTF8Encoding]::new($false)) + Move-Item -LiteralPath $tmpPath -Destination $absPath -Force +} + +function Read-WatermarkFile { + param([Parameter(Mandatory)][string]$Path) + $absPath = [System.IO.Path]::GetFullPath($Path) + $json = [System.IO.File]::ReadAllText($absPath, [System.Text.UTF8Encoding]::new($false)) + return $json | ConvertFrom-Json +} + +function Get-JitteredIntervalMs { + param( + [Parameter(Mandatory)][int]$Seconds, + [double]$JitterFraction = 0.1 + ) + $baseMs = $Seconds * 1000 + $jitterMs = [int]($baseMs * $JitterFraction) + $rng = [System.Random]::new() + return $baseMs + $rng.Next(-$jitterMs, $jitterMs + 1) +} + +function Write-ErrorAndEnvelope { + param( + [Parameter(Mandatory)][int]$Code, + [Parameter(Mandatory)][string]$Message + ) + $kindMap = @{ + $EXIT_BAD_ARGS = 'invalid_args' + $EXIT_AUTH = 'auth_failure' + $EXIT_NOT_FOUND = 'pr_not_found' + $EXIT_NETWORK = 'network_failure' + } + $kind = $kindMap[$Code] + if (-not $kind) { $kind = 'unknown_failure' } + + Write-Stderr "[Poll-PrStateDelta] ERROR ($kind): $Message" + + $errOut = $env:CONDUCTOR_ERROR_OUT + if ($errOut) { + $envelope = [ordered]@{ + kind = $kind + message = $Message + details = @{ exit_code = $Code } + } + $envelope | ConvertTo-Json -Compress | + Set-Content -LiteralPath $errOut -Encoding utf8NoBOM + } +} + +# ── Argument validation ────────────────────────────────────────────────────── + +if ([string]::IsNullOrWhiteSpace($PrUrl)) { + Write-ErrorAndEnvelope -Code $EXIT_BAD_ARGS -Message "PrUrl is required and must be non-empty." + exit $EXIT_BAD_ARGS +} +if ($TimeoutSeconds -le 0) { + Write-ErrorAndEnvelope -Code $EXIT_BAD_ARGS -Message "TimeoutSeconds must be a positive integer." + exit $EXIT_BAD_ARGS +} +if ($PollIntervalSeconds -le 0) { + Write-ErrorAndEnvelope -Code $EXIT_BAD_ARGS -Message "PollIntervalSeconds must be a positive integer." + exit $EXIT_BAD_ARGS +} + +$coords = $null +try { + $coords = if ($Platform -eq 'github') { + Resolve-GitHubCoords -Url $PrUrl + } else { + Resolve-AdoCoords -Url $PrUrl + } +} catch { + Write-ErrorAndEnvelope -Code $EXIT_BAD_ARGS -Message "Invalid PrUrl: $($_.Exception.Message)" + exit $EXIT_BAD_ARGS +} + +# Derive WatermarkPath from PR coords when not supplied by caller. +if ([string]::IsNullOrWhiteSpace($WatermarkPath)) { + $pathKey = if ($Platform -eq 'github') { + "github-$($coords.Owner)-$($coords.Repo)-$($coords.Number)" + } else { + "ado-$($coords.Org)-$($coords.Id)" + } + $pathKey = $pathKey -replace '[/\\:*?"<>|]', '_' + $WatermarkPath = Join-Path ([System.IO.Path]::GetTempPath()) "conductor-pr-delta-$pathKey.json" + Write-Stderr "[Poll-PrStateDelta] WatermarkPath not supplied; using '$WatermarkPath'" +} + +# ── Main polling logic ─────────────────────────────────────────────────────── + +$startTime = [System.DateTimeOffset]::UtcNow + +try { + + # ── Initial observation ─────────────────────────────────────────────────── + if (-not (Test-Path -LiteralPath $WatermarkPath)) { + Write-Stderr "[Poll-PrStateDelta] No watermark at '$WatermarkPath' — recording initial state." + + $snapshot = if ($Platform -eq 'github') { + Get-GitHubSnapshot -Coords $coords + } else { + Get-AdoSnapshot -Coords $coords + } + + $watermark = if ($Platform -eq 'github') { + New-GitHubWatermark -Snapshot $snapshot + } else { + New-AdoWatermark -Snapshot $snapshot + } + + Write-WatermarkAtomic -Path $WatermarkPath -Watermark $watermark + + [ordered]@{ + reaction_kind = 'initial_observation' + new_watermark = $watermark + delta_details = @{} + pr_url = $PrUrl + platform = $Platform + observed_at_utc = [System.DateTimeOffset]::UtcNow.ToString('yyyy-MM-ddTHH:mm:ssZ') + } | ConvertTo-Json -Depth 10 + exit $EXIT_SUCCESS + } + + # ── Load existing watermark ─────────────────────────────────────────────── + $watermark = Read-WatermarkFile -Path $WatermarkPath + + # ── Poll loop ───────────────────────────────────────────────────────────── + $pollCount = 0 + + while ($true) { + $elapsed = ([System.DateTimeOffset]::UtcNow - $startTime).TotalSeconds + + if ($elapsed -ge $TimeoutSeconds) { + Write-Stderr "[Poll-PrStateDelta] Timeout after $([int]$elapsed)s — no reaction detected (polls: $pollCount)." + + [ordered]@{ + reaction_kind = 'timeout' + new_watermark = $watermark + delta_details = @{ elapsed_seconds = [int]$elapsed; poll_count = $pollCount } + pr_url = $PrUrl + platform = $Platform + observed_at_utc = [System.DateTimeOffset]::UtcNow.ToString('yyyy-MM-ddTHH:mm:ssZ') + } | ConvertTo-Json -Depth 10 + exit $EXIT_SUCCESS + } + + $pollCount++ + Write-Stderr "[Poll-PrStateDelta] Poll #$pollCount at $([int]$elapsed)s elapsed (budget: ${TimeoutSeconds}s)." + + $snapshot = if ($Platform -eq 'github') { + Get-GitHubSnapshot -Coords $coords + } else { + Get-AdoSnapshot -Coords $coords + } + + $deltas = @(if ($Platform -eq 'github') { + Find-GitHubDeltas -Watermark $watermark -Snapshot $snapshot + } else { + Find-AdoDeltas -Watermark $watermark -Snapshot $snapshot + }) + + if ($deltas.Count -gt 0) { + $primary = Select-HighestPrecedenceDelta -Deltas $deltas + + $newWatermark = if ($Platform -eq 'github') { + New-GitHubWatermark -Snapshot $snapshot + } else { + New-AdoWatermark -Snapshot $snapshot + } + + Write-WatermarkAtomic -Path $WatermarkPath -Watermark $newWatermark + Write-Stderr "[Poll-PrStateDelta] Reaction detected: $($primary.kind)" + + [ordered]@{ + reaction_kind = $primary.kind + new_watermark = $newWatermark + delta_details = $primary.details + pr_url = $PrUrl + platform = $Platform + observed_at_utc = [System.DateTimeOffset]::UtcNow.ToString('yyyy-MM-ddTHH:mm:ssZ') + } | ConvertTo-Json -Depth 10 + exit $EXIT_SUCCESS + } + + $remaining = $TimeoutSeconds - $elapsed + $sleepMs = [long][Math]::Min( + (Get-JitteredIntervalMs -Seconds $PollIntervalSeconds), + $remaining * 1000 + ) + + Write-Stderr "[Poll-PrStateDelta] No delta. Sleeping ${sleepMs}ms ($([int]$remaining)s remaining)." + Start-Sleep -Milliseconds $sleepMs + } + +} catch { + $msg = $_.Exception.Message + + $exitCode = if ($msg -match '^AUTH:') { $EXIT_AUTH } + elseif ($msg -match '^NOTFOUND:') { $EXIT_NOT_FOUND } + elseif ($msg -match '^NETWORK:') { $EXIT_NETWORK } + else { $EXIT_NETWORK } + + Write-ErrorAndEnvelope -Code $exitCode -Message $msg + exit $exitCode +}