From 703a6c3a5885e5a983359f7a56856ea11e55adb7 Mon Sep 17 00:00:00 2001 From: Daniel Green Date: Sun, 24 May 2026 23:16:49 -0700 Subject: [PATCH 1/3] fix(reset): stamp polyphony:run-started-at watermark in state phase The watermark tag formatter PolyphonyTags.RunStartedAt(DateTimeOffset) was called only in tests; production never wrote the tag. Per docs/decisions/run-reset.md the watermark is supposed to be stamped by reset (originally polyphony reset state, collapsed into polyphony reset root by AB#3308). The pattern->projection rewrite preserved the delete half of the state phase (ProjectionResetCatalog.cs:24, AdoWorkItemTagDeleter.cs:74-77) but dropped the stamp half. As a result, every detect-state / state next-ready call saw runStartedAt=null, the `mergedAt <= watermark` filter never fired, and any root with a prior merged plan PR was stuck in merged_unseeded forever. Introduces IRunWatermarkStamper / RunWatermarkStamper(ITwigClient) and wires it into ProjectionResetExecutor as the action of the state phase: when Execute && !SkipState, after the per-resource delete loop body, stamp a fresh polyphony:run-started-at= tag (mirroring AdoWorkItemTagDeleter's sync-show-patch-sync pattern, defensively stripping any pre-existing watermark tags first). Fail-loud posture: twig exceptions propagate to the executor's outer catch, which records `state` in stepsFailed. Symmetric to PlanObserver.ReadRunStartedAtAsync's fail-closed read posture. Tests (9 added): - RunWatermarkStamperTests: no-prior, replaces, dedups duplicates, enforces sync-show-patch-sync order, throws when twig.show returns null. - ProjectionResetExecutorTests: Execute mode stamps once; dry-run doesn't stamp; SkipState doesn't stamp; stamper throw marks state phase failed. Dogfood verification: cloudvault root 62365430, reset --execute --allow-unjournaled stamps polyphony:run-started-at=2026-05-25T... and plan detect-state flips from merged_unseeded to not_started. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../PolyphonyServiceRegistration.cs | 1 + .../Journal/Reset/ProjectionResetExecutor.cs | 18 +- .../Journal/Reset/RunWatermarkStamper.cs | 66 +++++++ .../ResetRootProjectionCommandTests.cs | 9 +- .../Reset/ProjectionResetExecutorTests.cs | 162 +++++++++++++++++- .../Journal/Reset/RunWatermarkStamperTests.cs | 136 +++++++++++++++ 6 files changed, 387 insertions(+), 5 deletions(-) create mode 100644 src/Polyphony/Journal/Reset/RunWatermarkStamper.cs create mode 100644 tests/Polyphony.Tests/Journal/Reset/RunWatermarkStamperTests.cs diff --git a/src/Polyphony/Infrastructure/PolyphonyServiceRegistration.cs b/src/Polyphony/Infrastructure/PolyphonyServiceRegistration.cs index c379da1..c858770 100644 --- a/src/Polyphony/Infrastructure/PolyphonyServiceRegistration.cs +++ b/src/Polyphony/Infrastructure/PolyphonyServiceRegistration.cs @@ -58,6 +58,7 @@ public static IServiceCollection AddPolyphonyServices( services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); + services.AddSingleton(); services.AddSingleton(); // Journal drift observers. Every ResourceKind is explicitly accounted for here: diff --git a/src/Polyphony/Journal/Reset/ProjectionResetExecutor.cs b/src/Polyphony/Journal/Reset/ProjectionResetExecutor.cs index f6325bc..56c0e9e 100644 --- a/src/Polyphony/Journal/Reset/ProjectionResetExecutor.cs +++ b/src/Polyphony/Journal/Reset/ProjectionResetExecutor.cs @@ -9,7 +9,8 @@ public sealed class ProjectionResetExecutor( JournalDriftAnalyzer driftAnalyzer, ProjectionResetCoverageAnalyzer coverageAnalyzer, ProjectionResetPlanner planner, - IEnumerable deleters) + IEnumerable deleters, + IRunWatermarkStamper watermarkStamper) { private readonly IJournalStore _store = store; private readonly JournalDriftAnalyzer _driftAnalyzer = driftAnalyzer; @@ -17,6 +18,7 @@ public sealed class ProjectionResetExecutor( private readonly ProjectionResetPlanner _planner = planner; private readonly IReadOnlyDictionary _deleters = deleters .ToDictionary(deleter => deleter.Kind, StringComparer.Ordinal); + private readonly IRunWatermarkStamper _watermarkStamper = watermarkStamper; public async Task ExecuteAsync( int rootId, @@ -103,6 +105,20 @@ public async Task ExecuteAsync( { CleanupEmptyWorktreeRoots(phaseTargets.Select(target => target.Resource.Id)); } + + // State phase: after deleting any journal-observed + // watermark targets, stamp a fresh watermark so the + // next dispatch's observers can discriminate prior-run + // PRs from current-run PRs. See + // docs/decisions/run-reset.md — reset is the sole + // writer of polyphony:run-started-at. A failure here + // propagates to the outer catch, which records "state" + // in stepsFailed (state is the last phase, so the + // post-loop dispatch finds it correctly). + if (string.Equals(phase.Name, "state", StringComparison.Ordinal)) + { + await _watermarkStamper.StampAsync(rootId, DateTimeOffset.UtcNow, ct).ConfigureAwait(false); + } } stepsCompleted.Add(phase.Name); diff --git a/src/Polyphony/Journal/Reset/RunWatermarkStamper.cs b/src/Polyphony/Journal/Reset/RunWatermarkStamper.cs new file mode 100644 index 0000000..46bee49 --- /dev/null +++ b/src/Polyphony/Journal/Reset/RunWatermarkStamper.cs @@ -0,0 +1,66 @@ +using Polyphony.Infrastructure.Processes; +using Polyphony.Tagging; + +namespace Polyphony.Journal.Reset; + +/// +/// Stamps the per-root run watermark tag +/// (polyphony:run-started-at=<UTC>) on a root work item. +/// +/// The watermark is the signal observers consult to discriminate +/// current-run PRs from prior-run PRs (see +/// docs/decisions/run-reset.md and +/// ). +/// polyphony reset root is the sole writer — the state phase of +/// the projection executor invokes this stamper after the per-resource +/// delete loop completes, so the new watermark always supersedes any +/// pre-existing one in the same transaction. +/// +/// Fail-loud: twig errors propagate up to the executor's outer +/// catch, which marks the state phase as failed. This is symmetric to +/// PlanObserver.ReadRunStartedAtAsync's fail-closed read posture — +/// the watermark is load-bearing, so a silent stamp failure must not be +/// indistinguishable from a successful stamp. +/// +public interface IRunWatermarkStamper +{ + Task StampAsync(int rootId, DateTimeOffset utcNow, CancellationToken ct = default); +} + +public sealed class RunWatermarkStamper(ITwigClient twig) : IRunWatermarkStamper +{ + private readonly ITwigClient _twig = twig; + + public async Task StampAsync(int rootId, DateTimeOffset utcNow, CancellationToken ct = default) + { + // Mirror AdoWorkItemTagDeleter: pre-read sync so PatchFieldsAsync + // does not clobber tags written to ADO since the last cache refresh. + await _twig.SyncAsync(ct).ConfigureAwait(false); + + var item = await _twig.ShowAsync(rootId, ct).ConfigureAwait(false) + ?? throw new InvalidOperationException( + $"twig show returned null for root work item {rootId}; cannot stamp run-started-at watermark."); + + var raw = item["tags"]?.GetValue() + ?? item["fields"]?["System.Tags"]?.GetValue(); + var tags = TagSet.Parse(raw); + + // Strip any pre-existing watermark tags (including duplicates that + // could arise from operator edits or prior reset bugs). The + // projection delete phase only removes journal-observed watermark + // resources; this defensive strip covers the unjournaled / manual + // case so the post-stamp state is exactly one watermark tag. + var stripped = tags + .Where(tag => tag.StartsWith(PolyphonyTags.RunStartedAtPrefix + "=", StringComparison.Ordinal)) + .ToArray() + .Aggregate(tags, static (current, existing) => current.Remove(existing)); + + var stamped = stripped.Add(PolyphonyTags.RunStartedAt(utcNow)); + + await _twig.PatchFieldsAsync( + rootId, + new Dictionary { ["System.Tags"] = stamped.Format() }, + ct).ConfigureAwait(false); + await _twig.SyncAsync(ct).ConfigureAwait(false); + } +} diff --git a/tests/Polyphony.Tests/Commands/ResetRootProjectionCommandTests.cs b/tests/Polyphony.Tests/Commands/ResetRootProjectionCommandTests.cs index 53014a0..3d67581 100644 --- a/tests/Polyphony.Tests/Commands/ResetRootProjectionCommandTests.cs +++ b/tests/Polyphony.Tests/Commands/ResetRootProjectionCommandTests.cs @@ -54,7 +54,8 @@ public async Task ResetRoot_UsesProjectionExecutor() new JournalDriftAnalyzer([observer]), new ProjectionResetCoverageAnalyzer([observer], [deleter]), new ProjectionResetPlanner(), - [deleter]); + [deleter], + new NoOpRunWatermarkStamper()); var command = CreateCommand(executor); var (exit, output) = await CaptureConsoleAsync(() => command.ResetRoot(root: 100, execute: false)); @@ -137,4 +138,10 @@ public Task DeleteAsync(ProjectedResourceState resource, return Task.FromResult(new ResourceDeleteOutcome { Success = true, Deleted = true }); } } + + private sealed class NoOpRunWatermarkStamper : IRunWatermarkStamper + { + public Task StampAsync(int rootId, DateTimeOffset utcNow, CancellationToken ct = default) + => Task.CompletedTask; + } } diff --git a/tests/Polyphony.Tests/Journal/Reset/ProjectionResetExecutorTests.cs b/tests/Polyphony.Tests/Journal/Reset/ProjectionResetExecutorTests.cs index d3959dc..3f26e2d 100644 --- a/tests/Polyphony.Tests/Journal/Reset/ProjectionResetExecutorTests.cs +++ b/tests/Polyphony.Tests/Journal/Reset/ProjectionResetExecutorTests.cs @@ -23,7 +23,8 @@ public async Task ExecuteAsync_IncompleteCoverageWithoutAllowUnjournaled_FailsCo new JournalDriftAnalyzer([]), new ProjectionResetCoverageAnalyzer([], []), new ProjectionResetPlanner(), - []); + [], + new FakeRunWatermarkStamper()); var result = await executor.ExecuteAsync(100, new ProjectionResetExecutionOptions { Execute = true }, CancellationToken.None); @@ -58,7 +59,8 @@ public async Task ExecuteAsync_MutatedTargetWithoutForce_BlocksDeletion() new JournalDriftAnalyzer([observer]), new ProjectionResetCoverageAnalyzer([observer], [deleter]), new ProjectionResetPlanner(), - [deleter]); + [deleter], + new FakeRunWatermarkStamper()); var result = await executor.ExecuteAsync(100, new ProjectionResetExecutionOptions { Execute = true }, CancellationToken.None); @@ -94,7 +96,8 @@ public async Task ExecuteAsync_ExecuteMode_ReobservesAndReportsSuccessfulDeletio new JournalDriftAnalyzer([observer]), new ProjectionResetCoverageAnalyzer([observer], [deleter]), new ProjectionResetPlanner(), - [deleter]); + [deleter], + new FakeRunWatermarkStamper()); var result = await executor.ExecuteAsync(100, new ProjectionResetExecutionOptions { Execute = true }, CancellationToken.None); @@ -107,6 +110,141 @@ public async Task ExecuteAsync_ExecuteMode_ReobservesAndReportsSuccessfulDeletio world.Exists.ShouldBeFalse(); } + [Fact] + public async Task ExecuteAsync_ExecuteMode_StampsWatermarkExactlyOnce() + { + var world = new MutableBranchWorld(exists: true, matchesExpected: true, actualState: "abc123"); + var observer = new MutableBranchObserver(world); + var deleter = new MutableBranchDeleter(world); + var stamper = new FakeRunWatermarkStamper(); + JournalEntry[] entries = + [ + Entry( + id: 1, + startedAt: 1_000, + action: "branch_ensure_feature", + effects: + [ + Effect(ResourceKind.GitBranch, "feature/100", ResourceIntent.EnsurePresent, ResourceMutation.CreatedNow), + ]), + ]; + var executor = new ProjectionResetExecutor( + new FakeJournalStore(entries), + new JournalDriftAnalyzer([observer]), + new ProjectionResetCoverageAnalyzer([observer], [deleter]), + new ProjectionResetPlanner(), + [deleter], + stamper); + + var result = await executor.ExecuteAsync(100, new ProjectionResetExecutionOptions { Execute = true }, CancellationToken.None); + + result.Success.ShouldBeTrue(); + result.StepsCompleted.ShouldContain("state"); + stamper.StampCount.ShouldBe(1); + stamper.StampedRootIds.ShouldBe([100]); + } + + [Fact] + public async Task ExecuteAsync_DryRun_DoesNotStampWatermark() + { + var world = new MutableBranchWorld(exists: true, matchesExpected: true, actualState: "abc123"); + var observer = new MutableBranchObserver(world); + var deleter = new MutableBranchDeleter(world); + var stamper = new FakeRunWatermarkStamper(); + JournalEntry[] entries = + [ + Entry( + id: 1, + startedAt: 1_000, + action: "branch_ensure_feature", + effects: + [ + Effect(ResourceKind.GitBranch, "feature/100", ResourceIntent.EnsurePresent, ResourceMutation.CreatedNow), + ]), + ]; + var executor = new ProjectionResetExecutor( + new FakeJournalStore(entries), + new JournalDriftAnalyzer([observer]), + new ProjectionResetCoverageAnalyzer([observer], [deleter]), + new ProjectionResetPlanner(), + [deleter], + stamper); + + var result = await executor.ExecuteAsync(100, new ProjectionResetExecutionOptions { Execute = false }, CancellationToken.None); + + result.Success.ShouldBeTrue(); + stamper.StampCount.ShouldBe(0); + } + + [Fact] + public async Task ExecuteAsync_SkipState_DoesNotStampWatermark() + { + var world = new MutableBranchWorld(exists: true, matchesExpected: true, actualState: "abc123"); + var observer = new MutableBranchObserver(world); + var deleter = new MutableBranchDeleter(world); + var stamper = new FakeRunWatermarkStamper(); + JournalEntry[] entries = + [ + Entry( + id: 1, + startedAt: 1_000, + action: "branch_ensure_feature", + effects: + [ + Effect(ResourceKind.GitBranch, "feature/100", ResourceIntent.EnsurePresent, ResourceMutation.CreatedNow), + ]), + ]; + var executor = new ProjectionResetExecutor( + new FakeJournalStore(entries), + new JournalDriftAnalyzer([observer]), + new ProjectionResetCoverageAnalyzer([observer], [deleter]), + new ProjectionResetPlanner(), + [deleter], + stamper); + + var result = await executor.ExecuteAsync(100, new ProjectionResetExecutionOptions { Execute = true, SkipState = true }, CancellationToken.None); + + result.Success.ShouldBeTrue(); + result.StateSkipped.ShouldBeTrue(); + result.StepsCompleted.ShouldNotContain("state"); + stamper.StampCount.ShouldBe(0); + } + + [Fact] + public async Task ExecuteAsync_StamperThrows_MarksStatePhaseFailed() + { + var world = new MutableBranchWorld(exists: true, matchesExpected: true, actualState: "abc123"); + var observer = new MutableBranchObserver(world); + var deleter = new MutableBranchDeleter(world); + var stamper = new FakeRunWatermarkStamper { ThrowOnStamp = new InvalidOperationException("twig stamp boom") }; + JournalEntry[] entries = + [ + Entry( + id: 1, + startedAt: 1_000, + action: "branch_ensure_feature", + effects: + [ + Effect(ResourceKind.GitBranch, "feature/100", ResourceIntent.EnsurePresent, ResourceMutation.CreatedNow), + ]), + ]; + var executor = new ProjectionResetExecutor( + new FakeJournalStore(entries), + new JournalDriftAnalyzer([observer]), + new ProjectionResetCoverageAnalyzer([observer], [deleter]), + new ProjectionResetPlanner(), + [deleter], + stamper); + + var result = await executor.ExecuteAsync(100, new ProjectionResetExecutionOptions { Execute = true }, CancellationToken.None); + + result.Success.ShouldBeFalse(); + result.StepsCompleted.ShouldNotContain("state"); + result.StepsFailed.ShouldContain("state"); + result.Error.ShouldNotBeNull(); + result.Error.ShouldContain("twig stamp boom"); + } + private static JournalEntry Entry(long id, long startedAt, string action, IReadOnlyList? effects = null) => new() { @@ -147,6 +285,24 @@ private sealed class FakeJournalStore(IReadOnlyList entries) : IJo public Task ExportAsync(string destinationPath, CancellationToken ct) => throw new NotSupportedException(); } + private sealed class FakeRunWatermarkStamper : IRunWatermarkStamper + { + public int StampCount { get; private set; } + public List StampedRootIds { get; } = []; + public Exception? ThrowOnStamp { get; set; } + + public Task StampAsync(int rootId, DateTimeOffset utcNow, CancellationToken ct = default) + { + StampCount++; + StampedRootIds.Add(rootId); + if (ThrowOnStamp is not null) + { + throw ThrowOnStamp; + } + return Task.CompletedTask; + } + } + private sealed class MutableBranchWorld(bool exists, bool matchesExpected, string actualState) { public bool Exists { get; set; } = exists; diff --git a/tests/Polyphony.Tests/Journal/Reset/RunWatermarkStamperTests.cs b/tests/Polyphony.Tests/Journal/Reset/RunWatermarkStamperTests.cs new file mode 100644 index 0000000..638884a --- /dev/null +++ b/tests/Polyphony.Tests/Journal/Reset/RunWatermarkStamperTests.cs @@ -0,0 +1,136 @@ +using System.Text.Json.Nodes; +using Polyphony.Infrastructure.Processes; +using Polyphony.Journal.Reset; +using Polyphony.Tagging; +using Shouldly; +using Xunit; + +namespace Polyphony.Tests.Journal.Reset; + +public sealed class RunWatermarkStamperTests +{ + [Fact] + public async Task StampAsync_NoPriorWatermark_AddsTagAndPreservesOthers() + { + var twig = new FakeTwigClient(initialTags: "polyphony:root; project:cloudvault"); + var stamper = new RunWatermarkStamper(twig); + var now = new DateTimeOffset(2026, 5, 25, 6, 0, 0, TimeSpan.Zero); + + await stamper.StampAsync(rootId: 100, now, CancellationToken.None); + + var written = twig.LastWrittenTagsField.ShouldNotBeNull(); + var tags = TagSet.Parse(written); + tags.ShouldContain("polyphony:root"); + tags.ShouldContain("project:cloudvault"); + tags.Count(tag => tag.StartsWith(PolyphonyTags.RunStartedAtPrefix + "=", StringComparison.Ordinal)) + .ShouldBe(1); + tags.ShouldContain(PolyphonyTags.RunStartedAt(now)); + } + + [Fact] + public async Task StampAsync_ExistingWatermark_IsReplaced() + { + var older = new DateTimeOffset(2026, 5, 1, 0, 0, 0, TimeSpan.Zero); + var twig = new FakeTwigClient(initialTags: $"polyphony:root; {PolyphonyTags.RunStartedAt(older)}"); + var stamper = new RunWatermarkStamper(twig); + var now = new DateTimeOffset(2026, 5, 25, 6, 0, 0, TimeSpan.Zero); + + await stamper.StampAsync(rootId: 100, now, CancellationToken.None); + + var tags = TagSet.Parse(twig.LastWrittenTagsField); + tags.ShouldContain("polyphony:root"); + tags.Count(tag => tag.StartsWith(PolyphonyTags.RunStartedAtPrefix + "=", StringComparison.Ordinal)) + .ShouldBe(1); + tags.ShouldContain(PolyphonyTags.RunStartedAt(now)); + tags.ShouldNotContain(PolyphonyTags.RunStartedAt(older)); + } + + [Fact] + public async Task StampAsync_DuplicateWatermarks_AllStripped() + { + var a = new DateTimeOffset(2026, 5, 1, 0, 0, 0, TimeSpan.Zero); + var b = new DateTimeOffset(2026, 5, 10, 0, 0, 0, TimeSpan.Zero); + var twig = new FakeTwigClient( + initialTags: $"polyphony:root; {PolyphonyTags.RunStartedAt(a)}; {PolyphonyTags.RunStartedAt(b)}"); + var stamper = new RunWatermarkStamper(twig); + var now = new DateTimeOffset(2026, 5, 25, 6, 0, 0, TimeSpan.Zero); + + await stamper.StampAsync(rootId: 100, now, CancellationToken.None); + + var tags = TagSet.Parse(twig.LastWrittenTagsField); + tags.Count(tag => tag.StartsWith(PolyphonyTags.RunStartedAtPrefix + "=", StringComparison.Ordinal)) + .ShouldBe(1); + tags.ShouldContain(PolyphonyTags.RunStartedAt(now)); + } + + [Fact] + public async Task StampAsync_SyncsBeforeReadAndAfterWrite() + { + var twig = new FakeTwigClient(initialTags: "polyphony:root"); + var stamper = new RunWatermarkStamper(twig); + + await stamper.StampAsync(rootId: 100, DateTimeOffset.UtcNow, CancellationToken.None); + + twig.CallLog.ShouldBe(["sync", "show:100", "patch:100", "sync"]); + } + + [Fact] + public async Task StampAsync_TwigShowReturnsNull_Throws() + { + var twig = new FakeTwigClient(initialTags: null) { ShowReturnsNull = true }; + var stamper = new RunWatermarkStamper(twig); + + var ex = await Should.ThrowAsync( + () => stamper.StampAsync(rootId: 100, DateTimeOffset.UtcNow, CancellationToken.None)); + ex.Message.ShouldContain("100"); + } + + private sealed class FakeTwigClient(string? initialTags) : ITwigClient + { + public List CallLog { get; } = []; + public bool ShowReturnsNull { get; set; } + public string? LastWrittenTagsField { get; private set; } + + private string? _tagsField = initialTags; + + public Task SyncAsync(CancellationToken ct = default) + { + CallLog.Add("sync"); + return Task.CompletedTask; + } + + public Task ShowAsync(int workItemId, CancellationToken ct = default) + { + CallLog.Add($"show:{workItemId}"); + if (ShowReturnsNull) + { + return Task.FromResult(null); + } + var node = new JsonObject + { + ["id"] = workItemId, + ["tags"] = _tagsField, + }; + return Task.FromResult(node); + } + + public Task PatchFieldsAsync(int workItemId, IReadOnlyDictionary fields, CancellationToken ct = default) + { + CallLog.Add($"patch:{workItemId}"); + if (fields.TryGetValue("System.Tags", out var tags)) + { + LastWrittenTagsField = tags; + _tagsField = tags; + } + return Task.CompletedTask; + } + + public Task GetVersionAsync(CancellationToken ct = default) => throw new NotSupportedException(); + public Task ShowTreeAsync(int workItemId, CancellationToken ct = default) => throw new NotSupportedException(); + public Task TreeAsync(int depth, CancellationToken ct = default) => throw new NotSupportedException(); + public Task SetActiveAsync(int workItemId, CancellationToken ct = default) => throw new NotSupportedException(); + public Task SetStateAsync(string stateName, CancellationToken ct = default) => throw new NotSupportedException(); + public Task CreateChildAsync(int parentId, string type, string title, string description, CancellationToken ct = default) => throw new NotSupportedException(); + public Task GetConfigValueAsync(string key, CancellationToken ct = default) => throw new NotSupportedException(); + } +} From 1d4a10529cfb424637414a2a053990621eaf1d8c Mon Sep 17 00:00:00 2001 From: Daniel Green Date: Mon, 25 May 2026 09:51:34 -0700 Subject: [PATCH 2/3] pr_feedback_analyzer: classify by marker, not author identity Polyphony PRs are agent-authored under the operator's PAT, so the PR's author_identity is the human operator. The prior rule 'comment author == PR author identity -> exclude' silently dropped the operator's own review threads -- exactly the human-in-the-loop intervention the loop must respect. Marker presence (PrCommentMarker) is the deterministic signal for bot-vs-human under a shared identity; identity-equality is not. Removes the identity-exclusion rule from the analyzer prompt in all three workflows (plan-level, github-pr, ado-pr) and from their load-bearing header comments. Vote rules no longer use the 'non-author reviewer' qualifier -- self-approval / self-changes-requested is a platform branch-protection concern, not the analyzer's. ADR: docs/decisions/pr-feedback-identity-classification.md Validation: all four lints (sentiment-loop-consistency, github-pr, ado-pr, plan-level) and all 76 Pester tests across their .Tests.ps1 files pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .conductor/registry/workflows/ado-pr.yaml | 39 ++++-- .conductor/registry/workflows/github-pr.yaml | 41 +++--- .conductor/registry/workflows/plan-level.yaml | 35 +++-- .../pr-feedback-identity-classification.md | 128 ++++++++++++++++++ 4 files changed, 197 insertions(+), 46 deletions(-) create mode 100644 docs/decisions/pr-feedback-identity-classification.md diff --git a/.conductor/registry/workflows/ado-pr.yaml b/.conductor/registry/workflows/ado-pr.yaml index 4ed1968..be0e167 100644 --- a/.conductor/registry/workflows/ado-pr.yaml +++ b/.conductor/registry/workflows/ado-pr.yaml @@ -618,9 +618,14 @@ agents: # marker → bot reviewer (probably pr_initial_reviewer). Only # treat items under the `**Blocking concerns**` section as # negative; ignore everything else in the bot's body. - # 2. Comment author matches the PR author → exclude (PR author - # narrating their own work doesn't count as feedback to themselves). - # 3. Otherwise → human reviewer. Use natural-language judgement. + # 2. Otherwise → human reviewer. Use natural-language judgement, + # regardless of whether the comment author matches the PR + # author identity. Polyphony PRs are agent-authored under the + # operator's PAT, so author-identity equality is NOT a valid + # proxy for bot-vs-human — the marker is. Self-approval / + # self-waiting-for-author is a platform branch-protection + # concern, not the analyzer's. See + # docs/decisions/pr-feedback-identity-classification.md. # # Output digest contract: `feedback_digest` is a stable hash of # (comment ids + thread ids + vote tuples + body sha) of everything @@ -690,24 +695,28 @@ agents: marker-posting agent). Only treat items under the `**Blocking concerns**` section as negative; ignore everything else in the bot's body. - 2. Else if `author == PR author identity` → **author self-comment**. - Exclude entirely (the PR author narrating their own work is - not feedback to themselves). - 3. Else → **human reviewer**. Use natural-language judgement of + 2. Else → **human reviewer**. Use natural-language judgement of the comment's sentiment and any embedded code-review concerns. + **Do NOT compare `author` against the PR author identity** — + polyphony PRs are agent-authored under the operator's PAT, so + the operator's own comments on the PR are valid human-in-the- + loop interventions and must be treated as feedback. For reviewer **votes** on ADO: a `-5` (waiting for author) vote - counts as negative feedback. A `+5` (approved with suggestions) - vote ALONE does not — but if it's paired with unresolved actionable - threads, the threads still win and you should treat the unresolved - threads as negative feedback. A `+10` or `-10` was already terminated - by the deterministic classifier; if you see one here it means - mixed signals — defer to the threads / comments to decide. + counts as negative feedback regardless of who cast it (including + the PR author themselves — self-vote enforcement is a platform + branch-protection concern, not yours). A `+5` (approved with + suggestions) vote ALONE does not — but if it's paired with + unresolved actionable threads, the threads still win and you + should treat the unresolved threads as negative feedback. A + `+10` or `-10` was already terminated by the deterministic + classifier; if you see one here it means mixed signals — defer + to the threads / comments to decide. Unresolved threads (status active / pending — anything that is NOT resolved / fixed / closed / by-design / won-fix) that contain at - least one in-scope (non-author) comment count as negative feedback - even if the comment body is short. + least one non-bot comment count as negative feedback even if the + comment body is short. ## Output diff --git a/.conductor/registry/workflows/github-pr.yaml b/.conductor/registry/workflows/github-pr.yaml index e35b0bf..ea04263 100644 --- a/.conductor/registry/workflows/github-pr.yaml +++ b/.conductor/registry/workflows/github-pr.yaml @@ -491,9 +491,14 @@ agents: # marker → bot reviewer (probably pr_initial_reviewer). Only # treat items under the `**Blocking concerns**` section as # negative; ignore everything else in the bot's body. - # 2. Comment author matches the PR author → exclude (PR author - # narrating their own work doesn't count as feedback to themselves). - # 3. Otherwise → human reviewer. Use natural-language judgement. + # 2. Otherwise → human reviewer. Use natural-language judgement, + # regardless of whether the comment author matches the PR + # author identity. Polyphony PRs are agent-authored under the + # operator's PAT, so author-identity equality is NOT a valid + # proxy for bot-vs-human — the marker is. Self-approval / + # self-changes-requested is a platform branch-protection + # concern, not the analyzer's. See + # docs/decisions/pr-feedback-identity-classification.md. # # Output digest contract: `feedback_digest` is a stable hash of # (comment ids + thread ids + vote tuples + body sha) of everything @@ -563,23 +568,25 @@ agents: marker-posting agent). Only treat items under the `**Blocking concerns**` section as negative; ignore everything else in the bot's body. - 2. Else if `author == PR author identity` → **author self-comment**. - Exclude entirely (the PR author narrating their own work is - not feedback to themselves). - 3. Else → **human reviewer**. Use natural-language judgement of + 2. Else → **human reviewer**. Use natural-language judgement of the comment's sentiment and any embedded code-review concerns. - - For reviewer **votes**: a `changes_requested` vote from a non- - author reviewer counts as negative feedback regardless of any - explanatory comment. An `approved` vote with no unresolved - threads was already terminated by the deterministic classifier — - if you see it here it means the classifier returned `none` due - to mixed signals (e.g. approved AND unresolved threads), so the - threads still win. + **Do NOT compare `author` against the PR author identity** — + polyphony PRs are agent-authored under the operator's PAT, so + the operator's own comments on the PR are valid human-in-the- + loop interventions and must be treated as feedback. + + For reviewer **votes**: a `changes_requested` vote counts as + negative feedback regardless of who cast it (including the PR + author themselves — self-approval / self-changes-requested is + a platform branch-protection concern, not yours). An `approved` + vote with no unresolved threads was already terminated by the + deterministic classifier — if you see it here it means the + classifier returned `none` due to mixed signals (e.g. approved + AND unresolved threads), so the threads still win. Unresolved threads (`is_resolved == false && is_outdated != true`) - that contain at least one in-scope (non-author) comment count as - negative feedback even if the comment body is short. + that contain at least one non-bot comment count as negative + feedback even if the comment body is short. ## Output diff --git a/.conductor/registry/workflows/plan-level.yaml b/.conductor/registry/workflows/plan-level.yaml index 910caf0..7dc929a 100644 --- a/.conductor/registry/workflows/plan-level.yaml +++ b/.conductor/registry/workflows/plan-level.yaml @@ -1616,9 +1616,13 @@ agents: # Identity policy (load-bearing — encoded in the prompt): # 1. Comment with a `` # marker → bot reviewer. INCLUDE the comment's content. - # 2. Comment author matches the PR author → exclude (PR author - # narrating their own work doesn't count as feedback to itself). - # 3. Otherwise → human reviewer. INCLUDE. + # 2. Otherwise → human reviewer. INCLUDE, regardless of whether + # the comment author matches the PR author identity. Polyphony + # PRs are agent-authored under the operator's PAT, so + # author-identity equality is NOT a valid proxy for bot-vs- + # human — the marker is. Self-approval / self-changes-requested + # is a platform branch-protection concern, not the analyzer's. + # See docs/decisions/pr-feedback-identity-classification.md. # # Severity policy (also load-bearing): # - For polyphony's own bot reviewers (e.g. plan_reviewer), only @@ -1700,20 +1704,23 @@ agents: posting agent). Only treat items under the `**Blocking concerns**` section as negative; ignore everything else in the bot's body. - 2. Else if `author == PR author identity` → **author self-comment**. - Exclude entirely (the PR author narrating their own work is - not feedback to themselves). - 3. Else → **human reviewer**. Use natural-language judgement of + 2. Else → **human reviewer**. Use natural-language judgement of the comment's sentiment and any embedded code-review concerns. - - For reviewer **votes**: a `changes_requested` vote from a non- - author reviewer counts as negative feedback regardless of any - explanatory comment. A `rejected` vote was already terminated by - the deterministic classifier — you should not see it here. + **Do NOT compare `author` against the PR author identity** — + polyphony PRs are agent-authored under the operator's PAT, so + the operator's own comments on the PR are valid human-in-the- + loop interventions and must be treated as feedback. + + For reviewer **votes**: a `changes_requested` vote counts as + negative feedback regardless of who cast it (including the PR + author themselves — self-vote enforcement is a platform branch- + protection concern, not yours). A `rejected` vote was already + terminated by the deterministic classifier — you should not see + it here. Unresolved threads (`is_resolved == false && is_outdated != true`) - that contain at least one in-scope (non-author) comment count as - negative feedback even if the comment body is short. + that contain at least one non-bot comment count as negative + feedback even if the comment body is short. ## Output diff --git a/docs/decisions/pr-feedback-identity-classification.md b/docs/decisions/pr-feedback-identity-classification.md new file mode 100644 index 0000000..4c16f30 --- /dev/null +++ b/docs/decisions/pr-feedback-identity-classification.md @@ -0,0 +1,128 @@ +# PR Feedback Identity Classification — Marker, Not Author Identity + +> **Status:** Accepted (2026-05-25). +> **Affects:** `.conductor/registry/workflows/{plan-level,github-pr,ado-pr}.yaml` +> — specifically the `pr_feedback_analyzer` prompt in each. +> **Supersedes:** the prior "identity policy" rule that excluded any +> comment whose author matched the PR author identity. + +## Context + +Three workflows (`plan-level.yaml`, `github-pr.yaml`, `ado-pr.yaml`) run a +sentiment-driven review loop centered on a `pr_feedback_analyzer` agent. +The analyzer reads the poll envelope (comments, threads, votes) and +returns `has_negative_feedback` to drive the revise/merge/abort routing. + +Polyphony's PRs are almost always **agent-authored under the operator's +PAT** — the coder agent (and the marker-posting reviewer agents) push +through the same token the human operator uses. As a result, the PR +`author_identity` on the platform API is the **human operator**, not +the agent that actually drove the commits or posted the comments. + +To disambiguate bot-from-human under a shared identity, polyphony emits a +deterministic HTML marker on every machine-posted comment: + +```html + +``` + +The parser lives at `src/Polyphony/Commands/PrCommentMarker.cs` and is +called out explicitly in its summary as existing for "the common ADO case +where `plan_reviewer_poster_ado` shares the operator's PAT". + +### The bug this ADR addresses + +Through 2026-05, the analyzer prompt carried a three-tier classifier: + +1. Has `polyphony:agent-comment` marker → **bot**. +2. Else if `author == PR author identity` → **author self-comment**, exclude. +3. Else → **human reviewer**. + +Rule 2 is wrong. It excludes the human operator's own comments on +agent-authored PRs — exactly the highest-priority signal in the system, +the **human-in-the-loop intervention**. An incident on 2026-05-25 +surfaced this: an analyzer run reported `has_negative_feedback: false` +while three review threads from the operator identity were silently +dropped under rule 2. + +The stated rationale ("the PR author narrating their own work doesn't +count as feedback to themselves") conflates two distinct cases: + +- The **agent** narrating its own progress — yes, noise, but the marker + already identifies these as bot comments. +- The **human operator** intervening on an agent-authored PR — this is + authoritative feedback that must drive the revise loop. + +Identity-equality is not a valid proxy for "narrator vs. intervener" +when the agent and the operator share an identity. The marker is. + +## Decision + +The analyzer classifies comments **by marker presence, not by author +identity**. The full rule, applied uniformly to comments AND votes: + +1. **Marker present** → bot. Classify by the marker's `agent` + attribute. Apply existing bot logic (e.g. the + `**Blocking concerns**` section convention). +2. **Marker absent** → human. Apply natural-language judgement, regardless + of whether the author identity matches the PR author identity. + +There is **no identity comparison** anywhere in the rule. A `+10` / +`approved` vote from the operator on their own PR counts as positive +feedback. A `-5` / `changes_requested` vote from the operator counts as +negative feedback. A free-form comment from the operator is judged on +its content like any other human comment. + +### Why no carve-out for votes + +A previous draft of this ADR kept identity-exclusion for the **votes** +case on the grounds that "self-approval is platform-blocked anyway". +That carve-out was rejected because: + +- Self-approval is only platform-blocked at the **final merge to main** + in default branch-protection configurations. Earlier-stage approvals + (intermediate gates, draft reviews, ADO `+5` "approved with + suggestions" on a non-protected branch) are not universally blocked. +- Whether self-approval is permitted at any given stage is a + **policy/branch-protection concern enforced by the platform**, not a + concern of the polyphony feedback analyzer. If the platform blocks the + merge for policy reasons, the platform surfaces that downstream + (`mergeable_state`, merge-time refusal). The analyzer's job is only to + classify the **feedback content**, not to predict or enforce platform + merge policy. + +Mixing platform-policy reasoning into the analyzer was the original +mistake and the carve-out would have replicated it in miniature. + +## Consequences + +- **Human operator interventions on agent-authored PRs now drive the + revise loop.** This is the intended human-in-the-loop affordance and + the load-bearing reason for the change. +- **Self-approval is honored as positive signal at the analyzer layer.** + Whether the resulting merge actually proceeds is the platform's call + via branch protection / approval-required policy. Operators who want + to forbid self-approval should configure that in branch protection, + not rely on the analyzer to silently filter it out. +- **The `author_identity` field stays in the rendered prompt context** + for diagnostic value (it appears in the "Context — Poll envelope" + block), but the analyzer no longer compares against it. Future + versions may drop the field entirely; keeping it for now eases + triage of analyzer reasoning traces. +- **The structural consistency lint + (`lint-sentiment-loop-consistency.ps1`) is unaffected** — it pins + agent shape, output schema, and route presence, not prompt text. +- **The marker parser + (`src/Polyphony/Commands/PrCommentMarker.cs`) becomes more + load-bearing.** Any future bot poster that fails to inject the marker + will have its comments classified as human feedback. The existing + Pester tests around the marker remain the safety net. + +## Non-goals + +- This ADR does not change which votes are **terminal** (handled by the + deterministic classifier upstream of the analyzer). The terminal + classifier's identity rules — if any — are out of scope and continue + to be evaluated on their own merits. +- This ADR does not change marker injection by polyphony's bot posters; + it only changes how the analyzer interprets the absence of a marker. From 34cbbb3f1bb4de96c9af34d0170b038d5dabda2e Mon Sep 17 00:00:00 2001 From: Daniel Green Date: Fri, 29 May 2026 10:47:17 -0700 Subject: [PATCH 3/3] docs(adr): seed manifest as durable per-root state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ADR `seed-manifest-as-durable-state.md` capturing the design synthesized from the four-lens debate (Bach/Mahler/Mozart/Sibelius/Beethoven) and greenlit by Daniel on 2026-05-29. Key decisions encoded: - Seed manifest at `.polyphony/state/{rootId}/seed-manifest.json` with atomic writes (`.tmp` + rename). - `polyphony:plan-child-id` marker as the canonical reconciliation primitive (no title hashing). - Rich `plan_generation` linkage `{id, parent, cause, created_at}` with manifest items carrying `introduced_in` for orphan detection across renegotiation. - Script-internal retry (3 attempts on exit codes 3 and 5; code 1 permanent) — `max_attempts` is not available on `type: script` conductor nodes. - New `seeding_blocked` terminal, distinct from `workflow_abandoned` (which is reserved for operator-volitional abandonment). - GitHub explicitly out of scope; ADO-only execution. - Verb evolution on `plan write-plan` and `plan seed-children` — no new verb names. Open follow-up questions documented in §11 of the ADR. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../seed-manifest-as-durable-state.md | 689 ++++++++++++++++++ 1 file changed, 689 insertions(+) create mode 100644 docs/decisions/seed-manifest-as-durable-state.md diff --git a/docs/decisions/seed-manifest-as-durable-state.md b/docs/decisions/seed-manifest-as-durable-state.md new file mode 100644 index 0000000..4b8ca1a --- /dev/null +++ b/docs/decisions/seed-manifest-as-durable-state.md @@ -0,0 +1,689 @@ +# ADR: Seed Manifest as Durable State + +**Status:** Accepted +**Date:** 2026-05-29 +**Authors:** Bach (Architect) +**Synthesized from:** four-lens debate — Bach (architecture), Mahler (conductor engine), +Mozart (verb error circuit), Sibelius (platform reality); Beethoven (terminal semantics) +**Implements:** durable seeding state for PR #535 (manifest-aware seeder rebase) +**Related ADRs:** `polyphony-verb-error-boundary.md`, `gate-compression-pattern.md`, +`domain-signal-envelope.md` + +--- + +## Context + +`polyphony plan seed-children` creates ADO work items serially via +`twig.CreateChildAsync`. Each call is a separate REST round-trip with no +transactional guarantee. A partial seed — three of five children created before a +network drop — leaves ADO and the workflow in an indeterminate state: the system +cannot distinguish "planner never intended this child" from "seeder failed before +getting to it." + +The current recovery is to re-run the root item dispatch from scratch. Without a +durable record of the planner's intent, re-entry has two bad options: trust ADO +(which may be incomplete) or re-run the planner (which may produce a different plan +under changed conditions, silently generating duplicate children). + +### Three specific problems + +**1. Partial-seed ambiguity.** On restart, the seeder cannot tell which children it +already created vs which it still needs to create. Re-running blind risks creating +duplicate children when the network recovers. + +**2. Corrupt-state forward motion.** Without a reconciliation gate, a partially +seeded root can advance to implementation (via `polyphony state next-ready`) while +children are missing. The directive is unambiguous: "don't let corrupt state just +move forward." + +**3. Renegotiation orphan accumulation.** When the architect replans, old children +remain in ADO with their `polyphony:plan-child-id` markers. Without tracking which +generation introduced each child, the seeder cannot distinguish valid children from +prior-generation orphans. Unchecked, renegotiation + re-seed creates duplicates. + +### What the ADO tree alone cannot tell you + +The ADO work item tree is the "actual state" — what was created. It cannot answer: +- "Was child X intended by the current plan, or by a prior plan that has since been + superseded?" +- "Which children should exist but don't yet?" +- "Was the seeder interrupted, or did it finish with a genuinely different scope?" + +The seed manifest is the "desired state" — the planner's declared intent, frozen at +planning time. It is the missing half of the reconciliation pair. + +### What the conductor engine does NOT provide + +Conductor's `CheckpointManager` stores to `$TMPDIR` keyed to workflow name + +timestamp. It is crash recovery for a single run, not durable cross-run state. It is +gone on reboot. There is no built-in mechanism for "this artifact survives across +separate runs of the same root." + +Conductor's `max_attempts` / `RetryPolicy` apply only to provider-backed agent nodes, +not `type: script` nodes (confirmed against `schema.py` line 449, 843). There is no +`on_exhaust` field in the schema. Script-internal retry loops are the idiomatic and +fully supported pattern. + +--- + +## Decision + +The **seed manifest** is a JSON file written by `polyphony plan write-plan` and +consumed by `polyphony plan seed-children`. It records the planner's declared intent +for a root item's children — what should exist in ADO after seeding is complete — and +is patched in place as seeding succeeds. It is the reconciliation anchor for both +retry runs and renegotiation passes. + +### Three founding principles + +1. **Desired state, not observed state.** The manifest records what the planner + declared. ADO records what was actually created. The seeder closes the gap between + them. + +2. **Replace on renegotiation, don't patch.** A new plan generation produces a new + manifest wholesale. The prior manifest is not migrated. The new generation starts + from scratch with full awareness of current ADO state. + +3. **Filesystem is the entire persistence layer.** One JSON file per root item, in + the repo worktree. Atomic writes via `.tmp` + rename. Survives process restarts and + reboots. No database, no conductor-native artifact store. + +--- + +## Schema + +### Manifest file path + +``` +.polyphony/state/{rootId}/seed-manifest.json +``` + +where `{rootId}` is the polyphony root item identifier (e.g. `AB#1234`), matching the +directory layout established for per-root run state. + +### Write discipline + +Writes are **atomic**: write to `.polyphony/state/{rootId}/seed-manifest.json.tmp`, +then rename to the final path. A `.tmp` file that persists on startup signals a +failed prior write; the seeder treats this as "no valid manifest" and routes back to +the `write-plan` step rather than attempting to read a corrupt file. + +### Top-level schema + +```json +{ + "plan_generation": { + "id": "gen-1", + "parent": null, + "cause": "initial", + "created_at": "2026-05-29T10:16:22-07:00" + }, + "created_at": "2026-05-29T10:16:22-07:00", + "root_id": "AB#1234", + "items": [ + { + "child_id": "task-1", + "type": "Task", + "title": "Implement input validation", + "parent_id": "AB#1234", + "facets": ["actionable"], + "introduced_in": "gen-1", + "ado_id": 5678 + }, + { + "child_id": "task-2", + "type": "Task", + "title": "Write unit tests for validation", + "parent_id": "AB#1234", + "facets": ["actionable"], + "introduced_in": "gen-1", + "ado_id": null + } + ] +} +``` + +### Field definitions — top level + +| Field | Type | Required | Description | +|---|---|---|---| +| `plan_generation` | object | yes | Generation descriptor for this manifest. See sub-schema below. | +| `created_at` | string | yes | ISO 8601 datetime when this manifest was written. Records the planning decision time, not seeding completion time. | +| `root_id` | string | yes | Polyphony root item identifier (e.g. `"AB#1234"`). Redundant with file path; present for standalone readability and integrity checks. | +| `items` | array | yes | One entry per planned child. Treated as a set by the seeder — order is not significant. | + +### Field definitions — `plan_generation` + +| Field | Type | Constraints | Description | +|---|---|---|---| +| `id` | string | non-empty, stable within root | Generation identifier. Convention: monotonic `gen-N` starting at `gen-1`. Assigned by the planner verb. | +| `parent` | string \| null | null for first generation | The `id` of the prior generation this one supersedes. Forms a linked chain. | +| `cause` | string | see Cause Vocabulary | Why this generation was created. | +| `created_at` | string | ISO 8601 | When this generation was declared. | + +**Cause vocabulary:** + +| Value | Meaning | +|---|---| +| `initial` | First planning pass for this root item | +| `architect_replan` | Architect agent produced a revised plan | +| `manual_edit` | Operator edited the plan outside the normal workflow | +| `recovery` | Seeding failed; plan re-declared to enable a clean retry | + +### Field definitions — `items[]` + +| Field | Type | Constraints | Description | +|---|---|---|---| +| `child_id` | string | non-empty, stable within a generation | The `polyphony:plan-child-id` marker value. Canonical cross-run identity for this planned child. | +| `type` | string | valid ADO work item type for target project | e.g. `"Task"`, `"User Story"`, `"Bug"`. Must match the process template of the ADO project. | +| `title` | string | non-empty | Planned display title. May diverge from the ADO title if a human renames post-seeding; the marker is authoritative. | +| `parent_id` | string | non-empty | ADO work item ID of the intended parent (e.g. `"AB#1234"`). | +| `facets` | string[] | non-empty | Polyphony facets assigned to this child (e.g. `["actionable"]`). | +| `introduced_in` | string | matches a `plan_generation.id` | Which generation declared this item. Used for orphan detection on renegotiation. | +| `ado_id` | integer \| null | null until seeded | ADO work item ID assigned on creation. Patched by the seeder on each successful creation. Null means "not yet seeded." | + +--- + +## The Reconciliation Primitive + +### Why `polyphony:plan-child-id`, not title hash + +Sibelius confirmed that the `` marker is +embedded atomically in the ADO work item description by `twig.CreateChildAsync`. If +the item exists in ADO, the marker is there — the description is assembled before the +`twig new` call and the marker is included in that assembly. The marker survives +renames. The existing `BuildIndexes` method already uses it for same-run +reconciliation. `twig show-tree` returns children with description fields and +`BuildIndexes` parses the markers. + +The seed manifest records what `child_id` values were planned. The seeder's existing +marker-lookup handles cross-run reconciliation without a new comparison primitive. +This is a reuse of tested code, not a new surface. + +**Title hashing is explicitly rejected.** Humans rename titles post-seeding; +architects wordsmith during replanning. A hash primitive produces false "unmatched" +results on legitimate renames and requires a normalization function that becomes a +hidden compatibility surface. The marker is stronger and already present. + +--- + +## Lifecycle + +``` +1. polyphony plan write-plan + → architect agent runs, declares children + → plans/{id}.children.json written (existing behavior) + → seed manifest written to .polyphony/state/{rootId}/seed-manifest.json (atomic) + → manifest has all items with ado_id: null + +2. polyphony plan seed-children (first run or any subsequent restart) + → startup guard (see below) + → reads manifest + → queries ADO via twig show-tree + BuildIndexes marker lookup + → reconciliation pass: match manifest items to ADO children by marker + → for matched items: patch ado_id in manifest (atomic write after each patch) + → for unmatched items (gaps): seed via twig.CreateChildAsync, patch ado_id on success + → if all items converged → stamp polyphony:planned on parent → exit 0 + → if retry budget exhausted on gaps → emit seeding.infra_exhausted → exit non-zero + +3. Renegotiation (architect replan) + → polyphony plan write-plan runs again + → new manifest written, plan_generation bumped (new id, parent = prior id) + → prior manifest REPLACED (not patched) + → seeder called against new manifest detects prior-gen children as orphan candidates +``` + +### Startup guard + +On `polyphony plan seed-children` invocation, before any ADO calls: + +1. Does `.polyphony/state/{rootId}/seed-manifest.json` exist? + — No → exit 2 (config error). Workflow routes to `write-plan`. +2. Does `.polyphony/state/{rootId}/seed-manifest.json.tmp` exist? + — Yes → prior atomic write failed. Exit 2. Workflow routes to `write-plan`. +3. Manifest parses successfully? + — No → exit 2. Workflow routes to `write-plan`. +4. `root_id` in manifest matches the `--root-id` argument? + — No → exit 2 (config error — wrong manifest for this root). +5. All guards pass → proceed to reconciliation. + +The seeder never enters the seeding loop with a corrupt, absent, or mismatched +manifest. This is the "don't let corrupt state just move forward" guard. + +--- + +## Reconciliation Algorithm + +``` +function reconcile(manifest, adoChildren): + currentGenId = manifest.plan_generation.id + gaps = [] + + for item in manifest.items: + adoMatch = adoChildren.find(c => c.marker == item.child_id) + if adoMatch: + item.ado_id = adoMatch.ado_id // already seeded — patch in memory + else: + gaps.append(item) // not found in ADO — needs seeding + + return gaps +``` + +``` +function seedGaps(gaps, manifest, maxAttempts=3): + for item in gaps: + attempts = 0 + seeded = false + + while attempts < maxAttempts and not seeded: + result = twig.CreateChildAsync(item.parent_id, item.type, item.title) + + if result.success: + item.ado_id = result.ado_id + manifest.writeAtomic() // patch manifest on disk after each success + seeded = true + + elif result.exitCode == 1: // unclassified crash — no retry + raise PermanentSeedError(item, result) + + elif result.exitCode in [3, 5]: // twig unavailable or ADO unreachable + attempts++ + if attempts >= maxAttempts: + raise InfraExhaustedError(item, result) + sleep(backoff(attempts)) + + else: // all other non-zero codes — no retry + raise PermanentSeedError(item, result) + + return manifest +``` + +The manifest is patched atomically (`.tmp` + rename) after **each** successful item +creation — not at the end of the batch. If the seeder is killed mid-batch, the next +restart picks up from the last successfully persisted state. + +### Retry exit code mapping + +The exit code catalogue is authoritative from `polyphony-verb-error-boundary.md` +(Mozart's corrected version — the squad brief had wrong codes): + +| Exit code | Meaning | Retry posture | +|---|---|---| +| 0 | Domain outcome (success or named domain failure) | n/a | +| 1 | Unclassified crash | **No retry — permanent** | +| 2 | Config / manifest error | **No retry — permanent** | +| 3 | Twig CLI unavailable | **Retry up to 3 attempts** | +| 4 | Git operation failure | No retry for seeding context | +| 5 | ADO platform unreachable (includes 401/403 via twig) | **Retry up to 3 attempts** | + +Auth failures (ADO 401/403) surface as code 5. Auth failure and network failure share +the same retry posture — both are transient infra states, not structural errors. There +is no separate auth code. + +A validation failure from ADO (invalid type name, title over 255 chars, invalid area +path) surfaces as a domain outcome — exit 0, error in stdout JSON. The seeder does +NOT retry these: the item cannot be created until the plan is corrected. The seeder +surfaces the rejection clearly, and the workflow treats this as a planning error +requiring a renegotiation pass. + +### On retry exhaustion + +The seeder emits a typed error envelope to `CONDUCTOR_ERROR_OUT`: + +```json +{ + "kind": "seeding.infra_exhausted", + "message": "Failed to seed 2 of 5 planned children after 3 attempts: ADO platform unreachable", + "details": { + "root_id": "AB#1234", + "plan_generation": "gen-1", + "exhausted_items": ["task-4", "task-5"], + "last_exit_code": 5, + "attempts": 3 + } +} +``` + +The seeder then exits non-zero. The conductor workflow routes on +`seeding.infra_exhausted` to the `seeding_blocked` terminal. + +--- + +## Renegotiation Linkage + +### The generation chain + +Each manifest carries a `plan_generation` object that forms a linked list of planning +decisions: + +``` +gen-1 { parent: null, cause: "initial", created_at: "..." } + ↓ +gen-2 { parent: "gen-1", cause: "architect_replan", created_at: "..." } + ↓ +gen-3 { parent: "gen-2", cause: "recovery", created_at: "..." } +``` + +This chain answers "why did the plan change?" for any generation without inspecting +git history or workflow logs. + +Each manifest item carries `introduced_in: "gen-N"`, recording which generation +declared it. When generations are monotonically numbered (`gen-1`, `gen-2`, ...), +orphan detection is a single set-membership test. + +### Orphan detection + +When seeding under the current manifest, any ADO child whose `polyphony:plan-child-id` +marker is NOT in the current manifest's `items` list is an orphan — created for a +prior generation that has since been superseded. + +``` +function detectOrphans(manifest, adoChildren): + currentIds = Set(item.child_id for item in manifest.items) + orphans = [] + for child in adoChildren: + if child.marker is not null and child.marker not in currentIds: + orphans.append({ + "ado_id": child.ado_id, + "marker": child.marker, + "title": child.title + }) + return orphans +``` + +Orphans are surfaced in the seeder's stdout output. **The seeder does not delete +them.** Polyphony never deletes work items. Orphan disposition — close, relabel, +archive — is an operator or twig tooling concern. + +### Generation ID assignment + +The planner verb assigns generation IDs. Convention: monotonic `gen-N` within a root +item, starting at `gen-1`. The seeder reads generation IDs; it never assigns them. + +If the manifest is being created for the first time (no prior manifest for this root), +the planner assigns `gen-1` with `parent: null, cause: "initial"`. If a prior manifest +exists, the planner reads the prior `plan_generation.id`, increments, and sets +`parent` accordingly. + +--- + +## Verb Evolution + +Two existing verbs evolve. No new verbs are introduced. + +### `polyphony plan write-plan` (gains manifest write) + +**Current behavior:** Writes `plans/plan-{id}.children.json` to the plans directory. +**New behavior:** Additionally writes the seed manifest to +`.polyphony/state/{rootId}/seed-manifest.json` (atomic) in the same verb invocation, +immediately after the planner agent produces output. If the manifest write fails (disk +full, permissions), the verb exits non-zero and does NOT emit a successful result — +partial output is not acceptable here. + +The verb's stdout JSON gains a field: + +```json +{ + "status": "written", + "plan_path": "plans/plan-AB#1234.children.json", + "manifest_path": ".polyphony/state/AB#1234/seed-manifest.json", + "plan_generation": "gen-1", + "item_count": 5 +} +``` + +### `polyphony plan seed-children` (gains manifest-aware reconciliation) + +**Current behavior:** Calls `twig.CreateChildAsync` for all planned children; emits +`PlanSeedChildrenResult`. +**New behavior:** Reads manifest → startup guard → reconcile against ADO by marker → +seed only gaps → patch `ado_id` atomically on each success → surface orphans → emit +structured result. + +The verb's stdout JSON gains fields: + +```json +{ + "status": "seeded", + "plan_generation": "gen-1", + "seeded_items": [5678, 5679, 5680], + "reused_items": [5681], + "orphan_count": 1, + "orphans": [ + { "ado_id": 4200, "marker": "task-0", "title": "Prior gen task" } + ], + "error_count": 0 +} +``` + +The existing invariant is unchanged: `polyphony:planned` is stamped on the parent only +when `error_count == 0`. A partial-error seed never advances the root. + +--- + +## The `seeding_blocked` Terminal + +### Why not `workflow_abandoned` + +Beethoven's analysis is definitive: every route into `workflow_abandoned` is an +operator-volitional act requiring a human to click a button. It means "operator gave +up." Exhausted infrastructure retries are not volitional — the platform was +unreachable, the seeder hit its limit, and stopped. These are semantically distinct +and must not share a terminal: + +| Terminal | Semantics | How it's reached | +|---|---|---| +| `workflow_abandoned` | Operator made a conscious decision to stop | Human gate: click "Abandon" | +| `seeding_blocked` | Infrastructure failed to converge within retry budget | Automatic: `seeding.infra_exhausted` error kind | + +If infra exhaustion routes to `workflow_abandoned`, the batch aggregator cannot +distinguish "operator gave up intentionally" from "ADO was down when we tried to seed." +Re-trigger logic, SLO accounting, and incident response depend on this distinction. + +### `seeding_blocked` semantics + +`seeding_blocked` means: the seeder attempted to converge the ADO tree to the manifest +and could not complete due to an infrastructure failure that did not resolve within the +retry budget. The work item is **not abandoned** — it is **blocked**. The operator can +re-trigger when infrastructure recovers. The root item remains actionable without +requiring a judgment call. + +### YAML wiring sketch + +```yaml +- name: seed_children + type: script + script: | + polyphony plan seed-children --root-id "{{ workflow.input.root_id }}" + on_error: + seeding.infra_exhausted: seeding_blocked + routes: + - condition: "{{ agent.output.status == 'seeded' }}" + to: post_seeding_step + +- name: seeding_blocked + type: terminal + output: + status: seeding_blocked + root_id: "{{ workflow.input.root_id }}" + plan_generation: "{{ seed_children.output.plan_generation }}" + reason: infra_exhausted +``` + +The `on_error:` handler matches on the `kind` field in `CONDUCTOR_ERROR_OUT`. No new +conductor primitives are needed. + +--- + +## Relationship to Other ADRs + +### `polyphony-verb-error-boundary.md` + +The seeder uses the exit code taxonomy exactly as defined in that ADR. Mozart's +correction is adopted: code 5 is "ADO platform unreachable" (covers auth and network); +there is no separate auth code. Code 1 (unclassified crash) is permanent — no retry. +The invariant "infrastructure failures exit non-zero AND write `CONDUCTOR_ERROR_OUT`" +applies to the seeder; `seeding.infra_exhausted` is the error kind the seeder emits on +exhaustion. + +### `gate-compression-pattern.md` + +PR #535's gate removal is the direct precondition for this ADR. The prior design +placed a `human_gate` between `write-plan` and `seed-children` that acted as a +"did the seed succeed?" confirmation checkpoint. That gate is replaced by the +manifest + reconciliation + `seeding_blocked` pattern. The seeder becomes re-entrant +and self-verifying; the judgment gate is eliminated because correct seeding is now +machine-verifiable, not requiring human confirmation. The gate-compression ADR's +rule holds: a gate compresses when resolution is detectable by a poll/script. The seed +manifest makes that detection possible for the first time. + +### `domain-signal-envelope.md` + +When `polyphony plan seed-children` exits 0 (fully converged), the workflow emits a +seeding-complete domain signal. That signal's `details` field SHOULD carry the +reconciliation delta — how many items were newly seeded vs reused, the orphan count, +and the plan generation — so the batch aggregator and platespinner have visibility into +whether seeding was a clean first run or a partial-recovery continuation: + +```json +{ + "kind": "seeding_complete", + "severity": "info", + "title": "Seeding complete — AB#1234", + "message": "5 children seeded (3 new, 2 reused). Generation gen-2.", + "details": { + "root_id": "AB#1234", + "plan_generation": "gen-2", + "seeded_new": 3, + "reused": 2, + "orphan_count": 1 + } +} +``` + +--- + +## Alternatives Considered + +**Type + parent + title-normalized hash as reconciliation primitive (Bach's prior +proposal):** Rejected in favor of the existing `polyphony:plan-child-id` marker. +Sibelius confirmed the marker is already embedded atomically, already parsed by +`BuildIndexes`, and already used for same-run reconciliation. Title normalization +introduces a new compatibility surface — subtle changes to the normalization function +would silently break cross-run equivalence. The marker is a stronger, tested primitive. +The hash approach is over-engineering a solved problem. + +**Monotonic integer `plan_generation` counter:** Rejected. Daniel explicitly asked for +richer linkage: a chain that records parent generation and cause, not just an ordinal. +The integer loses the "why did this change?" context that is valuable for debugging +renegotiation loops and orphan accumulation. + +**Conductor checkpoint as cross-run state:** Rejected. Mahler confirmed +`CheckpointManager` stores to `$TMPDIR` keyed to workflow name + timestamp. It is +gone on reboot; it is not keyed to root item ID; it is not designed for durable +cross-run state. The filesystem path in the repo worktree is the correct answer. + +**Script-external retry via conductor `on_error: { max_attempts: 3 }`:** Rejected. +Mahler confirmed this syntax does not exist for `type: script` nodes. `RetryPolicy` +and `max_attempts` apply only to provider-backed agent nodes. Script-internal retry +loops are the idiomatic pattern for transient infra failures in seeder-style scripts. + +**Route infra exhaustion to `workflow_abandoned`:** Rejected. Beethoven's analysis +establishes that `workflow_abandoned` is entirely operator-volitional — four routes, +all requiring a human gate click. Automatic infra exhaustion is semantically different. +Conflating them corrupts observability and breaks re-trigger logic. + +**Add a `platform` field for GitHub/ADO branching:** Rejected. Seeding is +ADO-shaped (work item hierarchy, parent-child links, description markers). GitHub +Issues has no structural equivalent. A platform abstraction at this layer would produce +an abstraction that satisfies neither. GitHub seeding, if ever needed, is a separate +implementation — not an extension of this schema. + +--- + +## Consequences + +### Positive + +- Restarts are safe: the seeder seeds only gaps, never re-creates items that already + exist in ADO +- Corrupt partial seeds cannot advance to implementation — the startup guard ensures + the manifest is valid before any ADO queries +- Renegotiation orphans are visible rather than silently accumulating +- The `seeding_blocked` terminal gives the batch aggregator a precise, observable + signal that distinguishes infra failure from operator abandonment +- The plan generation chain provides a lightweight audit trail of replanning decisions + without requiring git log access + +### Negative + +- `polyphony plan write-plan` gains a new output artifact; callers that rely only on + `plans/*.children.json` are unaffected, but implementations must now maintain the + manifest write as part of the verb contract +- Every seeder invocation now begins with a `twig show-tree` query even for items that + have already been seeded — this is an additional ADO round-trip per restart. For + roots with large child counts and frequent restarts, this may add latency +- Orphan detection produces output that must be triaged by an operator or downstream + tooling; there is no automated disposition + +### Neutral + +- The `.polyphony/state/` directory is the per-root run state home. The manifest is + the first resident. Other future per-root artifacts (e.g. seeding error logs) follow + the same convention +- Roots seeded before this ADR shipped have no manifest. On first re-run they receive + a fresh manifest from `write-plan`. There is no migration path from + "no manifest" to a populated one — the planner always runs before the seeder + +--- + +## Invariants + +1. **No manifest, no seeding.** `polyphony plan seed-children` exits 2 if no valid + manifest exists for the specified root. The workflow must call `write-plan` first. +2. **Atomic writes only.** All manifest writes — creation and per-item `ado_id` + patches — use `.tmp` + rename. No partial file is ever the canonical manifest. +3. **Seeder never deletes.** Detection of orphaned ADO children is surfaced as output. + The seeder takes no delete action. +4. **`polyphony:planned` only on full convergence.** The parent tag is stamped only + when `error_count == 0` after all gaps are resolved. This is the hard guard against + corrupt-state advancement. +5. **Generation IDs are assigned by the planner, read by the seeder.** The seeder + patches `ado_id` fields only; it never assigns or modifies generation metadata. +6. **GitHub is out of scope.** No `platform` field, no GitHub execution path, no + abstraction layer. ADO-only. + +--- + +## Open Questions + +These are genuine open questions, not blocking decisions. + +**1. Orphan disposition workflow.** The seeder surfaces orphans in output but takes no +action. For roots with frequent renegotiation, orphan accumulation in ADO may become +operational noise. Should polyphony emit a domain signal for each orphan batch, +allowing an operator to triage? Or is this a twig CLI concern (`twig orphan-report`)? +Wagner and Sibelius to propose. + +**2. `seeding_blocked` re-trigger path.** When a root is in `seeding_blocked`, the +re-trigger should skip `write-plan` (the manifest is valid) and go straight to +`seed-children`. This requires a re-entry discriminator in `root-item-dispatch.yaml` +— a check for manifest presence before routing. Wagner to design the YAML re-entry +branch. + +**3. Manifest schema version field.** The current schema has no explicit `schema_version` +at the top level. If the schema gains new required fields, existing manifests will fail +the startup guard's parse check silently or with a confusing error. Consider adding +`schema_version: 1` to enable explicit forward-compatibility checks. Low-priority +until a first schema change is imminent. + +**4. Seeder idempotency on `ado_id` patches across renegotiation.** When the new +generation carries forward items whose `child_id` values are unchanged (same work, +new plan), the new manifest starts with `ado_id: null` and the seeder's reconciliation +pass repopulates them from ADO. This is correct but means one `twig show-tree` query +per renegotiation regardless of what changed. Acceptable at current renegotiation +frequencies; revisit if architects replan frequently on large root items. + +**5. Seeding-complete domain signal kind vocabulary.** The `seeding_complete` kind used +in the domain signal example above is not yet registered in the `domain-signal-envelope` +ADR's kind vocabulary. Add it on next revision of that ADR.