From 7075592c726d6e33cf5558be334c607711dc34d3 Mon Sep 17 00:00:00 2001 From: trick77 Date: Mon, 18 May 2026 21:18:23 +0200 Subject: [PATCH] =?UTF-8?q?Parse=20pr:modified=20draft=E2=86=92ready=20fli?= =?UTF-8?q?ps=20as=20synthetic=20pr:ready=5Ffor=5Freview?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Emit pr:ready_for_review when previousDraft=true & draft=false - Skip title/description/target-only pr:modified variants - Attribute actor as author for the synthetic event - Add test fixtures and comprehensive parser tests - Update docs (README, setup guide, onboarding script) --- README.md | 59 ++++++++- docs/setup-bitbucket-webhook.md | 14 ++- scripts/bitbucket_onboarding.py | 9 +- src/riptide_collector/parsers_bitbucket.py | 49 ++++++-- .../bitbucket_pr_modified_draft_to_ready.json | 44 +++++++ .../bitbucket_pr_modified_title_only.json | 44 +++++++ tests/test_parsers_bitbucket.py | 115 ++++++++++++++++++ 7 files changed, 323 insertions(+), 11 deletions(-) create mode 100644 tests/fixtures/bitbucket_pr_modified_draft_to_ready.json create mode 100644 tests/fixtures/bitbucket_pr_modified_title_only.json diff --git a/README.md b/README.md index 6c55e7f..06f6440 100644 --- a/README.md +++ b/README.md @@ -63,12 +63,69 @@ the data captured in v1. | **Deployment frequency** | `COUNT(*)` of `argocd_events` per `app_name` / `team` / time window where `operation_phase = 'Succeeded' AND environment = 'prod'`. Drop the `environment` filter (or slice by it) for staging visibility. | | **Lead time for changes** | For each merged PR, `MIN(bitbucket_events.occurred_at)` for the PR (first commit) → `argocd_events.occurred_at` of the prod deploy that carries the same `commit_sha` and `environment = 'prod'`. Joined via the SHA. Stratify by `bitbucket_events.change_type` (feature / hotfix / bugfix / …) to see hotfix lead time vs. feature lead time separately. | | **PR cycle time** | `pullrequest:fulfilled.occurred_at − pullrequest:created.occurred_at` per PR id. | -| **Time to first review** *(DX Core 4 "code review pickup time")* | `MIN(occurred_at) WHERE event_type IN ('pr:comment:added', 'pr:reviewer:approved', 'pr:reviewer:unapproved', 'pr:reviewer:needs_work', 'pr:reviewer:updated') AND author != AND NOT is_automated` per PR, minus the matching `pr:opened` row. The five-event union covers every reviewer touch Bitbucket DC emits — silent approvals, retracted approvals, "needs work" flips, and bare reviewer-status changes; `NOT is_automated` strips bot comments (noergler / Renovate / etc.) — every review-time bot must have its handle in the `automation` config block, otherwise its instant comment drives the metric toward zero. | +| **Time to first review** *(DX Core 4 "code review pickup time")* | Two-part computation per PR — see the SQL block below the table. Clock-start = `COALESCE(pr:ready_for_review, pr:opened)`: PRs opened ready start at `pr:opened`; PRs opened as drafts start at the synthetic `pr:ready_for_review` (emitted by the parser when a `pr:modified` payload carries `previousDraft=true, draft=false`). Engagement = first reviewer touch (`pr:comment:added`, `pr:reviewer:approved`, `pr:reviewer:unapproved`, `pr:reviewer:needs_work`, `pr:reviewer:updated`) where `author != pr_opener AND NOT is_automated AND occurred_at >= clock-start`. The five-event reviewer union covers every touch Bitbucket DC emits (silent approvals, retracted approvals, "needs work" flips, bare reviewer-status changes); the `occurred_at >= clock-start` guard drops early-feedback comments solicited during the draft phase, which would otherwise produce negative pickup times. `NOT is_automated` strips bot comments (noergler / Renovate / etc.) — every review-time bot must have its handle in the `automation` config block, otherwise its instant comment drives the metric toward zero. | | **Build success rate** | `pipeline_events` with `phase = 'COMPLETED'` grouped by `status`. Slice by `source` to compare Jenkins vs Tekton, by `pipeline_name` / `team` for ownership. | | **Build duration** | `pipeline_events.duration_seconds` (a Postgres `GENERATED ALWAYS AS (finished_at − started_at)` column). | | **Deploy success rate** | `argocd_events` with `operation_phase IN ('Succeeded', 'Failed')` aggregated, filtered to `environment = 'prod'` for the prod-only view. | | **Deploy duration** | `argocd_events.duration_seconds` (generated column). Filter by `environment = 'prod'` for production-only timing. | +#### Pickup-time query + +The collector emits a synthetic `pr:ready_for_review` row when a `pr:modified` +payload carries a draft→ready flip (`previousDraft=true, draft=false`); other +`pr:modified` variants — title / description / target-branch changes — are +dropped at parse time. The raw `eventKey` survives on `payload.eventKey` for +traceability. With that in place, the metric is one CTE: + +```sql +WITH pickup_start AS ( + SELECT + repo_full_name, + pr_id, + COALESCE( + MIN(occurred_at) FILTER (WHERE event_type = 'pr:ready_for_review'), + MIN(occurred_at) FILTER ( + WHERE event_type = 'pr:opened' + AND COALESCE(payload->'pullRequest'->>'draft', 'false') = 'false' + ) + ) AS clock_start, + MAX(author) FILTER (WHERE event_type = 'pr:opened') AS pr_opener + FROM bitbucket_events + GROUP BY repo_full_name, pr_id +) +SELECT + e.repo_full_name, + e.pr_id, + MIN(e.occurred_at) - ps.clock_start AS pickup_interval +FROM bitbucket_events e +JOIN pickup_start ps USING (repo_full_name, pr_id) +WHERE ps.clock_start IS NOT NULL + AND e.event_type IN ( + 'pr:comment:added', + 'pr:reviewer:approved', + 'pr:reviewer:unapproved', + 'pr:reviewer:needs_work', + 'pr:reviewer:updated' + ) + AND e.author IS DISTINCT FROM ps.pr_opener + AND NOT e.is_automated + AND e.occurred_at >= ps.clock_start +GROUP BY e.repo_full_name, e.pr_id, ps.clock_start; +``` + +The `occurred_at >= ps.clock_start` filter is load-bearing: a reviewer can +comment on a draft PR (typically when the author solicits early feedback), and +without this guard the engagement timestamp could land before the ready signal +and produce a negative interval. Both "early feedback in draft" and "the act of +flipping the switch" are intentionally excluded from the metric — pickup time +measures reviewer engagement *after the PR is ready*, nothing else. + +The `draft = false` guard inside the `pr:opened` branch of the `COALESCE` is the +same rule applied to the opposite tail: a PR opened as a draft that never gets +flipped to ready has no clock-start, so it's excluded from the metric entirely. +Early-feedback comments on a never-ready draft don't inflate the numerator, +because there's no clock-start to subtract from in the first place. + ### Quality / process signals from Bitbucket | Metric | How it's computed | diff --git a/docs/setup-bitbucket-webhook.md b/docs/setup-bitbucket-webhook.md index eb00e88..aeb3ad9 100644 --- a/docs/setup-bitbucket-webhook.md +++ b/docs/setup-bitbucket-webhook.md @@ -47,10 +47,20 @@ The script is idempotent (rerun after edits), supports `--dry-run` and value). BBS uses this to sign each delivery. 7. **Triggers** — select: - Repository: **Push** - - Pull request: **Created**, **Updated**, **Approved**, **Merged**, - **Declined**, **Comment created** + - Pull request: **Opened**, **Source branch updated**, **Modified**, + **Approved**, **Unapproved**, **Needs work**, **Reviewer updated**, + **Merged**, **Deleted**, **Comment added** 8. **Save**. +The **Modified** trigger is required for DX Core 4 pickup-time accuracy on +PRs opened as drafts: BBS emits `pr:modified` on title / description / +target / draft changes, and the parser keeps only the draft→ready flips +(re-typed as a synthetic `pr:ready_for_review` row) — that's the +clock-start signal for the pickup metric documented in the README. The +**Reviewer updated** trigger covers silent reviewer-status changes +(adding / removing reviewers without a comment) that also feed the +pickup metric. + Do **not** use the `Custom headers` field for auth and do **not** populate the top-level `credentials` block via REST — BBS DC silently drops `credentials.password` on REST POST/PUT. diff --git a/scripts/bitbucket_onboarding.py b/scripts/bitbucket_onboarding.py index da8c924..0e2a83d 100644 --- a/scripts/bitbucket_onboarding.py +++ b/scripts/bitbucket_onboarding.py @@ -68,10 +68,17 @@ # action of any kind. A single signal (e.g. only `pr:comment:added`) is # not enough — reviewers often approve silently without typing anything, # and a comment-only signal both misses those and inflates the metric -# with bot comments. The four reviewer-activity events here together +# with bot comments. The five reviewer-activity events here together # cover the workflows BBS DC reviewers actually use. +# +# `pr:modified` is the pickup-time **start** signal, not engagement: BBS +# emits it on title / description / target-branch / draft-status changes. +# The parser keeps only the draft→ready flips (re-typed as a synthetic +# `pr:ready_for_review`) so the pickup clock starts when the PR actually +# becomes reviewable, not when it was opened as a draft. REQUIRED_WEBHOOK_EVENTS: tuple[str, ...] = ( "pr:opened", + "pr:modified", "pr:from_ref_updated", "pr:comment:added", "pr:reviewer:approved", diff --git a/src/riptide_collector/parsers_bitbucket.py b/src/riptide_collector/parsers_bitbucket.py index 331c83b..040c42e 100644 --- a/src/riptide_collector/parsers_bitbucket.py +++ b/src/riptide_collector/parsers_bitbucket.py @@ -16,10 +16,13 @@ parse_change_type, ) -# Events where the meaningful actor is the reviewer/commenter, not the -# PR author. Used to power the DX Core 4 "code review pickup time" metric: -# MIN(occurred_at WHERE event_type IN these AND author != pr_opener) -# answers "when did someone other than the PR author engage with the PR". +# Author selection only: events where `actor` is the meaningful "who did +# this" — the reviewer / commenter — and `pullRequest.author` is the PR +# opener. The parser uses this set to skip the pr.author lookup so the +# row attributes the action to the human who performed it. The DX Core 4 +# pickup-time metric WHERE clause is documented in README; it overlaps +# with this set but isn't derived from it (e.g. `pr:ready_for_review` +# uses the same actor-as-author rule but is a clock-start, not engagement). _REVIEWER_ACTIVITY_EVENTS = frozenset( { "pr:comment:added", @@ -30,6 +33,15 @@ } ) +# Synthetic event_type emitted by the parser when a `pr:modified` payload +# carries a draft→ready flip (`previousDraft: true` + `pullRequest.draft: false`). +# Re-typing at parse time keeps downstream metric queries trivial — they can +# look for a row instead of digging into `payload->'previousDraft'`. The raw +# eventKey is preserved on `payload.eventKey`. This is the pickup-clock +# START signal for PRs that were opened as drafts; see the pickup-time +# section in README for the COALESCE(opened, ready_for_review) pattern. +_SYNTHETIC_READY_FOR_REVIEW = "pr:ready_for_review" + @dataclass(frozen=True) class BitbucketEventDraft: @@ -160,6 +172,24 @@ def extract_event( title = pr.get("title") if isinstance(pr.get("title"), str) else None description = pr.get("description") if isinstance(pr.get("description"), str) else None + # `pr:modified` fires on title / description / target / draft changes. + # Only the draft→ready flip feeds a metric we track (DX Core 4 pickup + # time start signal); other variants carry no signal worth a DB row. + # Re-type the flip as `pr:ready_for_review` so downstream queries don't + # have to dig into `previousDraft`; skip the rest as no-ops. + if event_type == "pr:modified": + previous_draft = body.get("previousDraft") + current_draft = pr.get("draft") + if previous_draft is True and current_draft is False: + event_type = _SYNTHETIC_READY_FOR_REVIEW + else: + return BitbucketSkip( + reason="pr:modified without draft→ready flip", + delivery_id=delivery_id, + event_type="pr:modified", + repo_full_name=lower(raw_repo_full_name), + ) + branch_name: str | None = None commit_sha: str | None = None author: str | None = None @@ -169,8 +199,13 @@ def extract_event( # as the meaningful "who did this" — different from pr.author who # opened the PR. We need that to attribute the "first review pickup" # signal (DX Core 4) to the right user and to filter out the - # PR-author-self-comment and bot-comment noise. - is_reviewer_activity = event_type in _REVIEWER_ACTIVITY_EVENTS + # PR-author-self-comment and bot-comment noise. The synthetic + # `pr:ready_for_review` follows the same rule: the actor is whoever + # flipped the switch (often the PR author, sometimes a maintainer), + # which may differ from `pullRequest.author`. + is_actor_authored = ( + event_type in _REVIEWER_ACTIVITY_EVENTS or event_type == _SYNTHETIC_READY_FOR_REVIEW + ) if pr: from_ref = _as_dict(pr.get("fromRef")) @@ -180,7 +215,7 @@ def extract_event( branch_name = display_id if isinstance(latest_commit, str): commit_sha = latest_commit - if not is_reviewer_activity: + if not is_actor_authored: author_user = _as_dict(_as_dict(pr.get("author")).get("user")) author = _user_handle(author_user) # PR-side revert detection: the title is the only signal we have diff --git a/tests/fixtures/bitbucket_pr_modified_draft_to_ready.json b/tests/fixtures/bitbucket_pr_modified_draft_to_ready.json new file mode 100644 index 0000000..2a67a54 --- /dev/null +++ b/tests/fixtures/bitbucket_pr_modified_draft_to_ready.json @@ -0,0 +1,44 @@ +{ + "eventKey": "pr:modified", + "date": "2026-04-28T10:03:00+0000", + "actor": { + "name": "alice", + "displayName": "Alice Example", + "slug": "alice" + }, + "pullRequest": { + "id": 42, + "title": "ABC-123: Add payment retry", + "description": "Fixes ABC-123 and PROJ-9", + "state": "OPEN", + "draft": false, + "fromRef": { + "id": "refs/heads/feature/ABC-123-retries", + "displayId": "feature/ABC-123-retries", + "latestCommit": "abc1234567890abc1234567890abc1234567890a", + "repository": { + "slug": "payments-api", + "project": {"key": "ACME"} + } + }, + "toRef": { + "id": "refs/heads/master", + "displayId": "master", + "repository": { + "slug": "payments-api", + "project": {"key": "ACME"} + } + }, + "author": { + "user": { + "name": "alice", + "displayName": "Alice Example", + "slug": "alice" + } + } + }, + "previousTitle": "ABC-123: Add payment retry", + "previousDescription": "Fixes ABC-123 and PROJ-9", + "previousTarget": null, + "previousDraft": true +} diff --git a/tests/fixtures/bitbucket_pr_modified_title_only.json b/tests/fixtures/bitbucket_pr_modified_title_only.json new file mode 100644 index 0000000..7767e4f --- /dev/null +++ b/tests/fixtures/bitbucket_pr_modified_title_only.json @@ -0,0 +1,44 @@ +{ + "eventKey": "pr:modified", + "date": "2026-04-28T10:04:00+0000", + "actor": { + "name": "alice", + "displayName": "Alice Example", + "slug": "alice" + }, + "pullRequest": { + "id": 42, + "title": "ABC-123: Add payment retry (typo fix)", + "description": "Fixes ABC-123 and PROJ-9", + "state": "OPEN", + "draft": false, + "fromRef": { + "id": "refs/heads/feature/ABC-123-retries", + "displayId": "feature/ABC-123-retries", + "latestCommit": "abc1234567890abc1234567890abc1234567890a", + "repository": { + "slug": "payments-api", + "project": {"key": "ACME"} + } + }, + "toRef": { + "id": "refs/heads/master", + "displayId": "master", + "repository": { + "slug": "payments-api", + "project": {"key": "ACME"} + } + }, + "author": { + "user": { + "name": "alice", + "displayName": "Alice Example", + "slug": "alice" + } + } + }, + "previousTitle": "ABC-123: Add payment retry", + "previousDescription": "Fixes ABC-123 and PROJ-9", + "previousTarget": null, + "previousDraft": false +} diff --git a/tests/test_parsers_bitbucket.py b/tests/test_parsers_bitbucket.py index 8d504a2..1b13e2b 100644 --- a/tests/test_parsers_bitbucket.py +++ b/tests/test_parsers_bitbucket.py @@ -57,6 +57,121 @@ def test_revert_pr_title_marks_is_revert(self) -> None: assert result.is_revert is True +class TestExtractPrModified: + def test_draft_to_ready_flip_emits_synthetic_ready_for_review(self) -> None: + # Given — pr:modified payload with previousDraft=true, draft=false. + # This is the DX Core 4 pickup-time start signal for PRs that were + # opened as drafts. + body = _load("bitbucket_pr_modified_draft_to_ready.json") + + # When + result = extract_event( + body, + x_event_key="pr:modified", + x_request_uuid="req-modified-flip", + x_hook_uuid=None, + ) + + # Then — re-typed to the synthetic event so downstream queries + # don't have to dig into previousDraft on the raw payload. + assert isinstance(result, BitbucketEventDraft) + assert result.event_type == "pr:ready_for_review" + assert result.pr_id == 42 + assert result.repo_full_name == "acme/payments-api" + # Raw payload preserves the original eventKey for traceability. + assert result.payload["eventKey"] == "pr:modified" + assert result.payload["previousDraft"] is True + + def test_draft_to_ready_uses_actor_as_author(self) -> None: + # Given — a maintainer (not the PR author) flips the draft switch. + # Pickup-time attribution should follow whoever performed the + # action, same rule as reviewer-activity events. + body = _load("bitbucket_pr_modified_draft_to_ready.json") + body["actor"] = {"name": "carol", "displayName": "Carol Maintainer", "slug": "carol"} + + # When + result = extract_event( + body, x_event_key="pr:modified", x_request_uuid="r", x_hook_uuid=None + ) + + # Then + assert isinstance(result, BitbucketEventDraft) + assert result.event_type == "pr:ready_for_review" + # carol flipped it, even though alice originally opened the PR. + assert result.author == "carol" + + def test_title_only_modify_returns_skip(self) -> None: + # Given — pr:modified with previousDraft=false (i.e. just a title / + # description / target change). Carries no metric we track today, + # so the parser drops it rather than bloating the table. + body = _load("bitbucket_pr_modified_title_only.json") + + # When + result = extract_event( + body, x_event_key="pr:modified", x_request_uuid="r", x_hook_uuid=None + ) + + # Then + assert isinstance(result, BitbucketSkip) + assert result.reason == "pr:modified without draft→ready flip" + # event_type on the skip reflects what BBS sent, not the synthetic + # rename (we never emitted a synthetic for this delivery). + assert result.event_type == "pr:modified" + assert result.repo_full_name == "acme/payments-api" + + def test_ready_to_draft_returns_skip(self) -> None: + # Given — the reverse flip (ready → draft). Not a pickup signal; + # if we tracked "PR rework rate" later this would be useful, but + # for v1 it carries no metric so we skip it. + body = _load("bitbucket_pr_modified_draft_to_ready.json") + body["previousDraft"] = False + body["pullRequest"]["draft"] = True + + # When + result = extract_event( + body, x_event_key="pr:modified", x_request_uuid="r", x_hook_uuid=None + ) + + # Then + assert isinstance(result, BitbucketSkip) + assert result.reason == "pr:modified without draft→ready flip" + + def test_pr_opened_as_draft_passes_through_unchanged(self) -> None: + # Regression guard: an opened-as-draft PR must still produce a + # regular pr:opened row — the pickup-time SQL inspects + # payload->'pullRequest'->>'draft' on that row to decide whether + # it counts as a clock-start. If the parser ever started filtering + # or re-typing opened-as-draft, the SQL would silently break. + body = _load("bitbucket_pr_merged.json") + body["eventKey"] = "pr:opened" + body["pullRequest"]["state"] = "OPEN" + body["pullRequest"]["draft"] = True + + # When + result = extract_event(body, x_event_key="pr:opened", x_request_uuid="r", x_hook_uuid=None) + + # Then + assert isinstance(result, BitbucketEventDraft) + assert result.event_type == "pr:opened" + assert result.payload["pullRequest"]["draft"] is True + + def test_modified_without_draft_fields_returns_skip(self) -> None: + # Given — a BBS DC version (or payload) without draft tracking at + # all: no previousDraft, no pullRequest.draft. The parser must + # not emit a synthetic ready-for-review row from absence-of-data. + body = _load("bitbucket_pr_modified_title_only.json") + del body["previousDraft"] + del body["pullRequest"]["draft"] + + # When + result = extract_event( + body, x_event_key="pr:modified", x_request_uuid="r", x_hook_uuid=None + ) + + # Then + assert isinstance(result, BitbucketSkip) + + class TestExtractRefsChanged: def test_branch_push_extracts_to_hash_and_branch(self) -> None: # Given