From 1f51279b825f059520bcb9d807c9b0e00374f1b2 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:36:59 +0200 Subject: [PATCH 01/11] Move code review prompt templates to Markdown files --- bugbug/tools/code_review/prompts.py | 85 +++++-------------- .../code_review/prompts/first_message.md | 21 +++++ bugbug/tools/code_review/prompts/system.md | 51 +++++++++++ 3 files changed, 93 insertions(+), 64 deletions(-) create mode 100644 bugbug/tools/code_review/prompts/first_message.md create mode 100644 bugbug/tools/code_review/prompts/system.md diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py index bf4073d474..dffdca43fe 100644 --- a/bugbug/tools/code_review/prompts.py +++ b/bugbug/tools/code_review/prompts.py @@ -5,80 +5,37 @@ """Prompt templates for code review agent.""" -from bugbug.tools.code_review.data_types import Skill - -TARGET_SOFTWARE: str | None = None - - -SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code. - -## Instructions +from pathlib import Path -Follow this systematic approach to review the patch: - -**Step 1: Analyze the Changes** -- Understand what the patch is trying to accomplish -- Use the patch summary for context, but focus primarily on what you can see in the actual diff -- Identify the intent and structure of the changes - -**Step 2: Identify Issues** -- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards -- Focus ONLY on new or changed lines (lines that begin with `+`) -- Never comment on unmodified code -- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns - -**Step 3: Verify and Assess Confidence** -- Use available tools when you need to verify concerns or gather additional context -- Only include comments where you are at least 80% confident the issue is valid -- When uncertain about an issue, use available tools to verify before commenting -- Do not suggest issues you cannot verify with available context +from bugbug.tools.code_review.data_types import Skill -**Step 4: Sort and Order Comments** -- Sort comments by descending confidence and importance -- Start with issues you are certain are valid and that are most critical -- Assign each comment a numeric order starting at 1 +_PROMPTS_DIR = Path(__file__).parent / "prompts" -**Step 5: Write Clear, Constructive Comments** -- Use direct, declarative language - state the problem definitively, then suggest the fix -- Keep comments short and specific -- Use directive language: "Fix", "Remove", "Change", "Add" -- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to" -- Focus strictly on code-related concerns +TARGET_SOFTWARE: str | None = None -## What NOT to Include +_SYSTEM_PROMPT_RAW = (_PROMPTS_DIR / "system.md").read_text() -Do not write comments that: -- Refer to unmodified code (lines without a `+` prefix) -- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...") -- Provide praise or restate obvious facts -- Focus on testing concerns -- Point out issues that are already handled in the visible code -- Suggest problems based on assumptions without verifying the context -- Flag style preferences without clear coding standard violations +_CONTEXT_TOOLS_AGENT = """\ + - `expand_context` to read surrounding code in the patched files + - `find_function_definition` to look up callers or definitions\ """ +_CONTEXT_TOOLS_LOCAL = """\ + - `Read` and `Grep` to inspect files in the local checkout + - `Bash` with `searchfox-cli` to query Searchfox for definitions, callers, and cross-references\ +""" -FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch: - - -{patch_summarization} - - - -Here are examples of good code review comments to guide your style and approach: - - -{comment_examples} -{approved_examples} - - +SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format( + target_software="{target_software}", + context_tools=_CONTEXT_TOOLS_AGENT, +) -Here is the patch you need to review: +LOCAL_SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format( + target_software="{target_software}", + context_tools=_CONTEXT_TOOLS_LOCAL, +) - -{patch} - -""" +FIRST_MESSAGE_TEMPLATE = (_PROMPTS_DIR / "first_message.md").read_text() TEMPLATE_COMMENT_EXAMPLE = """Patch example {example_number}: diff --git a/bugbug/tools/code_review/prompts/first_message.md b/bugbug/tools/code_review/prompts/first_message.md new file mode 100644 index 0000000000..848897fb69 --- /dev/null +++ b/bugbug/tools/code_review/prompts/first_message.md @@ -0,0 +1,21 @@ +Here is a summary of the patch: + + +{patch_summarization} + +{external_context} + + +Here are examples of good code review comments to guide your style and approach: + + +{comment_examples} +{approved_examples} + + + +Here is the patch you need to review: + + +{patch} + diff --git a/bugbug/tools/code_review/prompts/system.md b/bugbug/tools/code_review/prompts/system.md new file mode 100644 index 0000000000..674b4000a2 --- /dev/null +++ b/bugbug/tools/code_review/prompts/system.md @@ -0,0 +1,51 @@ +## Instructions + +Follow this systematic approach to review the patch: + +**Step 1: Analyze the Changes** + +- Read the commit message +- Understand what the patch is trying to accomplish +- Identify the intent and structure of the changes; if what the patch does doesn't match the commit message, this always warrants a review comment — mismatches make the history hard to understand and bisect + +**Step 2: Identify Issues** + +- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards +- Focus ONLY on new or changed lines (lines that begin with `+`) +- Never comment on unmodified code +- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns + +**Step 3: Verify and Assess Confidence** + +- Use available tools when you need to verify concerns or gather additional context: + {context_tools} + - `mozilla__get_phabricator_revision` to fetch related Phabricator revisions — open and closed, including their review comments; particularly useful for other revisions in the same stack + - `mozilla__get_bugzilla_bug` to fetch the associated Bugzilla bug and its history + - `mozilla__read_fx_doc_section` to read sections from the Firefox Source Tree Documentation (firefox-source-docs.mozilla.org) — useful for architecture and API documentation relevant to the changed code +- Significant tool usage is expected to perform a high-quality review, much like a human reviewer would +- When uncertain about an issue, use available tools to verify before commenting +- Do not suggest issues you cannot verify with available context + +**Step 4: Sort and Order Comments** + +- Sort comments by descending confidence and importance +- Start with issues you are certain are valid and that are most critical +- Assign each comment a numeric order starting at 1 + +**Step 5: Write Clear, Constructive Comments** + +- Use direct, declarative language - state the problem definitively, then suggest the fix +- Keep comments short and specific +- Use directive language: "Fix", "Remove", "Change", "Add" +- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to" +- Focus strictly on code-related concerns + +## What NOT to Include + +Do not write comments that: + +- Refer to unmodified code (lines without a `+` prefix) +- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...") +- Provide praise or restate obvious facts +- Suggest problems based on assumptions without verifying the context +- Flag style preferences without clear coding standard violations From 64ea247b9ede000177f7b9988182b964bb3ea906 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 27 May 2026 14:58:50 +0000 Subject: [PATCH 02/11] Wire Mozilla MCP server into the agent Adds the moz.tools MCP server to the Anthropic client so the agent can call mozilla__ tools during review. URL is overridable via BUGBUG_MOZ_MCP_URL. --- bugbug/tools/code_review/agent.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index 794dc6194d..b21c8bf563 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -160,6 +160,15 @@ def create(cls, **kwargs): max_tokens=40_000, temperature=None, thinking={"type": "enabled", "budget_tokens": 10_000}, + mcp_servers=[ + { + "type": "url", + "url": os.getenv( + "BUGBUG_MOZ_MCP_URL", "https://mcp-dev.moz.tools/mcp" + ), + "name": "mozilla", + } + ], ) if "patch_summarizer" not in kwargs: From 0f4fdeaa11076772a23b6e4f89dbe6d8623a3cd9 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 27 May 2026 15:00:18 +0000 Subject: [PATCH 03/11] Add LocalPatch and local diff review via diff= parameter LocalPatch wraps a raw unified diff string as a first-class Patch so it can flow through the review pipeline without a Phabricator backing. It accepts an optional commit_message and parses from it: the patch title (first line), description (remaining lines), bug ID (Bug NNN), and Differential Revision URL, and date if present in the format-patch header. On the MCP server side, patch_review now accepts optional diff= and commit_message= parameters alongside patch_url=, and a new patch_review_tool exposes the same logic as an @mcp.tool(). A shared _patch_review_impl handles both cases, using a lighter CodeReviewTool instance (no DB, no suggestion filtering) for local diffs. LOCAL_SYSTEM_PROMPT_TEMPLATE uses searchfox-cli and local agent tools instead of the agent-specific expand_context and searchfox tools. --- bugbug/tools/code_review/data_types.py | 87 +++++++++++++++++++++++++ services/mcp/src/bugbug_mcp/server.py | 88 ++++++++++++++++++++------ 2 files changed, 155 insertions(+), 20 deletions(-) diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py index d7ecd6938d..944ce9004e 100644 --- a/bugbug/tools/code_review/data_types.py +++ b/bugbug/tools/code_review/data_types.py @@ -1,10 +1,12 @@ import re +from datetime import datetime import httpx from pydantic import BaseModel, Field, PrivateAttr from bugbug.tools.core.connection import get_http_client from bugbug.tools.core.data_types import InlineComment +from bugbug.tools.core.platforms.base import Patch class GeneratedReviewComment(BaseModel): @@ -82,3 +84,88 @@ async def load(self) -> str: self._cached_body = _strip_frontmatter(response.text) return self._cached_body + + +_BUG_RE = re.compile(r"[Bb]ug\s+(\d+)") +_DIFFERENTIAL_RE = re.compile(r"Differential Revision:\s*(https?://\S+)") +_DATE_RE = re.compile(r"^Date:\s+(.+)$", re.MULTILINE) + + +class LocalPatch(Patch): + """A patch from a local diff string, not backed by any platform.""" + + def __init__( + self, + diff: str, + commit_message: str = "", + ) -> None: + self._diff = diff + self._commit_message = commit_message + lines = commit_message.splitlines() + self._title = lines[0].strip() if lines else "Local patch" + self._description = "\n".join(lines[1:]).strip() if len(lines) > 1 else "" + m = _BUG_RE.search(commit_message) + self._bug_id: int | None = int(m.group(1)) if m else None + m2 = _DIFFERENTIAL_RE.search(commit_message) + self._url = m2.group(1) if m2 else "" + m3 = _DATE_RE.search(commit_message) + if m3: + from email.utils import parsedate_to_datetime + try: + self._date = parsedate_to_datetime(m3.group(1)) + except Exception: + self._date = datetime.now() + else: + self._date = datetime.now() + + @property + def patch_id(self) -> str: + return "local" + + @property + def raw_diff(self) -> str: + return self._diff + + @property + def date_created(self) -> datetime: + return self._date + + @property + def has_bug(self) -> bool: + return self._bug_id is not None + + @property + def bug_id(self) -> int | None: + return self._bug_id + + @property + def bug_title(self) -> str: + return "" + + @property + def patch_title(self) -> str: + return self._title + + @property + def patch_description(self) -> str: + return self._description + + @property + def patch_url(self) -> str: + return self._url + + def is_accessible(self) -> bool: + return True + + def is_public(self) -> bool: + return True + + async def get_new_file(self, file_path: str) -> str: + raise FileNotFoundError( + f"No repository checkout available for '{file_path}' — use local tools to inspect it directly." + ) + + async def get_old_file(self, file_path: str) -> str: + raise FileNotFoundError( + f"No repository checkout available for '{file_path}' — use local tools to inspect it directly." + ) diff --git a/services/mcp/src/bugbug_mcp/server.py b/services/mcp/src/bugbug_mcp/server.py index a5179ee7ff..7cc99d09d5 100644 --- a/services/mcp/src/bugbug_mcp/server.py +++ b/services/mcp/src/bugbug_mcp/server.py @@ -1,6 +1,7 @@ """MCP server for Firefox Development.""" import functools +import logging import os from pathlib import Path from typing import Annotated @@ -12,7 +13,11 @@ from fastmcp.resources import FileResource from pydantic import Field -from bugbug.tools.code_review.prompts import SYSTEM_PROMPT_TEMPLATE +from bugbug.tools.code_review.data_types import LocalPatch +from bugbug.tools.code_review.prompts import ( + LOCAL_SYSTEM_PROMPT_TEMPLATE, + SYSTEM_PROMPT_TEMPLATE, +) from bugbug.tools.core.platforms.bugzilla import SanitizedBug from bugbug.tools.core.platforms.phabricator import ( PhabricatorPatch, @@ -20,6 +25,7 @@ ) mcp = FastMCP("Firefox Development MCP Server") +logger = logging.getLogger(__name__) @functools.cache @@ -29,32 +35,74 @@ def get_code_review_tool(): return CodeReviewTool.create() -@mcp.prompt() -async def patch_review( - patch_url: str = Field(description="URL to the Phabricator patch to review."), +@functools.cache +def get_local_code_review_tool(): + """Minimal tool for local diff prompt assembly — no LLM calls on the server.""" + from bugbug.tools.code_review.agent import CodeReviewTool + + return CodeReviewTool.create( + review_comments_db=None, + suggestion_filterer=None, + ) + + +async def _patch_review_impl( + patch_url: str | None, + diff: str | None, + commit_message: str | None, ) -> str: - """Review a code patch from Phabricator.""" - parsed_url = urlparse(patch_url) - if ( - parsed_url.netloc == "phabricator.services.mozilla.com" - and parsed_url.path.startswith("/D") - ): - revision_id = int(parsed_url.path[2:]) + if diff: + patch = LocalPatch(diff, commit_message=commit_message or "") + elif patch_url: + parsed_url = urlparse(patch_url) + if ( + parsed_url.netloc == "phabricator.services.mozilla.com" + and parsed_url.path.startswith("/D") + ): + revision_id = int(parsed_url.path[2:]) + else: + raise ValueError(f"Unsupported patch URL: {patch_url}") + patch = PhabricatorPatch(revision_id=revision_id) else: - raise ValueError(f"Unsupported patch URL: {patch_url}") + raise ValueError("Provide either patch_url or diff.") - patch = PhabricatorPatch(revision_id=revision_id) - - tool = get_code_review_tool() - system_prompt = SYSTEM_PROMPT_TEMPLATE.format( - target_software=tool.target_software, - ) - patch_summary = tool.patch_summarizer.run(patch) + tool = get_local_code_review_tool() if diff else get_code_review_tool() + prompt_template = LOCAL_SYSTEM_PROMPT_TEMPLATE if diff else SYSTEM_PROMPT_TEMPLATE + system_prompt = prompt_template.format(target_software=tool.target_software) + patch_summary = "" if diff else tool.patch_summarizer.run(patch) initial_prompt = tool.generate_initial_prompt(patch, patch_summary) - return system_prompt + "\n\n" + initial_prompt +@mcp.prompt() +async def patch_review( + patch_url: str | None = Field( + default=None, + description="URL to the Phabricator patch to review.", + ), + diff: str | None = Field( + default=None, + description="Raw unified diff to review (for local patches not yet on Phabricator).", + ), + commit_message: str | None = Field( + default=None, + description="Commit message for the local diff (optional, used to extract bug ID, title, and Differential Revision URL).", + ), +) -> str: + """Review a code patch from Phabricator or a raw local diff.""" + return await _patch_review_impl(patch_url, diff, commit_message) + + +@mcp.tool() +async def patch_review_tool( + patch_url: str | None = None, + diff: str | None = None, + commit_message: str | None = None, +) -> str: + """Build the patch review prompt. Use diff= for local diffs, patch_url= for Phabricator.""" + return await _patch_review_impl(patch_url, diff, commit_message) + + @mcp.resource( uri="bugzilla://bug/{bug_id}", name="Bugzilla Bug Content", From 5fded856e34425af59e078149cdf7f7ecc98f317 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Fri, 29 May 2026 17:46:30 +0200 Subject: [PATCH 04/11] Expose Phabricator review metadata in MCP --- bugbug/tools/core/platforms/phabricator.py | 137 +++++++++++++++++++- tests/test_phabricator_trusted_filtering.py | 84 ++++++++++++ 2 files changed, 220 insertions(+), 1 deletion(-) diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 7ea7526b3a..2e14ed3e6f 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -145,6 +145,42 @@ def _get_users_info_batch(user_phids: set[str]) -> dict[str, dict]: return _get_users_info_batch_with_retry(user_phids) +def _get_project_info_batch(project_phids: set[str]) -> dict[str, dict]: + if not project_phids: + return {} + + phabricator = get_phabricator_client() + projects_response = phabricator.request( + "project.search", + constraints={"phids": list(project_phids)}, + ) + + result = {} + for project_data in projects_response.get("data", []): + phid = project_data["phid"] + fields = project_data.get("fields", {}) + result[phid] = { + "name": fields.get("name", phid), + "slug": fields.get("slug"), + } + return result + + +def _get_repository_info(repository_phid: str | None) -> dict | None: + if not repository_phid: + return None + + phabricator = get_phabricator_client() + repositories_response = phabricator.request( + "diffusion.repository.search", + constraints={"phids": [repository_phid]}, + ) + repositories = repositories_response.get("data", []) + if not repositories: + return None + return repositories[0] + + def _sanitize_comments(comments: list, users_info: dict[str, dict]) -> tuple[list, int]: """Sanitize comments by filtering untrusted content. @@ -512,6 +548,19 @@ def author_phid(self) -> str: def diff_author_phid(self) -> str: return self._diff_metadata["authorPHID"] + @cached_property + def _revision_search_metadata(self) -> dict: + phabricator = get_phabricator_client() + response = phabricator.request( + "differential.revision.search", + constraints={"ids": [self.revision_id]}, + attachments={"reviewers": True}, + ) + revisions = response.get("data", []) + if not revisions: + return {} + return revisions[0] + @property def stack_graph(self) -> dict: return self._revision_metadata["fields"].get("stackGraph", {}) @@ -560,11 +609,45 @@ def _all_comments(self) -> list: @cached_property def _users_info(self) -> dict[str, dict]: - user_phids = {c.author_phid for c in self._all_comments} | {self.author_phid} + user_phids = ( + {c.author_phid for c in self._all_comments} + | {self.author_phid} + | self.reviewer_user_phids + ) if self.author_phid != self.diff_author_phid: user_phids.add(self.diff_author_phid) return _get_users_info_batch(user_phids) + @cached_property + def _project_info(self) -> dict[str, dict]: + return _get_project_info_batch(self.reviewer_project_phids) + + @cached_property + def reviewers(self) -> list[dict]: + if not self._revision_metadata["fields"].get("repositoryPHID"): + return [] + return ( + self._revision_search_metadata.get("attachments", {}) + .get("reviewers", {}) + .get("reviewers", []) + ) + + @cached_property + def reviewer_user_phids(self) -> set[str]: + return { + reviewer["reviewerPHID"] + for reviewer in self.reviewers + if reviewer.get("reviewerPHID", "").startswith("PHID-USER-") + } + + @cached_property + def reviewer_project_phids(self) -> set[str]: + return { + reviewer["reviewerPHID"] + for reviewer in self.reviewers + if reviewer.get("reviewerPHID", "").startswith("PHID-PROJ-") + } + def _format_user_display(self, user_phid: str) -> str: info = self._users_info.get(user_phid, {}) email = info.get("email", "Unknown") @@ -573,6 +656,49 @@ def _format_user_display(self, user_phid: str) -> str: return f"{real_name} ({email})" return email + def _format_reviewer_display(self, reviewer: dict) -> str: + reviewer_phid = reviewer["reviewerPHID"] + if reviewer_phid.startswith("PHID-USER-"): + name = self._format_user_display(reviewer_phid) + elif reviewer_phid.startswith("PHID-PROJ-"): + project = self._project_info.get(reviewer_phid, {}) + slug = project.get("slug") + name = slug or project.get("name", reviewer_phid) + else: + name = reviewer_phid + + status = reviewer.get("status") + if reviewer.get("isBlocking"): + status = "blocking" + if status: + return f"{name} [{status}]" + return name + + @property + def reviewer_summary(self) -> str | None: + if not self.reviewers: + return None + return ", ".join(self._format_reviewer_display(r) for r in self.reviewers) + + @cached_property + def repository_info(self) -> dict | None: + return _get_repository_info( + self._revision_metadata["fields"].get("repositoryPHID") + ) + + @property + def target_repository(self) -> str | None: + if not self.repository_info: + return None + fields = self.repository_info.get("fields", {}) + return fields.get("shortName") or fields.get("name") + + @property + def target_repository_default_branch(self) -> str | None: + if not self.repository_info: + return None + return self.repository_info.get("fields", {}).get("defaultBranch") + @property def author(self) -> str: return self._format_user_display(self.author_phid) @@ -633,6 +759,15 @@ def to_md(self) -> str: md_lines.append(f"- **URI**: {self.revision_uri}") md_lines.append(f"- **Revision Author**: {self.author}") md_lines.append(f"- **Status**: {self.revision_status}") + if self.reviewer_summary: + md_lines.append(f"- **Reviewers**: {self.reviewer_summary}") + if self.target_repository: + repository_line = self.target_repository + if self.target_repository_default_branch: + repository_line += ( + f" (default branch: {self.target_repository_default_branch})" + ) + md_lines.append(f"- **Target Repository**: {repository_line}") md_lines.append(f"- **Created**: {self.date_created.strftime(date_format)}") md_lines.append(f"- **Modified**: {self.date_modified.strftime(date_format)}") bug_id = self._revision_metadata["fields"].get("bugzilla.bug-id") or "N/A" diff --git a/tests/test_phabricator_trusted_filtering.py b/tests/test_phabricator_trusted_filtering.py index 4f2d6e32bc..41c425b330 100644 --- a/tests/test_phabricator_trusted_filtering.py +++ b/tests/test_phabricator_trusted_filtering.py @@ -708,6 +708,90 @@ def mock_load_revision(rev_phid=None, rev_id=None): assert REDACTED_TITLE not in md_output +def test_to_md_includes_reviewers_and_target_repository(): + mock_revision = { + "id": 12345, + "phid": "PHID-DREV-test123", + "fields": { + "title": "This is the revision title", + "authorPHID": "PHID-USER-author", + "status": {"name": "Needs Review"}, + "uri": "https://phabricator.services.mozilla.com/D12345", + "bugzilla.bug-id": "123456", + "summary": "", + "testPlan": "", + "stackGraph": {}, + "repositoryPHID": "PHID-REPO-firefoxbeta", + }, + } + mock_revision_search = { + "attachments": { + "reviewers": { + "reviewers": [ + { + "reviewerPHID": "PHID-USER-reviewer", + "status": "blocking", + "isBlocking": True, + } + ] + } + } + } + mock_repository_info = { + "fields": { + "shortName": "firefox-beta", + "defaultBranch": "beta", + } + } + mock_diff = { + "id": 54321, + "dateCreated": 1704110400, + "dateModified": 1704110400, + "baseRevision": "abc123", + "authorPHID": "PHID-USER-author", + } + mock_users_info = { + "PHID-USER-author": { + "email": "author@example.com", + "is_trusted": True, + "is_trusted_bot": False, + "real_name": "Patch Author", + }, + "PHID-USER-reviewer": { + "email": "reviewer@example.com", + "is_trusted": True, + "is_trusted_bot": False, + "real_name": "Patch Reviewer", + }, + } + + with ( + patch.object(SanitizedPhabricatorPatch, "_revision_metadata", mock_revision), + patch.object( + SanitizedPhabricatorPatch, + "_revision_search_metadata", + mock_revision_search, + ), + patch.object( + SanitizedPhabricatorPatch, "repository_info", mock_repository_info + ), + patch.object(SanitizedPhabricatorPatch, "_diff_metadata", mock_diff), + patch.object(SanitizedPhabricatorPatch, "get_comments", return_value=[]), + patch( + "bugbug.tools.core.platforms.phabricator._get_users_info_batch", + return_value=mock_users_info, + ), + patch.object(SanitizedPhabricatorPatch, "raw_diff", "diff content"), + ): + patch_obj = SanitizedPhabricatorPatch(diff_id=54321) + markdown = patch_obj.to_md() + + assert ( + "- **Reviewers**: Patch Reviewer (reviewer@example.com) [blocking]" in markdown + ) + assert "- **Target Repository**: firefox-beta (default branch: beta)" in markdown + + # Subsequent test rely on having an API key present, and perform testing against # the live phabricator instance. They can be helpful to validate changes # locally, but aren't run in CI. From 868301733a71c23ab7f1fca3f235dbe6ee75e659 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:45:47 +0200 Subject: [PATCH 05/11] Add reusable external content loading --- bugbug/tools/code_review/data_types.py | 42 ++++++++++++++++++++++++++ tests/test_code_review.py | 18 ++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py index 944ce9004e..3b2c48e470 100644 --- a/bugbug/tools/code_review/data_types.py +++ b/bugbug/tools/code_review/data_types.py @@ -2,6 +2,7 @@ from datetime import datetime import httpx +import tenacity from pydantic import BaseModel, Field, PrivateAttr from bugbug.tools.core.connection import get_http_client @@ -169,3 +170,44 @@ async def get_old_file(self, file_path: str) -> str: raise FileNotFoundError( f"No repository checkout available for '{file_path}' — use local tools to inspect it directly." ) + + +class ExternalContentLoadError(Exception): + """Raised when an ExternalContent body cannot be loaded.""" + + +@tenacity.retry( + stop=tenacity.stop_after_attempt(3), + wait=tenacity.wait_exponential(multiplier=1, min=1), + retry=tenacity.retry_if_exception_type(httpx.TransportError), + reraise=True, +) +async def _fetch_url(url: str) -> httpx.Response: + response = await get_http_client().get(url, timeout=30) + response.raise_for_status() + return response + + +class ExternalContent(BaseModel): + """An external file fetched and injected as context for the review.""" + + name: str = Field(description="A unique identifier for this content item.") + url: str = Field(description="HTTPS URL of the file to fetch.") + description: str = Field(description="Short description of what this content provides.") + + _cached_body: str | None = PrivateAttr(default=None) + + async def load(self) -> str: + """Return the content body, fetching and caching it on first use.""" + if self._cached_body is not None: + return self._cached_body + + try: + response = await _fetch_url(self.url) + except httpx.HTTPError as e: + raise ExternalContentLoadError( + f"Could not load content '{self.name}' from {self.url}" + ) from e + + self._cached_body = _strip_frontmatter(response.text) + return self._cached_body diff --git a/tests/test_code_review.py b/tests/test_code_review.py index 584824544d..c28ce11205 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -9,7 +9,7 @@ from unidiff import PatchSet from bugbug.tools.code_review import data_types, langchain_tools -from bugbug.tools.code_review.data_types import Skill, _strip_frontmatter +from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool from bugbug.tools.code_review.utils import find_comment_scope from bugbug.tools.core.platforms.patch_apply import ( @@ -480,3 +480,19 @@ def test_fetch_file_skips_revision_when_none(): result = asyncio.run(_fetch_file("f.txt", None, client, patch)) assert result == "latest content" client.get_file_at_revision.assert_not_called() + + +@pytest.mark.asyncio +async def test_external_content_load_caches(): + item = ExternalContent( + name="a", + url="https://example.com/a.md", + description="example", + ) + client = _mock_client_returning("---\nname: a\n---\nbody\n") + with patch.object(data_types, "get_http_client", return_value=client): + first = await item.load() + second = await item.load() + assert first == "body\n" + assert second == "body\n" + assert client.get.await_count == 1 From be04049d240e46c44b4993d38f3c49d947f7a094 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:46:41 +0200 Subject: [PATCH 06/11] Add review context schema validation --- .../code_review/review_context_schema.py | 622 ++++++++++++++++++ pyproject.toml | 1 + tests/test_code_review.py | 101 +++ 3 files changed, 724 insertions(+) create mode 100644 bugbug/tools/code_review/review_context_schema.py diff --git a/bugbug/tools/code_review/review_context_schema.py b/bugbug/tools/code_review/review_context_schema.py new file mode 100644 index 0000000000..ceba192a0c --- /dev/null +++ b/bugbug/tools/code_review/review_context_schema.py @@ -0,0 +1,622 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Schema and validator for code review external context rules. + +This module intentionally depends only on the Python standard library so it can +also be copied into target repositories and used from local tests. +""" + +import argparse +import sys +from collections.abc import Callable +from dataclasses import dataclass, field +from pathlib import Path +from typing import Literal, TypeAlias + +try: + import tomllib +except ImportError: + import tomli as tomllib # type: ignore + + +class ReviewContextValidationError(ValueError): + """Raised when review-context.toml has valid TOML but invalid schema.""" + + +@dataclass(frozen=True) +class GithubPolicy: + allowed_repos: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class Policy: + github: GithubPolicy = field(default_factory=GithubPolicy) + + +@dataclass(frozen=True) +class FilePredicate: + include: list[str] = field(default_factory=list) + exclude: list[str] = field(default_factory=list) + ext: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class BugzillaPredicate: + product: list[str] = field(default_factory=list) + component: list[str] = field(default_factory=list) + keywords: list[str] = field(default_factory=list) + severity: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class ReviewPredicate: + author: list[str] = field(default_factory=list) + reviewer: list[str] = field(default_factory=list) + blocking_reviewer: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class PatchPredicate: + repository: list[str] = field(default_factory=list) + is_backport: bool | None = None + + +@dataclass(frozen=True) +class Definitions: + files: dict[str, FilePredicate] = field(default_factory=dict) + bugzilla: dict[str, BugzillaPredicate] = field(default_factory=dict) + review: dict[str, ReviewPredicate] = field(default_factory=dict) + patch: dict[str, PatchPredicate] = field(default_factory=dict) + + +@dataclass(frozen=True) +class AllPredicate: + children: list["Predicate"] + + +@dataclass(frozen=True) +class AnyPredicate: + children: list["Predicate"] + + +@dataclass(frozen=True) +class NotPredicate: + child: "Predicate" + + +@dataclass(frozen=True) +class AnyFilePredicate: + predicate: FilePredicate + + +@dataclass(frozen=True) +class AllFilesPredicate: + predicate: FilePredicate + + +Predicate: TypeAlias = ( + AllPredicate + | AnyPredicate + | NotPredicate + | AnyFilePredicate + | AllFilesPredicate + | BugzillaPredicate + | ReviewPredicate + | PatchPredicate +) + + +@dataclass(frozen=True) +class LoadFileAction: + type: Literal["file"] + path: str + repo: str | None = None + branch: str | None = None + kind: str | None = None + + +@dataclass(frozen=True) +class FetchRevisionAction: + type: Literal["fetch_revision"] + revision: str | None = None + repo: str | None = None + hash: str | None = None + kind: str | None = None + + +RuleAction: TypeAlias = LoadFileAction | FetchRevisionAction + + +@dataclass(frozen=True) +class Rule: + name: str + when: Predicate + load: list[RuleAction] + description: str | None = None + owners: list[str] = field(default_factory=list) + priority: int = 0 + + +@dataclass(frozen=True) +class ReviewContextConfig: + version: int + policy: Policy = field(default_factory=Policy) + definitions: Definitions = field(default_factory=Definitions) + rules: list[Rule] = field(default_factory=list) + + +def _reject_unknown_keys(path: str, value: dict, allowed: set[str]) -> None: + unknown = sorted(set(value) - allowed) + if unknown: + raise ReviewContextValidationError( + f"{path}: unknown field(s): {', '.join(unknown)}" + ) + + +def _require_table(value: object, path: str) -> dict: + if not isinstance(value, dict): + raise ReviewContextValidationError(f"{path}: expected table") + return value + + +def _require_string(value: object, path: str) -> str: + if not isinstance(value, str): + raise ReviewContextValidationError(f"{path}: expected string") + return value + + +def _optional_string(value: object, path: str) -> str | None: + if value is None: + return None + return _require_string(value, path) + + +def _string_list(value: object, path: str) -> list[str]: + if value is None: + return [] + if not isinstance(value, list): + raise ReviewContextValidationError(f"{path}: expected list of strings") + for index, item in enumerate(value): + _require_string(item, f"{path}[{index}]") + return value + + +def _required_non_empty_string_list(value: object, path: str) -> list[str]: + items = _string_list(value, path) + if not items: + raise ReviewContextValidationError(f"{path}: expected non-empty list") + return items + + +def _optional_bool(value: object, path: str) -> bool | None: + if value is None: + return None + if not isinstance(value, bool): + raise ReviewContextValidationError(f"{path}: expected boolean") + return value + + +def _require_int(value: object, path: str) -> int: + if not isinstance(value, int) or isinstance(value, bool): + raise ReviewContextValidationError(f"{path}: expected integer") + return value + + +def _normalise_repo(repo: str) -> str: + return repo.lower() + + +def _parse_github_policy(value: object, path: str) -> GithubPolicy: + table = _require_table(value, path) + _reject_unknown_keys(path, table, {"allowed_repos"}) + return GithubPolicy( + allowed_repos=[ + _normalise_repo(repo) + for repo in _string_list( + table.get("allowed_repos"), f"{path}.allowed_repos" + ) + ] + ) + + +def _parse_policy(value: object | None) -> Policy: + if value is None: + return Policy() + table = _require_table(value, "policy") + _reject_unknown_keys("policy", table, {"github"}) + return Policy(github=_parse_github_policy(table.get("github", {}), "policy.github")) + + +def _parse_file_predicate(value: object, path: str) -> FilePredicate: + table = _require_table(value, path) + _reject_unknown_keys(path, table, {"include", "exclude", "ext", "ref"}) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = FilePredicate( + include=_string_list(table.get("include"), f"{path}.include"), + exclude=_string_list(table.get("exclude"), f"{path}.exclude"), + ext=_string_list(table.get("ext"), f"{path}.ext"), + ) + if not predicate.include and not predicate.exclude and not predicate.ext: + raise ReviewContextValidationError(f"{path}: expected include, exclude, or ext") + return predicate + + +def _parse_bugzilla_predicate(value: object, path: str) -> BugzillaPredicate: + table = _require_table(value, path) + _reject_unknown_keys( + path, table, {"product", "component", "keywords", "severity", "ref"} + ) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = BugzillaPredicate( + product=_string_list(table.get("product"), f"{path}.product"), + component=_string_list(table.get("component"), f"{path}.component"), + keywords=_string_list(table.get("keywords"), f"{path}.keywords"), + severity=_string_list(table.get("severity"), f"{path}.severity"), + ) + if ( + not predicate.product + and not predicate.component + and not predicate.keywords + and not predicate.severity + ): + raise ReviewContextValidationError(f"{path}: expected at least one field") + return predicate + + +def _parse_review_predicate(value: object, path: str) -> ReviewPredicate: + table = _require_table(value, path) + _reject_unknown_keys( + path, table, {"author", "reviewer", "blocking_reviewer", "ref"} + ) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = ReviewPredicate( + author=_string_list(table.get("author"), f"{path}.author"), + reviewer=_string_list(table.get("reviewer"), f"{path}.reviewer"), + blocking_reviewer=_string_list( + table.get("blocking_reviewer"), f"{path}.blocking_reviewer" + ), + ) + if ( + not predicate.author + and not predicate.reviewer + and not predicate.blocking_reviewer + ): + raise ReviewContextValidationError(f"{path}: expected at least one field") + return predicate + + +def _parse_patch_predicate(value: object, path: str) -> PatchPredicate: + table = _require_table(value, path) + _reject_unknown_keys(path, table, {"repository", "is_backport", "ref"}) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = PatchPredicate( + repository=_string_list(table.get("repository"), f"{path}.repository"), + is_backport=_optional_bool(table.get("is_backport"), f"{path}.is_backport"), + ) + if not predicate.repository and predicate.is_backport is None: + raise ReviewContextValidationError(f"{path}: expected at least one field") + return predicate + + +def _parse_definitions(value: object | None) -> Definitions: + if value is None: + return Definitions() + table = _require_table(value, "definitions") + _reject_unknown_keys("definitions", table, {"files", "bugzilla", "review", "patch"}) + return Definitions( + files={ + name: _parse_file_predicate(definition, f"definitions.files.{name}") + for name, definition in _require_table( + table.get("files", {}), "definitions.files" + ).items() + }, + bugzilla={ + name: _parse_bugzilla_predicate(definition, f"definitions.bugzilla.{name}") + for name, definition in _require_table( + table.get("bugzilla", {}), "definitions.bugzilla" + ).items() + }, + review={ + name: _parse_review_predicate(definition, f"definitions.review.{name}") + for name, definition in _require_table( + table.get("review", {}), "definitions.review" + ).items() + }, + patch={ + name: _parse_patch_predicate(definition, f"definitions.patch.{name}") + for name, definition in _require_table( + table.get("patch", {}), "definitions.patch" + ).items() + }, + ) + + +def _resolve_ref(ref: object, namespace: str, definitions: Definitions, path: str): + ref_name = _require_string(ref, path) + prefix = f"{namespace}." + if not ref_name.startswith(prefix): + raise ReviewContextValidationError( + f"{path}: expected {namespace}.* ref, got {ref_name!r}" + ) + name = ref_name[len(prefix) :] + mapping = getattr(definitions, namespace) + try: + return mapping[name] + except KeyError: + raise ReviewContextValidationError( + f"{path}: unknown ref {ref_name!r}" + ) from None + + +def _parse_file_predicate_ref( + value: object, path: str, definitions: Definitions +) -> FilePredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "files", definitions, f"{path}.ref") + return _parse_file_predicate(value, path) + + +def _parse_bugzilla_predicate_ref( + value: object, path: str, definitions: Definitions +) -> BugzillaPredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "bugzilla", definitions, f"{path}.ref") + return _parse_bugzilla_predicate(value, path) + + +def _parse_review_predicate_ref( + value: object, path: str, definitions: Definitions +) -> ReviewPredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "review", definitions, f"{path}.ref") + return _parse_review_predicate(value, path) + + +def _parse_patch_predicate_ref( + value: object, path: str, definitions: Definitions +) -> PatchPredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "patch", definitions, f"{path}.ref") + return _parse_patch_predicate(value, path) + + +def _parse_all_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AllPredicate(_parse_predicate_list(value, path, definitions)) + + +def _parse_any_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AnyPredicate(_parse_predicate_list(value, path, definitions)) + + +def _parse_not_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return NotPredicate(_parse_predicate(value, path, definitions)) + + +def _parse_any_file_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AnyFilePredicate(_parse_file_predicate_ref(value, path, definitions)) + + +def _parse_all_files_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AllFilesPredicate(_parse_file_predicate_ref(value, path, definitions)) + + +PredicateParser: TypeAlias = Callable[[object, str, Definitions], Predicate] + +_PREDICATE_PARSERS: dict[str, PredicateParser] = { + "all": _parse_all_predicate, + "any": _parse_any_predicate, + "not": _parse_not_predicate, + "any_file": _parse_any_file_predicate, + "all_files": _parse_all_files_predicate, + "bugzilla": _parse_bugzilla_predicate_ref, + "review": _parse_review_predicate_ref, + "patch": _parse_patch_predicate_ref, +} + + +def _parse_predicate(value: object, path: str, definitions: Definitions) -> Predicate: + """Parse one predicate node. + + Each TOML predicate object must have exactly one key. Dispatching through a + table keeps the grammar visible and makes adding a predicate a one-line + change plus a parser function. + """ + table = _require_table(value, path) + keys = set(table) + _reject_unknown_keys(path, table, set(_PREDICATE_PARSERS)) + if len(keys) != 1: + raise ReviewContextValidationError(f"{path}: expected exactly one predicate") + key = next(iter(keys)) + return _PREDICATE_PARSERS[key](table[key], f"{path}.{key}", definitions) + + +def _parse_predicate_list( + value: object, path: str, definitions: Definitions +) -> list[Predicate]: + if not isinstance(value, list) or not value: + raise ReviewContextValidationError(f"{path}: expected non-empty list") + return [ + _parse_predicate(child, f"{path}[{index}]", definitions) + for index, child in enumerate(value) + ] + + +def _parse_action(value: object, path: str) -> RuleAction: + """Parse one load action. + + Load actions are intentionally small: validate the schema here, and leave + trust policy and network behavior to the runtime loader. + """ + table = _require_table(value, path) + action_type = table.get("type") + if action_type == "file": + _reject_unknown_keys(path, table, {"type", "path", "repo", "branch", "kind"}) + if "path" not in table: + raise ReviewContextValidationError(f"{path}.path: required for file") + return LoadFileAction( + type="file", + path=_require_string(table["path"], f"{path}.path"), + repo=_optional_string(table.get("repo"), f"{path}.repo"), + branch=_optional_string(table.get("branch"), f"{path}.branch"), + kind=_optional_string(table.get("kind"), f"{path}.kind"), + ) + if action_type == "fetch_revision": + _reject_unknown_keys(path, table, {"type", "revision", "repo", "hash", "kind"}) + revision = _optional_string(table.get("revision"), f"{path}.revision") + repo = _optional_string(table.get("repo"), f"{path}.repo") + commit_hash = _optional_string(table.get("hash"), f"{path}.hash") + if not revision and not (repo and commit_hash): + raise ReviewContextValidationError( + f"{path}: fetch_revision requires revision or repo+hash" + ) + return FetchRevisionAction( + type="fetch_revision", + revision=revision, + repo=repo, + hash=commit_hash, + kind=_optional_string(table.get("kind"), f"{path}.kind"), + ) + if action_type is None: + raise ReviewContextValidationError(f"{path}.type: required") + raise ReviewContextValidationError( + f"{path}.type: unknown action type {action_type!r}" + ) + + +def _parse_rule(value: object, index: int, definitions: Definitions) -> Rule: + path = f"rules[{index}]" + table = _require_table(value, path) + _reject_unknown_keys( + path, + table, + {"name", "description", "owners", "priority", "when", "load"}, + ) + if "name" not in table: + raise ReviewContextValidationError(f"{path}.name: required") + if "when" not in table: + raise ReviewContextValidationError(f"{path}.when: required") + if "load" not in table: + raise ReviewContextValidationError(f"{path}.load: required") + load_value = table["load"] + if not isinstance(load_value, list) or not load_value: + raise ReviewContextValidationError(f"{path}.load: expected non-empty list") + return Rule( + name=_require_string(table["name"], f"{path}.name"), + description=_optional_string(table.get("description"), f"{path}.description"), + owners=_string_list(table.get("owners"), f"{path}.owners"), + priority=_require_int(table.get("priority", 0), f"{path}.priority"), + when=_parse_predicate(table["when"], f"{path}.when", definitions), + load=[ + _parse_action(action, f"{path}.load[{action_index}]") + for action_index, action in enumerate(load_value) + ], + ) + + +def parse_review_context_data(data: dict) -> ReviewContextConfig: + """Validate parsed review-context.toml data.""" + _reject_unknown_keys("", data, {"version", "policy", "definitions", "rules"}) + version = data.get("version") + if version != 1: + raise ReviewContextValidationError("version: expected 1") + policy = _parse_policy(data.get("policy")) + definitions = _parse_definitions(data.get("definitions")) + rules_value = data.get("rules", []) + if not isinstance(rules_value, list): + raise ReviewContextValidationError("rules: expected list of rule tables") + return ReviewContextConfig( + version=version, + policy=policy, + definitions=definitions, + rules=[ + _parse_rule(rule, index, definitions) + for index, rule in enumerate(rules_value) + ], + ) + + +def parse_review_context_toml(text: str) -> ReviewContextConfig: + """Parse and validate review-context.toml content.""" + return parse_review_context_data(tomllib.loads(text)) + + +def validate_review_context_file(path: str | Path) -> ReviewContextConfig: + """Parse and validate a review-context.toml file.""" + return parse_review_context_toml(Path(path).read_text()) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Validate review-context.toml") + parser.add_argument( + "path", + nargs="?", + default="review-context.toml", + help="Path to review-context.toml (default: %(default)s)", + ) + args = parser.parse_args(argv) + + try: + config = validate_review_context_file(args.path) + except (OSError, tomllib.TOMLDecodeError, ReviewContextValidationError) as exc: + print(f"{args.path}: invalid: {exc}", file=sys.stderr) + return 1 + + print(f"{args.path}: valid ({len(config.rules)} rule(s))") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml index abf83759b1..d740c53fb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,6 +116,7 @@ bugbug-data-github = "scripts.github_issue_retriever:main" bugbug-fixed-comments = "scripts.inline_comments_data_collection:main" bugbug-ci-failures-retriever = "scripts.retrieve_ci_failures:main" bugbug-try-pushes-retriever = "scripts.retrieve_try_pushes:main" +bugbug-validate-review-context = "bugbug.tools.code_review.review_context_schema:main" [tool.hatch.version] path = "VERSION" diff --git a/tests/test_code_review.py b/tests/test_code_review.py index c28ce11205..1bdd2fa4e5 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -11,6 +11,13 @@ from bugbug.tools.code_review import data_types, langchain_tools from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool +from bugbug.tools.code_review.review_context_schema import ( + ReviewContextValidationError, + parse_review_context_toml, +) +from bugbug.tools.code_review.review_context_schema import ( + main as validate_review_context_main, +) from bugbug.tools.code_review.utils import find_comment_scope from bugbug.tools.core.platforms.patch_apply import ( apply_patched_file, @@ -496,3 +503,97 @@ async def test_external_content_load_caches(): assert first == "body\n" assert second == "body\n" assert client.get.await_count == 1 + + +_RULES_TOML = """ +version = 1 + +[[rules]] +name = "Audio/Video C++" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } +load = [ + { type = "file", path = ".claude/skills/dom-media.md" }, +] + +[[rules]] +name = "WebIDL" +when = { any_file = { ext = [".webidl"] } } +load = [ + { type = "file", path = ".claude/skills/webidl.md", repo = "mozilla-firefox/firefox" }, +] + +[[rules]] +name = "Any JS" +when = { any_file = { ext = [".js"] } } +load = [ + { type = "file", path = ".claude/skills/js-style.md" }, +] + +[[rules]] +name = "Bugzilla component only" +when = { bugzilla = { component = ["Core::DOM: Web Audio"] } } +load = [ + { type = "file", path = ".claude/skills/dom-audio.md" }, +] +""" + + +def test_parse_review_context_toml_rejects_missing_when(): + toml = """ +version = 1 +[[rules]] +name = "Broken" +load = [{ type = "file", path = "x.md" }] +""" + with pytest.raises(ReviewContextValidationError, match="rules\\[0\\].when"): + parse_review_context_toml(toml) + + +def test_parse_review_context_toml_rejects_unknown_action_type(): + toml = """ +version = 1 +[[rules]] +name = "Broken" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "url", path = "x.md" }] +""" + with pytest.raises(ReviewContextValidationError, match="unknown action type"): + parse_review_context_toml(toml) + + +def test_parse_review_context_toml_rejects_unknown_rule_field(): + toml = """ +version = 1 +[[rules]] +name = "Broken" +bogus = true +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "x.md" }] +""" + with pytest.raises(ReviewContextValidationError, match="unknown field"): + parse_review_context_toml(toml) + + +def test_validate_review_context_main(tmp_path, capsys): + review_context_path = tmp_path / "review-context.toml" + review_context_path.write_text(_RULES_TOML) + + assert validate_review_context_main([str(review_context_path)]) == 0 + captured = capsys.readouterr() + assert "valid" in captured.out + + +def test_validate_review_context_main_failure(tmp_path, capsys): + review_context_path = tmp_path / "review-context.toml" + review_context_path.write_text( + """ +version = 1 +[[rules]] +name = "Broken" +load = [{ type = "file", path = "x.md" }] +""" + ) + + assert validate_review_context_main([str(review_context_path)]) == 1 + captured = capsys.readouterr() + assert "invalid" in captured.err From 7175003934e5108e6a7ddfe0334b640bd6605faa Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:50:03 +0200 Subject: [PATCH 07/11] Load GitHub file review context from rules --- bugbug/tools/code_review/review_context.py | 454 +++++++++++++++++++++ tests/test_code_review.py | 385 ++++++++++++++++- 2 files changed, 838 insertions(+), 1 deletion(-) create mode 100644 bugbug/tools/code_review/review_context.py diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py new file mode 100644 index 0000000000..907d2a73fe --- /dev/null +++ b/bugbug/tools/code_review/review_context.py @@ -0,0 +1,454 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Rule-based external content loading for code review. + +Fetches review-context.toml from a GitHub repository, matches changed files +against the rules, and pre-loads the referenced content. +""" + +import fnmatch +import hashlib +import json +import time +from dataclasses import dataclass +from logging import getLogger +from typing import Literal + +import httpx +from pydantic import BaseModel +from unidiff import PatchSet + +from bugbug.tools.code_review.data_types import ( + ExternalContent, + ExternalContentLoadError, + _fetch_url, +) +from bugbug.tools.code_review.review_context_schema import ( + AllFilesPredicate, + AllPredicate, + AnyFilePredicate, + AnyPredicate, + FilePredicate, + LoadFileAction, + NotPredicate, + Predicate, + ReviewContextConfig, + ReviewContextValidationError, + Rule, + RuleAction, + parse_review_context_toml, + tomllib, +) + +logger = getLogger(__name__) + +_review_context_cache: dict[ + tuple[str, str, str], tuple[float, "ReviewContextConfig"] +] = {} +_REVIEW_CONTEXT_CACHE_TTL = 300 # seconds +DEFAULT_REVIEW_CONTEXT_PATH = "review-context.toml" + + +@dataclass +class MatchedAction: + action: RuleAction + matched_rules: list[str] + + +class ExternalContentItem(BaseModel): + name: str + body: str + source_type: Literal["github_file", "phabricator_revision", "github_commit"] + source: str + action: dict + matched_rules: list[str] + trusted: bool = True + trust_reason: str + bytes: int + sha256: str + + @classmethod + def create( + cls, + *, + name: str, + body: str, + source_type: Literal["github_file", "phabricator_revision", "github_commit"], + source: str, + action: RuleAction, + matched_rules: list[str], + trust_reason: str, + ) -> "ExternalContentItem": + encoded = body.encode() + return cls( + name=name, + body=body, + source_type=source_type, + source=source, + action=_action_to_dict(action), + matched_rules=matched_rules, + trust_reason=trust_reason, + bytes=len(encoded), + sha256=hashlib.sha256(encoded).hexdigest(), + ) + + def manifest(self) -> dict: + return self.model_dump(exclude={"body"}) + + +def _github_raw_url(repo: str, branch: str, path: str) -> str: + return ( + f"https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/" + f"{path.lstrip('/')}" + ) + + +def _normalise_repo(repo: str) -> str: + return repo.lower() + + +def github_repo_allowed( + repo: str, review_context_repo: str, allowed_repos: set[str] +) -> bool: + repo = _normalise_repo(repo) + review_context_repo = _normalise_repo(review_context_repo) + if repo == review_context_repo: + return True + return any( + repo.startswith(allowed_repo) + if allowed_repo.endswith("/") + else repo == allowed_repo + for allowed_repo in allowed_repos + ) + + +def _action_to_dict(action: RuleAction) -> dict: + return {key: value for key, value in action.__dict__.items() if value is not None} + + +def parse_diff_files(diff: str) -> set[str]: + """Return the set of file paths added or modified by the diff.""" + try: + return {f.path for f in PatchSet.from_string(diff) if not f.is_removed_file} + except Exception: + files = set() + for line in diff.splitlines(): + if line.startswith("+++ b/"): + files.add(line[6:].strip()) + return files + + +def rule_matches( + rule: Rule, changed_files: set[str], bug_component: str | None = None +) -> bool: + return predicate_matches(rule.when, changed_files, bug_component) + + +def _normalise_path(path: str) -> str: + path = path.replace("\\", "/") + if path.startswith("./"): + return path[2:] + return path + + +def _file_matches(predicate: FilePredicate, path: str) -> bool: + path = _normalise_path(path) + if predicate.include and not any( + fnmatch.fnmatchcase(path, pattern) for pattern in predicate.include + ): + return False + if predicate.exclude and any( + fnmatch.fnmatchcase(path, pattern) for pattern in predicate.exclude + ): + return False + if predicate.ext and not any(path.endswith(ext) for ext in predicate.ext): + return False + return True + + + +def _unsupported_metadata_predicate() -> bool: + return False + + +def predicate_matches( + predicate: Predicate, + changed_files: set[str], + bug_component: str | None = None, +) -> bool: + match predicate: + case AllPredicate(children=children): + return all( + predicate_matches(child, changed_files, bug_component) + for child in children + ) + case AnyPredicate(children=children): + return any( + predicate_matches(child, changed_files, bug_component) + for child in children + ) + case NotPredicate(child=child): + return not predicate_matches(child, changed_files, bug_component) + case AnyFilePredicate(predicate=file_predicate): + return any(_file_matches(file_predicate, f) for f in changed_files) + case AllFilesPredicate(predicate=file_predicate): + return bool(changed_files) and all( + _file_matches(file_predicate, f) for f in changed_files + ) + case _: + return _unsupported_metadata_predicate() + raise TypeError(f"Unknown predicate type: {type(predicate).__name__}") + + +def _action_key(action: RuleAction) -> tuple: + if isinstance(action, LoadFileAction): + return ( + "file", + action.repo or "", + action.branch or "", + action.path, + ) + raise ValueError(f"Unknown action type: {action.type!r}") + + +def _action_to_content( + action: LoadFileAction, + default_repo: str, + default_branch: str, + allowed_repos: set[str], +) -> ExternalContent: + """Resolve a file action to an ExternalContent instance. + + Actions without an explicit repo load from the same GitHub repo and branch + as the rules file. Cross-repo references use their explicit repo and + optional branch, defaulting to main. + """ + path = action.path + repo = action.repo or default_repo + branch = action.branch or (default_branch if repo == default_repo else "main") + if not github_repo_allowed(repo, default_repo, allowed_repos): + raise ValueError(f"GitHub repo is not allowed for review context: {repo}") + url = _github_raw_url(repo, branch, path) + return ExternalContent(name=path, url=url, description=path) + + +def _merge_rules(base: list[Rule], extra_toml: str) -> list[Rule]: + """Merge extra rules into the base list. + + Rules in extra_toml whose name matches an existing rule replace that rule + in-place. Rules with new names are appended. Rules without a name are always + appended. Returns a new list; base is not mutated. + """ + rules = list(base) + extra = parse_review_context_toml(extra_toml).rules + if not extra: + return rules + index = {r.name: i for i, r in enumerate(rules) if r.name} + for rule in extra: + name = rule.name + if name and name in index: + rules[index[name]] = rule + else: + rules.append(rule) + return rules + + +def collect_actions( + diff: str, + config: ReviewContextConfig, + bug_component: str | None = None, + extra_context_toml: str | None = None, +) -> list[MatchedAction]: + """Return deduplicated actions matched from config against the diff. + + Merges extra_context_toml into config before matching. Evaluates each rule + against the changed files (and optional bug component). Actions are + deduplicated across rules so the same file is never fetched twice. Pure — + no I/O. + """ + if extra_context_toml: + config = ReviewContextConfig( + version=config.version, + policy=config.policy, + definitions=config.definitions, + rules=_merge_rules(config.rules, extra_context_toml), + ) + + changed_files = parse_diff_files(diff) + logger.debug( + "Matching rules against %d changed file(s): %s", + len(changed_files), + changed_files, + ) + + actions_by_key: dict[tuple, MatchedAction] = {} + actions: list[MatchedAction] = [] + + ordered_rules = sorted( + enumerate(config.rules), key=lambda item: (-item[1].priority, item[0]) + ) + for _, rule in ordered_rules: + if not rule_matches(rule, changed_files, bug_component): + continue + rule_name = rule.name + new_actions = [] + for action in rule.load: + key = _action_key(action) + if key in actions_by_key: + actions_by_key[key].matched_rules.append(rule_name) + continue + matched_action = MatchedAction(action=action, matched_rules=[rule_name]) + actions_by_key[key] = matched_action + actions.append(matched_action) + new_actions.append(action) + if new_actions: + logger.debug( + "Rule %r matched: %d action(s) queued", + rule_name, + len(new_actions), + ) + + logger.debug("Total actions to execute: %d", len(actions)) + return actions + + +async def execute_actions( + actions: list[MatchedAction], + default_repo: str, + default_branch: str, + allowed_repos: set[str], + content_overrides: dict[str, str] | None = None, +) -> list[ExternalContentItem]: + """Execute a list of actions and return loaded external content. + + file actions resolve to an ExternalContent URL and fetch the file content. + content_overrides bypasses the network fetch for matching names, allowing + callers to inject test content. + """ + results: list[ExternalContentItem] = [] + for matched_action in actions: + action = matched_action.action + if not isinstance(action, LoadFileAction): + logger.error("Unsupported review context action %s", _action_to_dict(action)) + continue + try: + item = _action_to_content( + action, default_repo, default_branch, allowed_repos + ) + if content_overrides and item.name in content_overrides: + body = content_overrides[item.name] + else: + body = await item.load() + results.append( + ExternalContentItem.create( + name=item.name, + body=body, + source_type="github_file", + source=item.url, + action=action, + matched_rules=matched_action.matched_rules, + trust_reason="github_repo_content", + ) + ) + except (ValueError, ExternalContentLoadError): + logger.error( + "Failed to load content for action %s", _action_to_dict(action) + ) + return results + + +async def _fetch_review_context( + repo: str, branch: str, review_context_path: str +) -> ReviewContextConfig: + """Fetch and parse review-context.toml, with a short in-process TTL cache.""" + now = time.time() + cache_key = (repo, branch, review_context_path) + if cache_key in _review_context_cache: + ts, config = _review_context_cache[cache_key] + if now - ts < _REVIEW_CONTEXT_CACHE_TTL: + logger.debug( + "Using cached review context from %s@%s:%s (age %.0fs)", + repo, + branch, + review_context_path, + now - ts, + ) + return config + review_context_url = _github_raw_url(repo, branch, review_context_path) + response = await _fetch_url(review_context_url) + config = parse_review_context_toml(response.text) + _review_context_cache[cache_key] = (now, config) + return config + + +async def load_external_content_for_diff( + diff: str, + review_context_repo: str, + review_context_branch: str = "main", + review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH, + extra_context_toml: str | None = None, + content_overrides: dict[str, str] | None = None, +) -> list[ExternalContentItem]: + """Fetch review-context.toml from GitHub, match against diff, return content. + + Retries the rules fetch on transient errors (via _fetch_url). The parsed + config is cached in-process with a short TTL to avoid redundant fetches + across back-to-back reviews. + """ + try: + config = await _fetch_review_context( + review_context_repo, review_context_branch, review_context_path + ) + except httpx.HTTPError: + logger.error( + "Could not fetch review context from %s@%s:%s", + review_context_repo, + review_context_branch, + review_context_path, + ) + return [] + except (tomllib.TOMLDecodeError, ReviewContextValidationError): + logger.exception( + "Could not parse review context from %s@%s:%s", + review_context_repo, + review_context_branch, + review_context_path, + ) + return [] + + try: + actions = collect_actions(diff, config, extra_context_toml=extra_context_toml) + except (tomllib.TOMLDecodeError, ReviewContextValidationError): + logger.exception("Could not parse extra review context") + return [] + + return await execute_actions( + actions, + review_context_repo, + review_context_branch, + set(config.policy.github.allowed_repos), + content_overrides, + ) + + +def external_content_manifest(content_items: list[ExternalContentItem]) -> list[dict]: + return [item.manifest() for item in content_items] + + +def format_external_content(content_items: list[ExternalContentItem]) -> str: + manifest = json.dumps(external_content_manifest(content_items), indent=2) + content = "\n\n".join( + f'\n{item.body.strip()}\n' + for item in content_items + ) + return ( + "\n\n\n" + f"{manifest}\n" + "" + "\n\n\n" + f"{content}\n" + "" + ) diff --git a/tests/test_code_review.py b/tests/test_code_review.py index 1bdd2fa4e5..79dc589a0f 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -8,11 +8,23 @@ import pytest from unidiff import PatchSet -from bugbug.tools.code_review import data_types, langchain_tools +from bugbug.tools.code_review import data_types, langchain_tools, review_context from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool +from bugbug.tools.code_review.review_context import ( + _merge_rules, + external_content_manifest, + format_external_content, + github_repo_allowed, + load_external_content_for_diff, + parse_diff_files, + rule_matches, +) from bugbug.tools.code_review.review_context_schema import ( + AnyFilePredicate, + FilePredicate, ReviewContextValidationError, + Rule, parse_review_context_toml, ) from bugbug.tools.code_review.review_context_schema import ( @@ -537,6 +549,65 @@ async def test_external_content_load_caches(): ] """ +_DIFF_MEDIA = """\ +diff --git a/dom/media/Foo.cpp b/dom/media/Foo.cpp +--- a/dom/media/Foo.cpp ++++ b/dom/media/Foo.cpp +@@ -1,1 +1,1 @@ +-old ++new +""" + +_DIFF_WEBIDL = """\ +diff --git a/dom/webidl/Foo.webidl b/dom/webidl/Foo.webidl +--- a/dom/webidl/Foo.webidl ++++ b/dom/webidl/Foo.webidl +@@ -1,1 +1,1 @@ +-old ++new +""" + + +@pytest.fixture(autouse=True) +def clear_review_context_cache(): + review_context._review_context_cache.clear() + yield + review_context._review_context_cache.clear() + + +def test_parse_diff_files(): + files = parse_diff_files(_DIFF_MEDIA) + assert files == {"dom/media/Foo.cpp"} + + +def test_github_repo_allowed_policy(): + assert github_repo_allowed("mozilla/cubeb", "example/repo", {"mozilla/"}) + assert github_repo_allowed("whatwg/html", "example/repo", {"whatwg/html"}) + assert github_repo_allowed("example/repo", "example/repo", set()) + assert not github_repo_allowed("mozilla/cubeb", "example/repo", set()) + assert not github_repo_allowed("whatwg/html-tests", "example/repo", {"whatwg/html"}) + + +def test_rule_matches_extension_and_path(): + rule = Rule( + name="test", + when=AnyFilePredicate(FilePredicate(include=["dom/media/**"], ext=[".cpp"])), + load=[], + ) + assert rule_matches(rule, {"dom/media/Foo.cpp"}) + assert not rule_matches(rule, {"dom/canvas/Foo.cpp"}) + assert not rule_matches(rule, {"dom/media/Foo.js"}) + + +def test_rule_matches_extension_only(): + rule = Rule( + name="test", + when=AnyFilePredicate(FilePredicate(ext=[".webidl"])), + load=[], + ) + assert rule_matches(rule, {"dom/webidl/Foo.webidl"}) + assert not rule_matches(rule, {"dom/webidl/Foo.js"}) + def test_parse_review_context_toml_rejects_missing_when(): toml = """ @@ -597,3 +668,315 @@ def test_validate_review_context_main_failure(tmp_path, capsys): assert validate_review_context_main([str(review_context_path)]) == 1 captured = capsys.readouterr() assert "invalid" in captured.err + + +@pytest.mark.asyncio +async def test_load_external_content_for_diff_file_load(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "---\nname: dom-media\n---\nMedia review guidelines\n" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, review_context_branch="release" + ) + + assert len(results) == 1 + assert results[0].name == ".claude/skills/dom-media.md" + assert results[0].body == "Media review guidelines\n" + assert results[0].source_type == "github_file" + assert results[0].matched_rules == ["Audio/Video C++"] + client.get.assert_any_await( + "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/review-context.toml", + timeout=30, + ) + client.get.assert_any_await( + "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/.claude/skills/dom-media.md", + timeout=30, + ) + + +@pytest.mark.asyncio +async def test_load_external_content_for_diff_no_match(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + + diff = """diff --git a/README.txt b/README.txt\n--- a/README.txt\n+++ b/README.txt\n@@ -1 +1 @@\n-a\n+b\n""" + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff(diff, review_context_repo) + + assert results == [] + + +@pytest.mark.asyncio +async def test_load_external_content_for_diff_rules_fetch_failure(): + client = MagicMock() + client.get = AsyncMock(side_effect=httpx.ConnectError("boom")) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, "mozilla-firefox/firefox" + ) + + assert results == [] + + +@pytest.mark.asyncio +async def test_load_external_content_deduplicates_actions(): + rules = """ +version = 1 + +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = ".claude/skills/cpp.md" }] + +[[rules]] +name = "B" +when = { any_file = { include = ["dom/media/**"] } } +load = [{ type = "file", path = ".claude/skills/cpp.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "C++ guidelines" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo + ) + + assert len(results) == 1 + assert results[0].matched_rules == ["A", "B"] + assert client.get.await_count == 2 + + +@pytest.mark.asyncio +async def test_load_external_content_orders_by_priority(): + rules = """ +version = 1 + +[[rules]] +name = "Low" +priority = 0 +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = ".claude/skills/low.md" }] + +[[rules]] +name = "High" +priority = 10 +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = ".claude/skills/high.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + low_response = MagicMock() + low_response.text = "low" + low_response.raise_for_status = MagicMock() + high_response = MagicMock() + high_response.text = "high" + high_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, high_response, low_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + ) + + assert [r.name for r in results] == [ + ".claude/skills/high.md", + ".claude/skills/low.md", + ] + + +@pytest.mark.asyncio +async def test_load_external_content_rejects_disallowed_github_repo(): + rules = """ +version = 1 +[[rules]] +name = "Cross repo" +when = { any_file = { ext = [".webidl"] } } +load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_WEBIDL, + review_context_repo, + ) + + assert results == [] + assert client.get.await_count == 1 + + +@pytest.mark.asyncio +async def test_load_external_content_allows_policy_prefix_repo(): + rules = """ +version = 1 +[policy.github] +allowed_repos = ["mozilla/"] +[[rules]] +name = "Cross repo" +when = { any_file = { ext = [".webidl"] } } +load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "cubeb guidelines" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_WEBIDL, + review_context_repo, + ) + + assert len(results) == 1 + assert results[0].source.endswith("/mozilla/cubeb/refs/heads/main/REVIEWING.md") + + +def test_merge_rules_appends_new(): + base = parse_review_context_toml( + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "a.md" }] +""" + ) + merged = _merge_rules( + base.rules, + """ +version = 1 +[[rules]] +name = "B" +when = { any_file = { ext = [".js"] } } +load = [{ type = "file", path = "b.md" }] +""", + ) + assert [rule.name for rule in merged] == ["A", "B"] + + +def test_merge_rules_replaces_by_name(): + base = parse_review_context_toml( + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "a.md" }] +""" + ) + merged = _merge_rules( + base.rules, + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".js"] } } +load = [{ type = "file", path = "replacement.md" }] +""", + ) + assert len(merged) == 1 + assert merged[0].load[0].path == "replacement.md" + + +def test_merge_rules_empty_extra(): + base = parse_review_context_toml( + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "a.md" }] +""" + ) + merged = _merge_rules(base.rules, "version = 1\n") + assert merged == base.rules + + +@pytest.mark.asyncio +async def test_content_override_used_instead_of_fetch(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + overrides = {".claude/skills/dom-media.md": "Override body"} + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) + + assert len(results) == 1 + assert results[0].body == "Override body" + assert client.get.await_count == 1 + + +@pytest.mark.asyncio +async def test_external_content_manifest_and_prompt_body(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + overrides = {".claude/skills/dom-media.md": "Guideline body"} + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) + + manifest = external_content_manifest(results) + assert manifest == [ + { + "name": ".claude/skills/dom-media.md", + "source_type": "github_file", + "source": "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/main/.claude/skills/dom-media.md", + "action": {"type": "file", "path": ".claude/skills/dom-media.md"}, + "matched_rules": ["Audio/Video C++"], + "trusted": True, + "trust_reason": "github_repo_content", + "bytes": len("Guideline body".encode()), + "sha256": results[0].sha256, + } + ] + prompt = format_external_content(results) + assert "" in prompt + assert "" in prompt + assert '' in prompt + assert "Guideline body" in prompt From 8828b03d7c3cc53617e73afbd8b5dc18ea74ae2d Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:51:15 +0200 Subject: [PATCH 08/11] Support metadata and revision review context rules --- bugbug/tools/code_review/review_context.py | 155 +++++++++++++++++++-- tests/test_code_review.py | 130 +++++++++++++++++ 2 files changed, 276 insertions(+), 9 deletions(-) diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py index 907d2a73fe..39791c70a1 100644 --- a/bugbug/tools/code_review/review_context.py +++ b/bugbug/tools/code_review/review_context.py @@ -9,9 +9,11 @@ against the rules, and pre-loads the referenced content. """ +import asyncio import fnmatch import hashlib import json +import re import time from dataclasses import dataclass from logging import getLogger @@ -31,20 +33,27 @@ AllPredicate, AnyFilePredicate, AnyPredicate, + BugzillaPredicate, + FetchRevisionAction, FilePredicate, LoadFileAction, NotPredicate, + PatchPredicate, Predicate, ReviewContextConfig, ReviewContextValidationError, + ReviewPredicate, Rule, RuleAction, parse_review_context_toml, tomllib, ) +from bugbug.tools.core.connection import get_http_client logger = getLogger(__name__) +_PHAB_RE = re.compile(r"D(\d+)$") + _review_context_cache: dict[ tuple[str, str, str], tuple[float, "ReviewContextConfig"] ] = {} @@ -141,6 +150,29 @@ def parse_diff_files(diff: str) -> set[str]: return files +async def get_bug_component(bug_id: int) -> str | None: + """Return 'Product::Component' for the given Bugzilla bug, or None on failure.""" + from libmozdata.bugzilla import Bugzilla + + def _fetch() -> str | None: + bugs: list[dict] = [] + Bugzilla( + bug_id, + include_fields=["product", "component"], + bughandler=lambda bug, data: data.append(bug), + bugdata=bugs, + ).get_data().wait() + if not bugs: + return None + return f"{bugs[0]['product']}::{bugs[0]['component']}" + + try: + return await asyncio.get_event_loop().run_in_executor(None, _fetch) + except Exception: + logger.warning("Could not fetch component for bug %s", bug_id) + return None + + def rule_matches( rule: Rule, changed_files: set[str], bug_component: str | None = None ) -> bool: @@ -169,8 +201,21 @@ def _file_matches(predicate: FilePredicate, path: str) -> bool: return True +def _bugzilla_matches( + predicate: BugzillaPredicate, bug_component: str | None = None +) -> bool: + if predicate.product or predicate.keywords or predicate.severity: + return False + if predicate.component: + return bug_component is not None and bug_component in predicate.component + return False -def _unsupported_metadata_predicate() -> bool: + +def _review_matches(predicate: ReviewPredicate) -> bool: + return False + + +def _patch_matches(predicate: PatchPredicate) -> bool: return False @@ -198,8 +243,12 @@ def predicate_matches( return bool(changed_files) and all( _file_matches(file_predicate, f) for f in changed_files ) - case _: - return _unsupported_metadata_predicate() + case BugzillaPredicate(): + return _bugzilla_matches(predicate, bug_component) + case ReviewPredicate(): + return _review_matches(predicate) + case PatchPredicate(): + return _patch_matches(predicate) raise TypeError(f"Unknown predicate type: {type(predicate).__name__}") @@ -211,6 +260,13 @@ def _action_key(action: RuleAction) -> tuple: action.branch or "", action.path, ) + if isinstance(action, FetchRevisionAction): + return ( + "fetch_revision", + action.revision or "", + action.repo or "", + action.hash or "", + ) raise ValueError(f"Unknown action type: {action.type!r}") @@ -235,6 +291,68 @@ def _action_to_content( return ExternalContent(name=path, url=url, description=path) +async def _fetch_revision( + action: FetchRevisionAction, +) -> tuple[str, str, Literal["phabricator_revision", "github_commit"], str, str] | None: + """Fetch the diff of another revision, either from Phabricator or GitHub.""" + revision_str = action.revision or "" + repo = action.repo or "" + commit_hash = action.hash or "" + + if revision_str: + m = _PHAB_RE.match(revision_str.strip()) + if not m: + logger.warning("fetch_revision: cannot parse revision %r", revision_str) + return None + rev_num = int(m.group(1)) + try: + from bugbug.tools.core.platforms.phabricator import get_phabricator_client + + def _load(): + phab = get_phabricator_client() + revision = phab.load_revision(rev_id=rev_num) + diff_id = revision["fields"]["diffID"] + return phab.load_raw_diff(diff_id) + + content = await asyncio.get_event_loop().run_in_executor(None, _load) + name = f"Revision D{rev_num}" + return ( + name, + content, + "phabricator_revision", + name, + "phabricator_revision", + ) + except Exception: + logger.warning("fetch_revision: failed to load Phabricator D%s", rev_num) + return None + + if repo and commit_hash: + try: + response = await get_http_client().get( + f"https://api.github.com/repos/{repo}/commits/{commit_hash}", + headers={"Accept": "application/vnd.github.diff"}, + timeout=30, + ) + response.raise_for_status() + source = f"{repo}@{commit_hash}" + return ( + f"{repo}@{commit_hash[:12]}", + response.text, + "github_commit", + source, + "github_repo_content", + ) + except httpx.HTTPError: + logger.warning("fetch_revision: failed to load %s@%s", repo, commit_hash) + return None + + logger.warning( + "fetch_revision: action has neither revision nor repo+hash: %s", action + ) + return None + + def _merge_rules(base: list[Rule], extra_toml: str) -> list[Rule]: """Merge extra rules into the base list. @@ -324,15 +442,29 @@ async def execute_actions( ) -> list[ExternalContentItem]: """Execute a list of actions and return loaded external content. - file actions resolve to an ExternalContent URL and fetch the file content. - content_overrides bypasses the network fetch for matching names, allowing - callers to inject test content. + fetch_revision actions fetch the raw diff of a Phabricator revision or + GitHub commit. file actions resolve to an ExternalContent URL and + fetch the file content. content_overrides bypasses the network fetch for + matching names, allowing callers to inject test content. """ results: list[ExternalContentItem] = [] for matched_action in actions: action = matched_action.action - if not isinstance(action, LoadFileAction): - logger.error("Unsupported review context action %s", _action_to_dict(action)) + if isinstance(action, FetchRevisionAction): + result = await _fetch_revision(action) + if result: + name, body, source_type, source, trust_reason = result + results.append( + ExternalContentItem.create( + name=name, + body=body, + source_type=source_type, + source=source, + action=action, + matched_rules=matched_action.matched_rules, + trust_reason=trust_reason, + ) + ) continue try: item = _action_to_content( @@ -389,6 +521,7 @@ async def load_external_content_for_diff( review_context_repo: str, review_context_branch: str = "main", review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH, + bug_id: int | None = None, extra_context_toml: str | None = None, content_overrides: dict[str, str] | None = None, ) -> list[ExternalContentItem]: @@ -419,8 +552,12 @@ async def load_external_content_for_diff( ) return [] + bug_component: str | None = None + if bug_id is not None: + bug_component = await get_bug_component(bug_id) + try: - actions = collect_actions(diff, config, extra_context_toml=extra_context_toml) + actions = collect_actions(diff, config, bug_component, extra_context_toml) except (tomllib.TOMLDecodeError, ReviewContextValidationError): logger.exception("Could not parse extra review context") return [] diff --git a/tests/test_code_review.py b/tests/test_code_review.py index 79dc589a0f..d87d506d9f 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -15,6 +15,7 @@ _merge_rules, external_content_manifest, format_external_content, + get_bug_component, github_repo_allowed, load_external_content_for_diff, parse_diff_files, @@ -609,6 +610,35 @@ def test_rule_matches_extension_only(): assert not rule_matches(rule, {"dom/webidl/Foo.js"}) +def test_rule_bugzilla_component_fails_closed_without_component(): + config = parse_review_context_toml(_RULES_TOML) + rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") + assert not rule_matches(rule, set(), bug_component=None) + + +def test_rule_bugzilla_component_matches(): + config = parse_review_context_toml(_RULES_TOML) + rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") + assert rule_matches(rule, set(), bug_component="Core::DOM: Web Audio") + assert not rule_matches(rule, set(), bug_component="Core::Layout") + + +@pytest.mark.asyncio +async def test_get_bug_component(): + with patch("libmozdata.bugzilla.Bugzilla") as bugzilla_cls: + instance = MagicMock() + bugzilla_cls.return_value = instance + + def get_data(): + handler = bugzilla_cls.call_args.kwargs["bughandler"] + data = bugzilla_cls.call_args.kwargs["bugdata"] + handler({"product": "Core", "component": "DOM: Web Audio"}, data) + return MagicMock(wait=MagicMock()) + + instance.get_data.side_effect = get_data + assert await get_bug_component(123) == "Core::DOM: Web Audio" + + def test_parse_review_context_toml_rejects_missing_when(): toml = """ version = 1 @@ -980,3 +1010,103 @@ async def test_external_content_manifest_and_prompt_body(): assert "" in prompt assert '' in prompt assert "Guideline body" in prompt + + +@pytest.mark.asyncio +async def test_extra_context_toml_appended(): + review_context_repo = "mozilla-firefox/firefox" + base_rules = """ +version = 1 +[[rules]] +name = "Base" +when = { any_file = { ext = [".js"] } } +load = [{ type = "file", path = "base.md" }] +""" + extra_rules = """ +version = 1 +[[rules]] +name = "Extra" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "extra.md" }] +""" + rules_response = MagicMock() + rules_response.text = base_rules + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "extra" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + extra_context_toml=extra_rules, + ) + + assert [result.name for result in results] == ["extra.md"] + + +@pytest.mark.asyncio +async def test_extra_context_toml_replaces_by_name(): + review_context_repo = "mozilla-firefox/firefox" + extra_rules = """ +version = 1 +[[rules]] +name = "Audio/Video C++" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "replacement.md" }] +""" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "replacement" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + extra_context_toml=extra_rules, + ) + + assert [result.name for result in results] == ["replacement.md"] + + +@pytest.mark.asyncio +async def test_load_external_content_fetches_phabricator_revision(): + rules = """ +version = 1 +[[rules]] +name = "Revision" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "fetch_revision", revision = "D123" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + + phabricator = MagicMock() + phabricator.load_revision.return_value = {"fields": {"diffID": 456}} + phabricator.load_raw_diff.return_value = "diff content" + + with ( + patch.object(data_types, "get_http_client", return_value=client), + patch( + "bugbug.tools.core.platforms.phabricator.get_phabricator_client", + return_value=phabricator, + ), + ): + results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo) + + assert len(results) == 1 + assert results[0].name == "Revision D123" + assert results[0].body == "diff content" + assert results[0].source_type == "phabricator_revision" From c2bfec2eccc46ca93f1c8aee56abd885b7f62329 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:51:33 +0200 Subject: [PATCH 09/11] Inject review context into code review prompts --- bugbug/tools/code_review/agent.py | 49 +++++++++++++-- services/mcp/src/bugbug_mcp/server.py | 90 ++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 8 deletions(-) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index b21c8bf563..d2006c1502 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -191,25 +191,30 @@ def create(cls, **kwargs): def count_tokens(self, text): return len(self._tokenizer.encode(text)) - def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str: + def generate_initial_prompt( + self, patch: Patch, patch_summary: str, external_context: str = "" + ) -> str: created_before = patch.date_created if self.is_experiment_env else None return FIRST_MESSAGE_TEMPLATE.format( patch=format_patch_set(patch.patch_set), patch_summarization=patch_summary, + external_context=external_context, comment_examples=self._get_comment_examples(patch, created_before), approved_examples=self._get_generated_examples(patch, created_before), ) async def generate_review_comments( - self, patch: Patch, patch_summary: str + self, patch: Patch, patch_summary: str, external_context: str = "" ) -> list[GeneratedReviewComment]: try: async for chunk in self.agent.astream( { "messages": [ HumanMessage( - self.generate_initial_prompt(patch, patch_summary) + self.generate_initial_prompt( + patch, patch_summary, external_context + ) ), ] }, @@ -223,14 +228,47 @@ async def generate_review_comments( return result["structured_response"].comments - async def run(self, patch: Patch) -> CodeReviewToolResponse: + async def run( + self, + patch: Patch, + review_context_repo: Optional[str] = None, + review_context_branch: str = "main", + extra_context_toml: Optional[str] = None, + content_overrides: Optional[dict[str, str]] = None, + ) -> CodeReviewToolResponse: if self.count_tokens(patch.raw_diff) > 21000: raise LargeDiffError("The diff is too large") patch_summary = self.patch_summarizer.run(patch) + external_context = "" + external_content_manifest = [] + if review_context_repo: + from bugbug.tools.code_review.review_context import ( + external_content_manifest as build_external_content_manifest, + ) + from bugbug.tools.code_review.review_context import ( + format_external_content, + load_external_content_for_diff, + ) + + bug_id = getattr(patch, "bug_id", None) + content_items = await load_external_content_for_diff( + patch.raw_diff, + review_context_repo, + review_context_branch=review_context_branch, + bug_id=bug_id, + extra_context_toml=extra_context_toml, + content_overrides=content_overrides, + ) + if content_items: + external_context = format_external_content(content_items) + external_content_manifest = build_external_content_manifest( + content_items + ) + unfiltered_suggestions = await self.generate_review_comments( - patch, patch_summary + patch, patch_summary, external_context ) if not unfiltered_suggestions: logger.info("No suggestions were generated") @@ -247,6 +285,7 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse: details={ "model": self._agent_model_name, "num_unfiltered_suggestions": len(unfiltered_suggestions), + "external_content": external_content_manifest, }, ) diff --git a/services/mcp/src/bugbug_mcp/server.py b/services/mcp/src/bugbug_mcp/server.py index 7cc99d09d5..8a842352d1 100644 --- a/services/mcp/src/bugbug_mcp/server.py +++ b/services/mcp/src/bugbug_mcp/server.py @@ -1,6 +1,7 @@ """MCP server for Firefox Development.""" import functools +import json import logging import os from pathlib import Path @@ -18,6 +19,10 @@ LOCAL_SYSTEM_PROMPT_TEMPLATE, SYSTEM_PROMPT_TEMPLATE, ) +from bugbug.tools.code_review.review_context import ( + format_external_content, + load_external_content_for_diff, +) from bugbug.tools.core.platforms.bugzilla import SanitizedBug from bugbug.tools.core.platforms.phabricator import ( PhabricatorPatch, @@ -50,6 +55,10 @@ async def _patch_review_impl( patch_url: str | None, diff: str | None, commit_message: str | None, + review_context_repo: str | None, + review_context_branch: str, + extra_context_toml: str | None, + content_overrides: str | None, ) -> str: if diff: patch = LocalPatch(diff, commit_message=commit_message or "") @@ -70,7 +79,33 @@ async def _patch_review_impl( prompt_template = LOCAL_SYSTEM_PROMPT_TEMPLATE if diff else SYSTEM_PROMPT_TEMPLATE system_prompt = prompt_template.format(target_software=tool.target_software) patch_summary = "" if diff else tool.patch_summarizer.run(patch) - initial_prompt = tool.generate_initial_prompt(patch, patch_summary) + + parsed_overrides: dict[str, str] | None = None + if content_overrides: + try: + parsed_overrides = json.loads(content_overrides) + except json.JSONDecodeError: + pass + + external_context = "" + if review_context_repo: + bug_id = getattr(patch, "bug_id", None) if patch.has_bug else None + content_items = await load_external_content_for_diff( + patch.raw_diff, + review_context_repo, + review_context_branch=review_context_branch or "main", + bug_id=bug_id, + extra_context_toml=( + extra_context_toml if extra_context_toml != "None" else None + ), + content_overrides=parsed_overrides, + ) + if content_items: + external_context = format_external_content(content_items) + + initial_prompt = tool.generate_initial_prompt( + patch, patch_summary, external_context + ) return system_prompt + "\n\n" + initial_prompt @@ -88,9 +123,46 @@ async def patch_review( default=None, description="Commit message for the local diff (optional, used to extract bug ID, title, and Differential Revision URL).", ), + review_context_repo: str | None = Field( + default=None, + description=( + "GitHub repository containing review-context.toml, in org/project form. " + "When omitted, no rule-based context is loaded." + ), + ), + review_context_branch: str = Field( + default="main", + description="GitHub branch to read review-context.toml and same-repo content from.", + ), + extra_context_toml: str | None = Field( + default=None, + description=( + "TOML content to merge into the fetched review-context.toml. " + "Rules are matched by name: same name replaces, new name appends. " + "Use this to test new or modified rules before landing." + ), + ), + content_overrides: str | None = Field( + default=None, + description=( + "JSON object mapping content name (file path) to body text. Overrides " + "the fetched content for matching items, or provides content for new " + "items referenced by extra_context_toml. Use this to test changes before " + "landing. Example: " + '{".claude/skills/dom-audio.md": "My guidelines."}' + ), + ), ) -> str: """Review a code patch from Phabricator or a raw local diff.""" - return await _patch_review_impl(patch_url, diff, commit_message) + return await _patch_review_impl( + patch_url, + diff, + commit_message, + review_context_repo, + review_context_branch, + extra_context_toml, + content_overrides, + ) @mcp.tool() @@ -98,9 +170,21 @@ async def patch_review_tool( patch_url: str | None = None, diff: str | None = None, commit_message: str | None = None, + review_context_repo: str | None = None, + review_context_branch: str = "main", + extra_context_toml: str | None = None, + content_overrides: str | None = None, ) -> str: """Build the patch review prompt. Use diff= for local diffs, patch_url= for Phabricator.""" - return await _patch_review_impl(patch_url, diff, commit_message) + return await _patch_review_impl( + patch_url, + diff, + commit_message, + review_context_repo, + review_context_branch, + extra_context_toml, + content_overrides, + ) @mcp.resource( From a2fee60123825bdf1c0be890b4db5789e0dafd6b Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:52:06 +0200 Subject: [PATCH 10/11] Document review context rules --- docs/code-review-context-example.toml | 232 +++++++++++ docs/code-review-skills.md | 395 +++++++++++++++++++ tests/test_code_review.py | 548 +++++++++++++++----------- 3 files changed, 946 insertions(+), 229 deletions(-) create mode 100644 docs/code-review-context-example.toml create mode 100644 docs/code-review-skills.md diff --git a/docs/code-review-context-example.toml b/docs/code-review-context-example.toml new file mode 100644 index 0000000000..b6083d87d1 --- /dev/null +++ b/docs/code-review-context-example.toml @@ -0,0 +1,232 @@ +# Proposed review-context.toml external content rules. +# +# This is a design sketch for the next rule language, not the format currently +# accepted by the loader. +# +# This file is intentionally verbose. It shows the major language features and +# comments on what each one does. + +# Version is required so the parser can reject files written for a future +# incompatible format. +version = 1 + +# GitHub loads are restricted. The current review-context repo is always +# allowed. Entries ending in `/` allow every repo under that organization; +# entries in `org/repo` form allow a single repo. +[policy.github] +allowed_repos = [ + "mozilla/", + "web-platform-tests/wpt", + "whatwg/html", +] + +# Definitions are reusable predicate fragments. They are expanded structurally, +# as if the referenced table had been written inline. The parser should reject +# unknown refs, cycles, and refs to the wrong predicate kind. +[definitions.files.media_impl] +include = ["dom/media/**"] +exclude = [ + "dom/media/test/**", + "dom/media/gtest/**", + "dom/media/webaudio/test/**", + "dom/media/**/Generated*.cpp", +] +ext = [".cpp", ".h", ".mm"] + +[definitions.files.frontend_ui] +include = [ + "browser/components/**", + "browser/base/content/**", + "toolkit/components/**", + "toolkit/content/**", +] +exclude = [ + "browser/components/**/test/**", + "toolkit/components/**/test/**", + "**/*.snap", +] +ext = [".js", ".mjs", ".sys.mjs", ".html", ".xhtml", ".css", ".scss"] + +[definitions.files.docs] +ext = [".md", ".rst"] + +# Metadata predicates can be reusable too. A predicate can only reference a +# definition from its own namespace: `bugzilla` refs `definitions.bugzilla.*`, +# `review` refs `definitions.review.*`, and `patch` refs +# `definitions.patch.*`. +[definitions.bugzilla.dom_api] +component = ["Core::DOM: Core & HTML", "Core::DOM: Events"] + +[definitions.review.frontend_reviewers] +# `reviewer` matches both blocking and non-blocking reviewers. +reviewer = ["frontend-reviewers"] + +[definitions.review.dom_reviewers] +reviewer = ["dom-reviewers", "webidl-reviewers"] + +[definitions.review.blocking_media_reviewers] +blocking_reviewer = ["padenot", "media-reviewers"] + +[definitions.patch.backport_repositories] +is_backport = true +repository = ["firefox-beta", "firefox-release", "firefox-esr"] + +# `priority` is optional. It defaults to 0 and only controls ordering among +# loaded external content blocks. Higher priority loads first. Ties preserve +# declaration order. All external content is injected before the patch diff. +[[rules]] +name = "Media: implementation" +description = "Load media architecture and realtime-safety guidance for C++ media changes." +owners = ["media-reviewers"] +priority = 80 + +# `all` means every child predicate must match. +# `any_file` means at least one changed file must match the file predicate. +# The file predicate uses the reusable definition above. +when = { all = [ + { any_file = { ref = "files.media_impl" } }, +] } + +# `kind` is an optional free-form audit/prompt hint. Load failures are +# non-fatal: the loader logs an error and continues with remaining loads. +load = [ + { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" }, + { type = "file", repo = "mozilla/cubeb", branch = "main", path = "docs/realtime-programming.md", kind = "policy" }, +] + + +[[rules]] +name = "Firefox frontend: JS, HTML, CSS" +description = "Load frontend conventions for browser chrome, toolkit UI, and component styles." +owners = ["frontend-reviewers"] +priority = 60 + +# `any` means at least one child predicate must match. +# This rule fires for frontend UI files when a frontend reviewer/group is +# assigned. +when = { all = [ + { any_file = { ref = "files.frontend_ui" } }, + { review = { ref = "review.frontend_reviewers" } }, +] } + +load = [ + { type = "file", path = ".claude/review/firefox-frontend.md", kind = "style-guide" }, + { type = "file", path = "browser/docs/frontend/performance.md", kind = "performance" }, +] + + +[[rules]] +name = "DOM API implementation" +description = "Load WebIDL and DOM binding guidance for changes that expose or implement web APIs." +owners = ["dom-reviewers"] +priority = 90 + +# Boolean composition can be nested. This rule fires for: +# - any WebIDL file under dom/, +# - C++/header changes under dom/ when Bugzilla says this is a DOM bug, or +# - DOM binding implementation files when a DOM/WebIDL reviewer is assigned. +when = { any = [ + { any_file = { include = ["dom/**"], ext = [".webidl"] } }, + { all = [ + { any_file = { include = ["dom/**"], ext = [".cpp", ".h"] } }, + { bugzilla = { ref = "bugzilla.dom_api" } }, + ] }, + { all = [ + { any_file = { include = ["dom/bindings/**"], ext = [".cpp", ".h", ".py"] } }, + { review = { ref = "review.dom_reviewers" } }, + ] }, +] } + +load = [ + { type = "file", path = ".claude/review/dom-api.md", kind = "api-contract" }, + { type = "file", path = "dom/docs/webidl-bindings.md", kind = "reference" }, +] + + +[[rules]] +name = "Docs-only change" +description = "Load writing and API documentation guidance when every changed file is documentation." +owners = ["docs-reviewers"] + +# `all_files` means every changed file must match the file predicate. This is +# useful for patch-shape rules such as docs-only, tests-only, or generated-only. +# This rule intentionally omits priority; it defaults to 0. +when = { all_files = { ref = "files.docs" } } + +load = [ + { type = "file", path = ".claude/review/docs.md", kind = "style-guide" }, +] + + +[[rules]] +name = "Security-sensitive DOM changes" +description = "Load security guidance for non-test security-relevant DOM changes." +owners = ["sec-reviewers", "dom-reviewers"] +priority = 100 + +# `not` negates its child predicate. This rule fires when some DOM security +# file changed, but the patch is not test-only. +when = { all = [ + { any_file = { include = ["dom/security/**"], ext = [".cpp", ".h", ".webidl"] } }, + { not = { all_files = { include = ["dom/security/test/**"] } } }, +] } + +load = [ + { type = "file", path = ".claude/review/security-sensitive.md", kind = "policy" }, +] + + +[[rules]] +name = "Backport risk checklist" +description = "Load release-branch guidance for patches targeting a backport repository or branch." +owners = ["release-managers"] +priority = 70 + +# `patch.repository` and `patch.is_backport` come from trusted patch metadata. +# For GitHub entrypoints, repository is org/project. For Phabricator reviews, +# repository is the target Phabricator repository exposed by the Mozilla MCP. +when = { all = [ + { patch = { ref = "patch.backport_repositories" } }, +] } + +load = [ + { type = "file", path = ".claude/review/backport-risk.md", kind = "release-policy" }, +] + + +[[rules]] +name = "Blocking reviewer guidance" +description = "Load reviewer-specific expectations when a blocking media reviewer is assigned." +owners = ["media-reviewers"] +priority = 85 + +# Review identities are Phabricator emails or review groups. +# `blocking_reviewer` narrows to reviewers whose approval is required before +# landing. Those reviewers also match ordinary `reviewer` predicates. +when = { all = [ + { any_file = { ref = "files.media_impl" } }, + { review = { ref = "review.blocking_media_reviewers" } }, +] } + +# Dedupe is global per review. If another matched rule already loaded +# media-architecture.md, this action records this rule in the manifest but does +# not fetch or inject the same source a second time. +load = [ + { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" }, + { type = "file", path = ".claude/review/media-reviewer-checklist.md", kind = "checklist" }, +] + + +[[rules]] +name = "Related revision context" +description = "Load another reviewed diff when a patch depends on behavior from a known revision." +owners = ["media-reviewers"] + +# `fetch_revision` injects a trusted diff as context. Phabricator revisions are +# fetched through the platform client/MCP path; GitHub commits use repo+hash. +when = { any_file = { include = ["dom/media/mediacontrol/**"] } } + +load = [ + { type = "fetch_revision", revision = "D303229", kind = "related-diff" }, + { type = "fetch_revision", repo = "mozilla/cubeb", hash = "0123456789abcdef0123456789abcdef01234567", kind = "related-diff" }, +] diff --git a/docs/code-review-skills.md b/docs/code-review-skills.md new file mode 100644 index 0000000000..cabbf3ab9b --- /dev/null +++ b/docs/code-review-skills.md @@ -0,0 +1,395 @@ +# Code Review Skills + +The code review agent can load external knowledge at review time, injecting it +as context before the LLM sees the patch. This lets domain experts encode their +knowledge once, in a form the agent uses automatically whenever it reviews +relevant code. + +## How it works + +When the agent reviews a patch, it: + +1. Fetches `review-context.toml` from the root of a GitHub repository passed to + the review entrypoint as `review_context_repo` (and optionally `review_context_branch`). +2. Parses the changed files from the diff. +3. Matches each rule against the changed files (and optionally the patch's + associated Bugzilla component, fetched from BMO). +4. For each matched rule, fetches the referenced skill or documentation files. +5. Injects the content as `` blocks inside an `` + section in the review prompt, before the patch. + +Pass the GitHub repository containing `review-context.toml` to `patch_review`, +using `org/project` syntax. `review_context_branch` defaults to `main`; pass another +branch when reviewing a backport or another release branch. + +``` +/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \ + review_context_repo="mozilla-firefox/firefox" \ + review_context_branch="main" +``` + +--- + +## For bugbug developers + +### Key files + +| File | Purpose | +| --------------------------------------------------- | ------------------------------------------------------------------ | +| `bugbug/tools/code_review/review_context_schema.py` | Stdlib-only rule schema and validator CLI | +| `bugbug/tools/code_review/review_context.py` | Rule engine: parses rules, matches files, loads external content | +| `bugbug/tools/code_review/data_types.py` | `Skill` model with async HTTP fetch and frontmatter stripping | +| `bugbug/tools/code_review/agent.py` | Calls `load_external_content_for_diff` in `run()`, formats context | +| `bugbug/tools/code_review/prompts/system.md` | System prompt template | +| `bugbug/tools/code_review/prompts/first_message.md` | First user message template (contains `{external_context}` slot) | + +### Prompt templates + +Both `system.md` and `first_message.md` are plain Markdown files loaded at +import time. Edit them directly — no Python changes needed. They use Python +`str.format()` placeholders (`{target_software}`, `{patch}`, +`{external_context}`, etc.). + +The `{external_context}` slot in `first_message.md` is filled by +`format_external_content()` from `review_context.py` when rules match. It is an +empty string otherwise, so no blank section appears in the prompt. + +Loaded content is preceded by an `` block. The +manifest excludes the body text and records each item's name, source, source +type, matched rules, trust reason, byte count, and SHA-256 digest. The same +manifest is also returned in `CodeReviewToolResponse.details["external_content"]`. + +All currently injected external content is trusted: + +- `file` content is fetched from a human-reviewed GitHub repository. +- `fetch_revision` content comes from configured review platforms. +- GitHub `file` loads are restricted by the repository allow-list in + `review-context.toml`; the repository containing the rules file is always + allowed. + +### Caching + +The parsed rules file is cached in-process for a short TTL, keyed by +`review_context_repo`, `review_context_branch`, and rules path. External content +is fetched for each review so the audit manifest reflects exactly what was +injected for that review. The shared `httpx.AsyncClient` from +`get_http_client()` handles connection reuse. + +### Metadata predicates + +Implemented via `libmozdata.Bugzilla` with `include_fields=["product", "component"]`. +The patch's associated bug ID is read from `patch.bug_id` (available on +`PhabricatorPatch`; other patch types pass `None`, which fails closed). The +component string compared against the rule is `"Product::Component"`. + +The schema accepts `bugzilla`, `review`, and `patch` predicates. Runtime +matching currently implements `bugzilla.component`; `bugzilla.product`, +`bugzilla.keywords`, `bugzilla.severity`, `review.*`, and `patch.*` are parsed +and validated but fail closed until trusted metadata is wired into the matcher. + +### `fetch_revision` + +Fetches the raw diff of another revision for use as context. Supports: + +- **Phabricator**: `{ type = "fetch_revision", revision = "D12345" }` — calls + the Phabricator API via `get_phabricator_client()`. +- **GitHub**: `{ type = "fetch_revision", repo = "org/repo", hash = "abc123" }` + — uses the GitHub API with `Accept: application/vnd.github.diff`. + +--- + +## For Mozilla developers: authoring skills and rules + +### Overview + +You add two things to your repository: + +- One or more **skill or documentation files** — Markdown or RST documents + with expert knowledge, guidelines, or relevant specs. These can be existing + docs already in the tree, or new files written specifically as review guides. +- Entries in **`review-context.toml`** at the repo root — rules that say which + files to load when certain code paths are touched. + +The review agent picks these up automatically on the next review of a matching +patch. + +### Skill file format + +Skill files are Markdown (`.md`); documentation files referenced as context can +also be RST (`.rst`). All formats accept optional YAML frontmatter. The +frontmatter is stripped before injection; only the body reaches the LLM. See an +[existing example on Searchfox](https://searchfox.org/mozilla-central/source/.claude/skills/firefox-desktop-frontend/SKILL.md). + +```markdown +--- +name: dom-audio +description: Web Audio API review guidance +--- + +## Web Audio review checklist + +- `AudioContext` must not be created on a background thread. +- `AudioNode` lifetimes are graph-managed; do not hold raw pointers. +- Prefer `MediaStreamTrack` over raw PCM where the spec allows it. +``` + +Conventional location for dedicated skill files: + +``` +.claude/skills//SKILL.md +``` + +### `review-context.toml` format + +`review-context.toml` is parsed as TOML and then validated against the rule +schema before any rule is matched. Unknown rule fields, unknown action types, +and malformed `fetch_revision` actions are rejected. + +You can validate a rules file without running a review: + +``` +bugbug-validate-review-context review-context.toml +``` + +The validator implementation lives in +`bugbug/tools/code_review/review_context_schema.py` and intentionally uses only the +Python standard library. A target repository can either run the bugbug CLI or +copy that single file into its own tests to validate `review-context.toml`. +Complete TOML examples in this document are marked as `toml review-context` and +validated by bugbug's test suite to keep the docs and parser from drifting. +For a complete validated rules file, see `docs/code-review-context-example.toml`. + +```toml review-context +version = 1 + +[[rules]] +name = "DOM: Web Audio C++" +when = { any_file = { include = ["dom/media/webaudio/**"], ext = [".cpp", ".h"] } } +load = [ + { type = "file", path = ".claude/skills/dom-audio/SKILL.md" }, +] +``` + +Multiple rules can fire for a single patch. Actions are deduplicated across +rules, so the same file is never fetched twice. + +All loaded external content is injected before the patch diff. `priority` is +optional and only affects ordering among loaded external content blocks: +matched rules are ordered by descending priority, defaulting to `0`; ties keep +declaration order. Load entries keep their order within a rule. If the same +resolved source is requested more than once, it is loaded once at its first +ordered position and the audit manifest records every matched rule that +requested it. + +Load failures are non-fatal. The loader logs an error and continues with the +remaining loads. + +### Rule matching + +Each rule has a required `when` predicate. Boolean predicates compose matching: +`all` requires every child to match, `any` requires at least one child to match, +and `not` negates its child. + +Path matching is defined over repository-relative paths from the patch, not +over local VCS checkout paths. Paths are normalized to POSIX-style separators +(`/`) with no leading `./`. File predicates use Python `fnmatch.fnmatchcase`, so +matching is case-sensitive on every platform, including Windows and macOS. That +keeps rule behavior tied to Git paths rather than host filesystem casing. + +The supported glob syntax is Python `fnmatchcase`: `*`, `?`, and character +classes such as `[ch]` and `[!0-9]`. In Python `fnmatchcase`, `*` can match +`/`, so patterns like `dom/media/**` match everything below that prefix. + +File quantifiers are explicit: + +```toml +# At least one changed file must match all predicates in the object. +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } + +# Every changed file must match all predicates in the object. +when = { all_files = { ext = [".md", ".rst"] } } +``` + +`any_file` is the normal implementation-code case. `all_files` is for +patch-shape rules such as docs-only, tests-only, or generated-only changes. + +### Reusable predicates + +Named predicate definitions let you reuse path sets or metadata groups across +rules. Definitions are referenced explicitly with `ref` and expanded +structurally, as if the referenced table had been written inline. + +```toml review-context +version = 1 + +[definitions.files.media_impl] +include = ["dom/media/**"] +exclude = ["dom/media/test/**", "dom/media/gtest/**"] +ext = [".cpp", ".h", ".mm"] + +[[rules]] +name = "Media: implementation" +when = { any_file = { ref = "files.media_impl" } } +load = [ + { type = "file", path = ".claude/skills/dom-media/SKILL.md" }, +] +``` + +Definitions are validated like inline predicates. References are namespace +checked: `files.*` refs are valid only in file predicates, `bugzilla.*` only in +Bugzilla predicates, `review.*` only in review predicates, and `patch.*` only +in patch predicates. Unknown references and references combined with inline +fields are schema errors. + +### Metadata trigger fields + +File predicates are separate from external metadata predicates. The parser +validates these field names and their value types. At runtime, only +`bugzilla.component` currently affects matching; the other fields are accepted +by the schema for forward-compatible rule files but fail closed. + +| Namespace | Field | Type | Runtime behavior | +| ---------- | ------------------- | --------------- | -------------------------------------------------------------- | +| `bugzilla` | `component` | list of strings | Matches exact `Product::Component` from Bugzilla | +| `bugzilla` | `product` | list of strings | Parsed and validated; currently fails closed | +| `bugzilla` | `keywords` | list of strings | Parsed and validated; currently fails closed | +| `bugzilla` | `severity` | list of strings | Parsed and validated; currently fails closed | +| `review` | `author` | list of strings | Parsed and validated; currently fails closed | +| `review` | `reviewer` | list of strings | Parsed and validated; currently fails closed | +| `review` | `blocking_reviewer` | list of strings | Parsed and validated; currently fails closed | +| `patch` | `repository` | list of strings | Parsed and validated; currently fails closed | +| `patch` | `is_backport` | boolean | Parsed and validated; currently fails closed | + +When review metadata matching is wired up, `reviewer` should match both +blocking and non-blocking reviewers. Use `blocking_reviewer` only when the rule +specifically needs a reviewer whose approval is required before landing. + +Example — load a skill when C++ or HTML files change under `dom/media/` (which +catches both implementation files and their tests): + +```toml review-context +version = 1 + +[[rules]] +name = "DOM: Media C++ and tests" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h", ".html"] } } +load = [ + { type = "file", path = ".claude/skills/dom-media/SKILL.md" }, +] +``` + +### Action types + +#### `file` — fetch a file from a GitHub repository + +```toml +# File from the same repo and branch as review-context.toml (default) +{ type = "file", path = ".claude/skills/dom-audio/SKILL.md" } + +# File from another repo (e.g., cubeb's real-time programming guidelines) +{ type = "file", path = ".claude/skills/real-time-programming/SKILL.md", + repo = "mozilla/cubeb" } + +# Explicit branch +{ type = "file", path = ".claude/skills/dom-audio/SKILL.md", + repo = "mozilla-firefox/firefox", branch = "main" } +``` + +Constructs a raw GitHub URL: +`https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/{path}`. +When `repo` is omitted, the repo is the `review_context_repo` passed to the review +entrypoint. Same-repo content uses `review_context_branch`; cross-repo content defaults +to `main` unless the action specifies `branch`. + +Any Markdown or RST file in the repository can be referenced — skill files, +architecture docs, READMEs, spec notes, etc. + +`kind` is optional on `file` and `fetch_revision` actions. It is a free-form +audit hint recorded in the action object in the external content manifest; the +loader does not interpret it. + +#### GitHub repository allow-list + +GitHub loads are restricted to keep review context auditable: + +- The repository containing `review-context.toml` is always allowed. +- Entries ending in `/`, such as `mozilla/`, allow every repo under that + organization. +- Entries in `org/repo` form allow a single repo. + +```toml +[policy.github] +allowed_repos = [ + "mozilla/", + "web-platform-tests/wpt", + "whatwg/html", +] +``` + +Repository names are normalized to lowercase before checking the policy. If a +`load` action references a disallowed repository, the loader does not fetch it. +The failure is non-fatal: it logs an error, skips that load, and continues with +the remaining loads. Skipped loads are not included in the external content +manifest because no content was injected. + +#### `fetch_revision` — diff of another revision as context + +Injects the raw diff of a related revision — useful for providing context about +a stack parent or a related patch. + +```toml +# Phabricator revision +{ type = "fetch_revision", revision = "D12345" } + +# GitHub commit (use a git commit hash — Firefox moved from Mercurial to GitHub) +{ type = "fetch_revision", repo = "padenot/cubeb", hash = "abc123def456" } +``` + +### Testing your rules and skills + +Pass your work-in-progress rules and skill files directly to `patch_review` +via the MCP. Rules are merged by name into the fetched `review-context.toml` +(replacing a rule with the same name, or appending if new). Skill content is +injected directly, bypassing the network fetch for matching names. + +From Claude Code, trigger a review with your local files: + +``` +/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \ + review_context_repo="mozilla-firefox/firefox" \ + extra_context_toml="$(cat review-context.toml)" \ + content_overrides='{".claude/skills/dom-audio/SKILL.md": "skill content here"}' +``` + +`content_overrides` is a JSON object (as a string). For multi-line skill files, +build the JSON separately: + +```bash +python3 -c "import json,sys; print(json.dumps({'.claude/skills/dom-audio/SKILL.md': open('.claude/skills/dom-audio/SKILL.md').read()}))" +``` + +Then paste the output as the `content_overrides` value. + +This simulates the state as if your changes had already landed — no push +required. If a skill is missing, verify the rule conditions match the patch's +changed files (`any_file` / `all_files` against the actual `+++ b/` paths in +the diff). + +### Gotchas + +**Rules are evaluated against the post-patch file list.** Paths in file predicates +should match the `+++ b/` filenames in the diff. + +**Glob patterns use `fnmatchcase` semantics.** `dom/media/**` matches +`dom/media/foo/bar.cpp`. + +**Frontmatter is stripped automatically.** You can put metadata (name, +description, owner) in frontmatter without it polluting the injected context. + +**Load failures are non-fatal.** The review proceeds without that content, and +the failure is logged at ERROR level and captured by Sentry. Check the URL +manually if content you expect is not appearing. + +**Bugzilla component rules require the patch to have an associated bug.** On +non-Phabricator patches or patches without a bug link, Bugzilla component rules +never match. diff --git a/tests/test_code_review.py b/tests/test_code_review.py index d87d506d9f..260e291177 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -1,6 +1,8 @@ import asyncio import logging import os +import re +from pathlib import Path from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock, patch @@ -9,7 +11,11 @@ from unidiff import PatchSet from bugbug.tools.code_review import data_types, langchain_tools, review_context -from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter +from bugbug.tools.code_review.data_types import ( + ExternalContent, + Skill, + _strip_frontmatter, +) from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool from bugbug.tools.code_review.review_context import ( _merge_rules, @@ -19,14 +25,15 @@ github_repo_allowed, load_external_content_for_diff, parse_diff_files, + parse_review_context_toml, rule_matches, ) from bugbug.tools.code_review.review_context_schema import ( AnyFilePredicate, + BugzillaPredicate, FilePredicate, ReviewContextValidationError, Rule, - parse_review_context_toml, ) from bugbug.tools.code_review.review_context_schema import ( main as validate_review_context_main, @@ -63,6 +70,17 @@ async def fetch(path): return fetch +def extract_review_context_examples(markdown: str) -> list[tuple[int, str]]: + examples = [] + fence_re = re.compile(r"^```(?P[^\n]*)\n(?P.*?)^```", re.M | re.S) + for match in fence_re.finditer(markdown): + info = match.group("info").split() + if info == ["toml", "review-context"]: + line = markdown.count("\n", 0, match.start()) + 1 + examples.append((line, match.group("body"))) + return examples + + # --------------------------------------------------------------------------- # strip_diff_prefix # --------------------------------------------------------------------------- @@ -502,6 +520,13 @@ def test_fetch_file_skips_revision_when_none(): client.get_file_at_revision.assert_not_called() +@pytest.fixture(autouse=True) +def clear_review_context_cache(): + review_context._review_context_cache.clear() + yield + review_context._review_context_cache.clear() + + @pytest.mark.asyncio async def test_external_content_load_caches(): item = ExternalContent( @@ -569,13 +594,6 @@ async def test_external_content_load_caches(): """ -@pytest.fixture(autouse=True) -def clear_review_context_cache(): - review_context._review_context_cache.clear() - yield - review_context._review_context_cache.clear() - - def test_parse_diff_files(): files = parse_diff_files(_DIFF_MEDIA) assert files == {"dom/media/Foo.cpp"} @@ -607,81 +625,79 @@ def test_rule_matches_extension_only(): load=[], ) assert rule_matches(rule, {"dom/webidl/Foo.webidl"}) - assert not rule_matches(rule, {"dom/webidl/Foo.js"}) - - -def test_rule_bugzilla_component_fails_closed_without_component(): - config = parse_review_context_toml(_RULES_TOML) - rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") - assert not rule_matches(rule, set(), bug_component=None) - - -def test_rule_bugzilla_component_matches(): - config = parse_review_context_toml(_RULES_TOML) - rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") - assert rule_matches(rule, set(), bug_component="Core::DOM: Web Audio") - assert not rule_matches(rule, set(), bug_component="Core::Layout") - - -@pytest.mark.asyncio -async def test_get_bug_component(): - with patch("libmozdata.bugzilla.Bugzilla") as bugzilla_cls: - instance = MagicMock() - bugzilla_cls.return_value = instance - - def get_data(): - handler = bugzilla_cls.call_args.kwargs["bughandler"] - data = bugzilla_cls.call_args.kwargs["bugdata"] - handler({"product": "Core", "component": "DOM: Web Audio"}, data) - return MagicMock(wait=MagicMock()) - - instance.get_data.side_effect = get_data - assert await get_bug_component(123) == "Core::DOM: Web Audio" + assert not rule_matches(rule, {"dom/media/Foo.cpp"}) def test_parse_review_context_toml_rejects_missing_when(): toml = """ version = 1 + [[rules]] -name = "Broken" -load = [{ type = "file", path = "x.md" }] +name = "Bad rule" +load = [{ type = "file", path = "skills/guide.md" }] """ - with pytest.raises(ReviewContextValidationError, match="rules\\[0\\].when"): + with pytest.raises(ReviewContextValidationError): parse_review_context_toml(toml) def test_parse_review_context_toml_rejects_unknown_action_type(): toml = """ version = 1 + [[rules]] -name = "Broken" +name = "Bad rule" when = { any_file = { ext = [".cpp"] } } -load = [{ type = "url", path = "x.md" }] +load = [{ type = "unknown", path = "skills/guide.md" }] """ - with pytest.raises(ReviewContextValidationError, match="unknown action type"): + with pytest.raises(ReviewContextValidationError): parse_review_context_toml(toml) def test_parse_review_context_toml_rejects_unknown_rule_field(): toml = """ version = 1 + [[rules]] -name = "Broken" -bogus = true +name = "Bad rule" when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = "x.md" }] +match_unknown = ["x"] +load = [{ type = "file", path = "skills/guide.md" }] """ - with pytest.raises(ReviewContextValidationError, match="unknown field"): + with pytest.raises(ReviewContextValidationError): parse_review_context_toml(toml) +def test_parse_review_context_example_file(): + repo_root = Path(__file__).resolve().parent.parent + example = (repo_root / "docs/code-review-context-example.toml").read_text() + config = parse_review_context_toml(example) + assert config.version == 1 + assert len(config.rules) >= 1 + assert "whatwg/html" in config.policy.github.allowed_repos + + +def test_parse_review_context_examples_from_docs(): + repo_root = Path(__file__).resolve().parent.parent + docs = (repo_root / "docs/code-review-skills.md").read_text() + examples = extract_review_context_examples(docs) + assert examples + + for line, example in examples: + try: + config = parse_review_context_toml(example) + except ReviewContextValidationError as exc: + pytest.fail(f"docs/code-review-skills.md:{line}: {exc}") + assert config.rules, f"docs/code-review-skills.md:{line}: expected rules" + + def test_validate_review_context_main(tmp_path, capsys): review_context_path = tmp_path / "review-context.toml" review_context_path.write_text(_RULES_TOML) assert validate_review_context_main([str(review_context_path)]) == 0 + captured = capsys.readouterr() - assert "valid" in captured.out + assert "valid (4 rule(s))" in captured.out def test_validate_review_context_main_failure(tmp_path, capsys): @@ -689,63 +705,120 @@ def test_validate_review_context_main_failure(tmp_path, capsys): review_context_path.write_text( """ version = 1 + [[rules]] -name = "Broken" -load = [{ type = "file", path = "x.md" }] +name = "Bad rule" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "unknown" }] """ ) assert validate_review_context_main([str(review_context_path)]) == 1 + captured = capsys.readouterr() assert "invalid" in captured.err + assert "unknown action type" in captured.err + + +def test_rule_bugzilla_component_fails_closed_without_component(): + rule = Rule( + name="test", + when=BugzillaPredicate(component=["Core::DOM: Web Audio"]), + load=[], + ) + assert not rule_matches(rule, {"dom/media/Foo.cpp"}, bug_component=None) + + +def test_rule_bugzilla_component_matches(): + rule = Rule( + name="test", + when=BugzillaPredicate(component=["Core::DOM: Web Audio"]), + load=[], + ) + assert rule_matches( + rule, {"dom/media/Foo.cpp"}, bug_component="Core::DOM: Web Audio" + ) + assert not rule_matches( + rule, {"dom/media/Foo.cpp"}, bug_component="Core::Networking" + ) + + +@pytest.mark.asyncio +async def test_get_bug_component(): + def fake_bugzilla(bug_id, include_fields, bughandler, bugdata): + bughandler({"product": "Core", "component": "DOM: Web Audio"}, bugdata) + mock = MagicMock() + mock.get_data.return_value.wait = MagicMock() + return mock + + with patch("libmozdata.bugzilla.Bugzilla", side_effect=fake_bugzilla): + component = await get_bug_component(12345) + + assert component == "Core::DOM: Web Audio" @pytest.mark.asyncio async def test_load_external_content_for_diff_file_load(): review_context_repo = "mozilla-firefox/firefox" + content_body = "---\nname: dom-media\n---\nAudio guidelines.\n" + rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + content_response = MagicMock() - content_response.text = "---\nname: dom-media\n---\nMedia review guidelines\n" + content_response.text = content_body content_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(side_effect=[rules_response, content_response]) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, review_context_repo, review_context_branch="release" - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, review_context_branch="release" + ) assert len(results) == 1 - assert results[0].name == ".claude/skills/dom-media.md" - assert results[0].body == "Media review guidelines\n" - assert results[0].source_type == "github_file" - assert results[0].matched_rules == ["Audio/Video C++"] - client.get.assert_any_await( - "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/review-context.toml", - timeout=30, + item = results[0] + assert item.name == ".claude/skills/dom-media.md" + assert item.body == "Audio guidelines.\n" + assert item.source_type == "github_file" + assert item.trusted + assert item.trust_reason == "github_repo_content" + assert item.matched_rules == ["Audio/Video C++"] + assert item.bytes == len("Audio guidelines.\n".encode()) + assert item.sha256 + assert client.get.await_args_list[0].args[0] == ( + "https://raw.githubusercontent.com/" + "mozilla-firefox/firefox/refs/heads/release/review-context.toml" ) - client.get.assert_any_await( - "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/.claude/skills/dom-media.md", - timeout=30, + assert client.get.await_args_list[1].args[0] == ( + "https://raw.githubusercontent.com/" + "mozilla-firefox/firefox/refs/heads/release/" + ".claude/skills/dom-media.md" ) @pytest.mark.asyncio async def test_load_external_content_for_diff_no_match(): review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) - diff = """diff --git a/README.txt b/README.txt\n--- a/README.txt\n+++ b/README.txt\n@@ -1 +1 @@\n-a\n+b\n""" + diff = "diff --git a/build/Makefile b/build/Makefile\n+++ b/build/Makefile\n+new\n" + with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff(diff, review_context_repo) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff(diff, review_context_repo) assert results == [] + assert client.get.await_count == 1 @pytest.mark.asyncio @@ -754,226 +827,254 @@ async def test_load_external_content_for_diff_rules_fetch_failure(): client.get = AsyncMock(side_effect=httpx.ConnectError("boom")) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, "mozilla-firefox/firefox" - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, "mozilla-firefox/firefox" + ) assert results == [] @pytest.mark.asyncio async def test_load_external_content_deduplicates_actions(): - rules = """ + toml = """ version = 1 [[rules]] -name = "A" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = ".claude/skills/cpp.md" }] +name = "Rule A" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp"] } } +load = [{ type = "file", path = "skills/guide.md" }] [[rules]] -name = "B" -when = { any_file = { include = ["dom/media/**"] } } -load = [{ type = "file", path = ".claude/skills/cpp.md" }] +name = "Rule B" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "skills/guide.md" }] """ review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() + content_response = MagicMock() - content_response.text = "C++ guidelines" + content_response.text = "body" content_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(side_effect=[rules_response, content_response]) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, review_context_repo - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo + ) assert len(results) == 1 - assert results[0].matched_rules == ["A", "B"] + assert results[0].matched_rules == ["Rule A", "Rule B"] assert client.get.await_count == 2 @pytest.mark.asyncio async def test_load_external_content_orders_by_priority(): - rules = """ + toml = """ version = 1 [[rules]] name = "Low" -priority = 0 +priority = 1 when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = ".claude/skills/low.md" }] +load = [{ type = "file", path = "skills/low.md" }] [[rules]] name = "High" priority = 10 when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = ".claude/skills/high.md" }] +load = [{ type = "file", path = "skills/high.md" }] """ review_context_repo = "mozilla-firefox/firefox" rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() - low_response = MagicMock() - low_response.text = "low" - low_response.raise_for_status = MagicMock() - high_response = MagicMock() - high_response.text = "high" - high_response.raise_for_status = MagicMock() + client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, high_response, low_response]) + client.get = AsyncMock(return_value=rules_response) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, - review_context_repo, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + content_overrides={ + "skills/low.md": "low\n", + "skills/high.md": "high\n", + }, + ) - assert [r.name for r in results] == [ - ".claude/skills/high.md", - ".claude/skills/low.md", - ] + assert [item.name for item in results] == ["skills/high.md", "skills/low.md"] @pytest.mark.asyncio async def test_load_external_content_rejects_disallowed_github_repo(): - rules = """ + toml = """ version = 1 + [[rules]] -name = "Cross repo" -when = { any_file = { ext = [".webidl"] } } -load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +name = "External" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", repo = "whatwg/html", path = "review.md" }] """ review_context_repo = "mozilla-firefox/firefox" rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_WEBIDL, - review_context_repo, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + content_overrides={"review.md": "external\n"}, + ) assert results == [] - assert client.get.await_count == 1 @pytest.mark.asyncio async def test_load_external_content_allows_policy_prefix_repo(): - rules = """ + toml = """ version = 1 + [policy.github] allowed_repos = ["mozilla/"] + [[rules]] -name = "Cross repo" -when = { any_file = { ext = [".webidl"] } } -load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +name = "External" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", repo = "mozilla/cubeb", path = "review.md" }] """ review_context_repo = "mozilla-firefox/firefox" rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() - content_response = MagicMock() - content_response.text = "cubeb guidelines" - content_response.raise_for_status = MagicMock() + client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, content_response]) + client.get = AsyncMock(return_value=rules_response) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_WEBIDL, - review_context_repo, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + content_overrides={"review.md": "external\n"}, + ) - assert len(results) == 1 - assert results[0].source.endswith("/mozilla/cubeb/refs/heads/main/REVIEWING.md") + assert [item.name for item in results] == ["review.md"] + + +# --- _merge_rules --- def test_merge_rules_appends_new(): base = parse_review_context_toml( """ version = 1 + [[rules]] name = "A" when = { any_file = { ext = [".cpp"] } } load = [{ type = "file", path = "a.md" }] """ - ) - merged = _merge_rules( - base.rules, - """ + ).rules + extra = """ version = 1 + [[rules]] name = "B" when = { any_file = { ext = [".js"] } } load = [{ type = "file", path = "b.md" }] -""", - ) - assert [rule.name for rule in merged] == ["A", "B"] +""" + merged = _merge_rules(base, extra) + assert len(merged) == 2 + assert merged[1].name == "B" def test_merge_rules_replaces_by_name(): base = parse_review_context_toml( """ version = 1 + [[rules]] name = "A" when = { any_file = { ext = [".cpp"] } } load = [{ type = "file", path = "a.md" }] """ - ) - merged = _merge_rules( - base.rules, - """ + ).rules + extra = """ version = 1 + [[rules]] name = "A" -when = { any_file = { ext = [".js"] } } -load = [{ type = "file", path = "replacement.md" }] -""", - ) +when = { any_file = { ext = [".cpp", ".h"] } } +load = [{ type = "file", path = "a.md" }] +""" + merged = _merge_rules(base, extra) assert len(merged) == 1 - assert merged[0].load[0].path == "replacement.md" + assert merged[0].when.predicate.ext == [".cpp", ".h"] def test_merge_rules_empty_extra(): base = parse_review_context_toml( """ version = 1 + [[rules]] name = "A" when = { any_file = { ext = [".cpp"] } } load = [{ type = "file", path = "a.md" }] """ - ) - merged = _merge_rules(base.rules, "version = 1\n") - assert merged == base.rules + ).rules + merged = _merge_rules(base, "version = 1\n") + assert len(merged) == 1 + assert merged[0].name == "A" + assert merged[0].when.predicate.ext == [".cpp"] + assert len(merged[0].load) == 1 + + merged = _merge_rules(base, "version = 1\n# comment\n") + assert len(merged) == 1 + assert merged[0].name == "A" + assert merged[0].when.predicate.ext == [".cpp"] + assert len(merged[0].load) == 1 + + +# --- content_overrides --- @pytest.mark.asyncio async def test_content_override_used_instead_of_fetch(): review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) - overrides = {".claude/skills/dom-media.md": "Override body"} + + overrides = {".claude/skills/dom-media.md": "Overridden content.\n"} with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, review_context_repo, content_overrides=overrides - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) assert len(results) == 1 - assert results[0].body == "Override body" - assert client.get.await_count == 1 + assert results[0].name == ".claude/skills/dom-media.md" + assert results[0].body == "Overridden content.\n" + assert client.get.await_count == 1 # only the rules fetch, not the content @pytest.mark.asyncio @@ -982,131 +1083,120 @@ async def test_external_content_manifest_and_prompt_body(): rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) - overrides = {".claude/skills/dom-media.md": "Guideline body"} + + overrides = {".claude/skills/dom-media.md": "Audio guidelines.\n"} with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, review_context_repo, content_overrides=overrides - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) manifest = external_content_manifest(results) assert manifest == [ { "name": ".claude/skills/dom-media.md", "source_type": "github_file", - "source": "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/main/.claude/skills/dom-media.md", - "action": {"type": "file", "path": ".claude/skills/dom-media.md"}, + "source": ( + "https://raw.githubusercontent.com/mozilla-firefox/firefox/" + "refs/heads/main/.claude/skills/dom-media.md" + ), + "action": { + "type": "file", + "path": ".claude/skills/dom-media.md", + }, "matched_rules": ["Audio/Video C++"], "trusted": True, "trust_reason": "github_repo_content", - "bytes": len("Guideline body".encode()), + "bytes": len("Audio guidelines.\n".encode()), "sha256": results[0].sha256, } ] - prompt = format_external_content(results) - assert "" in prompt - assert "" in prompt - assert '' in prompt - assert "Guideline body" in prompt + + prompt_content = format_external_content(results) + assert "" in prompt_content + assert "" in prompt_content + assert "Audio guidelines." in prompt_content @pytest.mark.asyncio async def test_extra_context_toml_appended(): review_context_repo = "mozilla-firefox/firefox" - base_rules = """ + extra = """ version = 1 + [[rules]] -name = "Base" +name = "Extra JS rule" when = { any_file = { ext = [".js"] } } -load = [{ type = "file", path = "base.md" }] +load = [{ type = "file", path = ".claude/skills/js.md" }] """ - extra_rules = """ + diff_js = "diff --git a/Foo.js b/Foo.js\n+++ b/Foo.js\n+new\n" + + # Use a base TOML without any .js rules so only the extra rule fires. + base_toml = """ version = 1 + [[rules]] -name = "Extra" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = "extra.md" }] +name = "Audio/Video C++" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } +load = [{ type = "file", path = ".claude/skills/dom-media.md" }] """ rules_response = MagicMock() - rules_response.text = base_rules + rules_response.text = base_toml rules_response.raise_for_status = MagicMock() - content_response = MagicMock() - content_response.text = "extra" - content_response.raise_for_status = MagicMock() + client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, content_response]) + client.get = AsyncMock(return_value=rules_response) + + overrides = {".claude/skills/js.md": "JS guidelines.\n"} with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, - review_context_repo, - extra_context_toml=extra_rules, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + diff_js, + review_context_repo, + extra_context_toml=extra, + content_overrides=overrides, + ) - assert [result.name for result in results] == ["extra.md"] + assert len(results) == 1 + assert results[0].name == ".claude/skills/js.md" + assert results[0].body == "JS guidelines.\n" @pytest.mark.asyncio async def test_extra_context_toml_replaces_by_name(): review_context_repo = "mozilla-firefox/firefox" - extra_rules = """ + # Replace the "Audio/Video C++" rule with a version that loads different content + extra = """ version = 1 + [[rules]] name = "Audio/Video C++" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = "replacement.md" }] +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } +load = [{ type = "file", path = ".claude/skills/dom-media-v2.md" }] """ rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() - content_response = MagicMock() - content_response.text = "replacement" - content_response.raise_for_status = MagicMock() - client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, content_response]) - - with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, - review_context_repo, - extra_context_toml=extra_rules, - ) - - assert [result.name for result in results] == ["replacement.md"] - -@pytest.mark.asyncio -async def test_load_external_content_fetches_phabricator_revision(): - rules = """ -version = 1 -[[rules]] -name = "Revision" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "fetch_revision", revision = "D123" }] -""" - review_context_repo = "mozilla-firefox/firefox" - rules_response = MagicMock() - rules_response.text = rules - rules_response.raise_for_status = MagicMock() client = MagicMock() client.get = AsyncMock(return_value=rules_response) - phabricator = MagicMock() - phabricator.load_revision.return_value = {"fields": {"diffID": 456}} - phabricator.load_raw_diff.return_value = "diff content" + overrides = {".claude/skills/dom-media-v2.md": "Updated guidelines.\n"} - with ( - patch.object(data_types, "get_http_client", return_value=client), - patch( - "bugbug.tools.core.platforms.phabricator.get_phabricator_client", - return_value=phabricator, - ), - ): - results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo) + with patch.object(data_types, "get_http_client", return_value=client): + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + extra_context_toml=extra, + content_overrides=overrides, + ) assert len(results) == 1 - assert results[0].name == "Revision D123" - assert results[0].body == "diff content" - assert results[0].source_type == "phabricator_revision" + assert results[0].name == ".claude/skills/dom-media-v2.md" + assert results[0].body == "Updated guidelines.\n" From cab6ec7e73be35d71c59f3631fd1f0c217cd8c6a Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:52:11 +0200 Subject: [PATCH 11/11] Preserve original version file state --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index c51ab6c126..aeee537b4e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.638 +0.0.637