diff --git a/VERSION b/VERSION
index c51ab6c126..aeee537b4e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.0.638
+0.0.637
diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
index 794dc6194d..d2006c1502 100644
--- a/bugbug/tools/code_review/agent.py
+++ b/bugbug/tools/code_review/agent.py
@@ -160,6 +160,15 @@ def create(cls, **kwargs):
max_tokens=40_000,
temperature=None,
thinking={"type": "enabled", "budget_tokens": 10_000},
+ mcp_servers=[
+ {
+ "type": "url",
+ "url": os.getenv(
+ "BUGBUG_MOZ_MCP_URL", "https://mcp-dev.moz.tools/mcp"
+ ),
+ "name": "mozilla",
+ }
+ ],
)
if "patch_summarizer" not in kwargs:
@@ -182,25 +191,30 @@ def create(cls, **kwargs):
def count_tokens(self, text):
return len(self._tokenizer.encode(text))
- def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str:
+ def generate_initial_prompt(
+ self, patch: Patch, patch_summary: str, external_context: str = ""
+ ) -> str:
created_before = patch.date_created if self.is_experiment_env else None
return FIRST_MESSAGE_TEMPLATE.format(
patch=format_patch_set(patch.patch_set),
patch_summarization=patch_summary,
+ external_context=external_context,
comment_examples=self._get_comment_examples(patch, created_before),
approved_examples=self._get_generated_examples(patch, created_before),
)
async def generate_review_comments(
- self, patch: Patch, patch_summary: str
+ self, patch: Patch, patch_summary: str, external_context: str = ""
) -> list[GeneratedReviewComment]:
try:
async for chunk in self.agent.astream(
{
"messages": [
HumanMessage(
- self.generate_initial_prompt(patch, patch_summary)
+ self.generate_initial_prompt(
+ patch, patch_summary, external_context
+ )
),
]
},
@@ -214,14 +228,47 @@ async def generate_review_comments(
return result["structured_response"].comments
- async def run(self, patch: Patch) -> CodeReviewToolResponse:
+ async def run(
+ self,
+ patch: Patch,
+ review_context_repo: Optional[str] = None,
+ review_context_branch: str = "main",
+ extra_context_toml: Optional[str] = None,
+ content_overrides: Optional[dict[str, str]] = None,
+ ) -> CodeReviewToolResponse:
if self.count_tokens(patch.raw_diff) > 21000:
raise LargeDiffError("The diff is too large")
patch_summary = self.patch_summarizer.run(patch)
+ external_context = ""
+ external_content_manifest = []
+ if review_context_repo:
+ from bugbug.tools.code_review.review_context import (
+ external_content_manifest as build_external_content_manifest,
+ )
+ from bugbug.tools.code_review.review_context import (
+ format_external_content,
+ load_external_content_for_diff,
+ )
+
+ bug_id = getattr(patch, "bug_id", None)
+ content_items = await load_external_content_for_diff(
+ patch.raw_diff,
+ review_context_repo,
+ review_context_branch=review_context_branch,
+ bug_id=bug_id,
+ extra_context_toml=extra_context_toml,
+ content_overrides=content_overrides,
+ )
+ if content_items:
+ external_context = format_external_content(content_items)
+ external_content_manifest = build_external_content_manifest(
+ content_items
+ )
+
unfiltered_suggestions = await self.generate_review_comments(
- patch, patch_summary
+ patch, patch_summary, external_context
)
if not unfiltered_suggestions:
logger.info("No suggestions were generated")
@@ -238,6 +285,7 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
details={
"model": self._agent_model_name,
"num_unfiltered_suggestions": len(unfiltered_suggestions),
+ "external_content": external_content_manifest,
},
)
diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py
index d7ecd6938d..3b2c48e470 100644
--- a/bugbug/tools/code_review/data_types.py
+++ b/bugbug/tools/code_review/data_types.py
@@ -1,10 +1,13 @@
import re
+from datetime import datetime
import httpx
+import tenacity
from pydantic import BaseModel, Field, PrivateAttr
from bugbug.tools.core.connection import get_http_client
from bugbug.tools.core.data_types import InlineComment
+from bugbug.tools.core.platforms.base import Patch
class GeneratedReviewComment(BaseModel):
@@ -82,3 +85,129 @@ async def load(self) -> str:
self._cached_body = _strip_frontmatter(response.text)
return self._cached_body
+
+
+_BUG_RE = re.compile(r"[Bb]ug\s+(\d+)")
+_DIFFERENTIAL_RE = re.compile(r"Differential Revision:\s*(https?://\S+)")
+_DATE_RE = re.compile(r"^Date:\s+(.+)$", re.MULTILINE)
+
+
+class LocalPatch(Patch):
+ """A patch from a local diff string, not backed by any platform."""
+
+ def __init__(
+ self,
+ diff: str,
+ commit_message: str = "",
+ ) -> None:
+ self._diff = diff
+ self._commit_message = commit_message
+ lines = commit_message.splitlines()
+ self._title = lines[0].strip() if lines else "Local patch"
+ self._description = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
+ m = _BUG_RE.search(commit_message)
+ self._bug_id: int | None = int(m.group(1)) if m else None
+ m2 = _DIFFERENTIAL_RE.search(commit_message)
+ self._url = m2.group(1) if m2 else ""
+ m3 = _DATE_RE.search(commit_message)
+ if m3:
+ from email.utils import parsedate_to_datetime
+ try:
+ self._date = parsedate_to_datetime(m3.group(1))
+ except Exception:
+ self._date = datetime.now()
+ else:
+ self._date = datetime.now()
+
+ @property
+ def patch_id(self) -> str:
+ return "local"
+
+ @property
+ def raw_diff(self) -> str:
+ return self._diff
+
+ @property
+ def date_created(self) -> datetime:
+ return self._date
+
+ @property
+ def has_bug(self) -> bool:
+ return self._bug_id is not None
+
+ @property
+ def bug_id(self) -> int | None:
+ return self._bug_id
+
+ @property
+ def bug_title(self) -> str:
+ return ""
+
+ @property
+ def patch_title(self) -> str:
+ return self._title
+
+ @property
+ def patch_description(self) -> str:
+ return self._description
+
+ @property
+ def patch_url(self) -> str:
+ return self._url
+
+ def is_accessible(self) -> bool:
+ return True
+
+ def is_public(self) -> bool:
+ return True
+
+ async def get_new_file(self, file_path: str) -> str:
+ raise FileNotFoundError(
+ f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
+ )
+
+ async def get_old_file(self, file_path: str) -> str:
+ raise FileNotFoundError(
+ f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
+ )
+
+
+class ExternalContentLoadError(Exception):
+ """Raised when an ExternalContent body cannot be loaded."""
+
+
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(3),
+ wait=tenacity.wait_exponential(multiplier=1, min=1),
+ retry=tenacity.retry_if_exception_type(httpx.TransportError),
+ reraise=True,
+)
+async def _fetch_url(url: str) -> httpx.Response:
+ response = await get_http_client().get(url, timeout=30)
+ response.raise_for_status()
+ return response
+
+
+class ExternalContent(BaseModel):
+ """An external file fetched and injected as context for the review."""
+
+ name: str = Field(description="A unique identifier for this content item.")
+ url: str = Field(description="HTTPS URL of the file to fetch.")
+ description: str = Field(description="Short description of what this content provides.")
+
+ _cached_body: str | None = PrivateAttr(default=None)
+
+ async def load(self) -> str:
+ """Return the content body, fetching and caching it on first use."""
+ if self._cached_body is not None:
+ return self._cached_body
+
+ try:
+ response = await _fetch_url(self.url)
+ except httpx.HTTPError as e:
+ raise ExternalContentLoadError(
+ f"Could not load content '{self.name}' from {self.url}"
+ ) from e
+
+ self._cached_body = _strip_frontmatter(response.text)
+ return self._cached_body
diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py
index bf4073d474..dffdca43fe 100644
--- a/bugbug/tools/code_review/prompts.py
+++ b/bugbug/tools/code_review/prompts.py
@@ -5,80 +5,37 @@
"""Prompt templates for code review agent."""
-from bugbug.tools.code_review.data_types import Skill
-
-TARGET_SOFTWARE: str | None = None
-
-
-SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code.
-
-## Instructions
+from pathlib import Path
-Follow this systematic approach to review the patch:
-
-**Step 1: Analyze the Changes**
-- Understand what the patch is trying to accomplish
-- Use the patch summary for context, but focus primarily on what you can see in the actual diff
-- Identify the intent and structure of the changes
-
-**Step 2: Identify Issues**
-- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
-- Focus ONLY on new or changed lines (lines that begin with `+`)
-- Never comment on unmodified code
-- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns
-
-**Step 3: Verify and Assess Confidence**
-- Use available tools when you need to verify concerns or gather additional context
-- Only include comments where you are at least 80% confident the issue is valid
-- When uncertain about an issue, use available tools to verify before commenting
-- Do not suggest issues you cannot verify with available context
+from bugbug.tools.code_review.data_types import Skill
-**Step 4: Sort and Order Comments**
-- Sort comments by descending confidence and importance
-- Start with issues you are certain are valid and that are most critical
-- Assign each comment a numeric order starting at 1
+_PROMPTS_DIR = Path(__file__).parent / "prompts"
-**Step 5: Write Clear, Constructive Comments**
-- Use direct, declarative language - state the problem definitively, then suggest the fix
-- Keep comments short and specific
-- Use directive language: "Fix", "Remove", "Change", "Add"
-- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
-- Focus strictly on code-related concerns
+TARGET_SOFTWARE: str | None = None
-## What NOT to Include
+_SYSTEM_PROMPT_RAW = (_PROMPTS_DIR / "system.md").read_text()
-Do not write comments that:
-- Refer to unmodified code (lines without a `+` prefix)
-- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
-- Provide praise or restate obvious facts
-- Focus on testing concerns
-- Point out issues that are already handled in the visible code
-- Suggest problems based on assumptions without verifying the context
-- Flag style preferences without clear coding standard violations
+_CONTEXT_TOOLS_AGENT = """\
+ - `expand_context` to read surrounding code in the patched files
+ - `find_function_definition` to look up callers or definitions\
"""
+_CONTEXT_TOOLS_LOCAL = """\
+ - `Read` and `Grep` to inspect files in the local checkout
+ - `Bash` with `searchfox-cli` to query Searchfox for definitions, callers, and cross-references\
+"""
-FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:
-
-
-{patch_summarization}
-
-
-
-Here are examples of good code review comments to guide your style and approach:
-
-
-{comment_examples}
-{approved_examples}
-
-
+SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
+ target_software="{target_software}",
+ context_tools=_CONTEXT_TOOLS_AGENT,
+)
-Here is the patch you need to review:
+LOCAL_SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
+ target_software="{target_software}",
+ context_tools=_CONTEXT_TOOLS_LOCAL,
+)
-
-{patch}
-
-"""
+FIRST_MESSAGE_TEMPLATE = (_PROMPTS_DIR / "first_message.md").read_text()
TEMPLATE_COMMENT_EXAMPLE = """Patch example {example_number}:
diff --git a/bugbug/tools/code_review/prompts/first_message.md b/bugbug/tools/code_review/prompts/first_message.md
new file mode 100644
index 0000000000..848897fb69
--- /dev/null
+++ b/bugbug/tools/code_review/prompts/first_message.md
@@ -0,0 +1,21 @@
+Here is a summary of the patch:
+
+
+{patch_summarization}
+
+{external_context}
+
+
+Here are examples of good code review comments to guide your style and approach:
+
+
+{comment_examples}
+{approved_examples}
+
+
+
+Here is the patch you need to review:
+
+
+{patch}
+
diff --git a/bugbug/tools/code_review/prompts/system.md b/bugbug/tools/code_review/prompts/system.md
new file mode 100644
index 0000000000..674b4000a2
--- /dev/null
+++ b/bugbug/tools/code_review/prompts/system.md
@@ -0,0 +1,51 @@
+## Instructions
+
+Follow this systematic approach to review the patch:
+
+**Step 1: Analyze the Changes**
+
+- Read the commit message
+- Understand what the patch is trying to accomplish
+- Identify the intent and structure of the changes; if what the patch does doesn't match the commit message, this always warrants a review comment — mismatches make the history hard to understand and bisect
+
+**Step 2: Identify Issues**
+
+- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
+- Focus ONLY on new or changed lines (lines that begin with `+`)
+- Never comment on unmodified code
+- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns
+
+**Step 3: Verify and Assess Confidence**
+
+- Use available tools when you need to verify concerns or gather additional context:
+ {context_tools}
+ - `mozilla__get_phabricator_revision` to fetch related Phabricator revisions — open and closed, including their review comments; particularly useful for other revisions in the same stack
+ - `mozilla__get_bugzilla_bug` to fetch the associated Bugzilla bug and its history
+ - `mozilla__read_fx_doc_section` to read sections from the Firefox Source Tree Documentation (firefox-source-docs.mozilla.org) — useful for architecture and API documentation relevant to the changed code
+- Significant tool usage is expected to perform a high-quality review, much like a human reviewer would
+- When uncertain about an issue, use available tools to verify before commenting
+- Do not suggest issues you cannot verify with available context
+
+**Step 4: Sort and Order Comments**
+
+- Sort comments by descending confidence and importance
+- Start with issues you are certain are valid and that are most critical
+- Assign each comment a numeric order starting at 1
+
+**Step 5: Write Clear, Constructive Comments**
+
+- Use direct, declarative language - state the problem definitively, then suggest the fix
+- Keep comments short and specific
+- Use directive language: "Fix", "Remove", "Change", "Add"
+- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
+- Focus strictly on code-related concerns
+
+## What NOT to Include
+
+Do not write comments that:
+
+- Refer to unmodified code (lines without a `+` prefix)
+- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
+- Provide praise or restate obvious facts
+- Suggest problems based on assumptions without verifying the context
+- Flag style preferences without clear coding standard violations
diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py
new file mode 100644
index 0000000000..39791c70a1
--- /dev/null
+++ b/bugbug/tools/code_review/review_context.py
@@ -0,0 +1,591 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Rule-based external content loading for code review.
+
+Fetches review-context.toml from a GitHub repository, matches changed files
+against the rules, and pre-loads the referenced content.
+"""
+
+import asyncio
+import fnmatch
+import hashlib
+import json
+import re
+import time
+from dataclasses import dataclass
+from logging import getLogger
+from typing import Literal
+
+import httpx
+from pydantic import BaseModel
+from unidiff import PatchSet
+
+from bugbug.tools.code_review.data_types import (
+ ExternalContent,
+ ExternalContentLoadError,
+ _fetch_url,
+)
+from bugbug.tools.code_review.review_context_schema import (
+ AllFilesPredicate,
+ AllPredicate,
+ AnyFilePredicate,
+ AnyPredicate,
+ BugzillaPredicate,
+ FetchRevisionAction,
+ FilePredicate,
+ LoadFileAction,
+ NotPredicate,
+ PatchPredicate,
+ Predicate,
+ ReviewContextConfig,
+ ReviewContextValidationError,
+ ReviewPredicate,
+ Rule,
+ RuleAction,
+ parse_review_context_toml,
+ tomllib,
+)
+from bugbug.tools.core.connection import get_http_client
+
+logger = getLogger(__name__)
+
+_PHAB_RE = re.compile(r"D(\d+)$")
+
+_review_context_cache: dict[
+ tuple[str, str, str], tuple[float, "ReviewContextConfig"]
+] = {}
+_REVIEW_CONTEXT_CACHE_TTL = 300 # seconds
+DEFAULT_REVIEW_CONTEXT_PATH = "review-context.toml"
+
+
+@dataclass
+class MatchedAction:
+ action: RuleAction
+ matched_rules: list[str]
+
+
+class ExternalContentItem(BaseModel):
+ name: str
+ body: str
+ source_type: Literal["github_file", "phabricator_revision", "github_commit"]
+ source: str
+ action: dict
+ matched_rules: list[str]
+ trusted: bool = True
+ trust_reason: str
+ bytes: int
+ sha256: str
+
+ @classmethod
+ def create(
+ cls,
+ *,
+ name: str,
+ body: str,
+ source_type: Literal["github_file", "phabricator_revision", "github_commit"],
+ source: str,
+ action: RuleAction,
+ matched_rules: list[str],
+ trust_reason: str,
+ ) -> "ExternalContentItem":
+ encoded = body.encode()
+ return cls(
+ name=name,
+ body=body,
+ source_type=source_type,
+ source=source,
+ action=_action_to_dict(action),
+ matched_rules=matched_rules,
+ trust_reason=trust_reason,
+ bytes=len(encoded),
+ sha256=hashlib.sha256(encoded).hexdigest(),
+ )
+
+ def manifest(self) -> dict:
+ return self.model_dump(exclude={"body"})
+
+
+def _github_raw_url(repo: str, branch: str, path: str) -> str:
+ return (
+ f"https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/"
+ f"{path.lstrip('/')}"
+ )
+
+
+def _normalise_repo(repo: str) -> str:
+ return repo.lower()
+
+
+def github_repo_allowed(
+ repo: str, review_context_repo: str, allowed_repos: set[str]
+) -> bool:
+ repo = _normalise_repo(repo)
+ review_context_repo = _normalise_repo(review_context_repo)
+ if repo == review_context_repo:
+ return True
+ return any(
+ repo.startswith(allowed_repo)
+ if allowed_repo.endswith("/")
+ else repo == allowed_repo
+ for allowed_repo in allowed_repos
+ )
+
+
+def _action_to_dict(action: RuleAction) -> dict:
+ return {key: value for key, value in action.__dict__.items() if value is not None}
+
+
+def parse_diff_files(diff: str) -> set[str]:
+ """Return the set of file paths added or modified by the diff."""
+ try:
+ return {f.path for f in PatchSet.from_string(diff) if not f.is_removed_file}
+ except Exception:
+ files = set()
+ for line in diff.splitlines():
+ if line.startswith("+++ b/"):
+ files.add(line[6:].strip())
+ return files
+
+
+async def get_bug_component(bug_id: int) -> str | None:
+ """Return 'Product::Component' for the given Bugzilla bug, or None on failure."""
+ from libmozdata.bugzilla import Bugzilla
+
+ def _fetch() -> str | None:
+ bugs: list[dict] = []
+ Bugzilla(
+ bug_id,
+ include_fields=["product", "component"],
+ bughandler=lambda bug, data: data.append(bug),
+ bugdata=bugs,
+ ).get_data().wait()
+ if not bugs:
+ return None
+ return f"{bugs[0]['product']}::{bugs[0]['component']}"
+
+ try:
+ return await asyncio.get_event_loop().run_in_executor(None, _fetch)
+ except Exception:
+ logger.warning("Could not fetch component for bug %s", bug_id)
+ return None
+
+
+def rule_matches(
+ rule: Rule, changed_files: set[str], bug_component: str | None = None
+) -> bool:
+ return predicate_matches(rule.when, changed_files, bug_component)
+
+
+def _normalise_path(path: str) -> str:
+ path = path.replace("\\", "/")
+ if path.startswith("./"):
+ return path[2:]
+ return path
+
+
+def _file_matches(predicate: FilePredicate, path: str) -> bool:
+ path = _normalise_path(path)
+ if predicate.include and not any(
+ fnmatch.fnmatchcase(path, pattern) for pattern in predicate.include
+ ):
+ return False
+ if predicate.exclude and any(
+ fnmatch.fnmatchcase(path, pattern) for pattern in predicate.exclude
+ ):
+ return False
+ if predicate.ext and not any(path.endswith(ext) for ext in predicate.ext):
+ return False
+ return True
+
+
+def _bugzilla_matches(
+ predicate: BugzillaPredicate, bug_component: str | None = None
+) -> bool:
+ if predicate.product or predicate.keywords or predicate.severity:
+ return False
+ if predicate.component:
+ return bug_component is not None and bug_component in predicate.component
+ return False
+
+
+def _review_matches(predicate: ReviewPredicate) -> bool:
+ return False
+
+
+def _patch_matches(predicate: PatchPredicate) -> bool:
+ return False
+
+
+def predicate_matches(
+ predicate: Predicate,
+ changed_files: set[str],
+ bug_component: str | None = None,
+) -> bool:
+ match predicate:
+ case AllPredicate(children=children):
+ return all(
+ predicate_matches(child, changed_files, bug_component)
+ for child in children
+ )
+ case AnyPredicate(children=children):
+ return any(
+ predicate_matches(child, changed_files, bug_component)
+ for child in children
+ )
+ case NotPredicate(child=child):
+ return not predicate_matches(child, changed_files, bug_component)
+ case AnyFilePredicate(predicate=file_predicate):
+ return any(_file_matches(file_predicate, f) for f in changed_files)
+ case AllFilesPredicate(predicate=file_predicate):
+ return bool(changed_files) and all(
+ _file_matches(file_predicate, f) for f in changed_files
+ )
+ case BugzillaPredicate():
+ return _bugzilla_matches(predicate, bug_component)
+ case ReviewPredicate():
+ return _review_matches(predicate)
+ case PatchPredicate():
+ return _patch_matches(predicate)
+ raise TypeError(f"Unknown predicate type: {type(predicate).__name__}")
+
+
+def _action_key(action: RuleAction) -> tuple:
+ if isinstance(action, LoadFileAction):
+ return (
+ "file",
+ action.repo or "",
+ action.branch or "",
+ action.path,
+ )
+ if isinstance(action, FetchRevisionAction):
+ return (
+ "fetch_revision",
+ action.revision or "",
+ action.repo or "",
+ action.hash or "",
+ )
+ raise ValueError(f"Unknown action type: {action.type!r}")
+
+
+def _action_to_content(
+ action: LoadFileAction,
+ default_repo: str,
+ default_branch: str,
+ allowed_repos: set[str],
+) -> ExternalContent:
+ """Resolve a file action to an ExternalContent instance.
+
+ Actions without an explicit repo load from the same GitHub repo and branch
+ as the rules file. Cross-repo references use their explicit repo and
+ optional branch, defaulting to main.
+ """
+ path = action.path
+ repo = action.repo or default_repo
+ branch = action.branch or (default_branch if repo == default_repo else "main")
+ if not github_repo_allowed(repo, default_repo, allowed_repos):
+ raise ValueError(f"GitHub repo is not allowed for review context: {repo}")
+ url = _github_raw_url(repo, branch, path)
+ return ExternalContent(name=path, url=url, description=path)
+
+
+async def _fetch_revision(
+ action: FetchRevisionAction,
+) -> tuple[str, str, Literal["phabricator_revision", "github_commit"], str, str] | None:
+ """Fetch the diff of another revision, either from Phabricator or GitHub."""
+ revision_str = action.revision or ""
+ repo = action.repo or ""
+ commit_hash = action.hash or ""
+
+ if revision_str:
+ m = _PHAB_RE.match(revision_str.strip())
+ if not m:
+ logger.warning("fetch_revision: cannot parse revision %r", revision_str)
+ return None
+ rev_num = int(m.group(1))
+ try:
+ from bugbug.tools.core.platforms.phabricator import get_phabricator_client
+
+ def _load():
+ phab = get_phabricator_client()
+ revision = phab.load_revision(rev_id=rev_num)
+ diff_id = revision["fields"]["diffID"]
+ return phab.load_raw_diff(diff_id)
+
+ content = await asyncio.get_event_loop().run_in_executor(None, _load)
+ name = f"Revision D{rev_num}"
+ return (
+ name,
+ content,
+ "phabricator_revision",
+ name,
+ "phabricator_revision",
+ )
+ except Exception:
+ logger.warning("fetch_revision: failed to load Phabricator D%s", rev_num)
+ return None
+
+ if repo and commit_hash:
+ try:
+ response = await get_http_client().get(
+ f"https://api.github.com/repos/{repo}/commits/{commit_hash}",
+ headers={"Accept": "application/vnd.github.diff"},
+ timeout=30,
+ )
+ response.raise_for_status()
+ source = f"{repo}@{commit_hash}"
+ return (
+ f"{repo}@{commit_hash[:12]}",
+ response.text,
+ "github_commit",
+ source,
+ "github_repo_content",
+ )
+ except httpx.HTTPError:
+ logger.warning("fetch_revision: failed to load %s@%s", repo, commit_hash)
+ return None
+
+ logger.warning(
+ "fetch_revision: action has neither revision nor repo+hash: %s", action
+ )
+ return None
+
+
+def _merge_rules(base: list[Rule], extra_toml: str) -> list[Rule]:
+ """Merge extra rules into the base list.
+
+ Rules in extra_toml whose name matches an existing rule replace that rule
+ in-place. Rules with new names are appended. Rules without a name are always
+ appended. Returns a new list; base is not mutated.
+ """
+ rules = list(base)
+ extra = parse_review_context_toml(extra_toml).rules
+ if not extra:
+ return rules
+ index = {r.name: i for i, r in enumerate(rules) if r.name}
+ for rule in extra:
+ name = rule.name
+ if name and name in index:
+ rules[index[name]] = rule
+ else:
+ rules.append(rule)
+ return rules
+
+
+def collect_actions(
+ diff: str,
+ config: ReviewContextConfig,
+ bug_component: str | None = None,
+ extra_context_toml: str | None = None,
+) -> list[MatchedAction]:
+ """Return deduplicated actions matched from config against the diff.
+
+ Merges extra_context_toml into config before matching. Evaluates each rule
+ against the changed files (and optional bug component). Actions are
+ deduplicated across rules so the same file is never fetched twice. Pure —
+ no I/O.
+ """
+ if extra_context_toml:
+ config = ReviewContextConfig(
+ version=config.version,
+ policy=config.policy,
+ definitions=config.definitions,
+ rules=_merge_rules(config.rules, extra_context_toml),
+ )
+
+ changed_files = parse_diff_files(diff)
+ logger.debug(
+ "Matching rules against %d changed file(s): %s",
+ len(changed_files),
+ changed_files,
+ )
+
+ actions_by_key: dict[tuple, MatchedAction] = {}
+ actions: list[MatchedAction] = []
+
+ ordered_rules = sorted(
+ enumerate(config.rules), key=lambda item: (-item[1].priority, item[0])
+ )
+ for _, rule in ordered_rules:
+ if not rule_matches(rule, changed_files, bug_component):
+ continue
+ rule_name = rule.name
+ new_actions = []
+ for action in rule.load:
+ key = _action_key(action)
+ if key in actions_by_key:
+ actions_by_key[key].matched_rules.append(rule_name)
+ continue
+ matched_action = MatchedAction(action=action, matched_rules=[rule_name])
+ actions_by_key[key] = matched_action
+ actions.append(matched_action)
+ new_actions.append(action)
+ if new_actions:
+ logger.debug(
+ "Rule %r matched: %d action(s) queued",
+ rule_name,
+ len(new_actions),
+ )
+
+ logger.debug("Total actions to execute: %d", len(actions))
+ return actions
+
+
+async def execute_actions(
+ actions: list[MatchedAction],
+ default_repo: str,
+ default_branch: str,
+ allowed_repos: set[str],
+ content_overrides: dict[str, str] | None = None,
+) -> list[ExternalContentItem]:
+ """Execute a list of actions and return loaded external content.
+
+ fetch_revision actions fetch the raw diff of a Phabricator revision or
+ GitHub commit. file actions resolve to an ExternalContent URL and
+ fetch the file content. content_overrides bypasses the network fetch for
+ matching names, allowing callers to inject test content.
+ """
+ results: list[ExternalContentItem] = []
+ for matched_action in actions:
+ action = matched_action.action
+ if isinstance(action, FetchRevisionAction):
+ result = await _fetch_revision(action)
+ if result:
+ name, body, source_type, source, trust_reason = result
+ results.append(
+ ExternalContentItem.create(
+ name=name,
+ body=body,
+ source_type=source_type,
+ source=source,
+ action=action,
+ matched_rules=matched_action.matched_rules,
+ trust_reason=trust_reason,
+ )
+ )
+ continue
+ try:
+ item = _action_to_content(
+ action, default_repo, default_branch, allowed_repos
+ )
+ if content_overrides and item.name in content_overrides:
+ body = content_overrides[item.name]
+ else:
+ body = await item.load()
+ results.append(
+ ExternalContentItem.create(
+ name=item.name,
+ body=body,
+ source_type="github_file",
+ source=item.url,
+ action=action,
+ matched_rules=matched_action.matched_rules,
+ trust_reason="github_repo_content",
+ )
+ )
+ except (ValueError, ExternalContentLoadError):
+ logger.error(
+ "Failed to load content for action %s", _action_to_dict(action)
+ )
+ return results
+
+
+async def _fetch_review_context(
+ repo: str, branch: str, review_context_path: str
+) -> ReviewContextConfig:
+ """Fetch and parse review-context.toml, with a short in-process TTL cache."""
+ now = time.time()
+ cache_key = (repo, branch, review_context_path)
+ if cache_key in _review_context_cache:
+ ts, config = _review_context_cache[cache_key]
+ if now - ts < _REVIEW_CONTEXT_CACHE_TTL:
+ logger.debug(
+ "Using cached review context from %s@%s:%s (age %.0fs)",
+ repo,
+ branch,
+ review_context_path,
+ now - ts,
+ )
+ return config
+ review_context_url = _github_raw_url(repo, branch, review_context_path)
+ response = await _fetch_url(review_context_url)
+ config = parse_review_context_toml(response.text)
+ _review_context_cache[cache_key] = (now, config)
+ return config
+
+
+async def load_external_content_for_diff(
+ diff: str,
+ review_context_repo: str,
+ review_context_branch: str = "main",
+ review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH,
+ bug_id: int | None = None,
+ extra_context_toml: str | None = None,
+ content_overrides: dict[str, str] | None = None,
+) -> list[ExternalContentItem]:
+ """Fetch review-context.toml from GitHub, match against diff, return content.
+
+ Retries the rules fetch on transient errors (via _fetch_url). The parsed
+ config is cached in-process with a short TTL to avoid redundant fetches
+ across back-to-back reviews.
+ """
+ try:
+ config = await _fetch_review_context(
+ review_context_repo, review_context_branch, review_context_path
+ )
+ except httpx.HTTPError:
+ logger.error(
+ "Could not fetch review context from %s@%s:%s",
+ review_context_repo,
+ review_context_branch,
+ review_context_path,
+ )
+ return []
+ except (tomllib.TOMLDecodeError, ReviewContextValidationError):
+ logger.exception(
+ "Could not parse review context from %s@%s:%s",
+ review_context_repo,
+ review_context_branch,
+ review_context_path,
+ )
+ return []
+
+ bug_component: str | None = None
+ if bug_id is not None:
+ bug_component = await get_bug_component(bug_id)
+
+ try:
+ actions = collect_actions(diff, config, bug_component, extra_context_toml)
+ except (tomllib.TOMLDecodeError, ReviewContextValidationError):
+ logger.exception("Could not parse extra review context")
+ return []
+
+ return await execute_actions(
+ actions,
+ review_context_repo,
+ review_context_branch,
+ set(config.policy.github.allowed_repos),
+ content_overrides,
+ )
+
+
+def external_content_manifest(content_items: list[ExternalContentItem]) -> list[dict]:
+ return [item.manifest() for item in content_items]
+
+
+def format_external_content(content_items: list[ExternalContentItem]) -> str:
+ manifest = json.dumps(external_content_manifest(content_items), indent=2)
+ content = "\n\n".join(
+ f'\n{item.body.strip()}\n'
+ for item in content_items
+ )
+ return (
+ "\n\n\n"
+ f"{manifest}\n"
+ ""
+ "\n\n\n"
+ f"{content}\n"
+ ""
+ )
diff --git a/bugbug/tools/code_review/review_context_schema.py b/bugbug/tools/code_review/review_context_schema.py
new file mode 100644
index 0000000000..ceba192a0c
--- /dev/null
+++ b/bugbug/tools/code_review/review_context_schema.py
@@ -0,0 +1,622 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Schema and validator for code review external context rules.
+
+This module intentionally depends only on the Python standard library so it can
+also be copied into target repositories and used from local tests.
+"""
+
+import argparse
+import sys
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal, TypeAlias
+
+try:
+ import tomllib
+except ImportError:
+ import tomli as tomllib # type: ignore
+
+
+class ReviewContextValidationError(ValueError):
+ """Raised when review-context.toml has valid TOML but invalid schema."""
+
+
+@dataclass(frozen=True)
+class GithubPolicy:
+ allowed_repos: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class Policy:
+ github: GithubPolicy = field(default_factory=GithubPolicy)
+
+
+@dataclass(frozen=True)
+class FilePredicate:
+ include: list[str] = field(default_factory=list)
+ exclude: list[str] = field(default_factory=list)
+ ext: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class BugzillaPredicate:
+ product: list[str] = field(default_factory=list)
+ component: list[str] = field(default_factory=list)
+ keywords: list[str] = field(default_factory=list)
+ severity: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class ReviewPredicate:
+ author: list[str] = field(default_factory=list)
+ reviewer: list[str] = field(default_factory=list)
+ blocking_reviewer: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class PatchPredicate:
+ repository: list[str] = field(default_factory=list)
+ is_backport: bool | None = None
+
+
+@dataclass(frozen=True)
+class Definitions:
+ files: dict[str, FilePredicate] = field(default_factory=dict)
+ bugzilla: dict[str, BugzillaPredicate] = field(default_factory=dict)
+ review: dict[str, ReviewPredicate] = field(default_factory=dict)
+ patch: dict[str, PatchPredicate] = field(default_factory=dict)
+
+
+@dataclass(frozen=True)
+class AllPredicate:
+ children: list["Predicate"]
+
+
+@dataclass(frozen=True)
+class AnyPredicate:
+ children: list["Predicate"]
+
+
+@dataclass(frozen=True)
+class NotPredicate:
+ child: "Predicate"
+
+
+@dataclass(frozen=True)
+class AnyFilePredicate:
+ predicate: FilePredicate
+
+
+@dataclass(frozen=True)
+class AllFilesPredicate:
+ predicate: FilePredicate
+
+
+Predicate: TypeAlias = (
+ AllPredicate
+ | AnyPredicate
+ | NotPredicate
+ | AnyFilePredicate
+ | AllFilesPredicate
+ | BugzillaPredicate
+ | ReviewPredicate
+ | PatchPredicate
+)
+
+
+@dataclass(frozen=True)
+class LoadFileAction:
+ type: Literal["file"]
+ path: str
+ repo: str | None = None
+ branch: str | None = None
+ kind: str | None = None
+
+
+@dataclass(frozen=True)
+class FetchRevisionAction:
+ type: Literal["fetch_revision"]
+ revision: str | None = None
+ repo: str | None = None
+ hash: str | None = None
+ kind: str | None = None
+
+
+RuleAction: TypeAlias = LoadFileAction | FetchRevisionAction
+
+
+@dataclass(frozen=True)
+class Rule:
+ name: str
+ when: Predicate
+ load: list[RuleAction]
+ description: str | None = None
+ owners: list[str] = field(default_factory=list)
+ priority: int = 0
+
+
+@dataclass(frozen=True)
+class ReviewContextConfig:
+ version: int
+ policy: Policy = field(default_factory=Policy)
+ definitions: Definitions = field(default_factory=Definitions)
+ rules: list[Rule] = field(default_factory=list)
+
+
+def _reject_unknown_keys(path: str, value: dict, allowed: set[str]) -> None:
+ unknown = sorted(set(value) - allowed)
+ if unknown:
+ raise ReviewContextValidationError(
+ f"{path}: unknown field(s): {', '.join(unknown)}"
+ )
+
+
+def _require_table(value: object, path: str) -> dict:
+ if not isinstance(value, dict):
+ raise ReviewContextValidationError(f"{path}: expected table")
+ return value
+
+
+def _require_string(value: object, path: str) -> str:
+ if not isinstance(value, str):
+ raise ReviewContextValidationError(f"{path}: expected string")
+ return value
+
+
+def _optional_string(value: object, path: str) -> str | None:
+ if value is None:
+ return None
+ return _require_string(value, path)
+
+
+def _string_list(value: object, path: str) -> list[str]:
+ if value is None:
+ return []
+ if not isinstance(value, list):
+ raise ReviewContextValidationError(f"{path}: expected list of strings")
+ for index, item in enumerate(value):
+ _require_string(item, f"{path}[{index}]")
+ return value
+
+
+def _required_non_empty_string_list(value: object, path: str) -> list[str]:
+ items = _string_list(value, path)
+ if not items:
+ raise ReviewContextValidationError(f"{path}: expected non-empty list")
+ return items
+
+
+def _optional_bool(value: object, path: str) -> bool | None:
+ if value is None:
+ return None
+ if not isinstance(value, bool):
+ raise ReviewContextValidationError(f"{path}: expected boolean")
+ return value
+
+
+def _require_int(value: object, path: str) -> int:
+ if not isinstance(value, int) or isinstance(value, bool):
+ raise ReviewContextValidationError(f"{path}: expected integer")
+ return value
+
+
+def _normalise_repo(repo: str) -> str:
+ return repo.lower()
+
+
+def _parse_github_policy(value: object, path: str) -> GithubPolicy:
+ table = _require_table(value, path)
+ _reject_unknown_keys(path, table, {"allowed_repos"})
+ return GithubPolicy(
+ allowed_repos=[
+ _normalise_repo(repo)
+ for repo in _string_list(
+ table.get("allowed_repos"), f"{path}.allowed_repos"
+ )
+ ]
+ )
+
+
+def _parse_policy(value: object | None) -> Policy:
+ if value is None:
+ return Policy()
+ table = _require_table(value, "policy")
+ _reject_unknown_keys("policy", table, {"github"})
+ return Policy(github=_parse_github_policy(table.get("github", {}), "policy.github"))
+
+
+def _parse_file_predicate(value: object, path: str) -> FilePredicate:
+ table = _require_table(value, path)
+ _reject_unknown_keys(path, table, {"include", "exclude", "ext", "ref"})
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+ predicate = FilePredicate(
+ include=_string_list(table.get("include"), f"{path}.include"),
+ exclude=_string_list(table.get("exclude"), f"{path}.exclude"),
+ ext=_string_list(table.get("ext"), f"{path}.ext"),
+ )
+ if not predicate.include and not predicate.exclude and not predicate.ext:
+ raise ReviewContextValidationError(f"{path}: expected include, exclude, or ext")
+ return predicate
+
+
+def _parse_bugzilla_predicate(value: object, path: str) -> BugzillaPredicate:
+ table = _require_table(value, path)
+ _reject_unknown_keys(
+ path, table, {"product", "component", "keywords", "severity", "ref"}
+ )
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+ predicate = BugzillaPredicate(
+ product=_string_list(table.get("product"), f"{path}.product"),
+ component=_string_list(table.get("component"), f"{path}.component"),
+ keywords=_string_list(table.get("keywords"), f"{path}.keywords"),
+ severity=_string_list(table.get("severity"), f"{path}.severity"),
+ )
+ if (
+ not predicate.product
+ and not predicate.component
+ and not predicate.keywords
+ and not predicate.severity
+ ):
+ raise ReviewContextValidationError(f"{path}: expected at least one field")
+ return predicate
+
+
+def _parse_review_predicate(value: object, path: str) -> ReviewPredicate:
+ table = _require_table(value, path)
+ _reject_unknown_keys(
+ path, table, {"author", "reviewer", "blocking_reviewer", "ref"}
+ )
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+ predicate = ReviewPredicate(
+ author=_string_list(table.get("author"), f"{path}.author"),
+ reviewer=_string_list(table.get("reviewer"), f"{path}.reviewer"),
+ blocking_reviewer=_string_list(
+ table.get("blocking_reviewer"), f"{path}.blocking_reviewer"
+ ),
+ )
+ if (
+ not predicate.author
+ and not predicate.reviewer
+ and not predicate.blocking_reviewer
+ ):
+ raise ReviewContextValidationError(f"{path}: expected at least one field")
+ return predicate
+
+
+def _parse_patch_predicate(value: object, path: str) -> PatchPredicate:
+ table = _require_table(value, path)
+ _reject_unknown_keys(path, table, {"repository", "is_backport", "ref"})
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+ predicate = PatchPredicate(
+ repository=_string_list(table.get("repository"), f"{path}.repository"),
+ is_backport=_optional_bool(table.get("is_backport"), f"{path}.is_backport"),
+ )
+ if not predicate.repository and predicate.is_backport is None:
+ raise ReviewContextValidationError(f"{path}: expected at least one field")
+ return predicate
+
+
+def _parse_definitions(value: object | None) -> Definitions:
+ if value is None:
+ return Definitions()
+ table = _require_table(value, "definitions")
+ _reject_unknown_keys("definitions", table, {"files", "bugzilla", "review", "patch"})
+ return Definitions(
+ files={
+ name: _parse_file_predicate(definition, f"definitions.files.{name}")
+ for name, definition in _require_table(
+ table.get("files", {}), "definitions.files"
+ ).items()
+ },
+ bugzilla={
+ name: _parse_bugzilla_predicate(definition, f"definitions.bugzilla.{name}")
+ for name, definition in _require_table(
+ table.get("bugzilla", {}), "definitions.bugzilla"
+ ).items()
+ },
+ review={
+ name: _parse_review_predicate(definition, f"definitions.review.{name}")
+ for name, definition in _require_table(
+ table.get("review", {}), "definitions.review"
+ ).items()
+ },
+ patch={
+ name: _parse_patch_predicate(definition, f"definitions.patch.{name}")
+ for name, definition in _require_table(
+ table.get("patch", {}), "definitions.patch"
+ ).items()
+ },
+ )
+
+
+def _resolve_ref(ref: object, namespace: str, definitions: Definitions, path: str):
+ ref_name = _require_string(ref, path)
+ prefix = f"{namespace}."
+ if not ref_name.startswith(prefix):
+ raise ReviewContextValidationError(
+ f"{path}: expected {namespace}.* ref, got {ref_name!r}"
+ )
+ name = ref_name[len(prefix) :]
+ mapping = getattr(definitions, namespace)
+ try:
+ return mapping[name]
+ except KeyError:
+ raise ReviewContextValidationError(
+ f"{path}: unknown ref {ref_name!r}"
+ ) from None
+
+
+def _parse_file_predicate_ref(
+ value: object, path: str, definitions: Definitions
+) -> FilePredicate:
+ table = _require_table(value, path)
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ return _resolve_ref(table["ref"], "files", definitions, f"{path}.ref")
+ return _parse_file_predicate(value, path)
+
+
+def _parse_bugzilla_predicate_ref(
+ value: object, path: str, definitions: Definitions
+) -> BugzillaPredicate:
+ table = _require_table(value, path)
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ return _resolve_ref(table["ref"], "bugzilla", definitions, f"{path}.ref")
+ return _parse_bugzilla_predicate(value, path)
+
+
+def _parse_review_predicate_ref(
+ value: object, path: str, definitions: Definitions
+) -> ReviewPredicate:
+ table = _require_table(value, path)
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ return _resolve_ref(table["ref"], "review", definitions, f"{path}.ref")
+ return _parse_review_predicate(value, path)
+
+
+def _parse_patch_predicate_ref(
+ value: object, path: str, definitions: Definitions
+) -> PatchPredicate:
+ table = _require_table(value, path)
+ if "ref" in table:
+ if len(table) != 1:
+ raise ReviewContextValidationError(
+ f"{path}.ref: cannot combine with fields"
+ )
+ return _resolve_ref(table["ref"], "patch", definitions, f"{path}.ref")
+ return _parse_patch_predicate(value, path)
+
+
+def _parse_all_predicate(
+ value: object, path: str, definitions: Definitions
+) -> Predicate:
+ return AllPredicate(_parse_predicate_list(value, path, definitions))
+
+
+def _parse_any_predicate(
+ value: object, path: str, definitions: Definitions
+) -> Predicate:
+ return AnyPredicate(_parse_predicate_list(value, path, definitions))
+
+
+def _parse_not_predicate(
+ value: object, path: str, definitions: Definitions
+) -> Predicate:
+ return NotPredicate(_parse_predicate(value, path, definitions))
+
+
+def _parse_any_file_predicate(
+ value: object, path: str, definitions: Definitions
+) -> Predicate:
+ return AnyFilePredicate(_parse_file_predicate_ref(value, path, definitions))
+
+
+def _parse_all_files_predicate(
+ value: object, path: str, definitions: Definitions
+) -> Predicate:
+ return AllFilesPredicate(_parse_file_predicate_ref(value, path, definitions))
+
+
+PredicateParser: TypeAlias = Callable[[object, str, Definitions], Predicate]
+
+_PREDICATE_PARSERS: dict[str, PredicateParser] = {
+ "all": _parse_all_predicate,
+ "any": _parse_any_predicate,
+ "not": _parse_not_predicate,
+ "any_file": _parse_any_file_predicate,
+ "all_files": _parse_all_files_predicate,
+ "bugzilla": _parse_bugzilla_predicate_ref,
+ "review": _parse_review_predicate_ref,
+ "patch": _parse_patch_predicate_ref,
+}
+
+
+def _parse_predicate(value: object, path: str, definitions: Definitions) -> Predicate:
+ """Parse one predicate node.
+
+ Each TOML predicate object must have exactly one key. Dispatching through a
+ table keeps the grammar visible and makes adding a predicate a one-line
+ change plus a parser function.
+ """
+ table = _require_table(value, path)
+ keys = set(table)
+ _reject_unknown_keys(path, table, set(_PREDICATE_PARSERS))
+ if len(keys) != 1:
+ raise ReviewContextValidationError(f"{path}: expected exactly one predicate")
+ key = next(iter(keys))
+ return _PREDICATE_PARSERS[key](table[key], f"{path}.{key}", definitions)
+
+
+def _parse_predicate_list(
+ value: object, path: str, definitions: Definitions
+) -> list[Predicate]:
+ if not isinstance(value, list) or not value:
+ raise ReviewContextValidationError(f"{path}: expected non-empty list")
+ return [
+ _parse_predicate(child, f"{path}[{index}]", definitions)
+ for index, child in enumerate(value)
+ ]
+
+
+def _parse_action(value: object, path: str) -> RuleAction:
+ """Parse one load action.
+
+ Load actions are intentionally small: validate the schema here, and leave
+ trust policy and network behavior to the runtime loader.
+ """
+ table = _require_table(value, path)
+ action_type = table.get("type")
+ if action_type == "file":
+ _reject_unknown_keys(path, table, {"type", "path", "repo", "branch", "kind"})
+ if "path" not in table:
+ raise ReviewContextValidationError(f"{path}.path: required for file")
+ return LoadFileAction(
+ type="file",
+ path=_require_string(table["path"], f"{path}.path"),
+ repo=_optional_string(table.get("repo"), f"{path}.repo"),
+ branch=_optional_string(table.get("branch"), f"{path}.branch"),
+ kind=_optional_string(table.get("kind"), f"{path}.kind"),
+ )
+ if action_type == "fetch_revision":
+ _reject_unknown_keys(path, table, {"type", "revision", "repo", "hash", "kind"})
+ revision = _optional_string(table.get("revision"), f"{path}.revision")
+ repo = _optional_string(table.get("repo"), f"{path}.repo")
+ commit_hash = _optional_string(table.get("hash"), f"{path}.hash")
+ if not revision and not (repo and commit_hash):
+ raise ReviewContextValidationError(
+ f"{path}: fetch_revision requires revision or repo+hash"
+ )
+ return FetchRevisionAction(
+ type="fetch_revision",
+ revision=revision,
+ repo=repo,
+ hash=commit_hash,
+ kind=_optional_string(table.get("kind"), f"{path}.kind"),
+ )
+ if action_type is None:
+ raise ReviewContextValidationError(f"{path}.type: required")
+ raise ReviewContextValidationError(
+ f"{path}.type: unknown action type {action_type!r}"
+ )
+
+
+def _parse_rule(value: object, index: int, definitions: Definitions) -> Rule:
+ path = f"rules[{index}]"
+ table = _require_table(value, path)
+ _reject_unknown_keys(
+ path,
+ table,
+ {"name", "description", "owners", "priority", "when", "load"},
+ )
+ if "name" not in table:
+ raise ReviewContextValidationError(f"{path}.name: required")
+ if "when" not in table:
+ raise ReviewContextValidationError(f"{path}.when: required")
+ if "load" not in table:
+ raise ReviewContextValidationError(f"{path}.load: required")
+ load_value = table["load"]
+ if not isinstance(load_value, list) or not load_value:
+ raise ReviewContextValidationError(f"{path}.load: expected non-empty list")
+ return Rule(
+ name=_require_string(table["name"], f"{path}.name"),
+ description=_optional_string(table.get("description"), f"{path}.description"),
+ owners=_string_list(table.get("owners"), f"{path}.owners"),
+ priority=_require_int(table.get("priority", 0), f"{path}.priority"),
+ when=_parse_predicate(table["when"], f"{path}.when", definitions),
+ load=[
+ _parse_action(action, f"{path}.load[{action_index}]")
+ for action_index, action in enumerate(load_value)
+ ],
+ )
+
+
+def parse_review_context_data(data: dict) -> ReviewContextConfig:
+ """Validate parsed review-context.toml data."""
+ _reject_unknown_keys("", data, {"version", "policy", "definitions", "rules"})
+ version = data.get("version")
+ if version != 1:
+ raise ReviewContextValidationError("version: expected 1")
+ policy = _parse_policy(data.get("policy"))
+ definitions = _parse_definitions(data.get("definitions"))
+ rules_value = data.get("rules", [])
+ if not isinstance(rules_value, list):
+ raise ReviewContextValidationError("rules: expected list of rule tables")
+ return ReviewContextConfig(
+ version=version,
+ policy=policy,
+ definitions=definitions,
+ rules=[
+ _parse_rule(rule, index, definitions)
+ for index, rule in enumerate(rules_value)
+ ],
+ )
+
+
+def parse_review_context_toml(text: str) -> ReviewContextConfig:
+ """Parse and validate review-context.toml content."""
+ return parse_review_context_data(tomllib.loads(text))
+
+
+def validate_review_context_file(path: str | Path) -> ReviewContextConfig:
+ """Parse and validate a review-context.toml file."""
+ return parse_review_context_toml(Path(path).read_text())
+
+
+def main(argv: list[str] | None = None) -> int:
+ parser = argparse.ArgumentParser(description="Validate review-context.toml")
+ parser.add_argument(
+ "path",
+ nargs="?",
+ default="review-context.toml",
+ help="Path to review-context.toml (default: %(default)s)",
+ )
+ args = parser.parse_args(argv)
+
+ try:
+ config = validate_review_context_file(args.path)
+ except (OSError, tomllib.TOMLDecodeError, ReviewContextValidationError) as exc:
+ print(f"{args.path}: invalid: {exc}", file=sys.stderr)
+ return 1
+
+ print(f"{args.path}: valid ({len(config.rules)} rule(s))")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py
index 7ea7526b3a..2e14ed3e6f 100644
--- a/bugbug/tools/core/platforms/phabricator.py
+++ b/bugbug/tools/core/platforms/phabricator.py
@@ -145,6 +145,42 @@ def _get_users_info_batch(user_phids: set[str]) -> dict[str, dict]:
return _get_users_info_batch_with_retry(user_phids)
+def _get_project_info_batch(project_phids: set[str]) -> dict[str, dict]:
+ if not project_phids:
+ return {}
+
+ phabricator = get_phabricator_client()
+ projects_response = phabricator.request(
+ "project.search",
+ constraints={"phids": list(project_phids)},
+ )
+
+ result = {}
+ for project_data in projects_response.get("data", []):
+ phid = project_data["phid"]
+ fields = project_data.get("fields", {})
+ result[phid] = {
+ "name": fields.get("name", phid),
+ "slug": fields.get("slug"),
+ }
+ return result
+
+
+def _get_repository_info(repository_phid: str | None) -> dict | None:
+ if not repository_phid:
+ return None
+
+ phabricator = get_phabricator_client()
+ repositories_response = phabricator.request(
+ "diffusion.repository.search",
+ constraints={"phids": [repository_phid]},
+ )
+ repositories = repositories_response.get("data", [])
+ if not repositories:
+ return None
+ return repositories[0]
+
+
def _sanitize_comments(comments: list, users_info: dict[str, dict]) -> tuple[list, int]:
"""Sanitize comments by filtering untrusted content.
@@ -512,6 +548,19 @@ def author_phid(self) -> str:
def diff_author_phid(self) -> str:
return self._diff_metadata["authorPHID"]
+ @cached_property
+ def _revision_search_metadata(self) -> dict:
+ phabricator = get_phabricator_client()
+ response = phabricator.request(
+ "differential.revision.search",
+ constraints={"ids": [self.revision_id]},
+ attachments={"reviewers": True},
+ )
+ revisions = response.get("data", [])
+ if not revisions:
+ return {}
+ return revisions[0]
+
@property
def stack_graph(self) -> dict:
return self._revision_metadata["fields"].get("stackGraph", {})
@@ -560,11 +609,45 @@ def _all_comments(self) -> list:
@cached_property
def _users_info(self) -> dict[str, dict]:
- user_phids = {c.author_phid for c in self._all_comments} | {self.author_phid}
+ user_phids = (
+ {c.author_phid for c in self._all_comments}
+ | {self.author_phid}
+ | self.reviewer_user_phids
+ )
if self.author_phid != self.diff_author_phid:
user_phids.add(self.diff_author_phid)
return _get_users_info_batch(user_phids)
+ @cached_property
+ def _project_info(self) -> dict[str, dict]:
+ return _get_project_info_batch(self.reviewer_project_phids)
+
+ @cached_property
+ def reviewers(self) -> list[dict]:
+ if not self._revision_metadata["fields"].get("repositoryPHID"):
+ return []
+ return (
+ self._revision_search_metadata.get("attachments", {})
+ .get("reviewers", {})
+ .get("reviewers", [])
+ )
+
+ @cached_property
+ def reviewer_user_phids(self) -> set[str]:
+ return {
+ reviewer["reviewerPHID"]
+ for reviewer in self.reviewers
+ if reviewer.get("reviewerPHID", "").startswith("PHID-USER-")
+ }
+
+ @cached_property
+ def reviewer_project_phids(self) -> set[str]:
+ return {
+ reviewer["reviewerPHID"]
+ for reviewer in self.reviewers
+ if reviewer.get("reviewerPHID", "").startswith("PHID-PROJ-")
+ }
+
def _format_user_display(self, user_phid: str) -> str:
info = self._users_info.get(user_phid, {})
email = info.get("email", "Unknown")
@@ -573,6 +656,49 @@ def _format_user_display(self, user_phid: str) -> str:
return f"{real_name} ({email})"
return email
+ def _format_reviewer_display(self, reviewer: dict) -> str:
+ reviewer_phid = reviewer["reviewerPHID"]
+ if reviewer_phid.startswith("PHID-USER-"):
+ name = self._format_user_display(reviewer_phid)
+ elif reviewer_phid.startswith("PHID-PROJ-"):
+ project = self._project_info.get(reviewer_phid, {})
+ slug = project.get("slug")
+ name = slug or project.get("name", reviewer_phid)
+ else:
+ name = reviewer_phid
+
+ status = reviewer.get("status")
+ if reviewer.get("isBlocking"):
+ status = "blocking"
+ if status:
+ return f"{name} [{status}]"
+ return name
+
+ @property
+ def reviewer_summary(self) -> str | None:
+ if not self.reviewers:
+ return None
+ return ", ".join(self._format_reviewer_display(r) for r in self.reviewers)
+
+ @cached_property
+ def repository_info(self) -> dict | None:
+ return _get_repository_info(
+ self._revision_metadata["fields"].get("repositoryPHID")
+ )
+
+ @property
+ def target_repository(self) -> str | None:
+ if not self.repository_info:
+ return None
+ fields = self.repository_info.get("fields", {})
+ return fields.get("shortName") or fields.get("name")
+
+ @property
+ def target_repository_default_branch(self) -> str | None:
+ if not self.repository_info:
+ return None
+ return self.repository_info.get("fields", {}).get("defaultBranch")
+
@property
def author(self) -> str:
return self._format_user_display(self.author_phid)
@@ -633,6 +759,15 @@ def to_md(self) -> str:
md_lines.append(f"- **URI**: {self.revision_uri}")
md_lines.append(f"- **Revision Author**: {self.author}")
md_lines.append(f"- **Status**: {self.revision_status}")
+ if self.reviewer_summary:
+ md_lines.append(f"- **Reviewers**: {self.reviewer_summary}")
+ if self.target_repository:
+ repository_line = self.target_repository
+ if self.target_repository_default_branch:
+ repository_line += (
+ f" (default branch: {self.target_repository_default_branch})"
+ )
+ md_lines.append(f"- **Target Repository**: {repository_line}")
md_lines.append(f"- **Created**: {self.date_created.strftime(date_format)}")
md_lines.append(f"- **Modified**: {self.date_modified.strftime(date_format)}")
bug_id = self._revision_metadata["fields"].get("bugzilla.bug-id") or "N/A"
diff --git a/docs/code-review-context-example.toml b/docs/code-review-context-example.toml
new file mode 100644
index 0000000000..b6083d87d1
--- /dev/null
+++ b/docs/code-review-context-example.toml
@@ -0,0 +1,232 @@
+# Proposed review-context.toml external content rules.
+#
+# This is a design sketch for the next rule language, not the format currently
+# accepted by the loader.
+#
+# This file is intentionally verbose. It shows the major language features and
+# comments on what each one does.
+
+# Version is required so the parser can reject files written for a future
+# incompatible format.
+version = 1
+
+# GitHub loads are restricted. The current review-context repo is always
+# allowed. Entries ending in `/` allow every repo under that organization;
+# entries in `org/repo` form allow a single repo.
+[policy.github]
+allowed_repos = [
+ "mozilla/",
+ "web-platform-tests/wpt",
+ "whatwg/html",
+]
+
+# Definitions are reusable predicate fragments. They are expanded structurally,
+# as if the referenced table had been written inline. The parser should reject
+# unknown refs, cycles, and refs to the wrong predicate kind.
+[definitions.files.media_impl]
+include = ["dom/media/**"]
+exclude = [
+ "dom/media/test/**",
+ "dom/media/gtest/**",
+ "dom/media/webaudio/test/**",
+ "dom/media/**/Generated*.cpp",
+]
+ext = [".cpp", ".h", ".mm"]
+
+[definitions.files.frontend_ui]
+include = [
+ "browser/components/**",
+ "browser/base/content/**",
+ "toolkit/components/**",
+ "toolkit/content/**",
+]
+exclude = [
+ "browser/components/**/test/**",
+ "toolkit/components/**/test/**",
+ "**/*.snap",
+]
+ext = [".js", ".mjs", ".sys.mjs", ".html", ".xhtml", ".css", ".scss"]
+
+[definitions.files.docs]
+ext = [".md", ".rst"]
+
+# Metadata predicates can be reusable too. A predicate can only reference a
+# definition from its own namespace: `bugzilla` refs `definitions.bugzilla.*`,
+# `review` refs `definitions.review.*`, and `patch` refs
+# `definitions.patch.*`.
+[definitions.bugzilla.dom_api]
+component = ["Core::DOM: Core & HTML", "Core::DOM: Events"]
+
+[definitions.review.frontend_reviewers]
+# `reviewer` matches both blocking and non-blocking reviewers.
+reviewer = ["frontend-reviewers"]
+
+[definitions.review.dom_reviewers]
+reviewer = ["dom-reviewers", "webidl-reviewers"]
+
+[definitions.review.blocking_media_reviewers]
+blocking_reviewer = ["padenot", "media-reviewers"]
+
+[definitions.patch.backport_repositories]
+is_backport = true
+repository = ["firefox-beta", "firefox-release", "firefox-esr"]
+
+# `priority` is optional. It defaults to 0 and only controls ordering among
+# loaded external content blocks. Higher priority loads first. Ties preserve
+# declaration order. All external content is injected before the patch diff.
+[[rules]]
+name = "Media: implementation"
+description = "Load media architecture and realtime-safety guidance for C++ media changes."
+owners = ["media-reviewers"]
+priority = 80
+
+# `all` means every child predicate must match.
+# `any_file` means at least one changed file must match the file predicate.
+# The file predicate uses the reusable definition above.
+when = { all = [
+ { any_file = { ref = "files.media_impl" } },
+] }
+
+# `kind` is an optional free-form audit/prompt hint. Load failures are
+# non-fatal: the loader logs an error and continues with remaining loads.
+load = [
+ { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" },
+ { type = "file", repo = "mozilla/cubeb", branch = "main", path = "docs/realtime-programming.md", kind = "policy" },
+]
+
+
+[[rules]]
+name = "Firefox frontend: JS, HTML, CSS"
+description = "Load frontend conventions for browser chrome, toolkit UI, and component styles."
+owners = ["frontend-reviewers"]
+priority = 60
+
+# `any` means at least one child predicate must match.
+# This rule fires for frontend UI files when a frontend reviewer/group is
+# assigned.
+when = { all = [
+ { any_file = { ref = "files.frontend_ui" } },
+ { review = { ref = "review.frontend_reviewers" } },
+] }
+
+load = [
+ { type = "file", path = ".claude/review/firefox-frontend.md", kind = "style-guide" },
+ { type = "file", path = "browser/docs/frontend/performance.md", kind = "performance" },
+]
+
+
+[[rules]]
+name = "DOM API implementation"
+description = "Load WebIDL and DOM binding guidance for changes that expose or implement web APIs."
+owners = ["dom-reviewers"]
+priority = 90
+
+# Boolean composition can be nested. This rule fires for:
+# - any WebIDL file under dom/,
+# - C++/header changes under dom/ when Bugzilla says this is a DOM bug, or
+# - DOM binding implementation files when a DOM/WebIDL reviewer is assigned.
+when = { any = [
+ { any_file = { include = ["dom/**"], ext = [".webidl"] } },
+ { all = [
+ { any_file = { include = ["dom/**"], ext = [".cpp", ".h"] } },
+ { bugzilla = { ref = "bugzilla.dom_api" } },
+ ] },
+ { all = [
+ { any_file = { include = ["dom/bindings/**"], ext = [".cpp", ".h", ".py"] } },
+ { review = { ref = "review.dom_reviewers" } },
+ ] },
+] }
+
+load = [
+ { type = "file", path = ".claude/review/dom-api.md", kind = "api-contract" },
+ { type = "file", path = "dom/docs/webidl-bindings.md", kind = "reference" },
+]
+
+
+[[rules]]
+name = "Docs-only change"
+description = "Load writing and API documentation guidance when every changed file is documentation."
+owners = ["docs-reviewers"]
+
+# `all_files` means every changed file must match the file predicate. This is
+# useful for patch-shape rules such as docs-only, tests-only, or generated-only.
+# This rule intentionally omits priority; it defaults to 0.
+when = { all_files = { ref = "files.docs" } }
+
+load = [
+ { type = "file", path = ".claude/review/docs.md", kind = "style-guide" },
+]
+
+
+[[rules]]
+name = "Security-sensitive DOM changes"
+description = "Load security guidance for non-test security-relevant DOM changes."
+owners = ["sec-reviewers", "dom-reviewers"]
+priority = 100
+
+# `not` negates its child predicate. This rule fires when some DOM security
+# file changed, but the patch is not test-only.
+when = { all = [
+ { any_file = { include = ["dom/security/**"], ext = [".cpp", ".h", ".webidl"] } },
+ { not = { all_files = { include = ["dom/security/test/**"] } } },
+] }
+
+load = [
+ { type = "file", path = ".claude/review/security-sensitive.md", kind = "policy" },
+]
+
+
+[[rules]]
+name = "Backport risk checklist"
+description = "Load release-branch guidance for patches targeting a backport repository or branch."
+owners = ["release-managers"]
+priority = 70
+
+# `patch.repository` and `patch.is_backport` come from trusted patch metadata.
+# For GitHub entrypoints, repository is org/project. For Phabricator reviews,
+# repository is the target Phabricator repository exposed by the Mozilla MCP.
+when = { all = [
+ { patch = { ref = "patch.backport_repositories" } },
+] }
+
+load = [
+ { type = "file", path = ".claude/review/backport-risk.md", kind = "release-policy" },
+]
+
+
+[[rules]]
+name = "Blocking reviewer guidance"
+description = "Load reviewer-specific expectations when a blocking media reviewer is assigned."
+owners = ["media-reviewers"]
+priority = 85
+
+# Review identities are Phabricator emails or review groups.
+# `blocking_reviewer` narrows to reviewers whose approval is required before
+# landing. Those reviewers also match ordinary `reviewer` predicates.
+when = { all = [
+ { any_file = { ref = "files.media_impl" } },
+ { review = { ref = "review.blocking_media_reviewers" } },
+] }
+
+# Dedupe is global per review. If another matched rule already loaded
+# media-architecture.md, this action records this rule in the manifest but does
+# not fetch or inject the same source a second time.
+load = [
+ { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" },
+ { type = "file", path = ".claude/review/media-reviewer-checklist.md", kind = "checklist" },
+]
+
+
+[[rules]]
+name = "Related revision context"
+description = "Load another reviewed diff when a patch depends on behavior from a known revision."
+owners = ["media-reviewers"]
+
+# `fetch_revision` injects a trusted diff as context. Phabricator revisions are
+# fetched through the platform client/MCP path; GitHub commits use repo+hash.
+when = { any_file = { include = ["dom/media/mediacontrol/**"] } }
+
+load = [
+ { type = "fetch_revision", revision = "D303229", kind = "related-diff" },
+ { type = "fetch_revision", repo = "mozilla/cubeb", hash = "0123456789abcdef0123456789abcdef01234567", kind = "related-diff" },
+]
diff --git a/docs/code-review-skills.md b/docs/code-review-skills.md
new file mode 100644
index 0000000000..cabbf3ab9b
--- /dev/null
+++ b/docs/code-review-skills.md
@@ -0,0 +1,395 @@
+# Code Review Skills
+
+The code review agent can load external knowledge at review time, injecting it
+as context before the LLM sees the patch. This lets domain experts encode their
+knowledge once, in a form the agent uses automatically whenever it reviews
+relevant code.
+
+## How it works
+
+When the agent reviews a patch, it:
+
+1. Fetches `review-context.toml` from the root of a GitHub repository passed to
+ the review entrypoint as `review_context_repo` (and optionally `review_context_branch`).
+2. Parses the changed files from the diff.
+3. Matches each rule against the changed files (and optionally the patch's
+ associated Bugzilla component, fetched from BMO).
+4. For each matched rule, fetches the referenced skill or documentation files.
+5. Injects the content as `` blocks inside an ``
+ section in the review prompt, before the patch.
+
+Pass the GitHub repository containing `review-context.toml` to `patch_review`,
+using `org/project` syntax. `review_context_branch` defaults to `main`; pass another
+branch when reviewing a backport or another release branch.
+
+```
+/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \
+ review_context_repo="mozilla-firefox/firefox" \
+ review_context_branch="main"
+```
+
+---
+
+## For bugbug developers
+
+### Key files
+
+| File | Purpose |
+| --------------------------------------------------- | ------------------------------------------------------------------ |
+| `bugbug/tools/code_review/review_context_schema.py` | Stdlib-only rule schema and validator CLI |
+| `bugbug/tools/code_review/review_context.py` | Rule engine: parses rules, matches files, loads external content |
+| `bugbug/tools/code_review/data_types.py` | `Skill` model with async HTTP fetch and frontmatter stripping |
+| `bugbug/tools/code_review/agent.py` | Calls `load_external_content_for_diff` in `run()`, formats context |
+| `bugbug/tools/code_review/prompts/system.md` | System prompt template |
+| `bugbug/tools/code_review/prompts/first_message.md` | First user message template (contains `{external_context}` slot) |
+
+### Prompt templates
+
+Both `system.md` and `first_message.md` are plain Markdown files loaded at
+import time. Edit them directly — no Python changes needed. They use Python
+`str.format()` placeholders (`{target_software}`, `{patch}`,
+`{external_context}`, etc.).
+
+The `{external_context}` slot in `first_message.md` is filled by
+`format_external_content()` from `review_context.py` when rules match. It is an
+empty string otherwise, so no blank section appears in the prompt.
+
+Loaded content is preceded by an `` block. The
+manifest excludes the body text and records each item's name, source, source
+type, matched rules, trust reason, byte count, and SHA-256 digest. The same
+manifest is also returned in `CodeReviewToolResponse.details["external_content"]`.
+
+All currently injected external content is trusted:
+
+- `file` content is fetched from a human-reviewed GitHub repository.
+- `fetch_revision` content comes from configured review platforms.
+- GitHub `file` loads are restricted by the repository allow-list in
+ `review-context.toml`; the repository containing the rules file is always
+ allowed.
+
+### Caching
+
+The parsed rules file is cached in-process for a short TTL, keyed by
+`review_context_repo`, `review_context_branch`, and rules path. External content
+is fetched for each review so the audit manifest reflects exactly what was
+injected for that review. The shared `httpx.AsyncClient` from
+`get_http_client()` handles connection reuse.
+
+### Metadata predicates
+
+Implemented via `libmozdata.Bugzilla` with `include_fields=["product", "component"]`.
+The patch's associated bug ID is read from `patch.bug_id` (available on
+`PhabricatorPatch`; other patch types pass `None`, which fails closed). The
+component string compared against the rule is `"Product::Component"`.
+
+The schema accepts `bugzilla`, `review`, and `patch` predicates. Runtime
+matching currently implements `bugzilla.component`; `bugzilla.product`,
+`bugzilla.keywords`, `bugzilla.severity`, `review.*`, and `patch.*` are parsed
+and validated but fail closed until trusted metadata is wired into the matcher.
+
+### `fetch_revision`
+
+Fetches the raw diff of another revision for use as context. Supports:
+
+- **Phabricator**: `{ type = "fetch_revision", revision = "D12345" }` — calls
+ the Phabricator API via `get_phabricator_client()`.
+- **GitHub**: `{ type = "fetch_revision", repo = "org/repo", hash = "abc123" }`
+ — uses the GitHub API with `Accept: application/vnd.github.diff`.
+
+---
+
+## For Mozilla developers: authoring skills and rules
+
+### Overview
+
+You add two things to your repository:
+
+- One or more **skill or documentation files** — Markdown or RST documents
+ with expert knowledge, guidelines, or relevant specs. These can be existing
+ docs already in the tree, or new files written specifically as review guides.
+- Entries in **`review-context.toml`** at the repo root — rules that say which
+ files to load when certain code paths are touched.
+
+The review agent picks these up automatically on the next review of a matching
+patch.
+
+### Skill file format
+
+Skill files are Markdown (`.md`); documentation files referenced as context can
+also be RST (`.rst`). All formats accept optional YAML frontmatter. The
+frontmatter is stripped before injection; only the body reaches the LLM. See an
+[existing example on Searchfox](https://searchfox.org/mozilla-central/source/.claude/skills/firefox-desktop-frontend/SKILL.md).
+
+```markdown
+---
+name: dom-audio
+description: Web Audio API review guidance
+---
+
+## Web Audio review checklist
+
+- `AudioContext` must not be created on a background thread.
+- `AudioNode` lifetimes are graph-managed; do not hold raw pointers.
+- Prefer `MediaStreamTrack` over raw PCM where the spec allows it.
+```
+
+Conventional location for dedicated skill files:
+
+```
+.claude/skills//SKILL.md
+```
+
+### `review-context.toml` format
+
+`review-context.toml` is parsed as TOML and then validated against the rule
+schema before any rule is matched. Unknown rule fields, unknown action types,
+and malformed `fetch_revision` actions are rejected.
+
+You can validate a rules file without running a review:
+
+```
+bugbug-validate-review-context review-context.toml
+```
+
+The validator implementation lives in
+`bugbug/tools/code_review/review_context_schema.py` and intentionally uses only the
+Python standard library. A target repository can either run the bugbug CLI or
+copy that single file into its own tests to validate `review-context.toml`.
+Complete TOML examples in this document are marked as `toml review-context` and
+validated by bugbug's test suite to keep the docs and parser from drifting.
+For a complete validated rules file, see `docs/code-review-context-example.toml`.
+
+```toml review-context
+version = 1
+
+[[rules]]
+name = "DOM: Web Audio C++"
+when = { any_file = { include = ["dom/media/webaudio/**"], ext = [".cpp", ".h"] } }
+load = [
+ { type = "file", path = ".claude/skills/dom-audio/SKILL.md" },
+]
+```
+
+Multiple rules can fire for a single patch. Actions are deduplicated across
+rules, so the same file is never fetched twice.
+
+All loaded external content is injected before the patch diff. `priority` is
+optional and only affects ordering among loaded external content blocks:
+matched rules are ordered by descending priority, defaulting to `0`; ties keep
+declaration order. Load entries keep their order within a rule. If the same
+resolved source is requested more than once, it is loaded once at its first
+ordered position and the audit manifest records every matched rule that
+requested it.
+
+Load failures are non-fatal. The loader logs an error and continues with the
+remaining loads.
+
+### Rule matching
+
+Each rule has a required `when` predicate. Boolean predicates compose matching:
+`all` requires every child to match, `any` requires at least one child to match,
+and `not` negates its child.
+
+Path matching is defined over repository-relative paths from the patch, not
+over local VCS checkout paths. Paths are normalized to POSIX-style separators
+(`/`) with no leading `./`. File predicates use Python `fnmatch.fnmatchcase`, so
+matching is case-sensitive on every platform, including Windows and macOS. That
+keeps rule behavior tied to Git paths rather than host filesystem casing.
+
+The supported glob syntax is Python `fnmatchcase`: `*`, `?`, and character
+classes such as `[ch]` and `[!0-9]`. In Python `fnmatchcase`, `*` can match
+`/`, so patterns like `dom/media/**` match everything below that prefix.
+
+File quantifiers are explicit:
+
+```toml
+# At least one changed file must match all predicates in the object.
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+
+# Every changed file must match all predicates in the object.
+when = { all_files = { ext = [".md", ".rst"] } }
+```
+
+`any_file` is the normal implementation-code case. `all_files` is for
+patch-shape rules such as docs-only, tests-only, or generated-only changes.
+
+### Reusable predicates
+
+Named predicate definitions let you reuse path sets or metadata groups across
+rules. Definitions are referenced explicitly with `ref` and expanded
+structurally, as if the referenced table had been written inline.
+
+```toml review-context
+version = 1
+
+[definitions.files.media_impl]
+include = ["dom/media/**"]
+exclude = ["dom/media/test/**", "dom/media/gtest/**"]
+ext = [".cpp", ".h", ".mm"]
+
+[[rules]]
+name = "Media: implementation"
+when = { any_file = { ref = "files.media_impl" } }
+load = [
+ { type = "file", path = ".claude/skills/dom-media/SKILL.md" },
+]
+```
+
+Definitions are validated like inline predicates. References are namespace
+checked: `files.*` refs are valid only in file predicates, `bugzilla.*` only in
+Bugzilla predicates, `review.*` only in review predicates, and `patch.*` only
+in patch predicates. Unknown references and references combined with inline
+fields are schema errors.
+
+### Metadata trigger fields
+
+File predicates are separate from external metadata predicates. The parser
+validates these field names and their value types. At runtime, only
+`bugzilla.component` currently affects matching; the other fields are accepted
+by the schema for forward-compatible rule files but fail closed.
+
+| Namespace | Field | Type | Runtime behavior |
+| ---------- | ------------------- | --------------- | -------------------------------------------------------------- |
+| `bugzilla` | `component` | list of strings | Matches exact `Product::Component` from Bugzilla |
+| `bugzilla` | `product` | list of strings | Parsed and validated; currently fails closed |
+| `bugzilla` | `keywords` | list of strings | Parsed and validated; currently fails closed |
+| `bugzilla` | `severity` | list of strings | Parsed and validated; currently fails closed |
+| `review` | `author` | list of strings | Parsed and validated; currently fails closed |
+| `review` | `reviewer` | list of strings | Parsed and validated; currently fails closed |
+| `review` | `blocking_reviewer` | list of strings | Parsed and validated; currently fails closed |
+| `patch` | `repository` | list of strings | Parsed and validated; currently fails closed |
+| `patch` | `is_backport` | boolean | Parsed and validated; currently fails closed |
+
+When review metadata matching is wired up, `reviewer` should match both
+blocking and non-blocking reviewers. Use `blocking_reviewer` only when the rule
+specifically needs a reviewer whose approval is required before landing.
+
+Example — load a skill when C++ or HTML files change under `dom/media/` (which
+catches both implementation files and their tests):
+
+```toml review-context
+version = 1
+
+[[rules]]
+name = "DOM: Media C++ and tests"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h", ".html"] } }
+load = [
+ { type = "file", path = ".claude/skills/dom-media/SKILL.md" },
+]
+```
+
+### Action types
+
+#### `file` — fetch a file from a GitHub repository
+
+```toml
+# File from the same repo and branch as review-context.toml (default)
+{ type = "file", path = ".claude/skills/dom-audio/SKILL.md" }
+
+# File from another repo (e.g., cubeb's real-time programming guidelines)
+{ type = "file", path = ".claude/skills/real-time-programming/SKILL.md",
+ repo = "mozilla/cubeb" }
+
+# Explicit branch
+{ type = "file", path = ".claude/skills/dom-audio/SKILL.md",
+ repo = "mozilla-firefox/firefox", branch = "main" }
+```
+
+Constructs a raw GitHub URL:
+`https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/{path}`.
+When `repo` is omitted, the repo is the `review_context_repo` passed to the review
+entrypoint. Same-repo content uses `review_context_branch`; cross-repo content defaults
+to `main` unless the action specifies `branch`.
+
+Any Markdown or RST file in the repository can be referenced — skill files,
+architecture docs, READMEs, spec notes, etc.
+
+`kind` is optional on `file` and `fetch_revision` actions. It is a free-form
+audit hint recorded in the action object in the external content manifest; the
+loader does not interpret it.
+
+#### GitHub repository allow-list
+
+GitHub loads are restricted to keep review context auditable:
+
+- The repository containing `review-context.toml` is always allowed.
+- Entries ending in `/`, such as `mozilla/`, allow every repo under that
+ organization.
+- Entries in `org/repo` form allow a single repo.
+
+```toml
+[policy.github]
+allowed_repos = [
+ "mozilla/",
+ "web-platform-tests/wpt",
+ "whatwg/html",
+]
+```
+
+Repository names are normalized to lowercase before checking the policy. If a
+`load` action references a disallowed repository, the loader does not fetch it.
+The failure is non-fatal: it logs an error, skips that load, and continues with
+the remaining loads. Skipped loads are not included in the external content
+manifest because no content was injected.
+
+#### `fetch_revision` — diff of another revision as context
+
+Injects the raw diff of a related revision — useful for providing context about
+a stack parent or a related patch.
+
+```toml
+# Phabricator revision
+{ type = "fetch_revision", revision = "D12345" }
+
+# GitHub commit (use a git commit hash — Firefox moved from Mercurial to GitHub)
+{ type = "fetch_revision", repo = "padenot/cubeb", hash = "abc123def456" }
+```
+
+### Testing your rules and skills
+
+Pass your work-in-progress rules and skill files directly to `patch_review`
+via the MCP. Rules are merged by name into the fetched `review-context.toml`
+(replacing a rule with the same name, or appending if new). Skill content is
+injected directly, bypassing the network fetch for matching names.
+
+From Claude Code, trigger a review with your local files:
+
+```
+/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \
+ review_context_repo="mozilla-firefox/firefox" \
+ extra_context_toml="$(cat review-context.toml)" \
+ content_overrides='{".claude/skills/dom-audio/SKILL.md": "skill content here"}'
+```
+
+`content_overrides` is a JSON object (as a string). For multi-line skill files,
+build the JSON separately:
+
+```bash
+python3 -c "import json,sys; print(json.dumps({'.claude/skills/dom-audio/SKILL.md': open('.claude/skills/dom-audio/SKILL.md').read()}))"
+```
+
+Then paste the output as the `content_overrides` value.
+
+This simulates the state as if your changes had already landed — no push
+required. If a skill is missing, verify the rule conditions match the patch's
+changed files (`any_file` / `all_files` against the actual `+++ b/` paths in
+the diff).
+
+### Gotchas
+
+**Rules are evaluated against the post-patch file list.** Paths in file predicates
+should match the `+++ b/` filenames in the diff.
+
+**Glob patterns use `fnmatchcase` semantics.** `dom/media/**` matches
+`dom/media/foo/bar.cpp`.
+
+**Frontmatter is stripped automatically.** You can put metadata (name,
+description, owner) in frontmatter without it polluting the injected context.
+
+**Load failures are non-fatal.** The review proceeds without that content, and
+the failure is logged at ERROR level and captured by Sentry. Check the URL
+manually if content you expect is not appearing.
+
+**Bugzilla component rules require the patch to have an associated bug.** On
+non-Phabricator patches or patches without a bug link, Bugzilla component rules
+never match.
diff --git a/pyproject.toml b/pyproject.toml
index abf83759b1..d740c53fb4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,6 +116,7 @@ bugbug-data-github = "scripts.github_issue_retriever:main"
bugbug-fixed-comments = "scripts.inline_comments_data_collection:main"
bugbug-ci-failures-retriever = "scripts.retrieve_ci_failures:main"
bugbug-try-pushes-retriever = "scripts.retrieve_try_pushes:main"
+bugbug-validate-review-context = "bugbug.tools.code_review.review_context_schema:main"
[tool.hatch.version]
path = "VERSION"
diff --git a/services/mcp/src/bugbug_mcp/server.py b/services/mcp/src/bugbug_mcp/server.py
index a5179ee7ff..8a842352d1 100644
--- a/services/mcp/src/bugbug_mcp/server.py
+++ b/services/mcp/src/bugbug_mcp/server.py
@@ -1,6 +1,8 @@
"""MCP server for Firefox Development."""
import functools
+import json
+import logging
import os
from pathlib import Path
from typing import Annotated
@@ -12,7 +14,15 @@
from fastmcp.resources import FileResource
from pydantic import Field
-from bugbug.tools.code_review.prompts import SYSTEM_PROMPT_TEMPLATE
+from bugbug.tools.code_review.data_types import LocalPatch
+from bugbug.tools.code_review.prompts import (
+ LOCAL_SYSTEM_PROMPT_TEMPLATE,
+ SYSTEM_PROMPT_TEMPLATE,
+)
+from bugbug.tools.code_review.review_context import (
+ format_external_content,
+ load_external_content_for_diff,
+)
from bugbug.tools.core.platforms.bugzilla import SanitizedBug
from bugbug.tools.core.platforms.phabricator import (
PhabricatorPatch,
@@ -20,6 +30,7 @@
)
mcp = FastMCP("Firefox Development MCP Server")
+logger = logging.getLogger(__name__)
@functools.cache
@@ -29,30 +40,151 @@ def get_code_review_tool():
return CodeReviewTool.create()
-@mcp.prompt()
-async def patch_review(
- patch_url: str = Field(description="URL to the Phabricator patch to review."),
+@functools.cache
+def get_local_code_review_tool():
+ """Minimal tool for local diff prompt assembly — no LLM calls on the server."""
+ from bugbug.tools.code_review.agent import CodeReviewTool
+
+ return CodeReviewTool.create(
+ review_comments_db=None,
+ suggestion_filterer=None,
+ )
+
+
+async def _patch_review_impl(
+ patch_url: str | None,
+ diff: str | None,
+ commit_message: str | None,
+ review_context_repo: str | None,
+ review_context_branch: str,
+ extra_context_toml: str | None,
+ content_overrides: str | None,
) -> str:
- """Review a code patch from Phabricator."""
- parsed_url = urlparse(patch_url)
- if (
- parsed_url.netloc == "phabricator.services.mozilla.com"
- and parsed_url.path.startswith("/D")
- ):
- revision_id = int(parsed_url.path[2:])
+ if diff:
+ patch = LocalPatch(diff, commit_message=commit_message or "")
+ elif patch_url:
+ parsed_url = urlparse(patch_url)
+ if (
+ parsed_url.netloc == "phabricator.services.mozilla.com"
+ and parsed_url.path.startswith("/D")
+ ):
+ revision_id = int(parsed_url.path[2:])
+ else:
+ raise ValueError(f"Unsupported patch URL: {patch_url}")
+ patch = PhabricatorPatch(revision_id=revision_id)
else:
- raise ValueError(f"Unsupported patch URL: {patch_url}")
+ raise ValueError("Provide either patch_url or diff.")
+
+ tool = get_local_code_review_tool() if diff else get_code_review_tool()
+ prompt_template = LOCAL_SYSTEM_PROMPT_TEMPLATE if diff else SYSTEM_PROMPT_TEMPLATE
+ system_prompt = prompt_template.format(target_software=tool.target_software)
+ patch_summary = "" if diff else tool.patch_summarizer.run(patch)
+
+ parsed_overrides: dict[str, str] | None = None
+ if content_overrides:
+ try:
+ parsed_overrides = json.loads(content_overrides)
+ except json.JSONDecodeError:
+ pass
+
+ external_context = ""
+ if review_context_repo:
+ bug_id = getattr(patch, "bug_id", None) if patch.has_bug else None
+ content_items = await load_external_content_for_diff(
+ patch.raw_diff,
+ review_context_repo,
+ review_context_branch=review_context_branch or "main",
+ bug_id=bug_id,
+ extra_context_toml=(
+ extra_context_toml if extra_context_toml != "None" else None
+ ),
+ content_overrides=parsed_overrides,
+ )
+ if content_items:
+ external_context = format_external_content(content_items)
+
+ initial_prompt = tool.generate_initial_prompt(
+ patch, patch_summary, external_context
+ )
+ return system_prompt + "\n\n" + initial_prompt
- patch = PhabricatorPatch(revision_id=revision_id)
- tool = get_code_review_tool()
- system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
- target_software=tool.target_software,
+@mcp.prompt()
+async def patch_review(
+ patch_url: str | None = Field(
+ default=None,
+ description="URL to the Phabricator patch to review.",
+ ),
+ diff: str | None = Field(
+ default=None,
+ description="Raw unified diff to review (for local patches not yet on Phabricator).",
+ ),
+ commit_message: str | None = Field(
+ default=None,
+ description="Commit message for the local diff (optional, used to extract bug ID, title, and Differential Revision URL).",
+ ),
+ review_context_repo: str | None = Field(
+ default=None,
+ description=(
+ "GitHub repository containing review-context.toml, in org/project form. "
+ "When omitted, no rule-based context is loaded."
+ ),
+ ),
+ review_context_branch: str = Field(
+ default="main",
+ description="GitHub branch to read review-context.toml and same-repo content from.",
+ ),
+ extra_context_toml: str | None = Field(
+ default=None,
+ description=(
+ "TOML content to merge into the fetched review-context.toml. "
+ "Rules are matched by name: same name replaces, new name appends. "
+ "Use this to test new or modified rules before landing."
+ ),
+ ),
+ content_overrides: str | None = Field(
+ default=None,
+ description=(
+ "JSON object mapping content name (file path) to body text. Overrides "
+ "the fetched content for matching items, or provides content for new "
+ "items referenced by extra_context_toml. Use this to test changes before "
+ "landing. Example: "
+ '{".claude/skills/dom-audio.md": "My guidelines."}'
+ ),
+ ),
+) -> str:
+ """Review a code patch from Phabricator or a raw local diff."""
+ return await _patch_review_impl(
+ patch_url,
+ diff,
+ commit_message,
+ review_context_repo,
+ review_context_branch,
+ extra_context_toml,
+ content_overrides,
)
- patch_summary = tool.patch_summarizer.run(patch)
- initial_prompt = tool.generate_initial_prompt(patch, patch_summary)
- return system_prompt + "\n\n" + initial_prompt
+
+@mcp.tool()
+async def patch_review_tool(
+ patch_url: str | None = None,
+ diff: str | None = None,
+ commit_message: str | None = None,
+ review_context_repo: str | None = None,
+ review_context_branch: str = "main",
+ extra_context_toml: str | None = None,
+ content_overrides: str | None = None,
+) -> str:
+ """Build the patch review prompt. Use diff= for local diffs, patch_url= for Phabricator."""
+ return await _patch_review_impl(
+ patch_url,
+ diff,
+ commit_message,
+ review_context_repo,
+ review_context_branch,
+ extra_context_toml,
+ content_overrides,
+ )
@mcp.resource(
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index 584824544d..260e291177 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -1,6 +1,8 @@
import asyncio
import logging
import os
+import re
+from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
@@ -8,9 +10,34 @@
import pytest
from unidiff import PatchSet
-from bugbug.tools.code_review import data_types, langchain_tools
-from bugbug.tools.code_review.data_types import Skill, _strip_frontmatter
+from bugbug.tools.code_review import data_types, langchain_tools, review_context
+from bugbug.tools.code_review.data_types import (
+ ExternalContent,
+ Skill,
+ _strip_frontmatter,
+)
from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool
+from bugbug.tools.code_review.review_context import (
+ _merge_rules,
+ external_content_manifest,
+ format_external_content,
+ get_bug_component,
+ github_repo_allowed,
+ load_external_content_for_diff,
+ parse_diff_files,
+ parse_review_context_toml,
+ rule_matches,
+)
+from bugbug.tools.code_review.review_context_schema import (
+ AnyFilePredicate,
+ BugzillaPredicate,
+ FilePredicate,
+ ReviewContextValidationError,
+ Rule,
+)
+from bugbug.tools.code_review.review_context_schema import (
+ main as validate_review_context_main,
+)
from bugbug.tools.code_review.utils import find_comment_scope
from bugbug.tools.core.platforms.patch_apply import (
apply_patched_file,
@@ -43,6 +70,17 @@ async def fetch(path):
return fetch
+def extract_review_context_examples(markdown: str) -> list[tuple[int, str]]:
+ examples = []
+ fence_re = re.compile(r"^```(?P[^\n]*)\n(?P.*?)^```", re.M | re.S)
+ for match in fence_re.finditer(markdown):
+ info = match.group("info").split()
+ if info == ["toml", "review-context"]:
+ line = markdown.count("\n", 0, match.start()) + 1
+ examples.append((line, match.group("body")))
+ return examples
+
+
# ---------------------------------------------------------------------------
# strip_diff_prefix
# ---------------------------------------------------------------------------
@@ -480,3 +518,685 @@ def test_fetch_file_skips_revision_when_none():
result = asyncio.run(_fetch_file("f.txt", None, client, patch))
assert result == "latest content"
client.get_file_at_revision.assert_not_called()
+
+
+@pytest.fixture(autouse=True)
+def clear_review_context_cache():
+ review_context._review_context_cache.clear()
+ yield
+ review_context._review_context_cache.clear()
+
+
+@pytest.mark.asyncio
+async def test_external_content_load_caches():
+ item = ExternalContent(
+ name="a",
+ url="https://example.com/a.md",
+ description="example",
+ )
+ client = _mock_client_returning("---\nname: a\n---\nbody\n")
+ with patch.object(data_types, "get_http_client", return_value=client):
+ first = await item.load()
+ second = await item.load()
+ assert first == "body\n"
+ assert second == "body\n"
+ assert client.get.await_count == 1
+
+
+_RULES_TOML = """
+version = 1
+
+[[rules]]
+name = "Audio/Video C++"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+load = [
+ { type = "file", path = ".claude/skills/dom-media.md" },
+]
+
+[[rules]]
+name = "WebIDL"
+when = { any_file = { ext = [".webidl"] } }
+load = [
+ { type = "file", path = ".claude/skills/webidl.md", repo = "mozilla-firefox/firefox" },
+]
+
+[[rules]]
+name = "Any JS"
+when = { any_file = { ext = [".js"] } }
+load = [
+ { type = "file", path = ".claude/skills/js-style.md" },
+]
+
+[[rules]]
+name = "Bugzilla component only"
+when = { bugzilla = { component = ["Core::DOM: Web Audio"] } }
+load = [
+ { type = "file", path = ".claude/skills/dom-audio.md" },
+]
+"""
+
+_DIFF_MEDIA = """\
+diff --git a/dom/media/Foo.cpp b/dom/media/Foo.cpp
+--- a/dom/media/Foo.cpp
++++ b/dom/media/Foo.cpp
+@@ -1,1 +1,1 @@
+-old
++new
+"""
+
+_DIFF_WEBIDL = """\
+diff --git a/dom/webidl/Foo.webidl b/dom/webidl/Foo.webidl
+--- a/dom/webidl/Foo.webidl
++++ b/dom/webidl/Foo.webidl
+@@ -1,1 +1,1 @@
+-old
++new
+"""
+
+
+def test_parse_diff_files():
+ files = parse_diff_files(_DIFF_MEDIA)
+ assert files == {"dom/media/Foo.cpp"}
+
+
+def test_github_repo_allowed_policy():
+ assert github_repo_allowed("mozilla/cubeb", "example/repo", {"mozilla/"})
+ assert github_repo_allowed("whatwg/html", "example/repo", {"whatwg/html"})
+ assert github_repo_allowed("example/repo", "example/repo", set())
+ assert not github_repo_allowed("mozilla/cubeb", "example/repo", set())
+ assert not github_repo_allowed("whatwg/html-tests", "example/repo", {"whatwg/html"})
+
+
+def test_rule_matches_extension_and_path():
+ rule = Rule(
+ name="test",
+ when=AnyFilePredicate(FilePredicate(include=["dom/media/**"], ext=[".cpp"])),
+ load=[],
+ )
+ assert rule_matches(rule, {"dom/media/Foo.cpp"})
+ assert not rule_matches(rule, {"dom/canvas/Foo.cpp"})
+ assert not rule_matches(rule, {"dom/media/Foo.js"})
+
+
+def test_rule_matches_extension_only():
+ rule = Rule(
+ name="test",
+ when=AnyFilePredicate(FilePredicate(ext=[".webidl"])),
+ load=[],
+ )
+ assert rule_matches(rule, {"dom/webidl/Foo.webidl"})
+ assert not rule_matches(rule, {"dom/media/Foo.cpp"})
+
+
+def test_parse_review_context_toml_rejects_missing_when():
+ toml = """
+version = 1
+
+[[rules]]
+name = "Bad rule"
+load = [{ type = "file", path = "skills/guide.md" }]
+"""
+ with pytest.raises(ReviewContextValidationError):
+ parse_review_context_toml(toml)
+
+
+def test_parse_review_context_toml_rejects_unknown_action_type():
+ toml = """
+version = 1
+
+[[rules]]
+name = "Bad rule"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "unknown", path = "skills/guide.md" }]
+"""
+ with pytest.raises(ReviewContextValidationError):
+ parse_review_context_toml(toml)
+
+
+def test_parse_review_context_toml_rejects_unknown_rule_field():
+ toml = """
+version = 1
+
+[[rules]]
+name = "Bad rule"
+when = { any_file = { ext = [".cpp"] } }
+match_unknown = ["x"]
+load = [{ type = "file", path = "skills/guide.md" }]
+"""
+ with pytest.raises(ReviewContextValidationError):
+ parse_review_context_toml(toml)
+
+
+def test_parse_review_context_example_file():
+ repo_root = Path(__file__).resolve().parent.parent
+ example = (repo_root / "docs/code-review-context-example.toml").read_text()
+ config = parse_review_context_toml(example)
+ assert config.version == 1
+ assert len(config.rules) >= 1
+ assert "whatwg/html" in config.policy.github.allowed_repos
+
+
+def test_parse_review_context_examples_from_docs():
+ repo_root = Path(__file__).resolve().parent.parent
+ docs = (repo_root / "docs/code-review-skills.md").read_text()
+ examples = extract_review_context_examples(docs)
+ assert examples
+
+ for line, example in examples:
+ try:
+ config = parse_review_context_toml(example)
+ except ReviewContextValidationError as exc:
+ pytest.fail(f"docs/code-review-skills.md:{line}: {exc}")
+ assert config.rules, f"docs/code-review-skills.md:{line}: expected rules"
+
+
+def test_validate_review_context_main(tmp_path, capsys):
+ review_context_path = tmp_path / "review-context.toml"
+ review_context_path.write_text(_RULES_TOML)
+
+ assert validate_review_context_main([str(review_context_path)]) == 0
+
+ captured = capsys.readouterr()
+ assert "valid (4 rule(s))" in captured.out
+
+
+def test_validate_review_context_main_failure(tmp_path, capsys):
+ review_context_path = tmp_path / "review-context.toml"
+ review_context_path.write_text(
+ """
+version = 1
+
+[[rules]]
+name = "Bad rule"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "unknown" }]
+"""
+ )
+
+ assert validate_review_context_main([str(review_context_path)]) == 1
+
+ captured = capsys.readouterr()
+ assert "invalid" in captured.err
+ assert "unknown action type" in captured.err
+
+
+def test_rule_bugzilla_component_fails_closed_without_component():
+ rule = Rule(
+ name="test",
+ when=BugzillaPredicate(component=["Core::DOM: Web Audio"]),
+ load=[],
+ )
+ assert not rule_matches(rule, {"dom/media/Foo.cpp"}, bug_component=None)
+
+
+def test_rule_bugzilla_component_matches():
+ rule = Rule(
+ name="test",
+ when=BugzillaPredicate(component=["Core::DOM: Web Audio"]),
+ load=[],
+ )
+ assert rule_matches(
+ rule, {"dom/media/Foo.cpp"}, bug_component="Core::DOM: Web Audio"
+ )
+ assert not rule_matches(
+ rule, {"dom/media/Foo.cpp"}, bug_component="Core::Networking"
+ )
+
+
+@pytest.mark.asyncio
+async def test_get_bug_component():
+ def fake_bugzilla(bug_id, include_fields, bughandler, bugdata):
+ bughandler({"product": "Core", "component": "DOM: Web Audio"}, bugdata)
+ mock = MagicMock()
+ mock.get_data.return_value.wait = MagicMock()
+ return mock
+
+ with patch("libmozdata.bugzilla.Bugzilla", side_effect=fake_bugzilla):
+ component = await get_bug_component(12345)
+
+ assert component == "Core::DOM: Web Audio"
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_file_load():
+ review_context_repo = "mozilla-firefox/firefox"
+ content_body = "---\nname: dom-media\n---\nAudio guidelines.\n"
+
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ content_response = MagicMock()
+ content_response.text = content_body
+ content_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, review_context_repo, review_context_branch="release"
+ )
+
+ assert len(results) == 1
+ item = results[0]
+ assert item.name == ".claude/skills/dom-media.md"
+ assert item.body == "Audio guidelines.\n"
+ assert item.source_type == "github_file"
+ assert item.trusted
+ assert item.trust_reason == "github_repo_content"
+ assert item.matched_rules == ["Audio/Video C++"]
+ assert item.bytes == len("Audio guidelines.\n".encode())
+ assert item.sha256
+ assert client.get.await_args_list[0].args[0] == (
+ "https://raw.githubusercontent.com/"
+ "mozilla-firefox/firefox/refs/heads/release/review-context.toml"
+ )
+ assert client.get.await_args_list[1].args[0] == (
+ "https://raw.githubusercontent.com/"
+ "mozilla-firefox/firefox/refs/heads/release/"
+ ".claude/skills/dom-media.md"
+ )
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_no_match():
+ review_context_repo = "mozilla-firefox/firefox"
+
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ diff = "diff --git a/build/Makefile b/build/Makefile\n+++ b/build/Makefile\n+new\n"
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(diff, review_context_repo)
+
+ assert results == []
+ assert client.get.await_count == 1
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_rules_fetch_failure():
+ client = MagicMock()
+ client.get = AsyncMock(side_effect=httpx.ConnectError("boom"))
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, "mozilla-firefox/firefox"
+ )
+
+ assert results == []
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_deduplicates_actions():
+ toml = """
+version = 1
+
+[[rules]]
+name = "Rule A"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp"] } }
+load = [{ type = "file", path = "skills/guide.md" }]
+
+[[rules]]
+name = "Rule B"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "skills/guide.md" }]
+"""
+ review_context_repo = "mozilla-firefox/firefox"
+
+ rules_response = MagicMock()
+ rules_response.text = toml
+ rules_response.raise_for_status = MagicMock()
+
+ content_response = MagicMock()
+ content_response.text = "body"
+ content_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, review_context_repo
+ )
+
+ assert len(results) == 1
+ assert results[0].matched_rules == ["Rule A", "Rule B"]
+ assert client.get.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_orders_by_priority():
+ toml = """
+version = 1
+
+[[rules]]
+name = "Low"
+priority = 1
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "skills/low.md" }]
+
+[[rules]]
+name = "High"
+priority = 10
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "skills/high.md" }]
+"""
+ review_context_repo = "mozilla-firefox/firefox"
+ rules_response = MagicMock()
+ rules_response.text = toml
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA,
+ review_context_repo,
+ content_overrides={
+ "skills/low.md": "low\n",
+ "skills/high.md": "high\n",
+ },
+ )
+
+ assert [item.name for item in results] == ["skills/high.md", "skills/low.md"]
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_rejects_disallowed_github_repo():
+ toml = """
+version = 1
+
+[[rules]]
+name = "External"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", repo = "whatwg/html", path = "review.md" }]
+"""
+ review_context_repo = "mozilla-firefox/firefox"
+ rules_response = MagicMock()
+ rules_response.text = toml
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA,
+ review_context_repo,
+ content_overrides={"review.md": "external\n"},
+ )
+
+ assert results == []
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_allows_policy_prefix_repo():
+ toml = """
+version = 1
+
+[policy.github]
+allowed_repos = ["mozilla/"]
+
+[[rules]]
+name = "External"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", repo = "mozilla/cubeb", path = "review.md" }]
+"""
+ review_context_repo = "mozilla-firefox/firefox"
+ rules_response = MagicMock()
+ rules_response.text = toml
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA,
+ review_context_repo,
+ content_overrides={"review.md": "external\n"},
+ )
+
+ assert [item.name for item in results] == ["review.md"]
+
+
+# --- _merge_rules ---
+
+
+def test_merge_rules_appends_new():
+ base = parse_review_context_toml(
+ """
+version = 1
+
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+ ).rules
+ extra = """
+version = 1
+
+[[rules]]
+name = "B"
+when = { any_file = { ext = [".js"] } }
+load = [{ type = "file", path = "b.md" }]
+"""
+ merged = _merge_rules(base, extra)
+ assert len(merged) == 2
+ assert merged[1].name == "B"
+
+
+def test_merge_rules_replaces_by_name():
+ base = parse_review_context_toml(
+ """
+version = 1
+
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+ ).rules
+ extra = """
+version = 1
+
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp", ".h"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+ merged = _merge_rules(base, extra)
+ assert len(merged) == 1
+ assert merged[0].when.predicate.ext == [".cpp", ".h"]
+
+
+def test_merge_rules_empty_extra():
+ base = parse_review_context_toml(
+ """
+version = 1
+
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+ ).rules
+ merged = _merge_rules(base, "version = 1\n")
+ assert len(merged) == 1
+ assert merged[0].name == "A"
+ assert merged[0].when.predicate.ext == [".cpp"]
+ assert len(merged[0].load) == 1
+
+ merged = _merge_rules(base, "version = 1\n# comment\n")
+ assert len(merged) == 1
+ assert merged[0].name == "A"
+ assert merged[0].when.predicate.ext == [".cpp"]
+ assert len(merged[0].load) == 1
+
+
+# --- content_overrides ---
+
+
+@pytest.mark.asyncio
+async def test_content_override_used_instead_of_fetch():
+ review_context_repo = "mozilla-firefox/firefox"
+
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ overrides = {".claude/skills/dom-media.md": "Overridden content.\n"}
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, review_context_repo, content_overrides=overrides
+ )
+
+ assert len(results) == 1
+ assert results[0].name == ".claude/skills/dom-media.md"
+ assert results[0].body == "Overridden content.\n"
+ assert client.get.await_count == 1 # only the rules fetch, not the content
+
+
+@pytest.mark.asyncio
+async def test_external_content_manifest_and_prompt_body():
+ review_context_repo = "mozilla-firefox/firefox"
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ overrides = {".claude/skills/dom-media.md": "Audio guidelines.\n"}
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, review_context_repo, content_overrides=overrides
+ )
+
+ manifest = external_content_manifest(results)
+ assert manifest == [
+ {
+ "name": ".claude/skills/dom-media.md",
+ "source_type": "github_file",
+ "source": (
+ "https://raw.githubusercontent.com/mozilla-firefox/firefox/"
+ "refs/heads/main/.claude/skills/dom-media.md"
+ ),
+ "action": {
+ "type": "file",
+ "path": ".claude/skills/dom-media.md",
+ },
+ "matched_rules": ["Audio/Video C++"],
+ "trusted": True,
+ "trust_reason": "github_repo_content",
+ "bytes": len("Audio guidelines.\n".encode()),
+ "sha256": results[0].sha256,
+ }
+ ]
+
+ prompt_content = format_external_content(results)
+ assert "" in prompt_content
+ assert "" in prompt_content
+ assert "Audio guidelines." in prompt_content
+
+
+@pytest.mark.asyncio
+async def test_extra_context_toml_appended():
+ review_context_repo = "mozilla-firefox/firefox"
+ extra = """
+version = 1
+
+[[rules]]
+name = "Extra JS rule"
+when = { any_file = { ext = [".js"] } }
+load = [{ type = "file", path = ".claude/skills/js.md" }]
+"""
+ diff_js = "diff --git a/Foo.js b/Foo.js\n+++ b/Foo.js\n+new\n"
+
+ # Use a base TOML without any .js rules so only the extra rule fires.
+ base_toml = """
+version = 1
+
+[[rules]]
+name = "Audio/Video C++"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+load = [{ type = "file", path = ".claude/skills/dom-media.md" }]
+"""
+ rules_response = MagicMock()
+ rules_response.text = base_toml
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ overrides = {".claude/skills/js.md": "JS guidelines.\n"}
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ diff_js,
+ review_context_repo,
+ extra_context_toml=extra,
+ content_overrides=overrides,
+ )
+
+ assert len(results) == 1
+ assert results[0].name == ".claude/skills/js.md"
+ assert results[0].body == "JS guidelines.\n"
+
+
+@pytest.mark.asyncio
+async def test_extra_context_toml_replaces_by_name():
+ review_context_repo = "mozilla-firefox/firefox"
+ # Replace the "Audio/Video C++" rule with a version that loads different content
+ extra = """
+version = 1
+
+[[rules]]
+name = "Audio/Video C++"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+load = [{ type = "file", path = ".claude/skills/dom-media-v2.md" }]
+"""
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ overrides = {".claude/skills/dom-media-v2.md": "Updated guidelines.\n"}
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(review_context, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA,
+ review_context_repo,
+ extra_context_toml=extra,
+ content_overrides=overrides,
+ )
+
+ assert len(results) == 1
+ assert results[0].name == ".claude/skills/dom-media-v2.md"
+ assert results[0].body == "Updated guidelines.\n"
diff --git a/tests/test_phabricator_trusted_filtering.py b/tests/test_phabricator_trusted_filtering.py
index 4f2d6e32bc..41c425b330 100644
--- a/tests/test_phabricator_trusted_filtering.py
+++ b/tests/test_phabricator_trusted_filtering.py
@@ -708,6 +708,90 @@ def mock_load_revision(rev_phid=None, rev_id=None):
assert REDACTED_TITLE not in md_output
+def test_to_md_includes_reviewers_and_target_repository():
+ mock_revision = {
+ "id": 12345,
+ "phid": "PHID-DREV-test123",
+ "fields": {
+ "title": "This is the revision title",
+ "authorPHID": "PHID-USER-author",
+ "status": {"name": "Needs Review"},
+ "uri": "https://phabricator.services.mozilla.com/D12345",
+ "bugzilla.bug-id": "123456",
+ "summary": "",
+ "testPlan": "",
+ "stackGraph": {},
+ "repositoryPHID": "PHID-REPO-firefoxbeta",
+ },
+ }
+ mock_revision_search = {
+ "attachments": {
+ "reviewers": {
+ "reviewers": [
+ {
+ "reviewerPHID": "PHID-USER-reviewer",
+ "status": "blocking",
+ "isBlocking": True,
+ }
+ ]
+ }
+ }
+ }
+ mock_repository_info = {
+ "fields": {
+ "shortName": "firefox-beta",
+ "defaultBranch": "beta",
+ }
+ }
+ mock_diff = {
+ "id": 54321,
+ "dateCreated": 1704110400,
+ "dateModified": 1704110400,
+ "baseRevision": "abc123",
+ "authorPHID": "PHID-USER-author",
+ }
+ mock_users_info = {
+ "PHID-USER-author": {
+ "email": "author@example.com",
+ "is_trusted": True,
+ "is_trusted_bot": False,
+ "real_name": "Patch Author",
+ },
+ "PHID-USER-reviewer": {
+ "email": "reviewer@example.com",
+ "is_trusted": True,
+ "is_trusted_bot": False,
+ "real_name": "Patch Reviewer",
+ },
+ }
+
+ with (
+ patch.object(SanitizedPhabricatorPatch, "_revision_metadata", mock_revision),
+ patch.object(
+ SanitizedPhabricatorPatch,
+ "_revision_search_metadata",
+ mock_revision_search,
+ ),
+ patch.object(
+ SanitizedPhabricatorPatch, "repository_info", mock_repository_info
+ ),
+ patch.object(SanitizedPhabricatorPatch, "_diff_metadata", mock_diff),
+ patch.object(SanitizedPhabricatorPatch, "get_comments", return_value=[]),
+ patch(
+ "bugbug.tools.core.platforms.phabricator._get_users_info_batch",
+ return_value=mock_users_info,
+ ),
+ patch.object(SanitizedPhabricatorPatch, "raw_diff", "diff content"),
+ ):
+ patch_obj = SanitizedPhabricatorPatch(diff_id=54321)
+ markdown = patch_obj.to_md()
+
+ assert (
+ "- **Reviewers**: Patch Reviewer (reviewer@example.com) [blocking]" in markdown
+ )
+ assert "- **Target Repository**: firefox-beta (default branch: beta)" in markdown
+
+
# Subsequent test rely on having an API key present, and perform testing against
# the live phabricator instance. They can be helpful to validate changes
# locally, but aren't run in CI.