From 1f51279b825f059520bcb9d807c9b0e00374f1b2 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:36:59 +0200
Subject: [PATCH 01/11] Move code review prompt templates to Markdown files

---
 bugbug/tools/code_review/prompts.py           | 85 +++++--------------
 .../code_review/prompts/first_message.md      | 21 +++++
 bugbug/tools/code_review/prompts/system.md    | 51 +++++++++++
 3 files changed, 93 insertions(+), 64 deletions(-)
 create mode 100644 bugbug/tools/code_review/prompts/first_message.md
 create mode 100644 bugbug/tools/code_review/prompts/system.md

diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py
index bf4073d474..dffdca43fe 100644
--- a/bugbug/tools/code_review/prompts.py
+++ b/bugbug/tools/code_review/prompts.py
@@ -5,80 +5,37 @@
 
 """Prompt templates for code review agent."""
 
-from bugbug.tools.code_review.data_types import Skill
-
-TARGET_SOFTWARE: str | None = None
-
-
-SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code.
-
-## Instructions
+from pathlib import Path
 
-Follow this systematic approach to review the patch:
-
-**Step 1: Analyze the Changes**
-- Understand what the patch is trying to accomplish
-- Use the patch summary for context, but focus primarily on what you can see in the actual diff
-- Identify the intent and structure of the changes
-
-**Step 2: Identify Issues**
-- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
-- Focus ONLY on new or changed lines (lines that begin with `+`)
-- Never comment on unmodified code
-- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns
-
-**Step 3: Verify and Assess Confidence**
-- Use available tools when you need to verify concerns or gather additional context
-- Only include comments where you are at least 80% confident the issue is valid
-- When uncertain about an issue, use available tools to verify before commenting
-- Do not suggest issues you cannot verify with available context
+from bugbug.tools.code_review.data_types import Skill
 
-**Step 4: Sort and Order Comments**
-- Sort comments by descending confidence and importance
-- Start with issues you are certain are valid and that are most critical
-- Assign each comment a numeric order starting at 1
+_PROMPTS_DIR = Path(__file__).parent / "prompts"
 
-**Step 5: Write Clear, Constructive Comments**
-- Use direct, declarative language - state the problem definitively, then suggest the fix
-- Keep comments short and specific
-- Use directive language: "Fix", "Remove", "Change", "Add"
-- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
-- Focus strictly on code-related concerns
+TARGET_SOFTWARE: str | None = None
 
-## What NOT to Include
+_SYSTEM_PROMPT_RAW = (_PROMPTS_DIR / "system.md").read_text()
 
-Do not write comments that:
-- Refer to unmodified code (lines without a `+` prefix)
-- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
-- Provide praise or restate obvious facts
-- Focus on testing concerns
-- Point out issues that are already handled in the visible code
-- Suggest problems based on assumptions without verifying the context
-- Flag style preferences without clear coding standard violations
+_CONTEXT_TOOLS_AGENT = """\
+  - `expand_context` to read surrounding code in the patched files
+  - `find_function_definition` to look up callers or definitions\
 """
 
+_CONTEXT_TOOLS_LOCAL = """\
+  - `Read` and `Grep` to inspect files in the local checkout
+  - `Bash` with `searchfox-cli` to query Searchfox for definitions, callers, and cross-references\
+"""
 
-FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:
-
-<patch_summary>
-{patch_summarization}
-</patch_summary>
-
-
-Here are examples of good code review comments to guide your style and approach:
-
-<examples>
-{comment_examples}
-{approved_examples}
-</examples>
-
+SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
+    target_software="{target_software}",
+    context_tools=_CONTEXT_TOOLS_AGENT,
+)
 
-Here is the patch you need to review:
+LOCAL_SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
+    target_software="{target_software}",
+    context_tools=_CONTEXT_TOOLS_LOCAL,
+)
 
-<patch>
-{patch}
-</patch>
-"""
+FIRST_MESSAGE_TEMPLATE = (_PROMPTS_DIR / "first_message.md").read_text()
 
 
 TEMPLATE_COMMENT_EXAMPLE = """Patch example {example_number}:
diff --git a/bugbug/tools/code_review/prompts/first_message.md b/bugbug/tools/code_review/prompts/first_message.md
new file mode 100644
index 0000000000..848897fb69
--- /dev/null
+++ b/bugbug/tools/code_review/prompts/first_message.md
@@ -0,0 +1,21 @@
+Here is a summary of the patch:
+
+<patch_summary>
+{patch_summarization}
+</patch_summary>
+{external_context}
+
+
+Here are examples of good code review comments to guide your style and approach:
+
+<examples>
+{comment_examples}
+{approved_examples}
+</examples>
+
+
+Here is the patch you need to review:
+
+<patch>
+{patch}
+</patch>
diff --git a/bugbug/tools/code_review/prompts/system.md b/bugbug/tools/code_review/prompts/system.md
new file mode 100644
index 0000000000..674b4000a2
--- /dev/null
+++ b/bugbug/tools/code_review/prompts/system.md
@@ -0,0 +1,51 @@
+## Instructions
+
+Follow this systematic approach to review the patch:
+
+**Step 1: Analyze the Changes**
+
+- Read the commit message
+- Understand what the patch is trying to accomplish
+- Identify the intent and structure of the changes; if what the patch does doesn't match the commit message, this always warrants a review comment — mismatches make the history hard to understand and bisect
+
+**Step 2: Identify Issues**
+
+- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
+- Focus ONLY on new or changed lines (lines that begin with `+`)
+- Never comment on unmodified code
+- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns
+
+**Step 3: Verify and Assess Confidence**
+
+- Use available tools when you need to verify concerns or gather additional context:
+  {context_tools}
+  - `mozilla__get_phabricator_revision` to fetch related Phabricator revisions — open and closed, including their review comments; particularly useful for other revisions in the same stack
+  - `mozilla__get_bugzilla_bug` to fetch the associated Bugzilla bug and its history
+  - `mozilla__read_fx_doc_section` to read sections from the Firefox Source Tree Documentation (firefox-source-docs.mozilla.org) — useful for architecture and API documentation relevant to the changed code
+- Significant tool usage is expected to perform a high-quality review, much like a human reviewer would
+- When uncertain about an issue, use available tools to verify before commenting
+- Do not suggest issues you cannot verify with available context
+
+**Step 4: Sort and Order Comments**
+
+- Sort comments by descending confidence and importance
+- Start with issues you are certain are valid and that are most critical
+- Assign each comment a numeric order starting at 1
+
+**Step 5: Write Clear, Constructive Comments**
+
+- Use direct, declarative language - state the problem definitively, then suggest the fix
+- Keep comments short and specific
+- Use directive language: "Fix", "Remove", "Change", "Add"
+- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
+- Focus strictly on code-related concerns
+
+## What NOT to Include
+
+Do not write comments that:
+
+- Refer to unmodified code (lines without a `+` prefix)
+- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
+- Provide praise or restate obvious facts
+- Suggest problems based on assumptions without verifying the context
+- Flag style preferences without clear coding standard violations

From 64ea247b9ede000177f7b9988182b964bb3ea906 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Wed, 27 May 2026 14:58:50 +0000
Subject: [PATCH 02/11] Wire Mozilla MCP server into the agent

Adds the moz.tools MCP server to the Anthropic client so the agent can
call mozilla__ tools during review. URL is overridable via
BUGBUG_MOZ_MCP_URL.
---
 bugbug/tools/code_review/agent.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
index 794dc6194d..b21c8bf563 100644
--- a/bugbug/tools/code_review/agent.py
+++ b/bugbug/tools/code_review/agent.py
@@ -160,6 +160,15 @@ def create(cls, **kwargs):
                 max_tokens=40_000,
                 temperature=None,
                 thinking={"type": "enabled", "budget_tokens": 10_000},
+                mcp_servers=[
+                    {
+                        "type": "url",
+                        "url": os.getenv(
+                            "BUGBUG_MOZ_MCP_URL", "https://mcp-dev.moz.tools/mcp"
+                        ),
+                        "name": "mozilla",
+                    }
+                ],
             )
 
         if "patch_summarizer" not in kwargs:

From 0f4fdeaa11076772a23b6e4f89dbe6d8623a3cd9 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Wed, 27 May 2026 15:00:18 +0000
Subject: [PATCH 03/11] Add LocalPatch and local diff review via diff=
 parameter

LocalPatch wraps a raw unified diff string as a first-class Patch so it
can flow through the review pipeline without a Phabricator backing.
It accepts an optional commit_message and parses from it: the patch
title (first line), description (remaining lines), bug ID (Bug NNN),
and Differential Revision URL, and date if present in the format-patch
header.

On the MCP server side, patch_review now accepts optional diff= and
commit_message= parameters alongside patch_url=, and a new
patch_review_tool exposes the same logic as an @mcp.tool(). A shared
_patch_review_impl handles both cases, using a lighter CodeReviewTool
instance (no DB, no suggestion filtering) for local diffs.
LOCAL_SYSTEM_PROMPT_TEMPLATE uses searchfox-cli and local agent tools
instead of the agent-specific expand_context and searchfox tools.
---
 bugbug/tools/code_review/data_types.py | 87 +++++++++++++++++++++++++
 services/mcp/src/bugbug_mcp/server.py  | 88 ++++++++++++++++++++------
 2 files changed, 155 insertions(+), 20 deletions(-)

diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py
index d7ecd6938d..944ce9004e 100644
--- a/bugbug/tools/code_review/data_types.py
+++ b/bugbug/tools/code_review/data_types.py
@@ -1,10 +1,12 @@
 import re
+from datetime import datetime
 
 import httpx
 from pydantic import BaseModel, Field, PrivateAttr
 
 from bugbug.tools.core.connection import get_http_client
 from bugbug.tools.core.data_types import InlineComment
+from bugbug.tools.core.platforms.base import Patch
 
 
 class GeneratedReviewComment(BaseModel):
@@ -82,3 +84,88 @@ async def load(self) -> str:
 
         self._cached_body = _strip_frontmatter(response.text)
         return self._cached_body
+
+
+_BUG_RE = re.compile(r"[Bb]ug\s+(\d+)")
+_DIFFERENTIAL_RE = re.compile(r"Differential Revision:\s*(https?://\S+)")
+_DATE_RE = re.compile(r"^Date:\s+(.+)$", re.MULTILINE)
+
+
+class LocalPatch(Patch):
+    """A patch from a local diff string, not backed by any platform."""
+
+    def __init__(
+        self,
+        diff: str,
+        commit_message: str = "",
+    ) -> None:
+        self._diff = diff
+        self._commit_message = commit_message
+        lines = commit_message.splitlines()
+        self._title = lines[0].strip() if lines else "Local patch"
+        self._description = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
+        m = _BUG_RE.search(commit_message)
+        self._bug_id: int | None = int(m.group(1)) if m else None
+        m2 = _DIFFERENTIAL_RE.search(commit_message)
+        self._url = m2.group(1) if m2 else ""
+        m3 = _DATE_RE.search(commit_message)
+        if m3:
+            from email.utils import parsedate_to_datetime
+            try:
+                self._date = parsedate_to_datetime(m3.group(1))
+            except Exception:
+                self._date = datetime.now()
+        else:
+            self._date = datetime.now()
+
+    @property
+    def patch_id(self) -> str:
+        return "local"
+
+    @property
+    def raw_diff(self) -> str:
+        return self._diff
+
+    @property
+    def date_created(self) -> datetime:
+        return self._date
+
+    @property
+    def has_bug(self) -> bool:
+        return self._bug_id is not None
+
+    @property
+    def bug_id(self) -> int | None:
+        return self._bug_id
+
+    @property
+    def bug_title(self) -> str:
+        return ""
+
+    @property
+    def patch_title(self) -> str:
+        return self._title
+
+    @property
+    def patch_description(self) -> str:
+        return self._description
+
+    @property
+    def patch_url(self) -> str:
+        return self._url
+
+    def is_accessible(self) -> bool:
+        return True
+
+    def is_public(self) -> bool:
+        return True
+
+    async def get_new_file(self, file_path: str) -> str:
+        raise FileNotFoundError(
+            f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
+        )
+
+    async def get_old_file(self, file_path: str) -> str:
+        raise FileNotFoundError(
+            f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
+        )
diff --git a/services/mcp/src/bugbug_mcp/server.py b/services/mcp/src/bugbug_mcp/server.py
index a5179ee7ff..7cc99d09d5 100644
--- a/services/mcp/src/bugbug_mcp/server.py
+++ b/services/mcp/src/bugbug_mcp/server.py
@@ -1,6 +1,7 @@
 """MCP server for Firefox Development."""
 
 import functools
+import logging
 import os
 from pathlib import Path
 from typing import Annotated
@@ -12,7 +13,11 @@
 from fastmcp.resources import FileResource
 from pydantic import Field
 
-from bugbug.tools.code_review.prompts import SYSTEM_PROMPT_TEMPLATE
+from bugbug.tools.code_review.data_types import LocalPatch
+from bugbug.tools.code_review.prompts import (
+    LOCAL_SYSTEM_PROMPT_TEMPLATE,
+    SYSTEM_PROMPT_TEMPLATE,
+)
 from bugbug.tools.core.platforms.bugzilla import SanitizedBug
 from bugbug.tools.core.platforms.phabricator import (
     PhabricatorPatch,
@@ -20,6 +25,7 @@
 )
 
 mcp = FastMCP("Firefox Development MCP Server")
+logger = logging.getLogger(__name__)
 
 
 @functools.cache
@@ -29,32 +35,74 @@ def get_code_review_tool():
     return CodeReviewTool.create()
 
 
-@mcp.prompt()
-async def patch_review(
-    patch_url: str = Field(description="URL to the Phabricator patch to review."),
+@functools.cache
+def get_local_code_review_tool():
+    """Minimal tool for local diff prompt assembly — no LLM calls on the server."""
+    from bugbug.tools.code_review.agent import CodeReviewTool
+
+    return CodeReviewTool.create(
+        review_comments_db=None,
+        suggestion_filterer=None,
+    )
+
+
+async def _patch_review_impl(
+    patch_url: str | None,
+    diff: str | None,
+    commit_message: str | None,
 ) -> str:
-    """Review a code patch from Phabricator."""
-    parsed_url = urlparse(patch_url)
-    if (
-        parsed_url.netloc == "phabricator.services.mozilla.com"
-        and parsed_url.path.startswith("/D")
-    ):
-        revision_id = int(parsed_url.path[2:])
+    if diff:
+        patch = LocalPatch(diff, commit_message=commit_message or "")
+    elif patch_url:
+        parsed_url = urlparse(patch_url)
+        if (
+            parsed_url.netloc == "phabricator.services.mozilla.com"
+            and parsed_url.path.startswith("/D")
+        ):
+            revision_id = int(parsed_url.path[2:])
+        else:
+            raise ValueError(f"Unsupported patch URL: {patch_url}")
+        patch = PhabricatorPatch(revision_id=revision_id)
     else:
-        raise ValueError(f"Unsupported patch URL: {patch_url}")
+        raise ValueError("Provide either patch_url or diff.")
 
-    patch = PhabricatorPatch(revision_id=revision_id)
-
-    tool = get_code_review_tool()
-    system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
-        target_software=tool.target_software,
-    )
-    patch_summary = tool.patch_summarizer.run(patch)
+    tool = get_local_code_review_tool() if diff else get_code_review_tool()
+    prompt_template = LOCAL_SYSTEM_PROMPT_TEMPLATE if diff else SYSTEM_PROMPT_TEMPLATE
+    system_prompt = prompt_template.format(target_software=tool.target_software)
+    patch_summary = "" if diff else tool.patch_summarizer.run(patch)
     initial_prompt = tool.generate_initial_prompt(patch, patch_summary)
-
     return system_prompt + "\n\n" + initial_prompt
 
 
+@mcp.prompt()
+async def patch_review(
+    patch_url: str | None = Field(
+        default=None,
+        description="URL to the Phabricator patch to review.",
+    ),
+    diff: str | None = Field(
+        default=None,
+        description="Raw unified diff to review (for local patches not yet on Phabricator).",
+    ),
+    commit_message: str | None = Field(
+        default=None,
+        description="Commit message for the local diff (optional, used to extract bug ID, title, and Differential Revision URL).",
+    ),
+) -> str:
+    """Review a code patch from Phabricator or a raw local diff."""
+    return await _patch_review_impl(patch_url, diff, commit_message)
+
+
+@mcp.tool()
+async def patch_review_tool(
+    patch_url: str | None = None,
+    diff: str | None = None,
+    commit_message: str | None = None,
+) -> str:
+    """Build the patch review prompt. Use diff= for local diffs, patch_url= for Phabricator."""
+    return await _patch_review_impl(patch_url, diff, commit_message)
+
+
 @mcp.resource(
     uri="bugzilla://bug/{bug_id}",
     name="Bugzilla Bug Content",

From 5fded856e34425af59e078149cdf7f7ecc98f317 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Fri, 29 May 2026 17:46:30 +0200
Subject: [PATCH 04/11] Expose Phabricator review metadata in MCP

---
 bugbug/tools/core/platforms/phabricator.py  | 137 +++++++++++++++++++-
 tests/test_phabricator_trusted_filtering.py |  84 ++++++++++++
 2 files changed, 220 insertions(+), 1 deletion(-)

diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py
index 7ea7526b3a..2e14ed3e6f 100644
--- a/bugbug/tools/core/platforms/phabricator.py
+++ b/bugbug/tools/core/platforms/phabricator.py
@@ -145,6 +145,42 @@ def _get_users_info_batch(user_phids: set[str]) -> dict[str, dict]:
     return _get_users_info_batch_with_retry(user_phids)
 
 
+def _get_project_info_batch(project_phids: set[str]) -> dict[str, dict]:
+    if not project_phids:
+        return {}
+
+    phabricator = get_phabricator_client()
+    projects_response = phabricator.request(
+        "project.search",
+        constraints={"phids": list(project_phids)},
+    )
+
+    result = {}
+    for project_data in projects_response.get("data", []):
+        phid = project_data["phid"]
+        fields = project_data.get("fields", {})
+        result[phid] = {
+            "name": fields.get("name", phid),
+            "slug": fields.get("slug"),
+        }
+    return result
+
+
+def _get_repository_info(repository_phid: str | None) -> dict | None:
+    if not repository_phid:
+        return None
+
+    phabricator = get_phabricator_client()
+    repositories_response = phabricator.request(
+        "diffusion.repository.search",
+        constraints={"phids": [repository_phid]},
+    )
+    repositories = repositories_response.get("data", [])
+    if not repositories:
+        return None
+    return repositories[0]
+
+
 def _sanitize_comments(comments: list, users_info: dict[str, dict]) -> tuple[list, int]:
     """Sanitize comments by filtering untrusted content.
 
@@ -512,6 +548,19 @@ def author_phid(self) -> str:
     def diff_author_phid(self) -> str:
         return self._diff_metadata["authorPHID"]
 
+    @cached_property
+    def _revision_search_metadata(self) -> dict:
+        phabricator = get_phabricator_client()
+        response = phabricator.request(
+            "differential.revision.search",
+            constraints={"ids": [self.revision_id]},
+            attachments={"reviewers": True},
+        )
+        revisions = response.get("data", [])
+        if not revisions:
+            return {}
+        return revisions[0]
+
     @property
     def stack_graph(self) -> dict:
         return self._revision_metadata["fields"].get("stackGraph", {})
@@ -560,11 +609,45 @@ def _all_comments(self) -> list:
 
     @cached_property
     def _users_info(self) -> dict[str, dict]:
-        user_phids = {c.author_phid for c in self._all_comments} | {self.author_phid}
+        user_phids = (
+            {c.author_phid for c in self._all_comments}
+            | {self.author_phid}
+            | self.reviewer_user_phids
+        )
         if self.author_phid != self.diff_author_phid:
             user_phids.add(self.diff_author_phid)
         return _get_users_info_batch(user_phids)
 
+    @cached_property
+    def _project_info(self) -> dict[str, dict]:
+        return _get_project_info_batch(self.reviewer_project_phids)
+
+    @cached_property
+    def reviewers(self) -> list[dict]:
+        if not self._revision_metadata["fields"].get("repositoryPHID"):
+            return []
+        return (
+            self._revision_search_metadata.get("attachments", {})
+            .get("reviewers", {})
+            .get("reviewers", [])
+        )
+
+    @cached_property
+    def reviewer_user_phids(self) -> set[str]:
+        return {
+            reviewer["reviewerPHID"]
+            for reviewer in self.reviewers
+            if reviewer.get("reviewerPHID", "").startswith("PHID-USER-")
+        }
+
+    @cached_property
+    def reviewer_project_phids(self) -> set[str]:
+        return {
+            reviewer["reviewerPHID"]
+            for reviewer in self.reviewers
+            if reviewer.get("reviewerPHID", "").startswith("PHID-PROJ-")
+        }
+
     def _format_user_display(self, user_phid: str) -> str:
         info = self._users_info.get(user_phid, {})
         email = info.get("email", "Unknown")
@@ -573,6 +656,49 @@ def _format_user_display(self, user_phid: str) -> str:
             return f"{real_name} ({email})"
         return email
 
+    def _format_reviewer_display(self, reviewer: dict) -> str:
+        reviewer_phid = reviewer["reviewerPHID"]
+        if reviewer_phid.startswith("PHID-USER-"):
+            name = self._format_user_display(reviewer_phid)
+        elif reviewer_phid.startswith("PHID-PROJ-"):
+            project = self._project_info.get(reviewer_phid, {})
+            slug = project.get("slug")
+            name = slug or project.get("name", reviewer_phid)
+        else:
+            name = reviewer_phid
+
+        status = reviewer.get("status")
+        if reviewer.get("isBlocking"):
+            status = "blocking"
+        if status:
+            return f"{name} [{status}]"
+        return name
+
+    @property
+    def reviewer_summary(self) -> str | None:
+        if not self.reviewers:
+            return None
+        return ", ".join(self._format_reviewer_display(r) for r in self.reviewers)
+
+    @cached_property
+    def repository_info(self) -> dict | None:
+        return _get_repository_info(
+            self._revision_metadata["fields"].get("repositoryPHID")
+        )
+
+    @property
+    def target_repository(self) -> str | None:
+        if not self.repository_info:
+            return None
+        fields = self.repository_info.get("fields", {})
+        return fields.get("shortName") or fields.get("name")
+
+    @property
+    def target_repository_default_branch(self) -> str | None:
+        if not self.repository_info:
+            return None
+        return self.repository_info.get("fields", {}).get("defaultBranch")
+
     @property
     def author(self) -> str:
         return self._format_user_display(self.author_phid)
@@ -633,6 +759,15 @@ def to_md(self) -> str:
         md_lines.append(f"- **URI**: {self.revision_uri}")
         md_lines.append(f"- **Revision Author**: {self.author}")
         md_lines.append(f"- **Status**: {self.revision_status}")
+        if self.reviewer_summary:
+            md_lines.append(f"- **Reviewers**: {self.reviewer_summary}")
+        if self.target_repository:
+            repository_line = self.target_repository
+            if self.target_repository_default_branch:
+                repository_line += (
+                    f" (default branch: {self.target_repository_default_branch})"
+                )
+            md_lines.append(f"- **Target Repository**: {repository_line}")
         md_lines.append(f"- **Created**: {self.date_created.strftime(date_format)}")
         md_lines.append(f"- **Modified**: {self.date_modified.strftime(date_format)}")
         bug_id = self._revision_metadata["fields"].get("bugzilla.bug-id") or "N/A"
diff --git a/tests/test_phabricator_trusted_filtering.py b/tests/test_phabricator_trusted_filtering.py
index 4f2d6e32bc..41c425b330 100644
--- a/tests/test_phabricator_trusted_filtering.py
+++ b/tests/test_phabricator_trusted_filtering.py
@@ -708,6 +708,90 @@ def mock_load_revision(rev_phid=None, rev_id=None):
         assert REDACTED_TITLE not in md_output
 
 
+def test_to_md_includes_reviewers_and_target_repository():
+    mock_revision = {
+        "id": 12345,
+        "phid": "PHID-DREV-test123",
+        "fields": {
+            "title": "This is the revision title",
+            "authorPHID": "PHID-USER-author",
+            "status": {"name": "Needs Review"},
+            "uri": "https://phabricator.services.mozilla.com/D12345",
+            "bugzilla.bug-id": "123456",
+            "summary": "",
+            "testPlan": "",
+            "stackGraph": {},
+            "repositoryPHID": "PHID-REPO-firefoxbeta",
+        },
+    }
+    mock_revision_search = {
+        "attachments": {
+            "reviewers": {
+                "reviewers": [
+                    {
+                        "reviewerPHID": "PHID-USER-reviewer",
+                        "status": "blocking",
+                        "isBlocking": True,
+                    }
+                ]
+            }
+        }
+    }
+    mock_repository_info = {
+        "fields": {
+            "shortName": "firefox-beta",
+            "defaultBranch": "beta",
+        }
+    }
+    mock_diff = {
+        "id": 54321,
+        "dateCreated": 1704110400,
+        "dateModified": 1704110400,
+        "baseRevision": "abc123",
+        "authorPHID": "PHID-USER-author",
+    }
+    mock_users_info = {
+        "PHID-USER-author": {
+            "email": "author@example.com",
+            "is_trusted": True,
+            "is_trusted_bot": False,
+            "real_name": "Patch Author",
+        },
+        "PHID-USER-reviewer": {
+            "email": "reviewer@example.com",
+            "is_trusted": True,
+            "is_trusted_bot": False,
+            "real_name": "Patch Reviewer",
+        },
+    }
+
+    with (
+        patch.object(SanitizedPhabricatorPatch, "_revision_metadata", mock_revision),
+        patch.object(
+            SanitizedPhabricatorPatch,
+            "_revision_search_metadata",
+            mock_revision_search,
+        ),
+        patch.object(
+            SanitizedPhabricatorPatch, "repository_info", mock_repository_info
+        ),
+        patch.object(SanitizedPhabricatorPatch, "_diff_metadata", mock_diff),
+        patch.object(SanitizedPhabricatorPatch, "get_comments", return_value=[]),
+        patch(
+            "bugbug.tools.core.platforms.phabricator._get_users_info_batch",
+            return_value=mock_users_info,
+        ),
+        patch.object(SanitizedPhabricatorPatch, "raw_diff", "diff content"),
+    ):
+        patch_obj = SanitizedPhabricatorPatch(diff_id=54321)
+        markdown = patch_obj.to_md()
+
+    assert (
+        "- **Reviewers**: Patch Reviewer (reviewer@example.com) [blocking]" in markdown
+    )
+    assert "- **Target Repository**: firefox-beta (default branch: beta)" in markdown
+
+
 # Subsequent test rely on having an API key present, and perform testing against
 # the live phabricator instance. They can be helpful to validate changes
 # locally, but aren't run in CI.

From 868301733a71c23ab7f1fca3f235dbe6ee75e659 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:45:47 +0200
Subject: [PATCH 05/11] Add reusable external content loading

---
 bugbug/tools/code_review/data_types.py | 42 ++++++++++++++++++++++++++
 tests/test_code_review.py              | 18 ++++++++++-
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py
index 944ce9004e..3b2c48e470 100644
--- a/bugbug/tools/code_review/data_types.py
+++ b/bugbug/tools/code_review/data_types.py
@@ -2,6 +2,7 @@
 from datetime import datetime
 
 import httpx
+import tenacity
 from pydantic import BaseModel, Field, PrivateAttr
 
 from bugbug.tools.core.connection import get_http_client
@@ -169,3 +170,44 @@ async def get_old_file(self, file_path: str) -> str:
         raise FileNotFoundError(
             f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
         )
+
+
+class ExternalContentLoadError(Exception):
+    """Raised when an ExternalContent body cannot be loaded."""
+
+
+@tenacity.retry(
+    stop=tenacity.stop_after_attempt(3),
+    wait=tenacity.wait_exponential(multiplier=1, min=1),
+    retry=tenacity.retry_if_exception_type(httpx.TransportError),
+    reraise=True,
+)
+async def _fetch_url(url: str) -> httpx.Response:
+    response = await get_http_client().get(url, timeout=30)
+    response.raise_for_status()
+    return response
+
+
+class ExternalContent(BaseModel):
+    """An external file fetched and injected as context for the review."""
+
+    name: str = Field(description="A unique identifier for this content item.")
+    url: str = Field(description="HTTPS URL of the file to fetch.")
+    description: str = Field(description="Short description of what this content provides.")
+
+    _cached_body: str | None = PrivateAttr(default=None)
+
+    async def load(self) -> str:
+        """Return the content body, fetching and caching it on first use."""
+        if self._cached_body is not None:
+            return self._cached_body
+
+        try:
+            response = await _fetch_url(self.url)
+        except httpx.HTTPError as e:
+            raise ExternalContentLoadError(
+                f"Could not load content '{self.name}' from {self.url}"
+            ) from e
+
+        self._cached_body = _strip_frontmatter(response.text)
+        return self._cached_body
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index 584824544d..c28ce11205 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -9,7 +9,7 @@
 from unidiff import PatchSet
 
 from bugbug.tools.code_review import data_types, langchain_tools
-from bugbug.tools.code_review.data_types import Skill, _strip_frontmatter
+from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter
 from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool
 from bugbug.tools.code_review.utils import find_comment_scope
 from bugbug.tools.core.platforms.patch_apply import (
@@ -480,3 +480,19 @@ def test_fetch_file_skips_revision_when_none():
     result = asyncio.run(_fetch_file("f.txt", None, client, patch))
     assert result == "latest content"
     client.get_file_at_revision.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_external_content_load_caches():
+    item = ExternalContent(
+        name="a",
+        url="https://example.com/a.md",
+        description="example",
+    )
+    client = _mock_client_returning("---\nname: a\n---\nbody\n")
+    with patch.object(data_types, "get_http_client", return_value=client):
+        first = await item.load()
+        second = await item.load()
+    assert first == "body\n"
+    assert second == "body\n"
+    assert client.get.await_count == 1

From be04049d240e46c44b4993d38f3c49d947f7a094 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:46:41 +0200
Subject: [PATCH 06/11] Add review context schema validation

---
 .../code_review/review_context_schema.py      | 622 ++++++++++++++++++
 pyproject.toml                                |   1 +
 tests/test_code_review.py                     | 101 +++
 3 files changed, 724 insertions(+)
 create mode 100644 bugbug/tools/code_review/review_context_schema.py

diff --git a/bugbug/tools/code_review/review_context_schema.py b/bugbug/tools/code_review/review_context_schema.py
new file mode 100644
index 0000000000..ceba192a0c
--- /dev/null
+++ b/bugbug/tools/code_review/review_context_schema.py
@@ -0,0 +1,622 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Schema and validator for code review external context rules.
+
+This module intentionally depends only on the Python standard library so it can
+also be copied into target repositories and used from local tests.
+"""
+
+import argparse
+import sys
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal, TypeAlias
+
+try:
+    import tomllib
+except ImportError:
+    import tomli as tomllib  # type: ignore
+
+
+class ReviewContextValidationError(ValueError):
+    """Raised when review-context.toml has valid TOML but invalid schema."""
+
+
+@dataclass(frozen=True)
+class GithubPolicy:
+    allowed_repos: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class Policy:
+    github: GithubPolicy = field(default_factory=GithubPolicy)
+
+
+@dataclass(frozen=True)
+class FilePredicate:
+    include: list[str] = field(default_factory=list)
+    exclude: list[str] = field(default_factory=list)
+    ext: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class BugzillaPredicate:
+    product: list[str] = field(default_factory=list)
+    component: list[str] = field(default_factory=list)
+    keywords: list[str] = field(default_factory=list)
+    severity: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class ReviewPredicate:
+    author: list[str] = field(default_factory=list)
+    reviewer: list[str] = field(default_factory=list)
+    blocking_reviewer: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class PatchPredicate:
+    repository: list[str] = field(default_factory=list)
+    is_backport: bool | None = None
+
+
+@dataclass(frozen=True)
+class Definitions:
+    files: dict[str, FilePredicate] = field(default_factory=dict)
+    bugzilla: dict[str, BugzillaPredicate] = field(default_factory=dict)
+    review: dict[str, ReviewPredicate] = field(default_factory=dict)
+    patch: dict[str, PatchPredicate] = field(default_factory=dict)
+
+
+@dataclass(frozen=True)
+class AllPredicate:
+    children: list["Predicate"]
+
+
+@dataclass(frozen=True)
+class AnyPredicate:
+    children: list["Predicate"]
+
+
+@dataclass(frozen=True)
+class NotPredicate:
+    child: "Predicate"
+
+
+@dataclass(frozen=True)
+class AnyFilePredicate:
+    predicate: FilePredicate
+
+
+@dataclass(frozen=True)
+class AllFilesPredicate:
+    predicate: FilePredicate
+
+
+Predicate: TypeAlias = (
+    AllPredicate
+    | AnyPredicate
+    | NotPredicate
+    | AnyFilePredicate
+    | AllFilesPredicate
+    | BugzillaPredicate
+    | ReviewPredicate
+    | PatchPredicate
+)
+
+
+@dataclass(frozen=True)
+class LoadFileAction:
+    type: Literal["file"]
+    path: str
+    repo: str | None = None
+    branch: str | None = None
+    kind: str | None = None
+
+
+@dataclass(frozen=True)
+class FetchRevisionAction:
+    type: Literal["fetch_revision"]
+    revision: str | None = None
+    repo: str | None = None
+    hash: str | None = None
+    kind: str | None = None
+
+
+RuleAction: TypeAlias = LoadFileAction | FetchRevisionAction
+
+
+@dataclass(frozen=True)
+class Rule:
+    name: str
+    when: Predicate
+    load: list[RuleAction]
+    description: str | None = None
+    owners: list[str] = field(default_factory=list)
+    priority: int = 0
+
+
+@dataclass(frozen=True)
+class ReviewContextConfig:
+    version: int
+    policy: Policy = field(default_factory=Policy)
+    definitions: Definitions = field(default_factory=Definitions)
+    rules: list[Rule] = field(default_factory=list)
+
+
+def _reject_unknown_keys(path: str, value: dict, allowed: set[str]) -> None:
+    unknown = sorted(set(value) - allowed)
+    if unknown:
+        raise ReviewContextValidationError(
+            f"{path}: unknown field(s): {', '.join(unknown)}"
+        )
+
+
+def _require_table(value: object, path: str) -> dict:
+    if not isinstance(value, dict):
+        raise ReviewContextValidationError(f"{path}: expected table")
+    return value
+
+
+def _require_string(value: object, path: str) -> str:
+    if not isinstance(value, str):
+        raise ReviewContextValidationError(f"{path}: expected string")
+    return value
+
+
+def _optional_string(value: object, path: str) -> str | None:
+    if value is None:
+        return None
+    return _require_string(value, path)
+
+
+def _string_list(value: object, path: str) -> list[str]:
+    if value is None:
+        return []
+    if not isinstance(value, list):
+        raise ReviewContextValidationError(f"{path}: expected list of strings")
+    for index, item in enumerate(value):
+        _require_string(item, f"{path}[{index}]")
+    return value
+
+
+def _required_non_empty_string_list(value: object, path: str) -> list[str]:
+    items = _string_list(value, path)
+    if not items:
+        raise ReviewContextValidationError(f"{path}: expected non-empty list")
+    return items
+
+
+def _optional_bool(value: object, path: str) -> bool | None:
+    if value is None:
+        return None
+    if not isinstance(value, bool):
+        raise ReviewContextValidationError(f"{path}: expected boolean")
+    return value
+
+
+def _require_int(value: object, path: str) -> int:
+    if not isinstance(value, int) or isinstance(value, bool):
+        raise ReviewContextValidationError(f"{path}: expected integer")
+    return value
+
+
+def _normalise_repo(repo: str) -> str:
+    return repo.lower()
+
+
+def _parse_github_policy(value: object, path: str) -> GithubPolicy:
+    table = _require_table(value, path)
+    _reject_unknown_keys(path, table, {"allowed_repos"})
+    return GithubPolicy(
+        allowed_repos=[
+            _normalise_repo(repo)
+            for repo in _string_list(
+                table.get("allowed_repos"), f"{path}.allowed_repos"
+            )
+        ]
+    )
+
+
+def _parse_policy(value: object | None) -> Policy:
+    if value is None:
+        return Policy()
+    table = _require_table(value, "policy")
+    _reject_unknown_keys("policy", table, {"github"})
+    return Policy(github=_parse_github_policy(table.get("github", {}), "policy.github"))
+
+
+def _parse_file_predicate(value: object, path: str) -> FilePredicate:
+    table = _require_table(value, path)
+    _reject_unknown_keys(path, table, {"include", "exclude", "ext", "ref"})
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+    predicate = FilePredicate(
+        include=_string_list(table.get("include"), f"{path}.include"),
+        exclude=_string_list(table.get("exclude"), f"{path}.exclude"),
+        ext=_string_list(table.get("ext"), f"{path}.ext"),
+    )
+    if not predicate.include and not predicate.exclude and not predicate.ext:
+        raise ReviewContextValidationError(f"{path}: expected include, exclude, or ext")
+    return predicate
+
+
+def _parse_bugzilla_predicate(value: object, path: str) -> BugzillaPredicate:
+    table = _require_table(value, path)
+    _reject_unknown_keys(
+        path, table, {"product", "component", "keywords", "severity", "ref"}
+    )
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+    predicate = BugzillaPredicate(
+        product=_string_list(table.get("product"), f"{path}.product"),
+        component=_string_list(table.get("component"), f"{path}.component"),
+        keywords=_string_list(table.get("keywords"), f"{path}.keywords"),
+        severity=_string_list(table.get("severity"), f"{path}.severity"),
+    )
+    if (
+        not predicate.product
+        and not predicate.component
+        and not predicate.keywords
+        and not predicate.severity
+    ):
+        raise ReviewContextValidationError(f"{path}: expected at least one field")
+    return predicate
+
+
+def _parse_review_predicate(value: object, path: str) -> ReviewPredicate:
+    table = _require_table(value, path)
+    _reject_unknown_keys(
+        path, table, {"author", "reviewer", "blocking_reviewer", "ref"}
+    )
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+    predicate = ReviewPredicate(
+        author=_string_list(table.get("author"), f"{path}.author"),
+        reviewer=_string_list(table.get("reviewer"), f"{path}.reviewer"),
+        blocking_reviewer=_string_list(
+            table.get("blocking_reviewer"), f"{path}.blocking_reviewer"
+        ),
+    )
+    if (
+        not predicate.author
+        and not predicate.reviewer
+        and not predicate.blocking_reviewer
+    ):
+        raise ReviewContextValidationError(f"{path}: expected at least one field")
+    return predicate
+
+
+def _parse_patch_predicate(value: object, path: str) -> PatchPredicate:
+    table = _require_table(value, path)
+    _reject_unknown_keys(path, table, {"repository", "is_backport", "ref"})
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        raise ReviewContextValidationError(f"{path}.ref: unresolved ref")
+    predicate = PatchPredicate(
+        repository=_string_list(table.get("repository"), f"{path}.repository"),
+        is_backport=_optional_bool(table.get("is_backport"), f"{path}.is_backport"),
+    )
+    if not predicate.repository and predicate.is_backport is None:
+        raise ReviewContextValidationError(f"{path}: expected at least one field")
+    return predicate
+
+
+def _parse_definitions(value: object | None) -> Definitions:
+    if value is None:
+        return Definitions()
+    table = _require_table(value, "definitions")
+    _reject_unknown_keys("definitions", table, {"files", "bugzilla", "review", "patch"})
+    return Definitions(
+        files={
+            name: _parse_file_predicate(definition, f"definitions.files.{name}")
+            for name, definition in _require_table(
+                table.get("files", {}), "definitions.files"
+            ).items()
+        },
+        bugzilla={
+            name: _parse_bugzilla_predicate(definition, f"definitions.bugzilla.{name}")
+            for name, definition in _require_table(
+                table.get("bugzilla", {}), "definitions.bugzilla"
+            ).items()
+        },
+        review={
+            name: _parse_review_predicate(definition, f"definitions.review.{name}")
+            for name, definition in _require_table(
+                table.get("review", {}), "definitions.review"
+            ).items()
+        },
+        patch={
+            name: _parse_patch_predicate(definition, f"definitions.patch.{name}")
+            for name, definition in _require_table(
+                table.get("patch", {}), "definitions.patch"
+            ).items()
+        },
+    )
+
+
+def _resolve_ref(ref: object, namespace: str, definitions: Definitions, path: str):
+    ref_name = _require_string(ref, path)
+    prefix = f"{namespace}."
+    if not ref_name.startswith(prefix):
+        raise ReviewContextValidationError(
+            f"{path}: expected {namespace}.* ref, got {ref_name!r}"
+        )
+    name = ref_name[len(prefix) :]
+    mapping = getattr(definitions, namespace)
+    try:
+        return mapping[name]
+    except KeyError:
+        raise ReviewContextValidationError(
+            f"{path}: unknown ref {ref_name!r}"
+        ) from None
+
+
+def _parse_file_predicate_ref(
+    value: object, path: str, definitions: Definitions
+) -> FilePredicate:
+    table = _require_table(value, path)
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        return _resolve_ref(table["ref"], "files", definitions, f"{path}.ref")
+    return _parse_file_predicate(value, path)
+
+
+def _parse_bugzilla_predicate_ref(
+    value: object, path: str, definitions: Definitions
+) -> BugzillaPredicate:
+    table = _require_table(value, path)
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        return _resolve_ref(table["ref"], "bugzilla", definitions, f"{path}.ref")
+    return _parse_bugzilla_predicate(value, path)
+
+
+def _parse_review_predicate_ref(
+    value: object, path: str, definitions: Definitions
+) -> ReviewPredicate:
+    table = _require_table(value, path)
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        return _resolve_ref(table["ref"], "review", definitions, f"{path}.ref")
+    return _parse_review_predicate(value, path)
+
+
+def _parse_patch_predicate_ref(
+    value: object, path: str, definitions: Definitions
+) -> PatchPredicate:
+    table = _require_table(value, path)
+    if "ref" in table:
+        if len(table) != 1:
+            raise ReviewContextValidationError(
+                f"{path}.ref: cannot combine with fields"
+            )
+        return _resolve_ref(table["ref"], "patch", definitions, f"{path}.ref")
+    return _parse_patch_predicate(value, path)
+
+
+def _parse_all_predicate(
+    value: object, path: str, definitions: Definitions
+) -> Predicate:
+    return AllPredicate(_parse_predicate_list(value, path, definitions))
+
+
+def _parse_any_predicate(
+    value: object, path: str, definitions: Definitions
+) -> Predicate:
+    return AnyPredicate(_parse_predicate_list(value, path, definitions))
+
+
+def _parse_not_predicate(
+    value: object, path: str, definitions: Definitions
+) -> Predicate:
+    return NotPredicate(_parse_predicate(value, path, definitions))
+
+
+def _parse_any_file_predicate(
+    value: object, path: str, definitions: Definitions
+) -> Predicate:
+    return AnyFilePredicate(_parse_file_predicate_ref(value, path, definitions))
+
+
+def _parse_all_files_predicate(
+    value: object, path: str, definitions: Definitions
+) -> Predicate:
+    return AllFilesPredicate(_parse_file_predicate_ref(value, path, definitions))
+
+
+PredicateParser: TypeAlias = Callable[[object, str, Definitions], Predicate]
+
+_PREDICATE_PARSERS: dict[str, PredicateParser] = {
+    "all": _parse_all_predicate,
+    "any": _parse_any_predicate,
+    "not": _parse_not_predicate,
+    "any_file": _parse_any_file_predicate,
+    "all_files": _parse_all_files_predicate,
+    "bugzilla": _parse_bugzilla_predicate_ref,
+    "review": _parse_review_predicate_ref,
+    "patch": _parse_patch_predicate_ref,
+}
+
+
+def _parse_predicate(value: object, path: str, definitions: Definitions) -> Predicate:
+    """Parse one predicate node.
+
+    Each TOML predicate object must have exactly one key. Dispatching through a
+    table keeps the grammar visible and makes adding a predicate a one-line
+    change plus a parser function.
+    """
+    table = _require_table(value, path)
+    keys = set(table)
+    _reject_unknown_keys(path, table, set(_PREDICATE_PARSERS))
+    if len(keys) != 1:
+        raise ReviewContextValidationError(f"{path}: expected exactly one predicate")
+    key = next(iter(keys))
+    return _PREDICATE_PARSERS[key](table[key], f"{path}.{key}", definitions)
+
+
+def _parse_predicate_list(
+    value: object, path: str, definitions: Definitions
+) -> list[Predicate]:
+    if not isinstance(value, list) or not value:
+        raise ReviewContextValidationError(f"{path}: expected non-empty list")
+    return [
+        _parse_predicate(child, f"{path}[{index}]", definitions)
+        for index, child in enumerate(value)
+    ]
+
+
+def _parse_action(value: object, path: str) -> RuleAction:
+    """Parse one load action.
+
+    Load actions are intentionally small: validate the schema here, and leave
+    trust policy and network behavior to the runtime loader.
+    """
+    table = _require_table(value, path)
+    action_type = table.get("type")
+    if action_type == "file":
+        _reject_unknown_keys(path, table, {"type", "path", "repo", "branch", "kind"})
+        if "path" not in table:
+            raise ReviewContextValidationError(f"{path}.path: required for file")
+        return LoadFileAction(
+            type="file",
+            path=_require_string(table["path"], f"{path}.path"),
+            repo=_optional_string(table.get("repo"), f"{path}.repo"),
+            branch=_optional_string(table.get("branch"), f"{path}.branch"),
+            kind=_optional_string(table.get("kind"), f"{path}.kind"),
+        )
+    if action_type == "fetch_revision":
+        _reject_unknown_keys(path, table, {"type", "revision", "repo", "hash", "kind"})
+        revision = _optional_string(table.get("revision"), f"{path}.revision")
+        repo = _optional_string(table.get("repo"), f"{path}.repo")
+        commit_hash = _optional_string(table.get("hash"), f"{path}.hash")
+        if not revision and not (repo and commit_hash):
+            raise ReviewContextValidationError(
+                f"{path}: fetch_revision requires revision or repo+hash"
+            )
+        return FetchRevisionAction(
+            type="fetch_revision",
+            revision=revision,
+            repo=repo,
+            hash=commit_hash,
+            kind=_optional_string(table.get("kind"), f"{path}.kind"),
+        )
+    if action_type is None:
+        raise ReviewContextValidationError(f"{path}.type: required")
+    raise ReviewContextValidationError(
+        f"{path}.type: unknown action type {action_type!r}"
+    )
+
+
+def _parse_rule(value: object, index: int, definitions: Definitions) -> Rule:
+    path = f"rules[{index}]"
+    table = _require_table(value, path)
+    _reject_unknown_keys(
+        path,
+        table,
+        {"name", "description", "owners", "priority", "when", "load"},
+    )
+    if "name" not in table:
+        raise ReviewContextValidationError(f"{path}.name: required")
+    if "when" not in table:
+        raise ReviewContextValidationError(f"{path}.when: required")
+    if "load" not in table:
+        raise ReviewContextValidationError(f"{path}.load: required")
+    load_value = table["load"]
+    if not isinstance(load_value, list) or not load_value:
+        raise ReviewContextValidationError(f"{path}.load: expected non-empty list")
+    return Rule(
+        name=_require_string(table["name"], f"{path}.name"),
+        description=_optional_string(table.get("description"), f"{path}.description"),
+        owners=_string_list(table.get("owners"), f"{path}.owners"),
+        priority=_require_int(table.get("priority", 0), f"{path}.priority"),
+        when=_parse_predicate(table["when"], f"{path}.when", definitions),
+        load=[
+            _parse_action(action, f"{path}.load[{action_index}]")
+            for action_index, action in enumerate(load_value)
+        ],
+    )
+
+
+def parse_review_context_data(data: dict) -> ReviewContextConfig:
+    """Validate parsed review-context.toml data."""
+    _reject_unknown_keys("<root>", data, {"version", "policy", "definitions", "rules"})
+    version = data.get("version")
+    if version != 1:
+        raise ReviewContextValidationError("version: expected 1")
+    policy = _parse_policy(data.get("policy"))
+    definitions = _parse_definitions(data.get("definitions"))
+    rules_value = data.get("rules", [])
+    if not isinstance(rules_value, list):
+        raise ReviewContextValidationError("rules: expected list of rule tables")
+    return ReviewContextConfig(
+        version=version,
+        policy=policy,
+        definitions=definitions,
+        rules=[
+            _parse_rule(rule, index, definitions)
+            for index, rule in enumerate(rules_value)
+        ],
+    )
+
+
+def parse_review_context_toml(text: str) -> ReviewContextConfig:
+    """Parse and validate review-context.toml content."""
+    return parse_review_context_data(tomllib.loads(text))
+
+
+def validate_review_context_file(path: str | Path) -> ReviewContextConfig:
+    """Parse and validate a review-context.toml file."""
+    return parse_review_context_toml(Path(path).read_text())
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Validate review-context.toml")
+    parser.add_argument(
+        "path",
+        nargs="?",
+        default="review-context.toml",
+        help="Path to review-context.toml (default: %(default)s)",
+    )
+    args = parser.parse_args(argv)
+
+    try:
+        config = validate_review_context_file(args.path)
+    except (OSError, tomllib.TOMLDecodeError, ReviewContextValidationError) as exc:
+        print(f"{args.path}: invalid: {exc}", file=sys.stderr)
+        return 1
+
+    print(f"{args.path}: valid ({len(config.rules)} rule(s))")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/pyproject.toml b/pyproject.toml
index abf83759b1..d740c53fb4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,6 +116,7 @@ bugbug-data-github = "scripts.github_issue_retriever:main"
 bugbug-fixed-comments = "scripts.inline_comments_data_collection:main"
 bugbug-ci-failures-retriever = "scripts.retrieve_ci_failures:main"
 bugbug-try-pushes-retriever = "scripts.retrieve_try_pushes:main"
+bugbug-validate-review-context = "bugbug.tools.code_review.review_context_schema:main"
 
 [tool.hatch.version]
 path = "VERSION"
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index c28ce11205..1bdd2fa4e5 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -11,6 +11,13 @@
 from bugbug.tools.code_review import data_types, langchain_tools
 from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter
 from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool
+from bugbug.tools.code_review.review_context_schema import (
+    ReviewContextValidationError,
+    parse_review_context_toml,
+)
+from bugbug.tools.code_review.review_context_schema import (
+    main as validate_review_context_main,
+)
 from bugbug.tools.code_review.utils import find_comment_scope
 from bugbug.tools.core.platforms.patch_apply import (
     apply_patched_file,
@@ -496,3 +503,97 @@ async def test_external_content_load_caches():
     assert first == "body\n"
     assert second == "body\n"
     assert client.get.await_count == 1
+
+
+_RULES_TOML = """
+version = 1
+
+[[rules]]
+name = "Audio/Video C++"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+load = [
+  { type = "file", path = ".claude/skills/dom-media.md" },
+]
+
+[[rules]]
+name = "WebIDL"
+when = { any_file = { ext = [".webidl"] } }
+load = [
+  { type = "file", path = ".claude/skills/webidl.md", repo = "mozilla-firefox/firefox" },
+]
+
+[[rules]]
+name = "Any JS"
+when = { any_file = { ext = [".js"] } }
+load = [
+  { type = "file", path = ".claude/skills/js-style.md" },
+]
+
+[[rules]]
+name = "Bugzilla component only"
+when = { bugzilla = { component = ["Core::DOM: Web Audio"] } }
+load = [
+  { type = "file", path = ".claude/skills/dom-audio.md" },
+]
+"""
+
+
+def test_parse_review_context_toml_rejects_missing_when():
+    toml = """
+version = 1
+[[rules]]
+name = "Broken"
+load = [{ type = "file", path = "x.md" }]
+"""
+    with pytest.raises(ReviewContextValidationError, match="rules\\[0\\].when"):
+        parse_review_context_toml(toml)
+
+
+def test_parse_review_context_toml_rejects_unknown_action_type():
+    toml = """
+version = 1
+[[rules]]
+name = "Broken"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "url", path = "x.md" }]
+"""
+    with pytest.raises(ReviewContextValidationError, match="unknown action type"):
+        parse_review_context_toml(toml)
+
+
+def test_parse_review_context_toml_rejects_unknown_rule_field():
+    toml = """
+version = 1
+[[rules]]
+name = "Broken"
+bogus = true
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "x.md" }]
+"""
+    with pytest.raises(ReviewContextValidationError, match="unknown field"):
+        parse_review_context_toml(toml)
+
+
+def test_validate_review_context_main(tmp_path, capsys):
+    review_context_path = tmp_path / "review-context.toml"
+    review_context_path.write_text(_RULES_TOML)
+
+    assert validate_review_context_main([str(review_context_path)]) == 0
+    captured = capsys.readouterr()
+    assert "valid" in captured.out
+
+
+def test_validate_review_context_main_failure(tmp_path, capsys):
+    review_context_path = tmp_path / "review-context.toml"
+    review_context_path.write_text(
+        """
+version = 1
+[[rules]]
+name = "Broken"
+load = [{ type = "file", path = "x.md" }]
+"""
+    )
+
+    assert validate_review_context_main([str(review_context_path)]) == 1
+    captured = capsys.readouterr()
+    assert "invalid" in captured.err

From 7175003934e5108e6a7ddfe0334b640bd6605faa Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:50:03 +0200
Subject: [PATCH 07/11] Load GitHub file review context from rules

---
 bugbug/tools/code_review/review_context.py | 454 +++++++++++++++++++++
 tests/test_code_review.py                  | 385 ++++++++++++++++-
 2 files changed, 838 insertions(+), 1 deletion(-)
 create mode 100644 bugbug/tools/code_review/review_context.py

diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py
new file mode 100644
index 0000000000..907d2a73fe
--- /dev/null
+++ b/bugbug/tools/code_review/review_context.py
@@ -0,0 +1,454 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Rule-based external content loading for code review.
+
+Fetches review-context.toml from a GitHub repository, matches changed files
+against the rules, and pre-loads the referenced content.
+"""
+
+import fnmatch
+import hashlib
+import json
+import time
+from dataclasses import dataclass
+from logging import getLogger
+from typing import Literal
+
+import httpx
+from pydantic import BaseModel
+from unidiff import PatchSet
+
+from bugbug.tools.code_review.data_types import (
+    ExternalContent,
+    ExternalContentLoadError,
+    _fetch_url,
+)
+from bugbug.tools.code_review.review_context_schema import (
+    AllFilesPredicate,
+    AllPredicate,
+    AnyFilePredicate,
+    AnyPredicate,
+    FilePredicate,
+    LoadFileAction,
+    NotPredicate,
+    Predicate,
+    ReviewContextConfig,
+    ReviewContextValidationError,
+    Rule,
+    RuleAction,
+    parse_review_context_toml,
+    tomllib,
+)
+
+logger = getLogger(__name__)
+
+_review_context_cache: dict[
+    tuple[str, str, str], tuple[float, "ReviewContextConfig"]
+] = {}
+_REVIEW_CONTEXT_CACHE_TTL = 300  # seconds
+DEFAULT_REVIEW_CONTEXT_PATH = "review-context.toml"
+
+
+@dataclass
+class MatchedAction:
+    action: RuleAction
+    matched_rules: list[str]
+
+
+class ExternalContentItem(BaseModel):
+    name: str
+    body: str
+    source_type: Literal["github_file", "phabricator_revision", "github_commit"]
+    source: str
+    action: dict
+    matched_rules: list[str]
+    trusted: bool = True
+    trust_reason: str
+    bytes: int
+    sha256: str
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        name: str,
+        body: str,
+        source_type: Literal["github_file", "phabricator_revision", "github_commit"],
+        source: str,
+        action: RuleAction,
+        matched_rules: list[str],
+        trust_reason: str,
+    ) -> "ExternalContentItem":
+        encoded = body.encode()
+        return cls(
+            name=name,
+            body=body,
+            source_type=source_type,
+            source=source,
+            action=_action_to_dict(action),
+            matched_rules=matched_rules,
+            trust_reason=trust_reason,
+            bytes=len(encoded),
+            sha256=hashlib.sha256(encoded).hexdigest(),
+        )
+
+    def manifest(self) -> dict:
+        return self.model_dump(exclude={"body"})
+
+
+def _github_raw_url(repo: str, branch: str, path: str) -> str:
+    return (
+        f"https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/"
+        f"{path.lstrip('/')}"
+    )
+
+
+def _normalise_repo(repo: str) -> str:
+    return repo.lower()
+
+
+def github_repo_allowed(
+    repo: str, review_context_repo: str, allowed_repos: set[str]
+) -> bool:
+    repo = _normalise_repo(repo)
+    review_context_repo = _normalise_repo(review_context_repo)
+    if repo == review_context_repo:
+        return True
+    return any(
+        repo.startswith(allowed_repo)
+        if allowed_repo.endswith("/")
+        else repo == allowed_repo
+        for allowed_repo in allowed_repos
+    )
+
+
+def _action_to_dict(action: RuleAction) -> dict:
+    return {key: value for key, value in action.__dict__.items() if value is not None}
+
+
+def parse_diff_files(diff: str) -> set[str]:
+    """Return the set of file paths added or modified by the diff."""
+    try:
+        return {f.path for f in PatchSet.from_string(diff) if not f.is_removed_file}
+    except Exception:
+        files = set()
+        for line in diff.splitlines():
+            if line.startswith("+++ b/"):
+                files.add(line[6:].strip())
+        return files
+
+
+def rule_matches(
+    rule: Rule, changed_files: set[str], bug_component: str | None = None
+) -> bool:
+    return predicate_matches(rule.when, changed_files, bug_component)
+
+
+def _normalise_path(path: str) -> str:
+    path = path.replace("\\", "/")
+    if path.startswith("./"):
+        return path[2:]
+    return path
+
+
+def _file_matches(predicate: FilePredicate, path: str) -> bool:
+    path = _normalise_path(path)
+    if predicate.include and not any(
+        fnmatch.fnmatchcase(path, pattern) for pattern in predicate.include
+    ):
+        return False
+    if predicate.exclude and any(
+        fnmatch.fnmatchcase(path, pattern) for pattern in predicate.exclude
+    ):
+        return False
+    if predicate.ext and not any(path.endswith(ext) for ext in predicate.ext):
+        return False
+    return True
+
+
+
+def _unsupported_metadata_predicate() -> bool:
+    return False
+
+
+def predicate_matches(
+    predicate: Predicate,
+    changed_files: set[str],
+    bug_component: str | None = None,
+) -> bool:
+    match predicate:
+        case AllPredicate(children=children):
+            return all(
+                predicate_matches(child, changed_files, bug_component)
+                for child in children
+            )
+        case AnyPredicate(children=children):
+            return any(
+                predicate_matches(child, changed_files, bug_component)
+                for child in children
+            )
+        case NotPredicate(child=child):
+            return not predicate_matches(child, changed_files, bug_component)
+        case AnyFilePredicate(predicate=file_predicate):
+            return any(_file_matches(file_predicate, f) for f in changed_files)
+        case AllFilesPredicate(predicate=file_predicate):
+            return bool(changed_files) and all(
+                _file_matches(file_predicate, f) for f in changed_files
+            )
+        case _:
+            return _unsupported_metadata_predicate()
+    raise TypeError(f"Unknown predicate type: {type(predicate).__name__}")
+
+
+def _action_key(action: RuleAction) -> tuple:
+    if isinstance(action, LoadFileAction):
+        return (
+            "file",
+            action.repo or "",
+            action.branch or "",
+            action.path,
+        )
+    raise ValueError(f"Unknown action type: {action.type!r}")
+
+
+def _action_to_content(
+    action: LoadFileAction,
+    default_repo: str,
+    default_branch: str,
+    allowed_repos: set[str],
+) -> ExternalContent:
+    """Resolve a file action to an ExternalContent instance.
+
+    Actions without an explicit repo load from the same GitHub repo and branch
+    as the rules file. Cross-repo references use their explicit repo and
+    optional branch, defaulting to main.
+    """
+    path = action.path
+    repo = action.repo or default_repo
+    branch = action.branch or (default_branch if repo == default_repo else "main")
+    if not github_repo_allowed(repo, default_repo, allowed_repos):
+        raise ValueError(f"GitHub repo is not allowed for review context: {repo}")
+    url = _github_raw_url(repo, branch, path)
+    return ExternalContent(name=path, url=url, description=path)
+
+
+def _merge_rules(base: list[Rule], extra_toml: str) -> list[Rule]:
+    """Merge extra rules into the base list.
+
+    Rules in extra_toml whose name matches an existing rule replace that rule
+    in-place. Rules with new names are appended. Rules without a name are always
+    appended. Returns a new list; base is not mutated.
+    """
+    rules = list(base)
+    extra = parse_review_context_toml(extra_toml).rules
+    if not extra:
+        return rules
+    index = {r.name: i for i, r in enumerate(rules) if r.name}
+    for rule in extra:
+        name = rule.name
+        if name and name in index:
+            rules[index[name]] = rule
+        else:
+            rules.append(rule)
+    return rules
+
+
+def collect_actions(
+    diff: str,
+    config: ReviewContextConfig,
+    bug_component: str | None = None,
+    extra_context_toml: str | None = None,
+) -> list[MatchedAction]:
+    """Return deduplicated actions matched from config against the diff.
+
+    Merges extra_context_toml into config before matching. Evaluates each rule
+    against the changed files (and optional bug component). Actions are
+    deduplicated across rules so the same file is never fetched twice. Pure —
+    no I/O.
+    """
+    if extra_context_toml:
+        config = ReviewContextConfig(
+            version=config.version,
+            policy=config.policy,
+            definitions=config.definitions,
+            rules=_merge_rules(config.rules, extra_context_toml),
+        )
+
+    changed_files = parse_diff_files(diff)
+    logger.debug(
+        "Matching rules against %d changed file(s): %s",
+        len(changed_files),
+        changed_files,
+    )
+
+    actions_by_key: dict[tuple, MatchedAction] = {}
+    actions: list[MatchedAction] = []
+
+    ordered_rules = sorted(
+        enumerate(config.rules), key=lambda item: (-item[1].priority, item[0])
+    )
+    for _, rule in ordered_rules:
+        if not rule_matches(rule, changed_files, bug_component):
+            continue
+        rule_name = rule.name
+        new_actions = []
+        for action in rule.load:
+            key = _action_key(action)
+            if key in actions_by_key:
+                actions_by_key[key].matched_rules.append(rule_name)
+                continue
+            matched_action = MatchedAction(action=action, matched_rules=[rule_name])
+            actions_by_key[key] = matched_action
+            actions.append(matched_action)
+            new_actions.append(action)
+        if new_actions:
+            logger.debug(
+                "Rule %r matched: %d action(s) queued",
+                rule_name,
+                len(new_actions),
+            )
+
+    logger.debug("Total actions to execute: %d", len(actions))
+    return actions
+
+
+async def execute_actions(
+    actions: list[MatchedAction],
+    default_repo: str,
+    default_branch: str,
+    allowed_repos: set[str],
+    content_overrides: dict[str, str] | None = None,
+) -> list[ExternalContentItem]:
+    """Execute a list of actions and return loaded external content.
+
+    file actions resolve to an ExternalContent URL and fetch the file content.
+    content_overrides bypasses the network fetch for matching names, allowing
+    callers to inject test content.
+    """
+    results: list[ExternalContentItem] = []
+    for matched_action in actions:
+        action = matched_action.action
+        if not isinstance(action, LoadFileAction):
+            logger.error("Unsupported review context action %s", _action_to_dict(action))
+            continue
+        try:
+            item = _action_to_content(
+                action, default_repo, default_branch, allowed_repos
+            )
+            if content_overrides and item.name in content_overrides:
+                body = content_overrides[item.name]
+            else:
+                body = await item.load()
+            results.append(
+                ExternalContentItem.create(
+                    name=item.name,
+                    body=body,
+                    source_type="github_file",
+                    source=item.url,
+                    action=action,
+                    matched_rules=matched_action.matched_rules,
+                    trust_reason="github_repo_content",
+                )
+            )
+        except (ValueError, ExternalContentLoadError):
+            logger.error(
+                "Failed to load content for action %s", _action_to_dict(action)
+            )
+    return results
+
+
+async def _fetch_review_context(
+    repo: str, branch: str, review_context_path: str
+) -> ReviewContextConfig:
+    """Fetch and parse review-context.toml, with a short in-process TTL cache."""
+    now = time.time()
+    cache_key = (repo, branch, review_context_path)
+    if cache_key in _review_context_cache:
+        ts, config = _review_context_cache[cache_key]
+        if now - ts < _REVIEW_CONTEXT_CACHE_TTL:
+            logger.debug(
+                "Using cached review context from %s@%s:%s (age %.0fs)",
+                repo,
+                branch,
+                review_context_path,
+                now - ts,
+            )
+            return config
+    review_context_url = _github_raw_url(repo, branch, review_context_path)
+    response = await _fetch_url(review_context_url)
+    config = parse_review_context_toml(response.text)
+    _review_context_cache[cache_key] = (now, config)
+    return config
+
+
+async def load_external_content_for_diff(
+    diff: str,
+    review_context_repo: str,
+    review_context_branch: str = "main",
+    review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH,
+    extra_context_toml: str | None = None,
+    content_overrides: dict[str, str] | None = None,
+) -> list[ExternalContentItem]:
+    """Fetch review-context.toml from GitHub, match against diff, return content.
+
+    Retries the rules fetch on transient errors (via _fetch_url). The parsed
+    config is cached in-process with a short TTL to avoid redundant fetches
+    across back-to-back reviews.
+    """
+    try:
+        config = await _fetch_review_context(
+            review_context_repo, review_context_branch, review_context_path
+        )
+    except httpx.HTTPError:
+        logger.error(
+            "Could not fetch review context from %s@%s:%s",
+            review_context_repo,
+            review_context_branch,
+            review_context_path,
+        )
+        return []
+    except (tomllib.TOMLDecodeError, ReviewContextValidationError):
+        logger.exception(
+            "Could not parse review context from %s@%s:%s",
+            review_context_repo,
+            review_context_branch,
+            review_context_path,
+        )
+        return []
+
+    try:
+        actions = collect_actions(diff, config, extra_context_toml=extra_context_toml)
+    except (tomllib.TOMLDecodeError, ReviewContextValidationError):
+        logger.exception("Could not parse extra review context")
+        return []
+
+    return await execute_actions(
+        actions,
+        review_context_repo,
+        review_context_branch,
+        set(config.policy.github.allowed_repos),
+        content_overrides,
+    )
+
+
+def external_content_manifest(content_items: list[ExternalContentItem]) -> list[dict]:
+    return [item.manifest() for item in content_items]
+
+
+def format_external_content(content_items: list[ExternalContentItem]) -> str:
+    manifest = json.dumps(external_content_manifest(content_items), indent=2)
+    content = "\n\n".join(
+        f'<context name="{item.name}">\n{item.body.strip()}\n</context>'
+        for item in content_items
+    )
+    return (
+        "\n\n<external_content_manifest>\n"
+        f"{manifest}\n"
+        "</external_content_manifest>"
+        "\n\n<external_context>\n"
+        f"{content}\n"
+        "</external_context>"
+    )
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index 1bdd2fa4e5..79dc589a0f 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -8,11 +8,23 @@
 import pytest
 from unidiff import PatchSet
 
-from bugbug.tools.code_review import data_types, langchain_tools
+from bugbug.tools.code_review import data_types, langchain_tools, review_context
 from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter
 from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool
+from bugbug.tools.code_review.review_context import (
+    _merge_rules,
+    external_content_manifest,
+    format_external_content,
+    github_repo_allowed,
+    load_external_content_for_diff,
+    parse_diff_files,
+    rule_matches,
+)
 from bugbug.tools.code_review.review_context_schema import (
+    AnyFilePredicate,
+    FilePredicate,
     ReviewContextValidationError,
+    Rule,
     parse_review_context_toml,
 )
 from bugbug.tools.code_review.review_context_schema import (
@@ -537,6 +549,65 @@ async def test_external_content_load_caches():
 ]
 """
 
+_DIFF_MEDIA = """\
+diff --git a/dom/media/Foo.cpp b/dom/media/Foo.cpp
+--- a/dom/media/Foo.cpp
++++ b/dom/media/Foo.cpp
+@@ -1,1 +1,1 @@
+-old
++new
+"""
+
+_DIFF_WEBIDL = """\
+diff --git a/dom/webidl/Foo.webidl b/dom/webidl/Foo.webidl
+--- a/dom/webidl/Foo.webidl
++++ b/dom/webidl/Foo.webidl
+@@ -1,1 +1,1 @@
+-old
++new
+"""
+
+
+@pytest.fixture(autouse=True)
+def clear_review_context_cache():
+    review_context._review_context_cache.clear()
+    yield
+    review_context._review_context_cache.clear()
+
+
+def test_parse_diff_files():
+    files = parse_diff_files(_DIFF_MEDIA)
+    assert files == {"dom/media/Foo.cpp"}
+
+
+def test_github_repo_allowed_policy():
+    assert github_repo_allowed("mozilla/cubeb", "example/repo", {"mozilla/"})
+    assert github_repo_allowed("whatwg/html", "example/repo", {"whatwg/html"})
+    assert github_repo_allowed("example/repo", "example/repo", set())
+    assert not github_repo_allowed("mozilla/cubeb", "example/repo", set())
+    assert not github_repo_allowed("whatwg/html-tests", "example/repo", {"whatwg/html"})
+
+
+def test_rule_matches_extension_and_path():
+    rule = Rule(
+        name="test",
+        when=AnyFilePredicate(FilePredicate(include=["dom/media/**"], ext=[".cpp"])),
+        load=[],
+    )
+    assert rule_matches(rule, {"dom/media/Foo.cpp"})
+    assert not rule_matches(rule, {"dom/canvas/Foo.cpp"})
+    assert not rule_matches(rule, {"dom/media/Foo.js"})
+
+
+def test_rule_matches_extension_only():
+    rule = Rule(
+        name="test",
+        when=AnyFilePredicate(FilePredicate(ext=[".webidl"])),
+        load=[],
+    )
+    assert rule_matches(rule, {"dom/webidl/Foo.webidl"})
+    assert not rule_matches(rule, {"dom/webidl/Foo.js"})
+
 
 def test_parse_review_context_toml_rejects_missing_when():
     toml = """
@@ -597,3 +668,315 @@ def test_validate_review_context_main_failure(tmp_path, capsys):
     assert validate_review_context_main([str(review_context_path)]) == 1
     captured = capsys.readouterr()
     assert "invalid" in captured.err
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_file_load():
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = _RULES_TOML
+    rules_response.raise_for_status = MagicMock()
+    content_response = MagicMock()
+    content_response.text = "---\nname: dom-media\n---\nMedia review guidelines\n"
+    content_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA, review_context_repo, review_context_branch="release"
+        )
+
+    assert len(results) == 1
+    assert results[0].name == ".claude/skills/dom-media.md"
+    assert results[0].body == "Media review guidelines\n"
+    assert results[0].source_type == "github_file"
+    assert results[0].matched_rules == ["Audio/Video C++"]
+    client.get.assert_any_await(
+        "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/review-context.toml",
+        timeout=30,
+    )
+    client.get.assert_any_await(
+        "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/.claude/skills/dom-media.md",
+        timeout=30,
+    )
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_no_match():
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = _RULES_TOML
+    rules_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(return_value=rules_response)
+
+    diff = """diff --git a/README.txt b/README.txt\n--- a/README.txt\n+++ b/README.txt\n@@ -1 +1 @@\n-a\n+b\n"""
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(diff, review_context_repo)
+
+    assert results == []
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_rules_fetch_failure():
+    client = MagicMock()
+    client.get = AsyncMock(side_effect=httpx.ConnectError("boom"))
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA, "mozilla-firefox/firefox"
+        )
+
+    assert results == []
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_deduplicates_actions():
+    rules = """
+version = 1
+
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = ".claude/skills/cpp.md" }]
+
+[[rules]]
+name = "B"
+when = { any_file = { include = ["dom/media/**"] } }
+load = [{ type = "file", path = ".claude/skills/cpp.md" }]
+"""
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = rules
+    rules_response.raise_for_status = MagicMock()
+    content_response = MagicMock()
+    content_response.text = "C++ guidelines"
+    content_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA, review_context_repo
+        )
+
+    assert len(results) == 1
+    assert results[0].matched_rules == ["A", "B"]
+    assert client.get.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_orders_by_priority():
+    rules = """
+version = 1
+
+[[rules]]
+name = "Low"
+priority = 0
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = ".claude/skills/low.md" }]
+
+[[rules]]
+name = "High"
+priority = 10
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = ".claude/skills/high.md" }]
+"""
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = rules
+    rules_response.raise_for_status = MagicMock()
+    low_response = MagicMock()
+    low_response.text = "low"
+    low_response.raise_for_status = MagicMock()
+    high_response = MagicMock()
+    high_response.text = "high"
+    high_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(side_effect=[rules_response, high_response, low_response])
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA,
+            review_context_repo,
+        )
+
+    assert [r.name for r in results] == [
+        ".claude/skills/high.md",
+        ".claude/skills/low.md",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_rejects_disallowed_github_repo():
+    rules = """
+version = 1
+[[rules]]
+name = "Cross repo"
+when = { any_file = { ext = [".webidl"] } }
+load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }]
+"""
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = rules
+    rules_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(return_value=rules_response)
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_WEBIDL,
+            review_context_repo,
+        )
+
+    assert results == []
+    assert client.get.await_count == 1
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_allows_policy_prefix_repo():
+    rules = """
+version = 1
+[policy.github]
+allowed_repos = ["mozilla/"]
+[[rules]]
+name = "Cross repo"
+when = { any_file = { ext = [".webidl"] } }
+load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }]
+"""
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = rules
+    rules_response.raise_for_status = MagicMock()
+    content_response = MagicMock()
+    content_response.text = "cubeb guidelines"
+    content_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_WEBIDL,
+            review_context_repo,
+        )
+
+    assert len(results) == 1
+    assert results[0].source.endswith("/mozilla/cubeb/refs/heads/main/REVIEWING.md")
+
+
+def test_merge_rules_appends_new():
+    base = parse_review_context_toml(
+        """
+version = 1
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+    )
+    merged = _merge_rules(
+        base.rules,
+        """
+version = 1
+[[rules]]
+name = "B"
+when = { any_file = { ext = [".js"] } }
+load = [{ type = "file", path = "b.md" }]
+""",
+    )
+    assert [rule.name for rule in merged] == ["A", "B"]
+
+
+def test_merge_rules_replaces_by_name():
+    base = parse_review_context_toml(
+        """
+version = 1
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+    )
+    merged = _merge_rules(
+        base.rules,
+        """
+version = 1
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".js"] } }
+load = [{ type = "file", path = "replacement.md" }]
+""",
+    )
+    assert len(merged) == 1
+    assert merged[0].load[0].path == "replacement.md"
+
+
+def test_merge_rules_empty_extra():
+    base = parse_review_context_toml(
+        """
+version = 1
+[[rules]]
+name = "A"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+    )
+    merged = _merge_rules(base.rules, "version = 1\n")
+    assert merged == base.rules
+
+
+@pytest.mark.asyncio
+async def test_content_override_used_instead_of_fetch():
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = _RULES_TOML
+    rules_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(return_value=rules_response)
+    overrides = {".claude/skills/dom-media.md": "Override body"}
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA, review_context_repo, content_overrides=overrides
+        )
+
+    assert len(results) == 1
+    assert results[0].body == "Override body"
+    assert client.get.await_count == 1
+
+
+@pytest.mark.asyncio
+async def test_external_content_manifest_and_prompt_body():
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = _RULES_TOML
+    rules_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(return_value=rules_response)
+    overrides = {".claude/skills/dom-media.md": "Guideline body"}
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA, review_context_repo, content_overrides=overrides
+        )
+
+    manifest = external_content_manifest(results)
+    assert manifest == [
+        {
+            "name": ".claude/skills/dom-media.md",
+            "source_type": "github_file",
+            "source": "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/main/.claude/skills/dom-media.md",
+            "action": {"type": "file", "path": ".claude/skills/dom-media.md"},
+            "matched_rules": ["Audio/Video C++"],
+            "trusted": True,
+            "trust_reason": "github_repo_content",
+            "bytes": len("Guideline body".encode()),
+            "sha256": results[0].sha256,
+        }
+    ]
+    prompt = format_external_content(results)
+    assert "<external_content_manifest>" in prompt
+    assert "<external_context>" in prompt
+    assert '<context name=".claude/skills/dom-media.md">' in prompt
+    assert "Guideline body" in prompt

From 8828b03d7c3cc53617e73afbd8b5dc18ea74ae2d Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:51:15 +0200
Subject: [PATCH 08/11] Support metadata and revision review context rules

---
 bugbug/tools/code_review/review_context.py | 155 +++++++++++++++++++--
 tests/test_code_review.py                  | 130 +++++++++++++++++
 2 files changed, 276 insertions(+), 9 deletions(-)

diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py
index 907d2a73fe..39791c70a1 100644
--- a/bugbug/tools/code_review/review_context.py
+++ b/bugbug/tools/code_review/review_context.py
@@ -9,9 +9,11 @@
 against the rules, and pre-loads the referenced content.
 """
 
+import asyncio
 import fnmatch
 import hashlib
 import json
+import re
 import time
 from dataclasses import dataclass
 from logging import getLogger
@@ -31,20 +33,27 @@
     AllPredicate,
     AnyFilePredicate,
     AnyPredicate,
+    BugzillaPredicate,
+    FetchRevisionAction,
     FilePredicate,
     LoadFileAction,
     NotPredicate,
+    PatchPredicate,
     Predicate,
     ReviewContextConfig,
     ReviewContextValidationError,
+    ReviewPredicate,
     Rule,
     RuleAction,
     parse_review_context_toml,
     tomllib,
 )
+from bugbug.tools.core.connection import get_http_client
 
 logger = getLogger(__name__)
 
+_PHAB_RE = re.compile(r"D(\d+)$")
+
 _review_context_cache: dict[
     tuple[str, str, str], tuple[float, "ReviewContextConfig"]
 ] = {}
@@ -141,6 +150,29 @@ def parse_diff_files(diff: str) -> set[str]:
         return files
 
 
+async def get_bug_component(bug_id: int) -> str | None:
+    """Return 'Product::Component' for the given Bugzilla bug, or None on failure."""
+    from libmozdata.bugzilla import Bugzilla
+
+    def _fetch() -> str | None:
+        bugs: list[dict] = []
+        Bugzilla(
+            bug_id,
+            include_fields=["product", "component"],
+            bughandler=lambda bug, data: data.append(bug),
+            bugdata=bugs,
+        ).get_data().wait()
+        if not bugs:
+            return None
+        return f"{bugs[0]['product']}::{bugs[0]['component']}"
+
+    try:
+        return await asyncio.get_event_loop().run_in_executor(None, _fetch)
+    except Exception:
+        logger.warning("Could not fetch component for bug %s", bug_id)
+        return None
+
+
 def rule_matches(
     rule: Rule, changed_files: set[str], bug_component: str | None = None
 ) -> bool:
@@ -169,8 +201,21 @@ def _file_matches(predicate: FilePredicate, path: str) -> bool:
     return True
 
 
+def _bugzilla_matches(
+    predicate: BugzillaPredicate, bug_component: str | None = None
+) -> bool:
+    if predicate.product or predicate.keywords or predicate.severity:
+        return False
+    if predicate.component:
+        return bug_component is not None and bug_component in predicate.component
+    return False
 
-def _unsupported_metadata_predicate() -> bool:
+
+def _review_matches(predicate: ReviewPredicate) -> bool:
+    return False
+
+
+def _patch_matches(predicate: PatchPredicate) -> bool:
     return False
 
 
@@ -198,8 +243,12 @@ def predicate_matches(
             return bool(changed_files) and all(
                 _file_matches(file_predicate, f) for f in changed_files
             )
-        case _:
-            return _unsupported_metadata_predicate()
+        case BugzillaPredicate():
+            return _bugzilla_matches(predicate, bug_component)
+        case ReviewPredicate():
+            return _review_matches(predicate)
+        case PatchPredicate():
+            return _patch_matches(predicate)
     raise TypeError(f"Unknown predicate type: {type(predicate).__name__}")
 
 
@@ -211,6 +260,13 @@ def _action_key(action: RuleAction) -> tuple:
             action.branch or "",
             action.path,
         )
+    if isinstance(action, FetchRevisionAction):
+        return (
+            "fetch_revision",
+            action.revision or "",
+            action.repo or "",
+            action.hash or "",
+        )
     raise ValueError(f"Unknown action type: {action.type!r}")
 
 
@@ -235,6 +291,68 @@ def _action_to_content(
     return ExternalContent(name=path, url=url, description=path)
 
 
+async def _fetch_revision(
+    action: FetchRevisionAction,
+) -> tuple[str, str, Literal["phabricator_revision", "github_commit"], str, str] | None:
+    """Fetch the diff of another revision, either from Phabricator or GitHub."""
+    revision_str = action.revision or ""
+    repo = action.repo or ""
+    commit_hash = action.hash or ""
+
+    if revision_str:
+        m = _PHAB_RE.match(revision_str.strip())
+        if not m:
+            logger.warning("fetch_revision: cannot parse revision %r", revision_str)
+            return None
+        rev_num = int(m.group(1))
+        try:
+            from bugbug.tools.core.platforms.phabricator import get_phabricator_client
+
+            def _load():
+                phab = get_phabricator_client()
+                revision = phab.load_revision(rev_id=rev_num)
+                diff_id = revision["fields"]["diffID"]
+                return phab.load_raw_diff(diff_id)
+
+            content = await asyncio.get_event_loop().run_in_executor(None, _load)
+            name = f"Revision D{rev_num}"
+            return (
+                name,
+                content,
+                "phabricator_revision",
+                name,
+                "phabricator_revision",
+            )
+        except Exception:
+            logger.warning("fetch_revision: failed to load Phabricator D%s", rev_num)
+            return None
+
+    if repo and commit_hash:
+        try:
+            response = await get_http_client().get(
+                f"https://api.github.com/repos/{repo}/commits/{commit_hash}",
+                headers={"Accept": "application/vnd.github.diff"},
+                timeout=30,
+            )
+            response.raise_for_status()
+            source = f"{repo}@{commit_hash}"
+            return (
+                f"{repo}@{commit_hash[:12]}",
+                response.text,
+                "github_commit",
+                source,
+                "github_repo_content",
+            )
+        except httpx.HTTPError:
+            logger.warning("fetch_revision: failed to load %s@%s", repo, commit_hash)
+            return None
+
+    logger.warning(
+        "fetch_revision: action has neither revision nor repo+hash: %s", action
+    )
+    return None
+
+
 def _merge_rules(base: list[Rule], extra_toml: str) -> list[Rule]:
     """Merge extra rules into the base list.
 
@@ -324,15 +442,29 @@ async def execute_actions(
 ) -> list[ExternalContentItem]:
     """Execute a list of actions and return loaded external content.
 
-    file actions resolve to an ExternalContent URL and fetch the file content.
-    content_overrides bypasses the network fetch for matching names, allowing
-    callers to inject test content.
+    fetch_revision actions fetch the raw diff of a Phabricator revision or
+    GitHub commit. file actions resolve to an ExternalContent URL and
+    fetch the file content. content_overrides bypasses the network fetch for
+    matching names, allowing callers to inject test content.
     """
     results: list[ExternalContentItem] = []
     for matched_action in actions:
         action = matched_action.action
-        if not isinstance(action, LoadFileAction):
-            logger.error("Unsupported review context action %s", _action_to_dict(action))
+        if isinstance(action, FetchRevisionAction):
+            result = await _fetch_revision(action)
+            if result:
+                name, body, source_type, source, trust_reason = result
+                results.append(
+                    ExternalContentItem.create(
+                        name=name,
+                        body=body,
+                        source_type=source_type,
+                        source=source,
+                        action=action,
+                        matched_rules=matched_action.matched_rules,
+                        trust_reason=trust_reason,
+                    )
+                )
             continue
         try:
             item = _action_to_content(
@@ -389,6 +521,7 @@ async def load_external_content_for_diff(
     review_context_repo: str,
     review_context_branch: str = "main",
     review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH,
+    bug_id: int | None = None,
     extra_context_toml: str | None = None,
     content_overrides: dict[str, str] | None = None,
 ) -> list[ExternalContentItem]:
@@ -419,8 +552,12 @@ async def load_external_content_for_diff(
         )
         return []
 
+    bug_component: str | None = None
+    if bug_id is not None:
+        bug_component = await get_bug_component(bug_id)
+
     try:
-        actions = collect_actions(diff, config, extra_context_toml=extra_context_toml)
+        actions = collect_actions(diff, config, bug_component, extra_context_toml)
     except (tomllib.TOMLDecodeError, ReviewContextValidationError):
         logger.exception("Could not parse extra review context")
         return []
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index 79dc589a0f..d87d506d9f 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -15,6 +15,7 @@
     _merge_rules,
     external_content_manifest,
     format_external_content,
+    get_bug_component,
     github_repo_allowed,
     load_external_content_for_diff,
     parse_diff_files,
@@ -609,6 +610,35 @@ def test_rule_matches_extension_only():
     assert not rule_matches(rule, {"dom/webidl/Foo.js"})
 
 
+def test_rule_bugzilla_component_fails_closed_without_component():
+    config = parse_review_context_toml(_RULES_TOML)
+    rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only")
+    assert not rule_matches(rule, set(), bug_component=None)
+
+
+def test_rule_bugzilla_component_matches():
+    config = parse_review_context_toml(_RULES_TOML)
+    rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only")
+    assert rule_matches(rule, set(), bug_component="Core::DOM: Web Audio")
+    assert not rule_matches(rule, set(), bug_component="Core::Layout")
+
+
+@pytest.mark.asyncio
+async def test_get_bug_component():
+    with patch("libmozdata.bugzilla.Bugzilla") as bugzilla_cls:
+        instance = MagicMock()
+        bugzilla_cls.return_value = instance
+
+        def get_data():
+            handler = bugzilla_cls.call_args.kwargs["bughandler"]
+            data = bugzilla_cls.call_args.kwargs["bugdata"]
+            handler({"product": "Core", "component": "DOM: Web Audio"}, data)
+            return MagicMock(wait=MagicMock())
+
+        instance.get_data.side_effect = get_data
+        assert await get_bug_component(123) == "Core::DOM: Web Audio"
+
+
 def test_parse_review_context_toml_rejects_missing_when():
     toml = """
 version = 1
@@ -980,3 +1010,103 @@ async def test_external_content_manifest_and_prompt_body():
     assert "<external_context>" in prompt
     assert '<context name=".claude/skills/dom-media.md">' in prompt
     assert "Guideline body" in prompt
+
+
+@pytest.mark.asyncio
+async def test_extra_context_toml_appended():
+    review_context_repo = "mozilla-firefox/firefox"
+    base_rules = """
+version = 1
+[[rules]]
+name = "Base"
+when = { any_file = { ext = [".js"] } }
+load = [{ type = "file", path = "base.md" }]
+"""
+    extra_rules = """
+version = 1
+[[rules]]
+name = "Extra"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "extra.md" }]
+"""
+    rules_response = MagicMock()
+    rules_response.text = base_rules
+    rules_response.raise_for_status = MagicMock()
+    content_response = MagicMock()
+    content_response.text = "extra"
+    content_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA,
+            review_context_repo,
+            extra_context_toml=extra_rules,
+        )
+
+    assert [result.name for result in results] == ["extra.md"]
+
+
+@pytest.mark.asyncio
+async def test_extra_context_toml_replaces_by_name():
+    review_context_repo = "mozilla-firefox/firefox"
+    extra_rules = """
+version = 1
+[[rules]]
+name = "Audio/Video C++"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "replacement.md" }]
+"""
+    rules_response = MagicMock()
+    rules_response.text = _RULES_TOML
+    rules_response.raise_for_status = MagicMock()
+    content_response = MagicMock()
+    content_response.text = "replacement"
+    content_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+    with patch.object(data_types, "get_http_client", return_value=client):
+        results = await load_external_content_for_diff(
+            _DIFF_MEDIA,
+            review_context_repo,
+            extra_context_toml=extra_rules,
+        )
+
+    assert [result.name for result in results] == ["replacement.md"]
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_fetches_phabricator_revision():
+    rules = """
+version = 1
+[[rules]]
+name = "Revision"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "fetch_revision", revision = "D123" }]
+"""
+    review_context_repo = "mozilla-firefox/firefox"
+    rules_response = MagicMock()
+    rules_response.text = rules
+    rules_response.raise_for_status = MagicMock()
+    client = MagicMock()
+    client.get = AsyncMock(return_value=rules_response)
+
+    phabricator = MagicMock()
+    phabricator.load_revision.return_value = {"fields": {"diffID": 456}}
+    phabricator.load_raw_diff.return_value = "diff content"
+
+    with (
+        patch.object(data_types, "get_http_client", return_value=client),
+        patch(
+            "bugbug.tools.core.platforms.phabricator.get_phabricator_client",
+            return_value=phabricator,
+        ),
+    ):
+        results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo)
+
+    assert len(results) == 1
+    assert results[0].name == "Revision D123"
+    assert results[0].body == "diff content"
+    assert results[0].source_type == "phabricator_revision"

From c2bfec2eccc46ca93f1c8aee56abd885b7f62329 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:51:33 +0200
Subject: [PATCH 09/11] Inject review context into code review prompts

---
 bugbug/tools/code_review/agent.py     | 49 +++++++++++++--
 services/mcp/src/bugbug_mcp/server.py | 90 ++++++++++++++++++++++++++-
 2 files changed, 131 insertions(+), 8 deletions(-)

diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
index b21c8bf563..d2006c1502 100644
--- a/bugbug/tools/code_review/agent.py
+++ b/bugbug/tools/code_review/agent.py
@@ -191,25 +191,30 @@ def create(cls, **kwargs):
     def count_tokens(self, text):
         return len(self._tokenizer.encode(text))
 
-    def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str:
+    def generate_initial_prompt(
+        self, patch: Patch, patch_summary: str, external_context: str = ""
+    ) -> str:
         created_before = patch.date_created if self.is_experiment_env else None
 
         return FIRST_MESSAGE_TEMPLATE.format(
             patch=format_patch_set(patch.patch_set),
             patch_summarization=patch_summary,
+            external_context=external_context,
             comment_examples=self._get_comment_examples(patch, created_before),
             approved_examples=self._get_generated_examples(patch, created_before),
         )
 
     async def generate_review_comments(
-        self, patch: Patch, patch_summary: str
+        self, patch: Patch, patch_summary: str, external_context: str = ""
     ) -> list[GeneratedReviewComment]:
         try:
             async for chunk in self.agent.astream(
                 {
                     "messages": [
                         HumanMessage(
-                            self.generate_initial_prompt(patch, patch_summary)
+                            self.generate_initial_prompt(
+                                patch, patch_summary, external_context
+                            )
                         ),
                     ]
                 },
@@ -223,14 +228,47 @@ async def generate_review_comments(
 
         return result["structured_response"].comments
 
-    async def run(self, patch: Patch) -> CodeReviewToolResponse:
+    async def run(
+        self,
+        patch: Patch,
+        review_context_repo: Optional[str] = None,
+        review_context_branch: str = "main",
+        extra_context_toml: Optional[str] = None,
+        content_overrides: Optional[dict[str, str]] = None,
+    ) -> CodeReviewToolResponse:
         if self.count_tokens(patch.raw_diff) > 21000:
             raise LargeDiffError("The diff is too large")
 
         patch_summary = self.patch_summarizer.run(patch)
 
+        external_context = ""
+        external_content_manifest = []
+        if review_context_repo:
+            from bugbug.tools.code_review.review_context import (
+                external_content_manifest as build_external_content_manifest,
+            )
+            from bugbug.tools.code_review.review_context import (
+                format_external_content,
+                load_external_content_for_diff,
+            )
+
+            bug_id = getattr(patch, "bug_id", None)
+            content_items = await load_external_content_for_diff(
+                patch.raw_diff,
+                review_context_repo,
+                review_context_branch=review_context_branch,
+                bug_id=bug_id,
+                extra_context_toml=extra_context_toml,
+                content_overrides=content_overrides,
+            )
+            if content_items:
+                external_context = format_external_content(content_items)
+                external_content_manifest = build_external_content_manifest(
+                    content_items
+                )
+
         unfiltered_suggestions = await self.generate_review_comments(
-            patch, patch_summary
+            patch, patch_summary, external_context
         )
         if not unfiltered_suggestions:
             logger.info("No suggestions were generated")
@@ -247,6 +285,7 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
             details={
                 "model": self._agent_model_name,
                 "num_unfiltered_suggestions": len(unfiltered_suggestions),
+                "external_content": external_content_manifest,
             },
         )
 
diff --git a/services/mcp/src/bugbug_mcp/server.py b/services/mcp/src/bugbug_mcp/server.py
index 7cc99d09d5..8a842352d1 100644
--- a/services/mcp/src/bugbug_mcp/server.py
+++ b/services/mcp/src/bugbug_mcp/server.py
@@ -1,6 +1,7 @@
 """MCP server for Firefox Development."""
 
 import functools
+import json
 import logging
 import os
 from pathlib import Path
@@ -18,6 +19,10 @@
     LOCAL_SYSTEM_PROMPT_TEMPLATE,
     SYSTEM_PROMPT_TEMPLATE,
 )
+from bugbug.tools.code_review.review_context import (
+    format_external_content,
+    load_external_content_for_diff,
+)
 from bugbug.tools.core.platforms.bugzilla import SanitizedBug
 from bugbug.tools.core.platforms.phabricator import (
     PhabricatorPatch,
@@ -50,6 +55,10 @@ async def _patch_review_impl(
     patch_url: str | None,
     diff: str | None,
     commit_message: str | None,
+    review_context_repo: str | None,
+    review_context_branch: str,
+    extra_context_toml: str | None,
+    content_overrides: str | None,
 ) -> str:
     if diff:
         patch = LocalPatch(diff, commit_message=commit_message or "")
@@ -70,7 +79,33 @@ async def _patch_review_impl(
     prompt_template = LOCAL_SYSTEM_PROMPT_TEMPLATE if diff else SYSTEM_PROMPT_TEMPLATE
     system_prompt = prompt_template.format(target_software=tool.target_software)
     patch_summary = "" if diff else tool.patch_summarizer.run(patch)
-    initial_prompt = tool.generate_initial_prompt(patch, patch_summary)
+
+    parsed_overrides: dict[str, str] | None = None
+    if content_overrides:
+        try:
+            parsed_overrides = json.loads(content_overrides)
+        except json.JSONDecodeError:
+            pass
+
+    external_context = ""
+    if review_context_repo:
+        bug_id = getattr(patch, "bug_id", None) if patch.has_bug else None
+        content_items = await load_external_content_for_diff(
+            patch.raw_diff,
+            review_context_repo,
+            review_context_branch=review_context_branch or "main",
+            bug_id=bug_id,
+            extra_context_toml=(
+                extra_context_toml if extra_context_toml != "None" else None
+            ),
+            content_overrides=parsed_overrides,
+        )
+        if content_items:
+            external_context = format_external_content(content_items)
+
+    initial_prompt = tool.generate_initial_prompt(
+        patch, patch_summary, external_context
+    )
     return system_prompt + "\n\n" + initial_prompt
 
 
@@ -88,9 +123,46 @@ async def patch_review(
         default=None,
         description="Commit message for the local diff (optional, used to extract bug ID, title, and Differential Revision URL).",
     ),
+    review_context_repo: str | None = Field(
+        default=None,
+        description=(
+            "GitHub repository containing review-context.toml, in org/project form. "
+            "When omitted, no rule-based context is loaded."
+        ),
+    ),
+    review_context_branch: str = Field(
+        default="main",
+        description="GitHub branch to read review-context.toml and same-repo content from.",
+    ),
+    extra_context_toml: str | None = Field(
+        default=None,
+        description=(
+            "TOML content to merge into the fetched review-context.toml. "
+            "Rules are matched by name: same name replaces, new name appends. "
+            "Use this to test new or modified rules before landing."
+        ),
+    ),
+    content_overrides: str | None = Field(
+        default=None,
+        description=(
+            "JSON object mapping content name (file path) to body text. Overrides "
+            "the fetched content for matching items, or provides content for new "
+            "items referenced by extra_context_toml. Use this to test changes before "
+            "landing. Example: "
+            '{".claude/skills/dom-audio.md": "My guidelines."}'
+        ),
+    ),
 ) -> str:
     """Review a code patch from Phabricator or a raw local diff."""
-    return await _patch_review_impl(patch_url, diff, commit_message)
+    return await _patch_review_impl(
+        patch_url,
+        diff,
+        commit_message,
+        review_context_repo,
+        review_context_branch,
+        extra_context_toml,
+        content_overrides,
+    )
 
 
 @mcp.tool()
@@ -98,9 +170,21 @@ async def patch_review_tool(
     patch_url: str | None = None,
     diff: str | None = None,
     commit_message: str | None = None,
+    review_context_repo: str | None = None,
+    review_context_branch: str = "main",
+    extra_context_toml: str | None = None,
+    content_overrides: str | None = None,
 ) -> str:
     """Build the patch review prompt. Use diff= for local diffs, patch_url= for Phabricator."""
-    return await _patch_review_impl(patch_url, diff, commit_message)
+    return await _patch_review_impl(
+        patch_url,
+        diff,
+        commit_message,
+        review_context_repo,
+        review_context_branch,
+        extra_context_toml,
+        content_overrides,
+    )
 
 
 @mcp.resource(

From a2fee60123825bdf1c0be890b4db5789e0dafd6b Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:52:06 +0200
Subject: [PATCH 10/11] Document review context rules

---
 docs/code-review-context-example.toml | 232 +++++++++++
 docs/code-review-skills.md            | 395 +++++++++++++++++++
 tests/test_code_review.py             | 548 +++++++++++++++-----------
 3 files changed, 946 insertions(+), 229 deletions(-)
 create mode 100644 docs/code-review-context-example.toml
 create mode 100644 docs/code-review-skills.md

diff --git a/docs/code-review-context-example.toml b/docs/code-review-context-example.toml
new file mode 100644
index 0000000000..b6083d87d1
--- /dev/null
+++ b/docs/code-review-context-example.toml
@@ -0,0 +1,232 @@
+# Proposed review-context.toml external content rules.
+#
+# This is a design sketch for the next rule language, not the format currently
+# accepted by the loader.
+#
+# This file is intentionally verbose. It shows the major language features and
+# comments on what each one does.
+
+# Version is required so the parser can reject files written for a future
+# incompatible format.
+version = 1
+
+# GitHub loads are restricted. The current review-context repo is always
+# allowed. Entries ending in `/` allow every repo under that organization;
+# entries in `org/repo` form allow a single repo.
+[policy.github]
+allowed_repos = [
+  "mozilla/",
+  "web-platform-tests/wpt",
+  "whatwg/html",
+]
+
+# Definitions are reusable predicate fragments. They are expanded structurally,
+# as if the referenced table had been written inline. The parser should reject
+# unknown refs, cycles, and refs to the wrong predicate kind.
+[definitions.files.media_impl]
+include = ["dom/media/**"]
+exclude = [
+  "dom/media/test/**",
+  "dom/media/gtest/**",
+  "dom/media/webaudio/test/**",
+  "dom/media/**/Generated*.cpp",
+]
+ext = [".cpp", ".h", ".mm"]
+
+[definitions.files.frontend_ui]
+include = [
+  "browser/components/**",
+  "browser/base/content/**",
+  "toolkit/components/**",
+  "toolkit/content/**",
+]
+exclude = [
+  "browser/components/**/test/**",
+  "toolkit/components/**/test/**",
+  "**/*.snap",
+]
+ext = [".js", ".mjs", ".sys.mjs", ".html", ".xhtml", ".css", ".scss"]
+
+[definitions.files.docs]
+ext = [".md", ".rst"]
+
+# Metadata predicates can be reusable too. A predicate can only reference a
+# definition from its own namespace: `bugzilla` refs `definitions.bugzilla.*`,
+# `review` refs `definitions.review.*`, and `patch` refs
+# `definitions.patch.*`.
+[definitions.bugzilla.dom_api]
+component = ["Core::DOM: Core & HTML", "Core::DOM: Events"]
+
+[definitions.review.frontend_reviewers]
+# `reviewer` matches both blocking and non-blocking reviewers.
+reviewer = ["frontend-reviewers"]
+
+[definitions.review.dom_reviewers]
+reviewer = ["dom-reviewers", "webidl-reviewers"]
+
+[definitions.review.blocking_media_reviewers]
+blocking_reviewer = ["padenot", "media-reviewers"]
+
+[definitions.patch.backport_repositories]
+is_backport = true
+repository = ["firefox-beta", "firefox-release", "firefox-esr"]
+
+# `priority` is optional. It defaults to 0 and only controls ordering among
+# loaded external content blocks. Higher priority loads first. Ties preserve
+# declaration order. All external content is injected before the patch diff.
+[[rules]]
+name = "Media: implementation"
+description = "Load media architecture and realtime-safety guidance for C++ media changes."
+owners = ["media-reviewers"]
+priority = 80
+
+# `all` means every child predicate must match.
+# `any_file` means at least one changed file must match the file predicate.
+# The file predicate uses the reusable definition above.
+when = { all = [
+  { any_file = { ref = "files.media_impl" } },
+] }
+
+# `kind` is an optional free-form audit/prompt hint. Load failures are
+# non-fatal: the loader logs an error and continues with remaining loads.
+load = [
+  { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" },
+  { type = "file", repo = "mozilla/cubeb", branch = "main", path = "docs/realtime-programming.md", kind = "policy" },
+]
+
+
+[[rules]]
+name = "Firefox frontend: JS, HTML, CSS"
+description = "Load frontend conventions for browser chrome, toolkit UI, and component styles."
+owners = ["frontend-reviewers"]
+priority = 60
+
+# `any` means at least one child predicate must match.
+# This rule fires for frontend UI files when a frontend reviewer/group is
+# assigned.
+when = { all = [
+  { any_file = { ref = "files.frontend_ui" } },
+  { review = { ref = "review.frontend_reviewers" } },
+] }
+
+load = [
+  { type = "file", path = ".claude/review/firefox-frontend.md", kind = "style-guide" },
+  { type = "file", path = "browser/docs/frontend/performance.md", kind = "performance" },
+]
+
+
+[[rules]]
+name = "DOM API implementation"
+description = "Load WebIDL and DOM binding guidance for changes that expose or implement web APIs."
+owners = ["dom-reviewers"]
+priority = 90
+
+# Boolean composition can be nested. This rule fires for:
+# - any WebIDL file under dom/,
+# - C++/header changes under dom/ when Bugzilla says this is a DOM bug, or
+# - DOM binding implementation files when a DOM/WebIDL reviewer is assigned.
+when = { any = [
+  { any_file = { include = ["dom/**"], ext = [".webidl"] } },
+  { all = [
+    { any_file = { include = ["dom/**"], ext = [".cpp", ".h"] } },
+    { bugzilla = { ref = "bugzilla.dom_api" } },
+  ] },
+  { all = [
+    { any_file = { include = ["dom/bindings/**"], ext = [".cpp", ".h", ".py"] } },
+    { review = { ref = "review.dom_reviewers" } },
+  ] },
+] }
+
+load = [
+  { type = "file", path = ".claude/review/dom-api.md", kind = "api-contract" },
+  { type = "file", path = "dom/docs/webidl-bindings.md", kind = "reference" },
+]
+
+
+[[rules]]
+name = "Docs-only change"
+description = "Load writing and API documentation guidance when every changed file is documentation."
+owners = ["docs-reviewers"]
+
+# `all_files` means every changed file must match the file predicate. This is
+# useful for patch-shape rules such as docs-only, tests-only, or generated-only.
+# This rule intentionally omits priority; it defaults to 0.
+when = { all_files = { ref = "files.docs" } }
+
+load = [
+  { type = "file", path = ".claude/review/docs.md", kind = "style-guide" },
+]
+
+
+[[rules]]
+name = "Security-sensitive DOM changes"
+description = "Load security guidance for non-test security-relevant DOM changes."
+owners = ["sec-reviewers", "dom-reviewers"]
+priority = 100
+
+# `not` negates its child predicate. This rule fires when some DOM security
+# file changed, but the patch is not test-only.
+when = { all = [
+  { any_file = { include = ["dom/security/**"], ext = [".cpp", ".h", ".webidl"] } },
+  { not = { all_files = { include = ["dom/security/test/**"] } } },
+] }
+
+load = [
+  { type = "file", path = ".claude/review/security-sensitive.md", kind = "policy" },
+]
+
+
+[[rules]]
+name = "Backport risk checklist"
+description = "Load release-branch guidance for patches targeting a backport repository or branch."
+owners = ["release-managers"]
+priority = 70
+
+# `patch.repository` and `patch.is_backport` come from trusted patch metadata.
+# For GitHub entrypoints, repository is org/project. For Phabricator reviews,
+# repository is the target Phabricator repository exposed by the Mozilla MCP.
+when = { all = [
+  { patch = { ref = "patch.backport_repositories" } },
+] }
+
+load = [
+  { type = "file", path = ".claude/review/backport-risk.md", kind = "release-policy" },
+]
+
+
+[[rules]]
+name = "Blocking reviewer guidance"
+description = "Load reviewer-specific expectations when a blocking media reviewer is assigned."
+owners = ["media-reviewers"]
+priority = 85
+
+# Review identities are Phabricator emails or review groups.
+# `blocking_reviewer` narrows to reviewers whose approval is required before
+# landing. Those reviewers also match ordinary `reviewer` predicates.
+when = { all = [
+  { any_file = { ref = "files.media_impl" } },
+  { review = { ref = "review.blocking_media_reviewers" } },
+] }
+
+# Dedupe is global per review. If another matched rule already loaded
+# media-architecture.md, this action records this rule in the manifest but does
+# not fetch or inject the same source a second time.
+load = [
+  { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" },
+  { type = "file", path = ".claude/review/media-reviewer-checklist.md", kind = "checklist" },
+]
+
+
+[[rules]]
+name = "Related revision context"
+description = "Load another reviewed diff when a patch depends on behavior from a known revision."
+owners = ["media-reviewers"]
+
+# `fetch_revision` injects a trusted diff as context. Phabricator revisions are
+# fetched through the platform client/MCP path; GitHub commits use repo+hash.
+when = { any_file = { include = ["dom/media/mediacontrol/**"] } }
+
+load = [
+  { type = "fetch_revision", revision = "D303229", kind = "related-diff" },
+  { type = "fetch_revision", repo = "mozilla/cubeb", hash = "0123456789abcdef0123456789abcdef01234567", kind = "related-diff" },
+]
diff --git a/docs/code-review-skills.md b/docs/code-review-skills.md
new file mode 100644
index 0000000000..cabbf3ab9b
--- /dev/null
+++ b/docs/code-review-skills.md
@@ -0,0 +1,395 @@
+# Code Review Skills
+
+The code review agent can load external knowledge at review time, injecting it
+as context before the LLM sees the patch. This lets domain experts encode their
+knowledge once, in a form the agent uses automatically whenever it reviews
+relevant code.
+
+## How it works
+
+When the agent reviews a patch, it:
+
+1. Fetches `review-context.toml` from the root of a GitHub repository passed to
+   the review entrypoint as `review_context_repo` (and optionally `review_context_branch`).
+2. Parses the changed files from the diff.
+3. Matches each rule against the changed files (and optionally the patch's
+   associated Bugzilla component, fetched from BMO).
+4. For each matched rule, fetches the referenced skill or documentation files.
+5. Injects the content as `<context>` blocks inside an `<external_context>`
+   section in the review prompt, before the patch.
+
+Pass the GitHub repository containing `review-context.toml` to `patch_review`,
+using `org/project` syntax. `review_context_branch` defaults to `main`; pass another
+branch when reviewing a backport or another release branch.
+
+```
+/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \
+  review_context_repo="mozilla-firefox/firefox" \
+  review_context_branch="main"
+```
+
+---
+
+## For bugbug developers
+
+### Key files
+
+| File                                                | Purpose                                                            |
+| --------------------------------------------------- | ------------------------------------------------------------------ |
+| `bugbug/tools/code_review/review_context_schema.py` | Stdlib-only rule schema and validator CLI                          |
+| `bugbug/tools/code_review/review_context.py`        | Rule engine: parses rules, matches files, loads external content   |
+| `bugbug/tools/code_review/data_types.py`            | `Skill` model with async HTTP fetch and frontmatter stripping      |
+| `bugbug/tools/code_review/agent.py`                 | Calls `load_external_content_for_diff` in `run()`, formats context |
+| `bugbug/tools/code_review/prompts/system.md`        | System prompt template                                             |
+| `bugbug/tools/code_review/prompts/first_message.md` | First user message template (contains `{external_context}` slot)   |
+
+### Prompt templates
+
+Both `system.md` and `first_message.md` are plain Markdown files loaded at
+import time. Edit them directly — no Python changes needed. They use Python
+`str.format()` placeholders (`{target_software}`, `{patch}`,
+`{external_context}`, etc.).
+
+The `{external_context}` slot in `first_message.md` is filled by
+`format_external_content()` from `review_context.py` when rules match. It is an
+empty string otherwise, so no blank section appears in the prompt.
+
+Loaded content is preceded by an `<external_content_manifest>` block. The
+manifest excludes the body text and records each item's name, source, source
+type, matched rules, trust reason, byte count, and SHA-256 digest. The same
+manifest is also returned in `CodeReviewToolResponse.details["external_content"]`.
+
+All currently injected external content is trusted:
+
+- `file` content is fetched from a human-reviewed GitHub repository.
+- `fetch_revision` content comes from configured review platforms.
+- GitHub `file` loads are restricted by the repository allow-list in
+  `review-context.toml`; the repository containing the rules file is always
+  allowed.
+
+### Caching
+
+The parsed rules file is cached in-process for a short TTL, keyed by
+`review_context_repo`, `review_context_branch`, and rules path. External content
+is fetched for each review so the audit manifest reflects exactly what was
+injected for that review. The shared `httpx.AsyncClient` from
+`get_http_client()` handles connection reuse.
+
+### Metadata predicates
+
+Implemented via `libmozdata.Bugzilla` with `include_fields=["product", "component"]`.
+The patch's associated bug ID is read from `patch.bug_id` (available on
+`PhabricatorPatch`; other patch types pass `None`, which fails closed). The
+component string compared against the rule is `"Product::Component"`.
+
+The schema accepts `bugzilla`, `review`, and `patch` predicates. Runtime
+matching currently implements `bugzilla.component`; `bugzilla.product`,
+`bugzilla.keywords`, `bugzilla.severity`, `review.*`, and `patch.*` are parsed
+and validated but fail closed until trusted metadata is wired into the matcher.
+
+### `fetch_revision`
+
+Fetches the raw diff of another revision for use as context. Supports:
+
+- **Phabricator**: `{ type = "fetch_revision", revision = "D12345" }` — calls
+  the Phabricator API via `get_phabricator_client()`.
+- **GitHub**: `{ type = "fetch_revision", repo = "org/repo", hash = "abc123" }`
+  — uses the GitHub API with `Accept: application/vnd.github.diff`.
+
+---
+
+## For Mozilla developers: authoring skills and rules
+
+### Overview
+
+You add two things to your repository:
+
+- One or more **skill or documentation files** — Markdown or RST documents
+  with expert knowledge, guidelines, or relevant specs. These can be existing
+  docs already in the tree, or new files written specifically as review guides.
+- Entries in **`review-context.toml`** at the repo root — rules that say which
+  files to load when certain code paths are touched.
+
+The review agent picks these up automatically on the next review of a matching
+patch.
+
+### Skill file format
+
+Skill files are Markdown (`.md`); documentation files referenced as context can
+also be RST (`.rst`). All formats accept optional YAML frontmatter. The
+frontmatter is stripped before injection; only the body reaches the LLM. See an
+[existing example on Searchfox](https://searchfox.org/mozilla-central/source/.claude/skills/firefox-desktop-frontend/SKILL.md).
+
+```markdown
+---
+name: dom-audio
+description: Web Audio API review guidance
+---
+
+## Web Audio review checklist
+
+- `AudioContext` must not be created on a background thread.
+- `AudioNode` lifetimes are graph-managed; do not hold raw pointers.
+- Prefer `MediaStreamTrack` over raw PCM where the spec allows it.
+```
+
+Conventional location for dedicated skill files:
+
+```
+.claude/skills/<component-name>/SKILL.md
+```
+
+### `review-context.toml` format
+
+`review-context.toml` is parsed as TOML and then validated against the rule
+schema before any rule is matched. Unknown rule fields, unknown action types,
+and malformed `fetch_revision` actions are rejected.
+
+You can validate a rules file without running a review:
+
+```
+bugbug-validate-review-context review-context.toml
+```
+
+The validator implementation lives in
+`bugbug/tools/code_review/review_context_schema.py` and intentionally uses only the
+Python standard library. A target repository can either run the bugbug CLI or
+copy that single file into its own tests to validate `review-context.toml`.
+Complete TOML examples in this document are marked as `toml review-context` and
+validated by bugbug's test suite to keep the docs and parser from drifting.
+For a complete validated rules file, see `docs/code-review-context-example.toml`.
+
+```toml review-context
+version = 1
+
+[[rules]]
+name = "DOM: Web Audio C++"
+when = { any_file = { include = ["dom/media/webaudio/**"], ext = [".cpp", ".h"] } }
+load = [
+  { type = "file", path = ".claude/skills/dom-audio/SKILL.md" },
+]
+```
+
+Multiple rules can fire for a single patch. Actions are deduplicated across
+rules, so the same file is never fetched twice.
+
+All loaded external content is injected before the patch diff. `priority` is
+optional and only affects ordering among loaded external content blocks:
+matched rules are ordered by descending priority, defaulting to `0`; ties keep
+declaration order. Load entries keep their order within a rule. If the same
+resolved source is requested more than once, it is loaded once at its first
+ordered position and the audit manifest records every matched rule that
+requested it.
+
+Load failures are non-fatal. The loader logs an error and continues with the
+remaining loads.
+
+### Rule matching
+
+Each rule has a required `when` predicate. Boolean predicates compose matching:
+`all` requires every child to match, `any` requires at least one child to match,
+and `not` negates its child.
+
+Path matching is defined over repository-relative paths from the patch, not
+over local VCS checkout paths. Paths are normalized to POSIX-style separators
+(`/`) with no leading `./`. File predicates use Python `fnmatch.fnmatchcase`, so
+matching is case-sensitive on every platform, including Windows and macOS. That
+keeps rule behavior tied to Git paths rather than host filesystem casing.
+
+The supported glob syntax is Python `fnmatchcase`: `*`, `?`, and character
+classes such as `[ch]` and `[!0-9]`. In Python `fnmatchcase`, `*` can match
+`/`, so patterns like `dom/media/**` match everything below that prefix.
+
+File quantifiers are explicit:
+
+```toml
+# At least one changed file must match all predicates in the object.
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+
+# Every changed file must match all predicates in the object.
+when = { all_files = { ext = [".md", ".rst"] } }
+```
+
+`any_file` is the normal implementation-code case. `all_files` is for
+patch-shape rules such as docs-only, tests-only, or generated-only changes.
+
+### Reusable predicates
+
+Named predicate definitions let you reuse path sets or metadata groups across
+rules. Definitions are referenced explicitly with `ref` and expanded
+structurally, as if the referenced table had been written inline.
+
+```toml review-context
+version = 1
+
+[definitions.files.media_impl]
+include = ["dom/media/**"]
+exclude = ["dom/media/test/**", "dom/media/gtest/**"]
+ext = [".cpp", ".h", ".mm"]
+
+[[rules]]
+name = "Media: implementation"
+when = { any_file = { ref = "files.media_impl" } }
+load = [
+  { type = "file", path = ".claude/skills/dom-media/SKILL.md" },
+]
+```
+
+Definitions are validated like inline predicates. References are namespace
+checked: `files.*` refs are valid only in file predicates, `bugzilla.*` only in
+Bugzilla predicates, `review.*` only in review predicates, and `patch.*` only
+in patch predicates. Unknown references and references combined with inline
+fields are schema errors.
+
+### Metadata trigger fields
+
+File predicates are separate from external metadata predicates. The parser
+validates these field names and their value types. At runtime, only
+`bugzilla.component` currently affects matching; the other fields are accepted
+by the schema for forward-compatible rule files but fail closed.
+
+| Namespace  | Field               | Type            | Runtime behavior                                               |
+| ---------- | ------------------- | --------------- | -------------------------------------------------------------- |
+| `bugzilla` | `component`         | list of strings | Matches exact `Product::Component` from Bugzilla               |
+| `bugzilla` | `product`           | list of strings | Parsed and validated; currently fails closed                   |
+| `bugzilla` | `keywords`          | list of strings | Parsed and validated; currently fails closed                   |
+| `bugzilla` | `severity`          | list of strings | Parsed and validated; currently fails closed                   |
+| `review`   | `author`            | list of strings | Parsed and validated; currently fails closed                   |
+| `review`   | `reviewer`          | list of strings | Parsed and validated; currently fails closed                   |
+| `review`   | `blocking_reviewer` | list of strings | Parsed and validated; currently fails closed                   |
+| `patch`    | `repository`        | list of strings | Parsed and validated; currently fails closed                   |
+| `patch`    | `is_backport`       | boolean         | Parsed and validated; currently fails closed                   |
+
+When review metadata matching is wired up, `reviewer` should match both
+blocking and non-blocking reviewers. Use `blocking_reviewer` only when the rule
+specifically needs a reviewer whose approval is required before landing.
+
+Example — load a skill when C++ or HTML files change under `dom/media/` (which
+catches both implementation files and their tests):
+
+```toml review-context
+version = 1
+
+[[rules]]
+name = "DOM: Media C++ and tests"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h", ".html"] } }
+load = [
+  { type = "file", path = ".claude/skills/dom-media/SKILL.md" },
+]
+```
+
+### Action types
+
+#### `file` — fetch a file from a GitHub repository
+
+```toml
+# File from the same repo and branch as review-context.toml (default)
+{ type = "file", path = ".claude/skills/dom-audio/SKILL.md" }
+
+# File from another repo (e.g., cubeb's real-time programming guidelines)
+{ type = "file", path = ".claude/skills/real-time-programming/SKILL.md",
+  repo = "mozilla/cubeb" }
+
+# Explicit branch
+{ type = "file", path = ".claude/skills/dom-audio/SKILL.md",
+  repo = "mozilla-firefox/firefox", branch = "main" }
+```
+
+Constructs a raw GitHub URL:
+`https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/{path}`.
+When `repo` is omitted, the repo is the `review_context_repo` passed to the review
+entrypoint. Same-repo content uses `review_context_branch`; cross-repo content defaults
+to `main` unless the action specifies `branch`.
+
+Any Markdown or RST file in the repository can be referenced — skill files,
+architecture docs, READMEs, spec notes, etc.
+
+`kind` is optional on `file` and `fetch_revision` actions. It is a free-form
+audit hint recorded in the action object in the external content manifest; the
+loader does not interpret it.
+
+#### GitHub repository allow-list
+
+GitHub loads are restricted to keep review context auditable:
+
+- The repository containing `review-context.toml` is always allowed.
+- Entries ending in `/`, such as `mozilla/`, allow every repo under that
+  organization.
+- Entries in `org/repo` form allow a single repo.
+
+```toml
+[policy.github]
+allowed_repos = [
+  "mozilla/",
+  "web-platform-tests/wpt",
+  "whatwg/html",
+]
+```
+
+Repository names are normalized to lowercase before checking the policy. If a
+`load` action references a disallowed repository, the loader does not fetch it.
+The failure is non-fatal: it logs an error, skips that load, and continues with
+the remaining loads. Skipped loads are not included in the external content
+manifest because no content was injected.
+
+#### `fetch_revision` — diff of another revision as context
+
+Injects the raw diff of a related revision — useful for providing context about
+a stack parent or a related patch.
+
+```toml
+# Phabricator revision
+{ type = "fetch_revision", revision = "D12345" }
+
+# GitHub commit (use a git commit hash — Firefox moved from Mercurial to GitHub)
+{ type = "fetch_revision", repo = "padenot/cubeb", hash = "abc123def456" }
+```
+
+### Testing your rules and skills
+
+Pass your work-in-progress rules and skill files directly to `patch_review`
+via the MCP. Rules are merged by name into the fetched `review-context.toml`
+(replacing a rule with the same name, or appending if new). Skill content is
+injected directly, bypassing the network fetch for matching names.
+
+From Claude Code, trigger a review with your local files:
+
+```
+/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \
+  review_context_repo="mozilla-firefox/firefox" \
+  extra_context_toml="$(cat review-context.toml)" \
+  content_overrides='{".claude/skills/dom-audio/SKILL.md": "skill content here"}'
+```
+
+`content_overrides` is a JSON object (as a string). For multi-line skill files,
+build the JSON separately:
+
+```bash
+python3 -c "import json,sys; print(json.dumps({'.claude/skills/dom-audio/SKILL.md': open('.claude/skills/dom-audio/SKILL.md').read()}))"
+```
+
+Then paste the output as the `content_overrides` value.
+
+This simulates the state as if your changes had already landed — no push
+required. If a skill is missing, verify the rule conditions match the patch's
+changed files (`any_file` / `all_files` against the actual `+++ b/` paths in
+the diff).
+
+### Gotchas
+
+**Rules are evaluated against the post-patch file list.** Paths in file predicates
+should match the `+++ b/` filenames in the diff.
+
+**Glob patterns use `fnmatchcase` semantics.** `dom/media/**` matches
+`dom/media/foo/bar.cpp`.
+
+**Frontmatter is stripped automatically.** You can put metadata (name,
+description, owner) in frontmatter without it polluting the injected context.
+
+**Load failures are non-fatal.** The review proceeds without that content, and
+the failure is logged at ERROR level and captured by Sentry. Check the URL
+manually if content you expect is not appearing.
+
+**Bugzilla component rules require the patch to have an associated bug.** On
+non-Phabricator patches or patches without a bug link, Bugzilla component rules
+never match.
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index d87d506d9f..260e291177 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -1,6 +1,8 @@
 import asyncio
 import logging
 import os
+import re
+from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -9,7 +11,11 @@
 from unidiff import PatchSet
 
 from bugbug.tools.code_review import data_types, langchain_tools, review_context
-from bugbug.tools.code_review.data_types import ExternalContent, Skill, _strip_frontmatter
+from bugbug.tools.code_review.data_types import (
+    ExternalContent,
+    Skill,
+    _strip_frontmatter,
+)
 from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool
 from bugbug.tools.code_review.review_context import (
     _merge_rules,
@@ -19,14 +25,15 @@
     github_repo_allowed,
     load_external_content_for_diff,
     parse_diff_files,
+    parse_review_context_toml,
     rule_matches,
 )
 from bugbug.tools.code_review.review_context_schema import (
     AnyFilePredicate,
+    BugzillaPredicate,
     FilePredicate,
     ReviewContextValidationError,
     Rule,
-    parse_review_context_toml,
 )
 from bugbug.tools.code_review.review_context_schema import (
     main as validate_review_context_main,
@@ -63,6 +70,17 @@ async def fetch(path):
     return fetch
 
 
+def extract_review_context_examples(markdown: str) -> list[tuple[int, str]]:
+    examples = []
+    fence_re = re.compile(r"^```(?P<info>[^\n]*)\n(?P<body>.*?)^```", re.M | re.S)
+    for match in fence_re.finditer(markdown):
+        info = match.group("info").split()
+        if info == ["toml", "review-context"]:
+            line = markdown.count("\n", 0, match.start()) + 1
+            examples.append((line, match.group("body")))
+    return examples
+
+
 # ---------------------------------------------------------------------------
 # strip_diff_prefix
 # ---------------------------------------------------------------------------
@@ -502,6 +520,13 @@ def test_fetch_file_skips_revision_when_none():
     client.get_file_at_revision.assert_not_called()
 
 
+@pytest.fixture(autouse=True)
+def clear_review_context_cache():
+    review_context._review_context_cache.clear()
+    yield
+    review_context._review_context_cache.clear()
+
+
 @pytest.mark.asyncio
 async def test_external_content_load_caches():
     item = ExternalContent(
@@ -569,13 +594,6 @@ async def test_external_content_load_caches():
 """
 
 
-@pytest.fixture(autouse=True)
-def clear_review_context_cache():
-    review_context._review_context_cache.clear()
-    yield
-    review_context._review_context_cache.clear()
-
-
 def test_parse_diff_files():
     files = parse_diff_files(_DIFF_MEDIA)
     assert files == {"dom/media/Foo.cpp"}
@@ -607,81 +625,79 @@ def test_rule_matches_extension_only():
         load=[],
     )
     assert rule_matches(rule, {"dom/webidl/Foo.webidl"})
-    assert not rule_matches(rule, {"dom/webidl/Foo.js"})
-
-
-def test_rule_bugzilla_component_fails_closed_without_component():
-    config = parse_review_context_toml(_RULES_TOML)
-    rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only")
-    assert not rule_matches(rule, set(), bug_component=None)
-
-
-def test_rule_bugzilla_component_matches():
-    config = parse_review_context_toml(_RULES_TOML)
-    rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only")
-    assert rule_matches(rule, set(), bug_component="Core::DOM: Web Audio")
-    assert not rule_matches(rule, set(), bug_component="Core::Layout")
-
-
-@pytest.mark.asyncio
-async def test_get_bug_component():
-    with patch("libmozdata.bugzilla.Bugzilla") as bugzilla_cls:
-        instance = MagicMock()
-        bugzilla_cls.return_value = instance
-
-        def get_data():
-            handler = bugzilla_cls.call_args.kwargs["bughandler"]
-            data = bugzilla_cls.call_args.kwargs["bugdata"]
-            handler({"product": "Core", "component": "DOM: Web Audio"}, data)
-            return MagicMock(wait=MagicMock())
-
-        instance.get_data.side_effect = get_data
-        assert await get_bug_component(123) == "Core::DOM: Web Audio"
+    assert not rule_matches(rule, {"dom/media/Foo.cpp"})
 
 
 def test_parse_review_context_toml_rejects_missing_when():
     toml = """
 version = 1
+
 [[rules]]
-name = "Broken"
-load = [{ type = "file", path = "x.md" }]
+name = "Bad rule"
+load = [{ type = "file", path = "skills/guide.md" }]
 """
-    with pytest.raises(ReviewContextValidationError, match="rules\\[0\\].when"):
+    with pytest.raises(ReviewContextValidationError):
         parse_review_context_toml(toml)
 
 
 def test_parse_review_context_toml_rejects_unknown_action_type():
     toml = """
 version = 1
+
 [[rules]]
-name = "Broken"
+name = "Bad rule"
 when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "url", path = "x.md" }]
+load = [{ type = "unknown", path = "skills/guide.md" }]
 """
-    with pytest.raises(ReviewContextValidationError, match="unknown action type"):
+    with pytest.raises(ReviewContextValidationError):
         parse_review_context_toml(toml)
 
 
 def test_parse_review_context_toml_rejects_unknown_rule_field():
     toml = """
 version = 1
+
 [[rules]]
-name = "Broken"
-bogus = true
+name = "Bad rule"
 when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "file", path = "x.md" }]
+match_unknown = ["x"]
+load = [{ type = "file", path = "skills/guide.md" }]
 """
-    with pytest.raises(ReviewContextValidationError, match="unknown field"):
+    with pytest.raises(ReviewContextValidationError):
         parse_review_context_toml(toml)
 
 
+def test_parse_review_context_example_file():
+    repo_root = Path(__file__).resolve().parent.parent
+    example = (repo_root / "docs/code-review-context-example.toml").read_text()
+    config = parse_review_context_toml(example)
+    assert config.version == 1
+    assert len(config.rules) >= 1
+    assert "whatwg/html" in config.policy.github.allowed_repos
+
+
+def test_parse_review_context_examples_from_docs():
+    repo_root = Path(__file__).resolve().parent.parent
+    docs = (repo_root / "docs/code-review-skills.md").read_text()
+    examples = extract_review_context_examples(docs)
+    assert examples
+
+    for line, example in examples:
+        try:
+            config = parse_review_context_toml(example)
+        except ReviewContextValidationError as exc:
+            pytest.fail(f"docs/code-review-skills.md:{line}: {exc}")
+        assert config.rules, f"docs/code-review-skills.md:{line}: expected rules"
+
+
 def test_validate_review_context_main(tmp_path, capsys):
     review_context_path = tmp_path / "review-context.toml"
     review_context_path.write_text(_RULES_TOML)
 
     assert validate_review_context_main([str(review_context_path)]) == 0
+
     captured = capsys.readouterr()
-    assert "valid" in captured.out
+    assert "valid (4 rule(s))" in captured.out
 
 
 def test_validate_review_context_main_failure(tmp_path, capsys):
@@ -689,63 +705,120 @@ def test_validate_review_context_main_failure(tmp_path, capsys):
     review_context_path.write_text(
         """
 version = 1
+
 [[rules]]
-name = "Broken"
-load = [{ type = "file", path = "x.md" }]
+name = "Bad rule"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "unknown" }]
 """
     )
 
     assert validate_review_context_main([str(review_context_path)]) == 1
+
     captured = capsys.readouterr()
     assert "invalid" in captured.err
+    assert "unknown action type" in captured.err
+
+
+def test_rule_bugzilla_component_fails_closed_without_component():
+    rule = Rule(
+        name="test",
+        when=BugzillaPredicate(component=["Core::DOM: Web Audio"]),
+        load=[],
+    )
+    assert not rule_matches(rule, {"dom/media/Foo.cpp"}, bug_component=None)
+
+
+def test_rule_bugzilla_component_matches():
+    rule = Rule(
+        name="test",
+        when=BugzillaPredicate(component=["Core::DOM: Web Audio"]),
+        load=[],
+    )
+    assert rule_matches(
+        rule, {"dom/media/Foo.cpp"}, bug_component="Core::DOM: Web Audio"
+    )
+    assert not rule_matches(
+        rule, {"dom/media/Foo.cpp"}, bug_component="Core::Networking"
+    )
+
+
+@pytest.mark.asyncio
+async def test_get_bug_component():
+    def fake_bugzilla(bug_id, include_fields, bughandler, bugdata):
+        bughandler({"product": "Core", "component": "DOM: Web Audio"}, bugdata)
+        mock = MagicMock()
+        mock.get_data.return_value.wait = MagicMock()
+        return mock
+
+    with patch("libmozdata.bugzilla.Bugzilla", side_effect=fake_bugzilla):
+        component = await get_bug_component(12345)
+
+    assert component == "Core::DOM: Web Audio"
 
 
 @pytest.mark.asyncio
 async def test_load_external_content_for_diff_file_load():
     review_context_repo = "mozilla-firefox/firefox"
+    content_body = "---\nname: dom-media\n---\nAudio guidelines.\n"
+
     rules_response = MagicMock()
     rules_response.text = _RULES_TOML
     rules_response.raise_for_status = MagicMock()
+
     content_response = MagicMock()
-    content_response.text = "---\nname: dom-media\n---\nMedia review guidelines\n"
+    content_response.text = content_body
     content_response.raise_for_status = MagicMock()
+
     client = MagicMock()
     client.get = AsyncMock(side_effect=[rules_response, content_response])
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA, review_context_repo, review_context_branch="release"
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA, review_context_repo, review_context_branch="release"
+            )
 
     assert len(results) == 1
-    assert results[0].name == ".claude/skills/dom-media.md"
-    assert results[0].body == "Media review guidelines\n"
-    assert results[0].source_type == "github_file"
-    assert results[0].matched_rules == ["Audio/Video C++"]
-    client.get.assert_any_await(
-        "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/review-context.toml",
-        timeout=30,
+    item = results[0]
+    assert item.name == ".claude/skills/dom-media.md"
+    assert item.body == "Audio guidelines.\n"
+    assert item.source_type == "github_file"
+    assert item.trusted
+    assert item.trust_reason == "github_repo_content"
+    assert item.matched_rules == ["Audio/Video C++"]
+    assert item.bytes == len("Audio guidelines.\n".encode())
+    assert item.sha256
+    assert client.get.await_args_list[0].args[0] == (
+        "https://raw.githubusercontent.com/"
+        "mozilla-firefox/firefox/refs/heads/release/review-context.toml"
     )
-    client.get.assert_any_await(
-        "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/.claude/skills/dom-media.md",
-        timeout=30,
+    assert client.get.await_args_list[1].args[0] == (
+        "https://raw.githubusercontent.com/"
+        "mozilla-firefox/firefox/refs/heads/release/"
+        ".claude/skills/dom-media.md"
     )
 
 
 @pytest.mark.asyncio
 async def test_load_external_content_for_diff_no_match():
     review_context_repo = "mozilla-firefox/firefox"
+
     rules_response = MagicMock()
     rules_response.text = _RULES_TOML
     rules_response.raise_for_status = MagicMock()
+
     client = MagicMock()
     client.get = AsyncMock(return_value=rules_response)
 
-    diff = """diff --git a/README.txt b/README.txt\n--- a/README.txt\n+++ b/README.txt\n@@ -1 +1 @@\n-a\n+b\n"""
+    diff = "diff --git a/build/Makefile b/build/Makefile\n+++ b/build/Makefile\n+new\n"
+
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(diff, review_context_repo)
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(diff, review_context_repo)
 
     assert results == []
+    assert client.get.await_count == 1
 
 
 @pytest.mark.asyncio
@@ -754,226 +827,254 @@ async def test_load_external_content_for_diff_rules_fetch_failure():
     client.get = AsyncMock(side_effect=httpx.ConnectError("boom"))
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA, "mozilla-firefox/firefox"
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA, "mozilla-firefox/firefox"
+            )
 
     assert results == []
 
 
 @pytest.mark.asyncio
 async def test_load_external_content_deduplicates_actions():
-    rules = """
+    toml = """
 version = 1
 
 [[rules]]
-name = "A"
-when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "file", path = ".claude/skills/cpp.md" }]
+name = "Rule A"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp"] } }
+load = [{ type = "file", path = "skills/guide.md" }]
 
 [[rules]]
-name = "B"
-when = { any_file = { include = ["dom/media/**"] } }
-load = [{ type = "file", path = ".claude/skills/cpp.md" }]
+name = "Rule B"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", path = "skills/guide.md" }]
 """
     review_context_repo = "mozilla-firefox/firefox"
+
     rules_response = MagicMock()
-    rules_response.text = rules
+    rules_response.text = toml
     rules_response.raise_for_status = MagicMock()
+
     content_response = MagicMock()
-    content_response.text = "C++ guidelines"
+    content_response.text = "body"
     content_response.raise_for_status = MagicMock()
+
     client = MagicMock()
     client.get = AsyncMock(side_effect=[rules_response, content_response])
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA, review_context_repo
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA, review_context_repo
+            )
 
     assert len(results) == 1
-    assert results[0].matched_rules == ["A", "B"]
+    assert results[0].matched_rules == ["Rule A", "Rule B"]
     assert client.get.await_count == 2
 
 
 @pytest.mark.asyncio
 async def test_load_external_content_orders_by_priority():
-    rules = """
+    toml = """
 version = 1
 
 [[rules]]
 name = "Low"
-priority = 0
+priority = 1
 when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "file", path = ".claude/skills/low.md" }]
+load = [{ type = "file", path = "skills/low.md" }]
 
 [[rules]]
 name = "High"
 priority = 10
 when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "file", path = ".claude/skills/high.md" }]
+load = [{ type = "file", path = "skills/high.md" }]
 """
     review_context_repo = "mozilla-firefox/firefox"
     rules_response = MagicMock()
-    rules_response.text = rules
+    rules_response.text = toml
     rules_response.raise_for_status = MagicMock()
-    low_response = MagicMock()
-    low_response.text = "low"
-    low_response.raise_for_status = MagicMock()
-    high_response = MagicMock()
-    high_response.text = "high"
-    high_response.raise_for_status = MagicMock()
+
     client = MagicMock()
-    client.get = AsyncMock(side_effect=[rules_response, high_response, low_response])
+    client.get = AsyncMock(return_value=rules_response)
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA,
-            review_context_repo,
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA,
+                review_context_repo,
+                content_overrides={
+                    "skills/low.md": "low\n",
+                    "skills/high.md": "high\n",
+                },
+            )
 
-    assert [r.name for r in results] == [
-        ".claude/skills/high.md",
-        ".claude/skills/low.md",
-    ]
+    assert [item.name for item in results] == ["skills/high.md", "skills/low.md"]
 
 
 @pytest.mark.asyncio
 async def test_load_external_content_rejects_disallowed_github_repo():
-    rules = """
+    toml = """
 version = 1
+
 [[rules]]
-name = "Cross repo"
-when = { any_file = { ext = [".webidl"] } }
-load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }]
+name = "External"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", repo = "whatwg/html", path = "review.md" }]
 """
     review_context_repo = "mozilla-firefox/firefox"
     rules_response = MagicMock()
-    rules_response.text = rules
+    rules_response.text = toml
     rules_response.raise_for_status = MagicMock()
+
     client = MagicMock()
     client.get = AsyncMock(return_value=rules_response)
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_WEBIDL,
-            review_context_repo,
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA,
+                review_context_repo,
+                content_overrides={"review.md": "external\n"},
+            )
 
     assert results == []
-    assert client.get.await_count == 1
 
 
 @pytest.mark.asyncio
 async def test_load_external_content_allows_policy_prefix_repo():
-    rules = """
+    toml = """
 version = 1
+
 [policy.github]
 allowed_repos = ["mozilla/"]
+
 [[rules]]
-name = "Cross repo"
-when = { any_file = { ext = [".webidl"] } }
-load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }]
+name = "External"
+when = { any_file = { ext = [".cpp"] } }
+load = [{ type = "file", repo = "mozilla/cubeb", path = "review.md" }]
 """
     review_context_repo = "mozilla-firefox/firefox"
     rules_response = MagicMock()
-    rules_response.text = rules
+    rules_response.text = toml
     rules_response.raise_for_status = MagicMock()
-    content_response = MagicMock()
-    content_response.text = "cubeb guidelines"
-    content_response.raise_for_status = MagicMock()
+
     client = MagicMock()
-    client.get = AsyncMock(side_effect=[rules_response, content_response])
+    client.get = AsyncMock(return_value=rules_response)
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_WEBIDL,
-            review_context_repo,
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA,
+                review_context_repo,
+                content_overrides={"review.md": "external\n"},
+            )
 
-    assert len(results) == 1
-    assert results[0].source.endswith("/mozilla/cubeb/refs/heads/main/REVIEWING.md")
+    assert [item.name for item in results] == ["review.md"]
+
+
+# --- _merge_rules ---
 
 
 def test_merge_rules_appends_new():
     base = parse_review_context_toml(
         """
 version = 1
+
 [[rules]]
 name = "A"
 when = { any_file = { ext = [".cpp"] } }
 load = [{ type = "file", path = "a.md" }]
 """
-    )
-    merged = _merge_rules(
-        base.rules,
-        """
+    ).rules
+    extra = """
 version = 1
+
 [[rules]]
 name = "B"
 when = { any_file = { ext = [".js"] } }
 load = [{ type = "file", path = "b.md" }]
-""",
-    )
-    assert [rule.name for rule in merged] == ["A", "B"]
+"""
+    merged = _merge_rules(base, extra)
+    assert len(merged) == 2
+    assert merged[1].name == "B"
 
 
 def test_merge_rules_replaces_by_name():
     base = parse_review_context_toml(
         """
 version = 1
+
 [[rules]]
 name = "A"
 when = { any_file = { ext = [".cpp"] } }
 load = [{ type = "file", path = "a.md" }]
 """
-    )
-    merged = _merge_rules(
-        base.rules,
-        """
+    ).rules
+    extra = """
 version = 1
+
 [[rules]]
 name = "A"
-when = { any_file = { ext = [".js"] } }
-load = [{ type = "file", path = "replacement.md" }]
-""",
-    )
+when = { any_file = { ext = [".cpp", ".h"] } }
+load = [{ type = "file", path = "a.md" }]
+"""
+    merged = _merge_rules(base, extra)
     assert len(merged) == 1
-    assert merged[0].load[0].path == "replacement.md"
+    assert merged[0].when.predicate.ext == [".cpp", ".h"]
 
 
 def test_merge_rules_empty_extra():
     base = parse_review_context_toml(
         """
 version = 1
+
 [[rules]]
 name = "A"
 when = { any_file = { ext = [".cpp"] } }
 load = [{ type = "file", path = "a.md" }]
 """
-    )
-    merged = _merge_rules(base.rules, "version = 1\n")
-    assert merged == base.rules
+    ).rules
+    merged = _merge_rules(base, "version = 1\n")
+    assert len(merged) == 1
+    assert merged[0].name == "A"
+    assert merged[0].when.predicate.ext == [".cpp"]
+    assert len(merged[0].load) == 1
+
+    merged = _merge_rules(base, "version = 1\n# comment\n")
+    assert len(merged) == 1
+    assert merged[0].name == "A"
+    assert merged[0].when.predicate.ext == [".cpp"]
+    assert len(merged[0].load) == 1
+
+
+# --- content_overrides ---
 
 
 @pytest.mark.asyncio
 async def test_content_override_used_instead_of_fetch():
     review_context_repo = "mozilla-firefox/firefox"
+
     rules_response = MagicMock()
     rules_response.text = _RULES_TOML
     rules_response.raise_for_status = MagicMock()
+
     client = MagicMock()
     client.get = AsyncMock(return_value=rules_response)
-    overrides = {".claude/skills/dom-media.md": "Override body"}
+
+    overrides = {".claude/skills/dom-media.md": "Overridden content.\n"}
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA, review_context_repo, content_overrides=overrides
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA, review_context_repo, content_overrides=overrides
+            )
 
     assert len(results) == 1
-    assert results[0].body == "Override body"
-    assert client.get.await_count == 1
+    assert results[0].name == ".claude/skills/dom-media.md"
+    assert results[0].body == "Overridden content.\n"
+    assert client.get.await_count == 1  # only the rules fetch, not the content
 
 
 @pytest.mark.asyncio
@@ -982,131 +1083,120 @@ async def test_external_content_manifest_and_prompt_body():
     rules_response = MagicMock()
     rules_response.text = _RULES_TOML
     rules_response.raise_for_status = MagicMock()
+
     client = MagicMock()
     client.get = AsyncMock(return_value=rules_response)
-    overrides = {".claude/skills/dom-media.md": "Guideline body"}
+
+    overrides = {".claude/skills/dom-media.md": "Audio guidelines.\n"}
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA, review_context_repo, content_overrides=overrides
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA, review_context_repo, content_overrides=overrides
+            )
 
     manifest = external_content_manifest(results)
     assert manifest == [
         {
             "name": ".claude/skills/dom-media.md",
             "source_type": "github_file",
-            "source": "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/main/.claude/skills/dom-media.md",
-            "action": {"type": "file", "path": ".claude/skills/dom-media.md"},
+            "source": (
+                "https://raw.githubusercontent.com/mozilla-firefox/firefox/"
+                "refs/heads/main/.claude/skills/dom-media.md"
+            ),
+            "action": {
+                "type": "file",
+                "path": ".claude/skills/dom-media.md",
+            },
             "matched_rules": ["Audio/Video C++"],
             "trusted": True,
             "trust_reason": "github_repo_content",
-            "bytes": len("Guideline body".encode()),
+            "bytes": len("Audio guidelines.\n".encode()),
             "sha256": results[0].sha256,
         }
     ]
-    prompt = format_external_content(results)
-    assert "<external_content_manifest>" in prompt
-    assert "<external_context>" in prompt
-    assert '<context name=".claude/skills/dom-media.md">' in prompt
-    assert "Guideline body" in prompt
+
+    prompt_content = format_external_content(results)
+    assert "<external_content_manifest>" in prompt_content
+    assert "<external_context>" in prompt_content
+    assert "Audio guidelines." in prompt_content
 
 
 @pytest.mark.asyncio
 async def test_extra_context_toml_appended():
     review_context_repo = "mozilla-firefox/firefox"
-    base_rules = """
+    extra = """
 version = 1
+
 [[rules]]
-name = "Base"
+name = "Extra JS rule"
 when = { any_file = { ext = [".js"] } }
-load = [{ type = "file", path = "base.md" }]
+load = [{ type = "file", path = ".claude/skills/js.md" }]
 """
-    extra_rules = """
+    diff_js = "diff --git a/Foo.js b/Foo.js\n+++ b/Foo.js\n+new\n"
+
+    # Use a base TOML without any .js rules so only the extra rule fires.
+    base_toml = """
 version = 1
+
 [[rules]]
-name = "Extra"
-when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "file", path = "extra.md" }]
+name = "Audio/Video C++"
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+load = [{ type = "file", path = ".claude/skills/dom-media.md" }]
 """
     rules_response = MagicMock()
-    rules_response.text = base_rules
+    rules_response.text = base_toml
     rules_response.raise_for_status = MagicMock()
-    content_response = MagicMock()
-    content_response.text = "extra"
-    content_response.raise_for_status = MagicMock()
+
     client = MagicMock()
-    client.get = AsyncMock(side_effect=[rules_response, content_response])
+    client.get = AsyncMock(return_value=rules_response)
+
+    overrides = {".claude/skills/js.md": "JS guidelines.\n"}
 
     with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA,
-            review_context_repo,
-            extra_context_toml=extra_rules,
-        )
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                diff_js,
+                review_context_repo,
+                extra_context_toml=extra,
+                content_overrides=overrides,
+            )
 
-    assert [result.name for result in results] == ["extra.md"]
+    assert len(results) == 1
+    assert results[0].name == ".claude/skills/js.md"
+    assert results[0].body == "JS guidelines.\n"
 
 
 @pytest.mark.asyncio
 async def test_extra_context_toml_replaces_by_name():
     review_context_repo = "mozilla-firefox/firefox"
-    extra_rules = """
+    # Replace the "Audio/Video C++" rule with a version that loads different content
+    extra = """
 version = 1
+
 [[rules]]
 name = "Audio/Video C++"
-when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "file", path = "replacement.md" }]
+when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } }
+load = [{ type = "file", path = ".claude/skills/dom-media-v2.md" }]
 """
     rules_response = MagicMock()
     rules_response.text = _RULES_TOML
     rules_response.raise_for_status = MagicMock()
-    content_response = MagicMock()
-    content_response.text = "replacement"
-    content_response.raise_for_status = MagicMock()
-    client = MagicMock()
-    client.get = AsyncMock(side_effect=[rules_response, content_response])
-
-    with patch.object(data_types, "get_http_client", return_value=client):
-        results = await load_external_content_for_diff(
-            _DIFF_MEDIA,
-            review_context_repo,
-            extra_context_toml=extra_rules,
-        )
-
-    assert [result.name for result in results] == ["replacement.md"]
 
-
-@pytest.mark.asyncio
-async def test_load_external_content_fetches_phabricator_revision():
-    rules = """
-version = 1
-[[rules]]
-name = "Revision"
-when = { any_file = { ext = [".cpp"] } }
-load = [{ type = "fetch_revision", revision = "D123" }]
-"""
-    review_context_repo = "mozilla-firefox/firefox"
-    rules_response = MagicMock()
-    rules_response.text = rules
-    rules_response.raise_for_status = MagicMock()
     client = MagicMock()
     client.get = AsyncMock(return_value=rules_response)
 
-    phabricator = MagicMock()
-    phabricator.load_revision.return_value = {"fields": {"diffID": 456}}
-    phabricator.load_raw_diff.return_value = "diff content"
+    overrides = {".claude/skills/dom-media-v2.md": "Updated guidelines.\n"}
 
-    with (
-        patch.object(data_types, "get_http_client", return_value=client),
-        patch(
-            "bugbug.tools.core.platforms.phabricator.get_phabricator_client",
-            return_value=phabricator,
-        ),
-    ):
-        results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo)
+    with patch.object(data_types, "get_http_client", return_value=client):
+        with patch.object(review_context, "get_http_client", return_value=client):
+            results = await load_external_content_for_diff(
+                _DIFF_MEDIA,
+                review_context_repo,
+                extra_context_toml=extra,
+                content_overrides=overrides,
+            )
 
     assert len(results) == 1
-    assert results[0].name == "Revision D123"
-    assert results[0].body == "diff content"
-    assert results[0].source_type == "phabricator_revision"
+    assert results[0].name == ".claude/skills/dom-media-v2.md"
+    assert results[0].body == "Updated guidelines.\n"

From cab6ec7e73be35d71c59f3631fd1f0c217cd8c6a Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Mon, 1 Jun 2026 16:52:11 +0200
Subject: [PATCH 11/11] Preserve original version file state

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index c51ab6c126..aeee537b4e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.0.638
+0.0.637