mozilla · jpangas · May 11, 2026 · Jun 1, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/bugbug/tools/test_plans_generator/agent.py b/bugbug/tools/test_plans_generator/agent.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Test case and test step generation tool implementation."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from langchain.agents import create_agent
+from langchain.chat_models import BaseChatModel, init_chat_model
+from langchain.messages import HumanMessage
+
+from bugbug.tools.base import GenerativeModelTool
+from bugbug.tools.core.llms import DEFAULT_ANTHROPIC_MODEL
+from bugbug.tools.test_plans_generator.data_types import TestPlanGenerationResult
+from bugbug.tools.test_plans_generator.prompts import (
+    TEST_CASES_PROMPT_TEMPLATE,
+    TEST_STEPS_PROMPT_TEMPLATE,
+)
+
+
+def _message_content_to_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, list):
+        return "".join(
+            item.get("text", "")
+            for item in content
+            if isinstance(item, dict) and item.get("type") == "text"
+        )
+
+    return str(content)
+
+
+def _format_custom_instructions(custom_instructions: str) -> str:
+    return custom_instructions.strip() or "N/A"
+
+
+class TestPlanGenerationTool(GenerativeModelTool):
+    """Tool for generating QA test cases and test steps."""
+
+    def __init__(
+        self,
+        llm: BaseChatModel,
+        target_software: str = "Mozilla Firefox",
+    ) -> None:
+        self.target_software = target_software
+        self.agent = create_agent(llm)
+
+    @classmethod
+    def create(cls, **kwargs):
+        """Factory method to instantiate the tool with default dependencies."""
+        if "llm" not in kwargs:
+            kwargs["llm"] = init_chat_model(DEFAULT_ANTHROPIC_MODEL)
+
+        return cls(**kwargs)
+
+    def _invoke_llm(self, prompt: str) -> str:
+        result = self.agent.invoke({"messages": [HumanMessage(prompt)]})
+        return _message_content_to_text(result["messages"][-1].content).strip()
+
+    def generate_test_cases(
+        self,
+        feature_description: str,
+        test_scope: str,
+        qa_test_cases: str = "",
+        custom_instructions: str = "",
+    ) -> str:
+        """Generate missed test cases for a feature."""
+        prompt = TEST_CASES_PROMPT_TEMPLATE.format(
+            target_software=self.target_software,
+            feature_description=feature_description,
+            test_scope=test_scope,
+            qa_test_cases=qa_test_cases or "N/A",
+            custom_instructions=_format_custom_instructions(custom_instructions),
+        )
+        return self._invoke_llm(prompt)
+
+    def generate_test_steps(
+        self,
+        feature_description: str,
+        test_cases: str,
+        custom_instructions: str = "",
+    ) -> str:
+        """Generate detailed test steps for each test case."""
+        prompt = TEST_STEPS_PROMPT_TEMPLATE.format(
+            target_software=self.target_software,
+            feature_description=feature_description,
+            test_cases=test_cases,
+            custom_instructions=_format_custom_instructions(custom_instructions),
+        )
+        return self._invoke_llm(prompt)
+
+    def run(
+        self,
+        feature_description: str,
+        test_scope: str,
+        qa_test_cases: str = "",
+        generate_steps: bool = True,
+        test_cases_custom_instructions: str = "",
+        test_steps_custom_instructions: str = "",
+    ) -> TestPlanGenerationResult:
+        """Generate test cases and optionally generate steps for them."""
+        generated_test_cases = self.generate_test_cases(
+            feature_description=feature_description,
+            test_scope=test_scope,
+            qa_test_cases=qa_test_cases,
+            custom_instructions=test_cases_custom_instructions,
+        )
+
+        test_steps = None
+        if generate_steps and generated_test_cases:
+            test_steps = self.generate_test_steps(
+                feature_description=feature_description,
+                test_cases=generated_test_cases,
+                custom_instructions=test_steps_custom_instructions,
+            )
+
+        return TestPlanGenerationResult(
+            test_cases=generated_test_cases,
+            test_steps=test_steps,
+        )
diff --git a/bugbug/tools/test_plans_generator/data_types.py b/bugbug/tools/test_plans_generator/data_types.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class TestPlanGenerationResult:
+    test_cases: str
+    test_steps: str | None = None
diff --git a/bugbug/tools/test_plans_generator/prompts.py b/bugbug/tools/test_plans_generator/prompts.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+TEST_CASES_PROMPT_TEMPLATE = """You are an expert Quality Assurance Engineer with expertise in designing high level test cases for features of the {target_software} web browser.
+You are given a feature's description, its scope of testing, and a list of already existing test cases.
+Using the knowledge and information you are given, generate no more than 15 test cases that have been missed for the feature.
+
+-- This is the feature's description --:
+{feature_description}
+
+-- These are the existing test cases so far for the feature --:
+{qa_test_cases}
+
+-- This is the feature's scope of testing --:
+{test_scope}
+
+-- These are additional instructions for generating test cases --:
+{custom_instructions}
+
+-- Here are some tips for success --:
+1. Thoroughly understand the feature from the description, scope of testing and the existing test cases.
+2. Alter the wording while generating test cases.
+3. Check to see if each generated case is relevant to the feature.
+4. Check to see if each generated case is within the scope of testing.
+5. Check to see if each generated case is dissimilar to any existing test cases.
+6. Follow the additional instructions when they are provided.
+7. Return only valid JSON with a "test_cases" key.
+8. Each entry in "test_cases" must have an "id" integer and a "test_case" string.
+
+Avoid using a title, markdown formatting, comments, or any text outside the JSON object.
+
+-- Here is an example of the expected output format --:
+{{
+  "test_cases": [
+    {{
+      "id": 1,
+      "test_case": "Verify that sponsored suggestions can be disabled from Settings."
+    }},
+    {{
+      "id": 2,
+      "test_case": "Verify that organic search suggestions continue to appear when sponsored suggestions are disabled."
+    }}
+  ]
+}}"""
+
+
+TEST_STEPS_PROMPT_TEMPLATE = """You are an expert Quality Assurance Engineer with expertise in designing detailed test steps for test cases of features of the {target_software} web browser.
+You are given a feature's description and a list of test cases.
+Using the knowledge and information you are given, generate test steps for each test case.
+
+-- This is the feature's description --:
+{feature_description}
+
+-- These are the test cases for the feature --:
+{test_cases}
+
+-- These are additional instructions for generating test steps --:
+{custom_instructions}
+
+-- Here are some tips for success --:
+1. Thoroughly understand the feature from the description and the test cases.
+2. For each test case, generate clear and concise steps to execute the test case.
+3. Do not include the Launch Firefox step, as it is assumed to be the first step for every test case.
+4. Each test case should have its own set of steps.
+5. Follow the additional instructions when they are provided.
+6. Return only valid JSON with a "test_cases" key.
+7. Keep the same "id" and "test_case" values from the input test cases.
+8. Each entry in "test_cases" must have an "id" integer, a "test_case" string, and a "test_steps" array of strings.
+Avoid using a title, markdown formatting, comments, or any text outside the JSON object.
+
+-- Here are some examples --:
+{{
+  "test_cases": [
+    {{
+      "id": 1,
+      "test_case": "Ensure that Rich suggestions entries match the design",
+      "test_steps": [
+        "Start typing a popular keyword inside the Address Bar.",
+        "Observe the Rich entities icon and description."
+      ]
+    }},
+    {{
+      "id": 2,
+      "test_case": "Search-shortcut - Ensure that Rich entities are accessible via keyboard",
+      "test_steps": [
+        "Observe the Address Bar.",
+        "Click inside the Address Bar, select the google search shortcut.",
+        "Press 'Down' arrow key.",
+        "Navigate through the Rich entities using Up/Down arrow keys."
+      ]
+    }}
+  ]
+}}"""
diff --git a/scripts/run_test_plans_generator.py b/scripts/run_test_plans_generator.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Run the test plans generator tool locally."""
+
+import argparse
+import json
+
+from bugbug.tools.test_plans_generator.agent import TestPlanGenerationTool
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--feature-description",
+        required=True,
+        help="Description of the feature to generate test cases for.",
+    )
+    parser.add_argument(
+        "--test-scope",
+        required=True,
+        help="Scope of testing for the feature.",
+    )
+    parser.add_argument(
+        "--qa-test-cases",
+        default="",
+        help="Existing QA test cases to avoid duplicating.",
+    )
+    parser.add_argument(
+        "--custom-instructions",
+        default="",
+        help="Additional instructions to include in selected generation prompts.",
+    )
+    parser.add_argument(
+        "--custom-instructions-target",
+        choices=("test-cases", "test-steps", "both"),
+        default="both",
+        help="Generation prompt to receive the custom instructions.",
+    )
+    parser.add_argument(
+        "--no-test-steps",
+        dest="generate_steps",
+        action="store_false",
+        default=True,
+        help="Only generate test cases, without detailed test steps.",
+    )
+    parser.add_argument(
+        "--json-lines",
+        action="store_true",
+        help="Print one JSON object per generation phase.",
+    )
+
+    return parser.parse_args()
+
+
+def _load_json(output: str, output_name: str) -> dict:
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError as e:
+        decoder = json.JSONDecoder()
+        for index, character in enumerate(output):
+            if character not in "{[":
+                continue
+
+            try:
+                data, _ = decoder.raw_decode(output[index:])
+                return data
+            except json.JSONDecodeError:
+                continue
+
+        raise SystemExit(
+            f"The model did not return valid {output_name} JSON: {e}\n"
+            f"Model output:\n{output}"
+        ) from e
+
+
+def _custom_instructions_for_target(args: argparse.Namespace, target: str) -> str:
+    if args.custom_instructions_target in (target, "both"):
+        return args.custom_instructions
+
+    return ""
+
+
+def _print_json(data: dict, json_lines: bool = False) -> None:
+    if json_lines:
+        print(json.dumps(data), flush=True)
+        return
+
+    print(json.dumps(data, indent=2))
+
+
+def main() -> None:
+    args = parse_args()
+
+    tool = TestPlanGenerationTool.create()
+
+    generated_test_cases = tool.generate_test_cases(
+        feature_description=args.feature_description,
+        test_scope=args.test_scope,
+        qa_test_cases=args.qa_test_cases,
+        custom_instructions=_custom_instructions_for_target(args, "test-cases"),
+    )
+    test_cases = _load_json(generated_test_cases, "test cases")
+
+    if not args.generate_steps:
+        if args.json_lines:
+            _print_json({"type": "test_cases", "data": test_cases}, json_lines=True)
+        else:
+            _print_json(test_cases)
+        return
+
+    if args.json_lines:
+        _print_json({"type": "test_cases", "data": test_cases}, json_lines=True)
+
+    generated_test_steps = tool.generate_test_steps(
+        feature_description=args.feature_description,
+        test_cases=json.dumps(test_cases),
+        custom_instructions=_custom_instructions_for_target(args, "test-steps"),
+    )
+    test_plan = _load_json(generated_test_steps, "test steps")
+
+    if args.json_lines:
+        _print_json({"type": "test_steps", "data": test_plan}, json_lines=True)
+        return
+
+    _print_json(test_plan)
+
+
+if __name__ == "__main__":
+    main()