From b8a522967ffe32f06a57635a6a5527596e1cf577 Mon Sep 17 00:00:00 2001
From: John Pangas <swiftyxswaggy@outlook.com>
Date: Mon, 11 May 2026 01:20:22 -0600
Subject: [PATCH 1/4] Add Test Plan Generation Tool

---
 bugbug/tools/test_generation/__init__.py   |  12 +++
 bugbug/tools/test_generation/agent.py      | 114 +++++++++++++++++++++
 bugbug/tools/test_generation/data_types.py |  12 +++
 bugbug/tools/test_generation/prompts.py    |  60 +++++++++++
 4 files changed, 198 insertions(+)
 create mode 100644 bugbug/tools/test_generation/__init__.py
 create mode 100644 bugbug/tools/test_generation/agent.py
 create mode 100644 bugbug/tools/test_generation/data_types.py
 create mode 100644 bugbug/tools/test_generation/prompts.py

diff --git a/bugbug/tools/test_generation/__init__.py b/bugbug/tools/test_generation/__init__.py
new file mode 100644
index 0000000000..db567172b0
--- /dev/null
+++ b/bugbug/tools/test_generation/__init__.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from bugbug.tools.test_generation.agent import TestGenerationTool
+from bugbug.tools.test_generation.data_types import TestGenerationResult
+
+__all__ = [
+    "TestGenerationResult",
+    "TestGenerationTool",
+]
diff --git a/bugbug/tools/test_generation/agent.py b/bugbug/tools/test_generation/agent.py
new file mode 100644
index 0000000000..dacf58f78b
--- /dev/null
+++ b/bugbug/tools/test_generation/agent.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Test case and test step generation tool implementation."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from langchain.agents import create_agent
+from langchain.chat_models import BaseChatModel, init_chat_model
+from langchain.messages import HumanMessage
+
+from bugbug.tools.base import GenerativeModelTool
+from bugbug.tools.core.llms import DEFAULT_OPENAI_MODEL
+from bugbug.tools.test_generation.data_types import TestGenerationResult
+from bugbug.tools.test_generation.prompts import (
+    TEST_CASES_PROMPT_TEMPLATE,
+    TEST_STEPS_PROMPT_TEMPLATE,
+)
+
+
+def _message_content_to_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, list):
+        return "".join(
+            item.get("text", "")
+            for item in content
+            if isinstance(item, dict) and item.get("type") == "text"
+        )
+
+    return str(content)
+
+
+class TestGenerationTool(GenerativeModelTool):
+    """Tool for generating QA test cases and test steps."""
+
+    def __init__(
+        self,
+        llm: BaseChatModel,
+        target_software: str = "Mozilla Firefox",
+    ) -> None:
+        self.target_software = target_software
+        self.agent = create_agent(llm)
+
+    @classmethod
+    def create(cls, **kwargs):
+        """Factory method to instantiate the tool with default dependencies."""
+        if "llm" not in kwargs:
+            kwargs["llm"] = init_chat_model(DEFAULT_OPENAI_MODEL)
+
+        return cls(**kwargs)
+
+    def _invoke_llm(self, prompt: str) -> str:
+        result = self.agent.invoke({"messages": [HumanMessage(prompt)]})
+        return _message_content_to_text(result["messages"][-1].content).strip()
+
+    def generate_test_cases(
+        self,
+        feature_description: str,
+        test_scope: str,
+        qa_test_cases: str = "",
+    ) -> str:
+        """Generate missed test cases for a feature."""
+        prompt = TEST_CASES_PROMPT_TEMPLATE.format(
+            target_software=self.target_software,
+            feature_description=feature_description,
+            test_scope=test_scope,
+            qa_test_cases=qa_test_cases or "N/A",
+        )
+        return self._invoke_llm(prompt)
+
+    def generate_test_steps(
+        self,
+        feature_description: str,
+        test_cases: str,
+    ) -> str:
+        """Generate detailed test steps for each test case."""
+        prompt = TEST_STEPS_PROMPT_TEMPLATE.format(
+            target_software=self.target_software,
+            feature_description=feature_description,
+            test_cases=test_cases,
+        )
+        return self._invoke_llm(prompt)
+
+    def run(
+        self,
+        feature_description: str,
+        test_scope: str,
+        qa_test_cases: str = "",
+        generate_steps: bool = True,
+    ) -> TestGenerationResult:
+        """Generate test cases and optionally generate steps for them."""
+        generated_test_cases = self.generate_test_cases(
+            feature_description,
+            test_scope,
+            qa_test_cases,
+        )
+
+        test_steps = None
+        if generate_steps and generated_test_cases:
+            test_steps = self.generate_test_steps(
+                feature_description,
+                generated_test_cases,
+            )
+
+        return TestGenerationResult(
+            test_cases=generated_test_cases,
+            test_steps=test_steps,
+        )
diff --git a/bugbug/tools/test_generation/data_types.py b/bugbug/tools/test_generation/data_types.py
new file mode 100644
index 0000000000..806976f838
--- /dev/null
+++ b/bugbug/tools/test_generation/data_types.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class TestGenerationResult:
+    test_cases: str
+    test_steps: str | None = None
diff --git a/bugbug/tools/test_generation/prompts.py b/bugbug/tools/test_generation/prompts.py
new file mode 100644
index 0000000000..f9b4ae32cd
--- /dev/null
+++ b/bugbug/tools/test_generation/prompts.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+TEST_CASES_PROMPT_TEMPLATE = """You are an expert Quality Assurance Engineer with expertise in designing high level test cases for features of the {target_software} web browser.
+You are given a feature's description, its scope of testing, and a list of already existing test cases.
+Using the knowledge and information you are given, generate no more than 15 test cases that have been missed for the feature.
+
+-- This is the feature's description --:
+{feature_description}
+
+-- These are the existing test cases so far for the feature --:
+{qa_test_cases}
+
+-- This is the feature's scope of testing --:
+{test_scope}
+
+-- Here are some tips for success --:
+1. Thoroughly understand the feature from the description, scope of testing and the existing test cases.
+2. Alter the wording while generating test cases.
+3. Check to see if each generated case is relevant to the feature.
+4. Check to see if each generated case is within the scope of testing.
+5. Check to see if each generated case is dissimilar to any existing test cases.
+
+The test cases should be presented in a numbered list, with each entry being a single, concise test case.
+Avoid using a title and markdown formatting."""
+
+
+TEST_STEPS_PROMPT_TEMPLATE = """You are an expert Quality Assurance Engineer with expertise in designing detailed test steps for test cases of features of the {target_software} web browser.
+You are given a feature's description and a list of test cases.
+Using the knowledge and information you are given, generate test steps for each test case.
+
+-- This is the feature's description --:
+{feature_description}
+
+-- These are the test cases for the feature --:
+{test_cases}
+
+-- Here are some tips for success --:
+1. Thoroughly understand the feature from the description and the test cases.
+2. For each test case, generate clear and concise steps to execute the test case.
+3. Each test case should have its own set of steps.
+4. Present the steps in a numbered list under each test case.
+Avoid using a title and markdown formatting.
+
+-- Here are some examples --:
+Test Case 1: Ensure that Rich suggestions entries match the design
+Test Steps:
+1. Launch Firefox.
+2. Start typing a popular keyword inside the Address Bar.
+3. Observe the Rich entities icon and description.
+
+Test Case 2: Search-shortcut - Ensure that Rich entities are accessible via keyboard
+Test Steps:
+1. Launch Firefox.
+2. Observe the Address Bar.
+3. Click inside the Address Bar, select the google search shortcut.
+4. Press 'Down' arrow key.
+5. Navigate through the Rich entities using Up/Down arrow keys."""

From 54c0a532cca0ebef35ffa8ca20ba6b031bb31b0e Mon Sep 17 00:00:00 2001
From: John Pangas <swiftyxswaggy@outlook.com>
Date: Mon, 1 Jun 2026 03:10:55 -0600
Subject: [PATCH 2/4] Make changes to the title

---
 bugbug/tools/test_generation/__init__.py      | 12 ---
 .../agent.py                                  | 10 +-
 .../data_types.py                             |  2 +-
 .../prompts.py                                | 62 ++++++++----
 scripts/run_test_plans_generator.py           | 99 +++++++++++++++++++
 5 files changed, 150 insertions(+), 35 deletions(-)
 delete mode 100644 bugbug/tools/test_generation/__init__.py
 rename bugbug/tools/{test_generation => test_plans_generator}/agent.py (92%)
 rename bugbug/tools/{test_generation => test_plans_generator}/data_types.py (91%)
 rename bugbug/tools/{test_generation => test_plans_generator}/prompts.py (55%)
 create mode 100644 scripts/run_test_plans_generator.py

diff --git a/bugbug/tools/test_generation/__init__.py b/bugbug/tools/test_generation/__init__.py
deleted file mode 100644
index db567172b0..0000000000
--- a/bugbug/tools/test_generation/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this file,
-# You can obtain one at http://mozilla.org/MPL/2.0/.
-
-from bugbug.tools.test_generation.agent import TestGenerationTool
-from bugbug.tools.test_generation.data_types import TestGenerationResult
-
-__all__ = [
-    "TestGenerationResult",
-    "TestGenerationTool",
-]
diff --git a/bugbug/tools/test_generation/agent.py b/bugbug/tools/test_plans_generator/agent.py
similarity index 92%
rename from bugbug/tools/test_generation/agent.py
rename to bugbug/tools/test_plans_generator/agent.py
index dacf58f78b..24d1f5d5bb 100644
--- a/bugbug/tools/test_generation/agent.py
+++ b/bugbug/tools/test_plans_generator/agent.py
@@ -15,8 +15,8 @@
 
 from bugbug.tools.base import GenerativeModelTool
 from bugbug.tools.core.llms import DEFAULT_OPENAI_MODEL
-from bugbug.tools.test_generation.data_types import TestGenerationResult
-from bugbug.tools.test_generation.prompts import (
+from bugbug.tools.test_plans_generator.data_types import TestPlanGenerationResult
+from bugbug.tools.test_plans_generator.prompts import (
     TEST_CASES_PROMPT_TEMPLATE,
     TEST_STEPS_PROMPT_TEMPLATE,
 )
@@ -36,7 +36,7 @@ def _message_content_to_text(content: Any) -> str:
     return str(content)
 
 
-class TestGenerationTool(GenerativeModelTool):
+class TestPlanGenerationTool(GenerativeModelTool):
     """Tool for generating QA test cases and test steps."""
 
     def __init__(
@@ -93,7 +93,7 @@ def run(
         test_scope: str,
         qa_test_cases: str = "",
         generate_steps: bool = True,
-    ) -> TestGenerationResult:
+    ) -> TestPlanGenerationResult:
         """Generate test cases and optionally generate steps for them."""
         generated_test_cases = self.generate_test_cases(
             feature_description,
@@ -108,7 +108,7 @@ def run(
                 generated_test_cases,
             )
 
-        return TestGenerationResult(
+        return TestPlanGenerationResult(
             test_cases=generated_test_cases,
             test_steps=test_steps,
         )
diff --git a/bugbug/tools/test_generation/data_types.py b/bugbug/tools/test_plans_generator/data_types.py
similarity index 91%
rename from bugbug/tools/test_generation/data_types.py
rename to bugbug/tools/test_plans_generator/data_types.py
index 806976f838..212fe24776 100644
--- a/bugbug/tools/test_generation/data_types.py
+++ b/bugbug/tools/test_plans_generator/data_types.py
@@ -7,6 +7,6 @@
 
 
 @dataclass(frozen=True)
-class TestGenerationResult:
+class TestPlanGenerationResult:
     test_cases: str
     test_steps: str | None = None
diff --git a/bugbug/tools/test_generation/prompts.py b/bugbug/tools/test_plans_generator/prompts.py
similarity index 55%
rename from bugbug/tools/test_generation/prompts.py
rename to bugbug/tools/test_plans_generator/prompts.py
index f9b4ae32cd..60773aa81b 100644
--- a/bugbug/tools/test_generation/prompts.py
+++ b/bugbug/tools/test_plans_generator/prompts.py
@@ -22,9 +22,24 @@
 3. Check to see if each generated case is relevant to the feature.
 4. Check to see if each generated case is within the scope of testing.
 5. Check to see if each generated case is dissimilar to any existing test cases.
+6. Return only valid JSON with a "test_cases" key.
+7. Each entry in "test_cases" must have an "id" integer and a "test_case" string.
 
-The test cases should be presented in a numbered list, with each entry being a single, concise test case.
-Avoid using a title and markdown formatting."""
+Avoid using a title, markdown formatting, comments, or any text outside the JSON object.
+
+-- Here is an example of the expected output format --:
+{{
+  "test_cases": [
+    {{
+      "id": 1,
+      "test_case": "Verify that sponsored suggestions can be disabled from Settings."
+    }},
+    {{
+      "id": 2,
+      "test_case": "Verify that organic search suggestions continue to appear when sponsored suggestions are disabled."
+    }}
+  ]
+}}"""
 
 
 TEST_STEPS_PROMPT_TEMPLATE = """You are an expert Quality Assurance Engineer with expertise in designing detailed test steps for test cases of features of the {target_software} web browser.
@@ -41,20 +56,33 @@
 1. Thoroughly understand the feature from the description and the test cases.
 2. For each test case, generate clear and concise steps to execute the test case.
 3. Each test case should have its own set of steps.
-4. Present the steps in a numbered list under each test case.
-Avoid using a title and markdown formatting.
+4. Return only valid JSON with a "test_cases" key.
+5. Keep the same "id" and "test_case" values from the input test cases.
+6. Each entry in "test_cases" must have an "id" integer, a "test_case" string, and a "test_steps" array of strings.
+Avoid using a title, markdown formatting, comments, or any text outside the JSON object.
 
 -- Here are some examples --:
-Test Case 1: Ensure that Rich suggestions entries match the design
-Test Steps:
-1. Launch Firefox.
-2. Start typing a popular keyword inside the Address Bar.
-3. Observe the Rich entities icon and description.
-
-Test Case 2: Search-shortcut - Ensure that Rich entities are accessible via keyboard
-Test Steps:
-1. Launch Firefox.
-2. Observe the Address Bar.
-3. Click inside the Address Bar, select the google search shortcut.
-4. Press 'Down' arrow key.
-5. Navigate through the Rich entities using Up/Down arrow keys."""
+{{
+  "test_cases": [
+    {{
+      "id": 1,
+      "test_case": "Ensure that Rich suggestions entries match the design",
+      "test_steps": [
+        "Launch Firefox.",
+        "Start typing a popular keyword inside the Address Bar.",
+        "Observe the Rich entities icon and description."
+      ]
+    }},
+    {{
+      "id": 2,
+      "test_case": "Search-shortcut - Ensure that Rich entities are accessible via keyboard",
+      "test_steps": [
+        "Launch Firefox.",
+        "Observe the Address Bar.",
+        "Click inside the Address Bar, select the google search shortcut.",
+        "Press 'Down' arrow key.",
+        "Navigate through the Rich entities using Up/Down arrow keys."
+      ]
+    }}
+  ]
+}}"""
diff --git a/scripts/run_test_plans_generator.py b/scripts/run_test_plans_generator.py
new file mode 100644
index 0000000000..b023b97d9c
--- /dev/null
+++ b/scripts/run_test_plans_generator.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Run the test plans generator tool locally."""
+
+import argparse
+import json
+
+from bugbug.tools.test_plans_generator.agent import TestPlanGenerationTool
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--feature-description",
+        required=True,
+        help="Description of the feature to generate test cases for.",
+    )
+    parser.add_argument(
+        "--test-scope",
+        required=True,
+        help="Scope of testing for the feature.",
+    )
+    parser.add_argument(
+        "--qa-test-cases",
+        default="",
+        help="Existing QA test cases to avoid duplicating.",
+    )
+    parser.add_argument(
+        "--no-test-steps",
+        dest="generate_steps",
+        action="store_false",
+        help="Only generate test cases, without detailed test steps.",
+    )
+    parser.add_argument(
+        "--json-lines",
+        action="store_true",
+        help="Print one JSON object per generation phase.",
+    )
+
+    return parser.parse_args()
+
+
+def _load_json(output: str, output_name: str) -> dict:
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError as e:
+        raise SystemExit(
+            f"The model did not return valid {output_name} JSON: {e}"
+        ) from e
+
+
+def _print_json(data: dict, json_lines: bool = False) -> None:
+    if json_lines:
+        print(json.dumps(data), flush=True)
+        return
+
+    print(json.dumps(data, indent=2))
+
+
+def main() -> None:
+    args = parse_args()
+
+    tool = TestPlanGenerationTool.create()
+
+    generated_test_cases = tool.generate_test_cases(
+        feature_description=args.feature_description,
+        test_scope=args.test_scope,
+        qa_test_cases=args.qa_test_cases,
+    )
+    test_cases = _load_json(generated_test_cases, "test cases")
+
+    if not args.generate_steps:
+        if args.json_lines:
+            _print_json({"type": "test_cases", **test_cases}, json_lines=True)
+        else:
+            _print_json(test_cases)
+        return
+
+    if args.json_lines:
+        _print_json({"type": "test_cases", **test_cases}, json_lines=True)
+
+    generated_test_steps = tool.generate_test_steps(
+        feature_description=args.feature_description,
+        test_cases=generated_test_cases,
+    )
+    test_plan = _load_json(generated_test_steps, "test steps")
+
+    if args.json_lines:
+        _print_json({"type": "test_steps", **test_plan}, json_lines=True)
+        return
+
+    _print_json(test_plan)
+
+
+if __name__ == "__main__":
+    main()

From ea94974429cf91284abb377d473ba048e738d9a8 Mon Sep 17 00:00:00 2001
From: John Pangas <swiftyxswaggy@outlook.com>
Date: Mon, 1 Jun 2026 19:05:37 -0600
Subject: [PATCH 3/4] Clean the code

---
 bugbug/tools/test_plans_generator/prompts.py | 11 +++++------
 scripts/run_test_plans_generator.py          |  7 ++++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/bugbug/tools/test_plans_generator/prompts.py b/bugbug/tools/test_plans_generator/prompts.py
index 60773aa81b..a1d15b3903 100644
--- a/bugbug/tools/test_plans_generator/prompts.py
+++ b/bugbug/tools/test_plans_generator/prompts.py
@@ -55,10 +55,11 @@
 -- Here are some tips for success --:
 1. Thoroughly understand the feature from the description and the test cases.
 2. For each test case, generate clear and concise steps to execute the test case.
-3. Each test case should have its own set of steps.
-4. Return only valid JSON with a "test_cases" key.
-5. Keep the same "id" and "test_case" values from the input test cases.
-6. Each entry in "test_cases" must have an "id" integer, a "test_case" string, and a "test_steps" array of strings.
+3. Do not include the Launch Firefox step, as it is assumed to be the first step for every test case.
+4. Each test case should have its own set of steps.
+5. Return only valid JSON with a "test_cases" key.
+6. Keep the same "id" and "test_case" values from the input test cases.
+7. Each entry in "test_cases" must have an "id" integer, a "test_case" string, and a "test_steps" array of strings.
 Avoid using a title, markdown formatting, comments, or any text outside the JSON object.
 
 -- Here are some examples --:
@@ -68,7 +69,6 @@
       "id": 1,
       "test_case": "Ensure that Rich suggestions entries match the design",
       "test_steps": [
-        "Launch Firefox.",
         "Start typing a popular keyword inside the Address Bar.",
         "Observe the Rich entities icon and description."
       ]
@@ -77,7 +77,6 @@
       "id": 2,
       "test_case": "Search-shortcut - Ensure that Rich entities are accessible via keyboard",
       "test_steps": [
-        "Launch Firefox.",
         "Observe the Address Bar.",
         "Click inside the Address Bar, select the google search shortcut.",
         "Press 'Down' arrow key.",
diff --git a/scripts/run_test_plans_generator.py b/scripts/run_test_plans_generator.py
index b023b97d9c..5357f4e2f6 100644
--- a/scripts/run_test_plans_generator.py
+++ b/scripts/run_test_plans_generator.py
@@ -32,6 +32,7 @@ def parse_args() -> argparse.Namespace:
         "--no-test-steps",
         dest="generate_steps",
         action="store_false",
+        default=True,
         help="Only generate test cases, without detailed test steps.",
     )
     parser.add_argument(
@@ -74,13 +75,13 @@ def main() -> None:
 
     if not args.generate_steps:
         if args.json_lines:
-            _print_json({"type": "test_cases", **test_cases}, json_lines=True)
+            _print_json({"type": "test_cases", "data": test_cases}, json_lines=True)
         else:
             _print_json(test_cases)
         return
 
     if args.json_lines:
-        _print_json({"type": "test_cases", **test_cases}, json_lines=True)
+        _print_json({"type": "test_cases", "data": test_cases}, json_lines=True)
 
     generated_test_steps = tool.generate_test_steps(
         feature_description=args.feature_description,
@@ -89,7 +90,7 @@ def main() -> None:
     test_plan = _load_json(generated_test_steps, "test steps")
 
     if args.json_lines:
-        _print_json({"type": "test_steps", **test_plan}, json_lines=True)
+        _print_json({"type": "test_steps", "data": test_plan}, json_lines=True)
         return
 
     _print_json(test_plan)

From c877528120ccd57b40436ae8954a53abfebfb11b Mon Sep 17 00:00:00 2001
From: John Pangas <swiftyxswaggy@outlook.com>
Date: Tue, 2 Jun 2026 01:02:14 -0600
Subject: [PATCH 4/4] Add custom instructions

---
 bugbug/tools/test_plans_generator/agent.py   | 26 ++++++++++----
 bugbug/tools/test_plans_generator/prompts.py | 18 +++++++---
 scripts/run_test_plans_generator.py          | 36 ++++++++++++++++++--
 3 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/bugbug/tools/test_plans_generator/agent.py b/bugbug/tools/test_plans_generator/agent.py
index 24d1f5d5bb..026a2e16a5 100644
--- a/bugbug/tools/test_plans_generator/agent.py
+++ b/bugbug/tools/test_plans_generator/agent.py
@@ -14,7 +14,7 @@
 from langchain.messages import HumanMessage
 
 from bugbug.tools.base import GenerativeModelTool
-from bugbug.tools.core.llms import DEFAULT_OPENAI_MODEL
+from bugbug.tools.core.llms import DEFAULT_ANTHROPIC_MODEL
 from bugbug.tools.test_plans_generator.data_types import TestPlanGenerationResult
 from bugbug.tools.test_plans_generator.prompts import (
     TEST_CASES_PROMPT_TEMPLATE,
@@ -36,6 +36,10 @@ def _message_content_to_text(content: Any) -> str:
     return str(content)
 
 
+def _format_custom_instructions(custom_instructions: str) -> str:
+    return custom_instructions.strip() or "N/A"
+
+
 class TestPlanGenerationTool(GenerativeModelTool):
     """Tool for generating QA test cases and test steps."""
 
@@ -51,7 +55,7 @@ def __init__(
     def create(cls, **kwargs):
         """Factory method to instantiate the tool with default dependencies."""
         if "llm" not in kwargs:
-            kwargs["llm"] = init_chat_model(DEFAULT_OPENAI_MODEL)
+            kwargs["llm"] = init_chat_model(DEFAULT_ANTHROPIC_MODEL)
 
         return cls(**kwargs)
 
@@ -64,6 +68,7 @@ def generate_test_cases(
         feature_description: str,
         test_scope: str,
         qa_test_cases: str = "",
+        custom_instructions: str = "",
     ) -> str:
         """Generate missed test cases for a feature."""
         prompt = TEST_CASES_PROMPT_TEMPLATE.format(
@@ -71,6 +76,7 @@ def generate_test_cases(
             feature_description=feature_description,
             test_scope=test_scope,
             qa_test_cases=qa_test_cases or "N/A",
+            custom_instructions=_format_custom_instructions(custom_instructions),
         )
         return self._invoke_llm(prompt)
 
@@ -78,12 +84,14 @@ def generate_test_steps(
         self,
         feature_description: str,
         test_cases: str,
+        custom_instructions: str = "",
     ) -> str:
         """Generate detailed test steps for each test case."""
         prompt = TEST_STEPS_PROMPT_TEMPLATE.format(
             target_software=self.target_software,
             feature_description=feature_description,
             test_cases=test_cases,
+            custom_instructions=_format_custom_instructions(custom_instructions),
         )
         return self._invoke_llm(prompt)
 
@@ -93,19 +101,23 @@ def run(
         test_scope: str,
         qa_test_cases: str = "",
         generate_steps: bool = True,
+        test_cases_custom_instructions: str = "",
+        test_steps_custom_instructions: str = "",
     ) -> TestPlanGenerationResult:
         """Generate test cases and optionally generate steps for them."""
         generated_test_cases = self.generate_test_cases(
-            feature_description,
-            test_scope,
-            qa_test_cases,
+            feature_description=feature_description,
+            test_scope=test_scope,
+            qa_test_cases=qa_test_cases,
+            custom_instructions=test_cases_custom_instructions,
         )
 
         test_steps = None
         if generate_steps and generated_test_cases:
             test_steps = self.generate_test_steps(
-                feature_description,
-                generated_test_cases,
+                feature_description=feature_description,
+                test_cases=generated_test_cases,
+                custom_instructions=test_steps_custom_instructions,
             )
 
         return TestPlanGenerationResult(
diff --git a/bugbug/tools/test_plans_generator/prompts.py b/bugbug/tools/test_plans_generator/prompts.py
index a1d15b3903..b5cc37bb4b 100644
--- a/bugbug/tools/test_plans_generator/prompts.py
+++ b/bugbug/tools/test_plans_generator/prompts.py
@@ -16,14 +16,18 @@
 -- This is the feature's scope of testing --:
 {test_scope}
 
+-- These are additional instructions for generating test cases --:
+{custom_instructions}
+
 -- Here are some tips for success --:
 1. Thoroughly understand the feature from the description, scope of testing and the existing test cases.
 2. Alter the wording while generating test cases.
 3. Check to see if each generated case is relevant to the feature.
 4. Check to see if each generated case is within the scope of testing.
 5. Check to see if each generated case is dissimilar to any existing test cases.
-6. Return only valid JSON with a "test_cases" key.
-7. Each entry in "test_cases" must have an "id" integer and a "test_case" string.
+6. Follow the additional instructions when they are provided.
+7. Return only valid JSON with a "test_cases" key.
+8. Each entry in "test_cases" must have an "id" integer and a "test_case" string.
 
 Avoid using a title, markdown formatting, comments, or any text outside the JSON object.
 
@@ -52,14 +56,18 @@
 -- These are the test cases for the feature --:
 {test_cases}
 
+-- These are additional instructions for generating test steps --:
+{custom_instructions}
+
 -- Here are some tips for success --:
 1. Thoroughly understand the feature from the description and the test cases.
 2. For each test case, generate clear and concise steps to execute the test case.
 3. Do not include the Launch Firefox step, as it is assumed to be the first step for every test case.
 4. Each test case should have its own set of steps.
-5. Return only valid JSON with a "test_cases" key.
-6. Keep the same "id" and "test_case" values from the input test cases.
-7. Each entry in "test_cases" must have an "id" integer, a "test_case" string, and a "test_steps" array of strings.
+5. Follow the additional instructions when they are provided.
+6. Return only valid JSON with a "test_cases" key.
+7. Keep the same "id" and "test_case" values from the input test cases.
+8. Each entry in "test_cases" must have an "id" integer, a "test_case" string, and a "test_steps" array of strings.
 Avoid using a title, markdown formatting, comments, or any text outside the JSON object.
 
 -- Here are some examples --:
diff --git a/scripts/run_test_plans_generator.py b/scripts/run_test_plans_generator.py
index 5357f4e2f6..fd8d1ab232 100644
--- a/scripts/run_test_plans_generator.py
+++ b/scripts/run_test_plans_generator.py
@@ -28,6 +28,17 @@ def parse_args() -> argparse.Namespace:
         default="",
         help="Existing QA test cases to avoid duplicating.",
     )
+    parser.add_argument(
+        "--custom-instructions",
+        default="",
+        help="Additional instructions to include in selected generation prompts.",
+    )
+    parser.add_argument(
+        "--custom-instructions-target",
+        choices=("test-cases", "test-steps", "both"),
+        default="both",
+        help="Generation prompt to receive the custom instructions.",
+    )
     parser.add_argument(
         "--no-test-steps",
         dest="generate_steps",
@@ -48,11 +59,30 @@ def _load_json(output: str, output_name: str) -> dict:
     try:
         return json.loads(output)
     except json.JSONDecodeError as e:
+        decoder = json.JSONDecoder()
+        for index, character in enumerate(output):
+            if character not in "{[":
+                continue
+
+            try:
+                data, _ = decoder.raw_decode(output[index:])
+                return data
+            except json.JSONDecodeError:
+                continue
+
         raise SystemExit(
-            f"The model did not return valid {output_name} JSON: {e}"
+            f"The model did not return valid {output_name} JSON: {e}\n"
+            f"Model output:\n{output}"
         ) from e
 
 
+def _custom_instructions_for_target(args: argparse.Namespace, target: str) -> str:
+    if args.custom_instructions_target in (target, "both"):
+        return args.custom_instructions
+
+    return ""
+
+
 def _print_json(data: dict, json_lines: bool = False) -> None:
     if json_lines:
         print(json.dumps(data), flush=True)
@@ -70,6 +100,7 @@ def main() -> None:
         feature_description=args.feature_description,
         test_scope=args.test_scope,
         qa_test_cases=args.qa_test_cases,
+        custom_instructions=_custom_instructions_for_target(args, "test-cases"),
     )
     test_cases = _load_json(generated_test_cases, "test cases")
 
@@ -85,7 +116,8 @@ def main() -> None:
 
     generated_test_steps = tool.generate_test_steps(
         feature_description=args.feature_description,
-        test_cases=generated_test_cases,
+        test_cases=json.dumps(test_cases),
+        custom_instructions=_custom_instructions_for_target(args, "test-steps"),
     )
     test_plan = _load_json(generated_test_steps, "test steps")