-
Notifications
You must be signed in to change notification settings - Fork 334
Add Test Plan Generation Tool #6019
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
jpangas
wants to merge
4
commits into
mozilla:master
Choose a base branch
from
jpangas:add-tool-gen-test-cases
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+365
−0
Draft
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # This Source Code Form is subject to the terms of the Mozilla Public | ||
| # License, v. 2.0. If a copy of the MPL was not distributed with this file, | ||
| # You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
|
||
| """Test case and test step generation tool implementation.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from typing import Any | ||
|
|
||
| from langchain.agents import create_agent | ||
| from langchain.chat_models import BaseChatModel, init_chat_model | ||
| from langchain.messages import HumanMessage | ||
|
|
||
| from bugbug.tools.base import GenerativeModelTool | ||
| from bugbug.tools.core.llms import DEFAULT_ANTHROPIC_MODEL | ||
| from bugbug.tools.test_plans_generator.data_types import TestPlanGenerationResult | ||
| from bugbug.tools.test_plans_generator.prompts import ( | ||
| TEST_CASES_PROMPT_TEMPLATE, | ||
| TEST_STEPS_PROMPT_TEMPLATE, | ||
| ) | ||
|
|
||
|
|
||
| def _message_content_to_text(content: Any) -> str: | ||
| if isinstance(content, str): | ||
| return content | ||
|
|
||
| if isinstance(content, list): | ||
| return "".join( | ||
| item.get("text", "") | ||
| for item in content | ||
| if isinstance(item, dict) and item.get("type") == "text" | ||
| ) | ||
|
|
||
| return str(content) | ||
|
|
||
|
|
||
| def _format_custom_instructions(custom_instructions: str) -> str: | ||
| return custom_instructions.strip() or "N/A" | ||
|
|
||
|
|
||
| class TestPlanGenerationTool(GenerativeModelTool): | ||
| """Tool for generating QA test cases and test steps.""" | ||
|
|
||
| def __init__( | ||
| self, | ||
| llm: BaseChatModel, | ||
| target_software: str = "Mozilla Firefox", | ||
| ) -> None: | ||
| self.target_software = target_software | ||
| self.agent = create_agent(llm) | ||
|
|
||
| @classmethod | ||
| def create(cls, **kwargs): | ||
| """Factory method to instantiate the tool with default dependencies.""" | ||
| if "llm" not in kwargs: | ||
| kwargs["llm"] = init_chat_model(DEFAULT_ANTHROPIC_MODEL) | ||
|
|
||
| return cls(**kwargs) | ||
|
|
||
| def _invoke_llm(self, prompt: str) -> str: | ||
| result = self.agent.invoke({"messages": [HumanMessage(prompt)]}) | ||
| return _message_content_to_text(result["messages"][-1].content).strip() | ||
|
|
||
| def generate_test_cases( | ||
| self, | ||
| feature_description: str, | ||
| test_scope: str, | ||
| qa_test_cases: str = "", | ||
| custom_instructions: str = "", | ||
| ) -> str: | ||
| """Generate missed test cases for a feature.""" | ||
| prompt = TEST_CASES_PROMPT_TEMPLATE.format( | ||
| target_software=self.target_software, | ||
| feature_description=feature_description, | ||
| test_scope=test_scope, | ||
| qa_test_cases=qa_test_cases or "N/A", | ||
| custom_instructions=_format_custom_instructions(custom_instructions), | ||
| ) | ||
| return self._invoke_llm(prompt) | ||
|
|
||
| def generate_test_steps( | ||
| self, | ||
| feature_description: str, | ||
| test_cases: str, | ||
| custom_instructions: str = "", | ||
| ) -> str: | ||
| """Generate detailed test steps for each test case.""" | ||
| prompt = TEST_STEPS_PROMPT_TEMPLATE.format( | ||
| target_software=self.target_software, | ||
| feature_description=feature_description, | ||
| test_cases=test_cases, | ||
| custom_instructions=_format_custom_instructions(custom_instructions), | ||
| ) | ||
| return self._invoke_llm(prompt) | ||
|
|
||
| def run( | ||
| self, | ||
| feature_description: str, | ||
| test_scope: str, | ||
| qa_test_cases: str = "", | ||
| generate_steps: bool = True, | ||
| test_cases_custom_instructions: str = "", | ||
| test_steps_custom_instructions: str = "", | ||
| ) -> TestPlanGenerationResult: | ||
| """Generate test cases and optionally generate steps for them.""" | ||
| generated_test_cases = self.generate_test_cases( | ||
| feature_description=feature_description, | ||
| test_scope=test_scope, | ||
| qa_test_cases=qa_test_cases, | ||
| custom_instructions=test_cases_custom_instructions, | ||
| ) | ||
|
|
||
| test_steps = None | ||
| if generate_steps and generated_test_cases: | ||
| test_steps = self.generate_test_steps( | ||
| feature_description=feature_description, | ||
| test_cases=generated_test_cases, | ||
| custom_instructions=test_steps_custom_instructions, | ||
| ) | ||
|
|
||
| return TestPlanGenerationResult( | ||
| test_cases=generated_test_cases, | ||
| test_steps=test_steps, | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # This Source Code Form is subject to the terms of the Mozilla Public | ||
| # License, v. 2.0. If a copy of the MPL was not distributed with this file, | ||
| # You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
|
||
| from dataclasses import dataclass | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class TestPlanGenerationResult: | ||
| test_cases: str | ||
| test_steps: str | None = None | ||
|
jpangas marked this conversation as resolved.
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # This Source Code Form is subject to the terms of the Mozilla Public | ||
| # License, v. 2.0. If a copy of the MPL was not distributed with this file, | ||
| # You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
|
||
| TEST_CASES_PROMPT_TEMPLATE = """You are an expert Quality Assurance Engineer with expertise in designing high level test cases for features of the {target_software} web browser. | ||
| You are given a feature's description, its scope of testing, and a list of already existing test cases. | ||
| Using the knowledge and information you are given, generate no more than 15 test cases that have been missed for the feature. | ||
|
|
||
| -- This is the feature's description --: | ||
| {feature_description} | ||
|
|
||
| -- These are the existing test cases so far for the feature --: | ||
| {qa_test_cases} | ||
|
|
||
| -- This is the feature's scope of testing --: | ||
| {test_scope} | ||
|
|
||
| -- These are additional instructions for generating test cases --: | ||
| {custom_instructions} | ||
|
|
||
| -- Here are some tips for success --: | ||
| 1. Thoroughly understand the feature from the description, scope of testing and the existing test cases. | ||
| 2. Alter the wording while generating test cases. | ||
| 3. Check to see if each generated case is relevant to the feature. | ||
| 4. Check to see if each generated case is within the scope of testing. | ||
| 5. Check to see if each generated case is dissimilar to any existing test cases. | ||
| 6. Follow the additional instructions when they are provided. | ||
| 7. Return only valid JSON with a "test_cases" key. | ||
| 8. Each entry in "test_cases" must have an "id" integer and a "test_case" string. | ||
|
|
||
| Avoid using a title, markdown formatting, comments, or any text outside the JSON object. | ||
|
|
||
| -- Here is an example of the expected output format --: | ||
| {{ | ||
| "test_cases": [ | ||
| {{ | ||
| "id": 1, | ||
| "test_case": "Verify that sponsored suggestions can be disabled from Settings." | ||
| }}, | ||
| {{ | ||
| "id": 2, | ||
| "test_case": "Verify that organic search suggestions continue to appear when sponsored suggestions are disabled." | ||
| }} | ||
| ] | ||
| }}""" | ||
|
|
||
|
|
||
| TEST_STEPS_PROMPT_TEMPLATE = """You are an expert Quality Assurance Engineer with expertise in designing detailed test steps for test cases of features of the {target_software} web browser. | ||
| You are given a feature's description and a list of test cases. | ||
| Using the knowledge and information you are given, generate test steps for each test case. | ||
|
|
||
| -- This is the feature's description --: | ||
| {feature_description} | ||
|
|
||
| -- These are the test cases for the feature --: | ||
| {test_cases} | ||
|
|
||
| -- These are additional instructions for generating test steps --: | ||
| {custom_instructions} | ||
|
|
||
| -- Here are some tips for success --: | ||
| 1. Thoroughly understand the feature from the description and the test cases. | ||
| 2. For each test case, generate clear and concise steps to execute the test case. | ||
| 3. Do not include the Launch Firefox step, as it is assumed to be the first step for every test case. | ||
| 4. Each test case should have its own set of steps. | ||
| 5. Follow the additional instructions when they are provided. | ||
| 6. Return only valid JSON with a "test_cases" key. | ||
| 7. Keep the same "id" and "test_case" values from the input test cases. | ||
| 8. Each entry in "test_cases" must have an "id" integer, a "test_case" string, and a "test_steps" array of strings. | ||
| Avoid using a title, markdown formatting, comments, or any text outside the JSON object. | ||
|
|
||
| -- Here are some examples --: | ||
| {{ | ||
| "test_cases": [ | ||
| {{ | ||
| "id": 1, | ||
| "test_case": "Ensure that Rich suggestions entries match the design", | ||
| "test_steps": [ | ||
| "Start typing a popular keyword inside the Address Bar.", | ||
| "Observe the Rich entities icon and description." | ||
| ] | ||
| }}, | ||
| {{ | ||
| "id": 2, | ||
| "test_case": "Search-shortcut - Ensure that Rich entities are accessible via keyboard", | ||
| "test_steps": [ | ||
| "Observe the Address Bar.", | ||
| "Click inside the Address Bar, select the google search shortcut.", | ||
| "Press 'Down' arrow key.", | ||
| "Navigate through the Rich entities using Up/Down arrow keys." | ||
| ] | ||
| }} | ||
| ] | ||
| }}""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # This Source Code Form is subject to the terms of the Mozilla Public | ||
| # License, v. 2.0. If a copy of the MPL was not distributed with this file, | ||
| # You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
|
||
| """Run the test plans generator tool locally.""" | ||
|
|
||
| import argparse | ||
| import json | ||
|
|
||
| from bugbug.tools.test_plans_generator.agent import TestPlanGenerationTool | ||
|
|
||
|
|
||
| def parse_args() -> argparse.Namespace: | ||
| parser = argparse.ArgumentParser(description=__doc__) | ||
| parser.add_argument( | ||
| "--feature-description", | ||
| required=True, | ||
| help="Description of the feature to generate test cases for.", | ||
| ) | ||
| parser.add_argument( | ||
| "--test-scope", | ||
| required=True, | ||
| help="Scope of testing for the feature.", | ||
| ) | ||
| parser.add_argument( | ||
| "--qa-test-cases", | ||
| default="", | ||
| help="Existing QA test cases to avoid duplicating.", | ||
| ) | ||
| parser.add_argument( | ||
| "--custom-instructions", | ||
| default="", | ||
| help="Additional instructions to include in selected generation prompts.", | ||
| ) | ||
| parser.add_argument( | ||
| "--custom-instructions-target", | ||
| choices=("test-cases", "test-steps", "both"), | ||
| default="both", | ||
| help="Generation prompt to receive the custom instructions.", | ||
| ) | ||
| parser.add_argument( | ||
| "--no-test-steps", | ||
| dest="generate_steps", | ||
| action="store_false", | ||
| default=True, | ||
| help="Only generate test cases, without detailed test steps.", | ||
| ) | ||
|
jpangas marked this conversation as resolved.
|
||
| parser.add_argument( | ||
| "--json-lines", | ||
| action="store_true", | ||
| help="Print one JSON object per generation phase.", | ||
| ) | ||
|
|
||
| return parser.parse_args() | ||
|
|
||
|
|
||
| def _load_json(output: str, output_name: str) -> dict: | ||
| try: | ||
| return json.loads(output) | ||
| except json.JSONDecodeError as e: | ||
| decoder = json.JSONDecoder() | ||
| for index, character in enumerate(output): | ||
| if character not in "{[": | ||
| continue | ||
|
|
||
| try: | ||
| data, _ = decoder.raw_decode(output[index:]) | ||
| return data | ||
| except json.JSONDecodeError: | ||
| continue | ||
|
|
||
| raise SystemExit( | ||
| f"The model did not return valid {output_name} JSON: {e}\n" | ||
| f"Model output:\n{output}" | ||
| ) from e | ||
|
|
||
|
|
||
| def _custom_instructions_for_target(args: argparse.Namespace, target: str) -> str: | ||
| if args.custom_instructions_target in (target, "both"): | ||
| return args.custom_instructions | ||
|
|
||
| return "" | ||
|
|
||
|
|
||
| def _print_json(data: dict, json_lines: bool = False) -> None: | ||
| if json_lines: | ||
| print(json.dumps(data), flush=True) | ||
| return | ||
|
|
||
| print(json.dumps(data, indent=2)) | ||
|
|
||
|
|
||
| def main() -> None: | ||
| args = parse_args() | ||
|
|
||
| tool = TestPlanGenerationTool.create() | ||
|
|
||
| generated_test_cases = tool.generate_test_cases( | ||
| feature_description=args.feature_description, | ||
| test_scope=args.test_scope, | ||
| qa_test_cases=args.qa_test_cases, | ||
| custom_instructions=_custom_instructions_for_target(args, "test-cases"), | ||
| ) | ||
| test_cases = _load_json(generated_test_cases, "test cases") | ||
|
|
||
| if not args.generate_steps: | ||
| if args.json_lines: | ||
| _print_json({"type": "test_cases", "data": test_cases}, json_lines=True) | ||
| else: | ||
| _print_json(test_cases) | ||
| return | ||
|
|
||
| if args.json_lines: | ||
| _print_json({"type": "test_cases", "data": test_cases}, json_lines=True) | ||
|
|
||
| generated_test_steps = tool.generate_test_steps( | ||
| feature_description=args.feature_description, | ||
| test_cases=json.dumps(test_cases), | ||
| custom_instructions=_custom_instructions_for_target(args, "test-steps"), | ||
| ) | ||
| test_plan = _load_json(generated_test_steps, "test steps") | ||
|
|
||
| if args.json_lines: | ||
| _print_json({"type": "test_steps", "data": test_plan}, json_lines=True) | ||
| return | ||
|
|
||
| _print_json(test_plan) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.