diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..99670df --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,211 @@ +# QaAgent โ€” System Architecture & Design Specification + +This document details the engineering architecture, data flows, components, and design decisions of **QaAgent**, a professional-grade TypeScript + Playwright QA automation platform. + +--- + +## ๐Ÿ—บ๏ธ System Topology & Execution Modes + +QaAgent is built around a single, highly-instrumented local browser automation engine, supporting two reasoning interfaces: + +```mermaid +flowchart TB + subgraph Input ["Input Layer"] + A[CLI Arguments / JSON Task File] --> B[loadConfig / loadTask] + end + + subgraph Interface ["Execution Modes"] + B --> C[Codex Mode / local-first] + B --> D[Groq Mode / API-driven] + end + + subgraph Core ["Local Browser Engine"] + C --> E[BrowserAgent] + D --> E + E --> F[Playwright Browser Context] + F --> G[ConsoleListener] + F --> H[NetworkListener + API Interceptor] + end + + subgraph Brain ["QA Intelligence"] + E --> I[Page Analyzer] + I --> J[BrowserState JSON] + J --> K[QA Engine] + K --> L[Detectors] + K --> M[Declarative Playbooks] + end + + subgraph Output ["Report Generation"] + L --> N[Report Writer] + M --> N + N --> O[Zero-Dep OOXML Excel Report] + N --> P[Markdown & JSON Debug Logs] + end + + classDef core fill:#f9f,stroke:#333,stroke-width:2px; + classDef brain fill:#bbf,stroke:#333,stroke-width:2px; + classDef output fill:#bfb,stroke:#333,stroke-width:2px; + class E,F,I core; + class K,L,M brain; + class O,P output; +``` + +### 1. Codex / no-API Mode +- **Rationale**: For local-first development where credentials/auth should never leave the machine. +- **Workflow**: Codex (running within the chat client) acts as the reasoning engine. The local `codex-driver` handles page initialization, explicit steps, autonomous exploration, and local detectors, writing rich evidence and reports locally. + +### 2. Groq / API Mode +- **Rationale**: Fully autonomous standalone CLI tool loop. +- **Workflow**: A tool-use loop executes against a Groq model (e.g. `gpt-oss-120b`). The model chooses tool calls (e.g. click, fill, scroll, hover), and the browser agent executes them, handling recoveries and checking safety guards on each action. + +--- + +## ๐Ÿ“‚ Source Code Mapping + +The codebase is organized into modular directories under `agent/src/` to isolate automation, reasoning, intelligence, and reporting: + +```text +agent/src/ +โ”œโ”€โ”€ api-agent/ # Groq tool loop definitions and client orchestration +โ”‚ โ”œโ”€โ”€ groq-client.ts +โ”‚ โ”œโ”€โ”€ groq-tool-definitions.ts +โ”‚ โ””โ”€โ”€ groq-tool-loop.ts +โ”œโ”€โ”€ browser/ # Playwright orchestration and DOM analyzer +โ”‚ โ”œโ”€โ”€ actions.ts # Maps command strings to browser method execution +โ”‚ โ”œโ”€โ”€ browser-agent.ts # Unified browser context, state cache, actions +โ”‚ โ”œโ”€โ”€ console-listener.ts +โ”‚ โ”œโ”€โ”€ login-runner.ts # Secure credential autofill and validation +โ”‚ โ”œโ”€โ”€ network-listener.ts# Collects network errors & intercepting API payloads +โ”‚ โ”œโ”€โ”€ page-analyzer.ts # Computes accessible DOM representation +โ”‚ โ”œโ”€โ”€ recorder.ts # Auditing browser actions for coverage verification +โ”‚ โ””โ”€โ”€ selector-healer.ts # Multi-strategy selector repair +โ”œโ”€โ”€ codex-agent/ # Codex mode driver and autonomous exploration +โ”‚ โ”œโ”€โ”€ autonomous-explorer.ts # Navigates origins, tests pages/forms autonomously +โ”‚ โ”œโ”€โ”€ codex-driver.ts +โ”‚ โ”œโ”€โ”€ codex-report-helper.ts +โ”‚ โ””โ”€โ”€ codex-task-runner.ts +โ”œโ”€โ”€ data/ # Indian-style mock CRM lead data generators +โ”‚ โ””โ”€โ”€ lead-data.ts +โ”œโ”€โ”€ memory/ # JSON file-backed local persistence layer +โ”‚ โ”œโ”€โ”€ selectors-memory.ts +โ”‚ โ”œโ”€โ”€ sites-memory.ts +โ”‚ โ””โ”€โ”€ test-history.ts +โ”œโ”€โ”€ qa/ # QA profiles, detectors, and playbooks +โ”‚ โ”œโ”€โ”€ detectors/ # Specialized DOM auditors +โ”‚ โ”‚ โ”œโ”€โ”€ accessibility-detector.ts +โ”‚ โ”‚ โ”œโ”€โ”€ form-detector.ts +โ”‚ โ”‚ โ”œโ”€โ”€ performance-detector.ts +โ”‚ โ”‚ โ””โ”€โ”€ table-detector.ts +โ”‚ โ”œโ”€โ”€ playbooks/ # Scope checklists +โ”‚ โ”œโ”€โ”€ checks.ts +โ”‚ โ”œโ”€โ”€ coverage.ts # Formulates Pass/Partial/Fail based on action success +โ”‚ โ”œโ”€โ”€ flaky-rules.ts +โ”‚ โ”œโ”€โ”€ issue-detector.ts # Aggregator for all detectors +โ”‚ โ”œโ”€โ”€ playbook-runner.ts +โ”‚ โ”œโ”€โ”€ priority-rules.ts +โ”‚ โ”œโ”€โ”€ qa-engine.ts +โ”‚ โ”œโ”€โ”€ risk-rules.ts +โ”‚ โ””โ”€โ”€ severity.ts +โ”œโ”€โ”€ reports/ # Report generation templates +โ”‚ โ”œโ”€โ”€ excel.ts # Hand-coded OOXML ZIP compiler +โ”‚ โ”œโ”€โ”€ json.ts +โ”‚ โ”œโ”€โ”€ markdown.ts +โ”‚ โ””โ”€โ”€ report-writer.ts +โ”œโ”€โ”€ shared/ # Interfaces, utils, and safety guards +โ”‚ โ”œโ”€โ”€ safety-guard.ts # Action-filtering firewall +โ”‚ โ”œโ”€โ”€ types.ts +โ”‚ โ””โ”€โ”€ utils.ts +โ””โ”€โ”€ config.ts # Local settings parser +``` + +--- + +## ๐Ÿ› ๏ธ Key Engineering Components + +### 1. BrowserState Extractor (`page-analyzer.ts`) +Instead of feeding raw HTML or a full screenshot to the agent, the Page Analyzer compiles a highly structured **accessible DOM map** including: +- **Clickable Elements**: Evaluates focusable items, computing unique CSS selectors, roles, text names, tags, and coordinates. Every clickable element gets a simple integer index (`0-99`) for Groq tool execution. +- **Form Fields**: Structures inputs, labels, placeholders, validation hints, and submit associations. +- **Tables**: Parses rows, headers, cell matrices, and paginator elements. +- **Toasts and Modals**: Captures transient UI elements (success/error popups) separately. + +### 2. Multi-Strategy Selector Healer (`selector-healer.ts`) +Selectors in web apps frequently change. When a selector fails during execution, the healer runs 4 sequential recovery layers: + +``` +[Target Selector fails] + โ”‚ + โ–ผ +1. Explicit Match โ”€โ”€โ–บ Locates element directly on active page (if successful -> Save in Memory) + โ”‚ (failed) + โ–ผ +2. Memory Look-up โ”€โ”€โ–บ Checks previously healed selector history for this URL + โ”‚ (failed) + โ–ผ +3. Role & Text โ”€โ”€โ–บ Finds element using ARIA role & text hints (e.g. button:has-text("Submit")) + โ”‚ (failed) + โ–ผ +4. Indexed Match โ”€โ”€โ–บ Maps fallback matching using structural coordinates in browser state + โ”‚ (failed) + โ–ผ +[Throw Selector Error / Mark Recoverable in Tool Loop] +``` + +### 3. Two-Tier Safety Guard (`safety-guard.ts`) +To prevent the agent from performing destructive actions in production/staging environments (such as bulk deletes, user invites, settings modifications, or real payments), the safety engine evaluates actions: +- **Safe Tool Whitelist**: Tools that only observe or perform standard form interaction (e.g., `open_url`, `click_by_index`, `scroll`, `hover`) bypass filters immediately, preventing false positives. +- **Intent Pattern Matching**: Unknown or custom tools are analyzed against safety rules (regex check) for action flags before execution. This prevents data fields (like entering `email: "delete-me@gmail.com"`) from triggering message-send blockages. + +### 4. Zero-Dependency OOXML Excel Builder (`excel.ts`) +To remain lightweight and portable, the Excel report generator uses **no external libraries** like `exceljs` or `xlsx`. It compiles raw OpenXML files directly: +- Writes structure files: `[Content_Types].xml`, `xl/styles.xml`, `xl/workbook.xml`, `xl/worksheets/sheet1.xml`, etc. +- Serializes screenshots into PNG files under `xl/media/` and writes `drawing.xml` elements to position screenshots inside cells. +- Standardizes styling: formats headers (purple background, bold white text), severity tiers (Red/Critical, Amber/High, Yellow/Medium, Blue/Low), and column widths. +- Bundles them using a lightweight, pure Node.js CRC32-based ZIP compiler. + +### 5. Autonomous Explorer (`autonomous-explorer.ts`) +In Codex/no-API mode, the agent isn't passive. It crawls and checks sites dynamically: +- Locates navbar, sidebar, and tab navigation links. +- Explores linked pages (restricted to the same origin URL). +- Auto-detects forms, tests empty submit states, and runs validation auditors. +- Takes screenshots of each path and merges issues (deduplicated by title) into the report. + +--- + +## โšก Execution Lifecycle + +For every QA test execution: + +``` +1. Parse Arguments โ”€โ”€โ–บ 2. Initialize Playwright Context & Listeners + โ”‚ + โ–ผ + 3. Execute Smart Login (if configured) + โ”‚ + โ–ผ + 4. Execute Explicit Task Steps + โ”‚ + โ–ผ + 5. Run Autonomous Explorer (crawls & fills) + โ”‚ + โ–ผ + 6. Run QA Detectors & Playbooks + โ”‚ + โ–ผ + 7. Compile Coverage Summary + โ”‚ + โ–ผ + 8. Compile Excel Workbook + Media Zip + โ”‚ + โ–ผ + 9. Update Site History & Memory +``` + +--- + +## ๐Ÿš€ Future Roadmap & Optimizations + +1. **Local LLM-Driven Selector Healing**: Integrate a fallback to query a local model to match a modified DOM node when structural heuristics fail. +2. **Visual Regression / Pixelmatch**: Capture visual baselines and diff screenshot outputs to highlight layout anomalies. +3. **Structured Logger**: Introduce a unified, JSON-formatted structured logging engine (like `pino`) for improved monitoring. +4. **Interactive Dashboard**: Build an HTML reporter utilizing charts to summarize historical run trends across test executions. diff --git a/README.md b/README.md index 0a402c8..2afc6c8 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -

QaAgent

+

โšก QaAgent

- Local-first TypeScript + Playwright QA automation for real website testing. + Autonomous, Local-first TypeScript + Playwright QA Automation Engine for Enterprise Web Applications.

- Test auth journeys, CRM flows, forms, tables, UI/UX, console/network health, safety risks, and coverage truth with Excel-first reports. + Test authentication paths, multi-page flows, dynamic forms, complex data tables, accessibility issues, performance metrics, and safety rules with robust, evidence-rich, auto-generated Excel reports.

@@ -18,37 +18,24 @@ Reports

-

- Demo | - Install | - Sample Report | - Agent Plugin | - Plugin Guide | - Launch Kit -

- -

- QaAgent social preview showing browser checks, issue detection, screenshots, and QA reports -

- -## What It Does +--- -QaAgent runs a local Playwright browser, captures evidence, detects website quality issues, and produces QA reports that developers and product teams can act on. +## ๐ŸŒŸ What It Does -- **Start from a URL or task file** and run smoke, functional, UI/UX, accessibility, performance, security, or full-professional checks. -- **Use Codex/no-API mode** for chat-driven QA, or **Groq/API mode** for standalone tool-loop runs. -- **Capture proof** with screenshots, traces, browser state, console errors, network errors, and action steps. -- **Report honestly** with passed, partial, blocked, untested, and needs-verification coverage. -- **Stay safe by default** by blocking deletes, payments, real message sends, bulk updates, sensitive exports, and settings changes. +QaAgent runs a local, highly-instrumented Playwright browser, captures trace evidence, detects deep quality/usability defects, and produces professional, self-contained Excel workbooks. -## Demo +* **Autonomous Crawling & Testing**: In Codex/no-API mode, the agent automatically discovers links, sidebar items, tabs, and modals within the same origin, tests form validation, and takes full-page screenshots at every step. +* **Dual Execution Modes**: Choose **Codex/no-API mode** (ideal for local-first execution with local credentials) or **Groq API mode** (autonomous agent CLI loop utilizing model-driven tool calls). +* **Multi-Strategy Selector Healing**: Automatically attempts to recover from failing CSS selectors using selectors history memory, text hints, ARIA roles, or indexed state coordinates before raising a failure. +* **Two-Tier Safety Guard**: A proactive firewall blocking destructive actions (deletes, settings alterations, payments, bulk updates, and message broadcast sends) by default. Safe tools bypass checks to eliminate false positives. +* **Fleshed-out QA Detectors**: Automated DOM audits checking for accessibility faults, invalid forms, pagination/horizontal scrolling failures in tables, and console/network bottlenecks. +* **Misleading UI Detection**: An API response interceptor capturing HTTP payloads to confirm if a user-facing success toast matches the actual server API response. -

- Animated QaAgent workflow from user task to browser testing, issue detection, and QA report -

+--- -## 60-Second Install +## ๐Ÿ“ฆ 60-Second Install +Clone the repository and build: ```bash git clone https://github.com/BAKUGOS1/QaAgent.git cd QaAgent @@ -57,172 +44,52 @@ npx playwright install npm run quality:gate ``` -Run a public smoke test: - +Run a public homepage exploration: ```bash -npm run agent:codex -- --url "https://example.com" --task "Smoke test homepage and generate report" --headed +npm run agent:codex -- --url "https://example.com" --task "Explore homepage and generate QA report" --headed ``` Run with a task file: - ```bash npm run agent:codex -- --task-file agent/tasks/example-task.json --headed ``` -Generated reports stay local under `agent/reports/`; screenshots, traces, and browser state stay under `agent/artifacts/`. - -## Report Preview - -

- QaAgent sample Excel-style QA report preview -

- -See a readable sample report: [docs/SAMPLE_REPORT.md](docs/SAMPLE_REPORT.md) - -## Why Star It - -- **Local-first QA agent**: run browser QA without sending site credentials to a hosted automation service. -- **Evidence-rich output**: screenshots, Playwright traces, browser state, console errors, network errors, and action steps. -- **Excel-first reports**: product/dev-friendly bug report sheets with embedded screenshots. -- **Professional playbooks**: auth, forms, CRUD, tables, navigation, UI/UX, accessibility, performance, security, and regression basics. -- **Installable agent surface**: Codex and Claude Code plugin manifests are included. - -Sharing the repo? Use the ready-to-post copy and social preview notes in [docs/LAUNCH_KIT.md](docs/LAUNCH_KIT.md). - -QaAgent is built around one shared browser engine and two operating modes: - -- **Codex / no-API mode**: Codex does the reasoning in chat while this repo provides browser automation, state capture, screenshots, memory, generated data, and reports. -- **Groq API mode**: Groq acts as the standalone model brain and chooses safe Playwright tool calls from the CLI. - -## Highlights - -- Playwright browser automation with headed and headless runs. -- Browser state extraction with clickable element indexes, forms, links, buttons, tables, modals, toasts, console errors, and network errors. -- Professional QA playbooks for smoke, functional, UI/UX, regression, accessibility, performance, security, CRUD, search/filter/sort, pagination, navigation, upload/download, and auth checks. -- Safe local memory for selectors, sites, known issues, playbooks, and previous run summaries. -- Indian-style CRM test lead generation with `@faker-js/faker`. -- Excel-first reports, including embedded screenshots. Markdown/JSON are optional debug outputs. -- Safety guardrails that block destructive actions such as deletes, payments, real message sends, bulk updates, billing changes, sensitive exports, and account setting changes by default. -- Installable Codex and Claude Code plugin surfaces so this repo can expose the QaAgent skill on any machine. - -## Architecture - -

- QaAgent architecture workflow -

- -QaAgent takes a URL or task file, chooses a reasoning mode, runs a local Playwright browser, captures evidence, and generates a report that says what was tested and what still needs verification. - -- **Input**: CLI args or task JSON define the site, scope, modules, login settings, report format, and safety permissions. -- **Reasoning mode**: Codex/no-API mode uses this repo as the local browser harness; Groq/API mode lets Groq choose safe tool calls. -- **Smart login**: when `login.enabled` is true, the agent uses env-backed credentials, submits the login form, and verifies success without printing secrets. -- **Local execution**: Playwright opens pages, performs safe actions, captures screenshots, trace files, browser state, console errors, and network errors. -- **QA intelligence**: detectors and playbooks inspect forms, tables, navigation, validations, UX signals, and coverage truth. -- **Output**: Excel-first report plus optional Markdown/JSON, screenshots, browser state, and traces under ignored local artifact folders. - -## Developer Quick Start - -```bash -npm install -npx playwright install -npm run typecheck -npm run test:smoke -npm run quality:gate -``` - -The smoke test verifies Codex/no-API mode, browser state capture, screenshot/trace capture, coverage reporting, report writing, Excel media embedding, missing Groq key handling, safety guards, and generated test data. - -## Install As An Agent Plugin - -This repo includes a local marketplace, a Codex plugin manifest, and a Claude Code plugin manifest: - -```text -.agents/plugins/marketplace.json -plugins/qa-agent/.codex-plugin/plugin.json -plugins/qa-agent/.claude-plugin/plugin.json -plugins/qa-agent/commands/qa-agent.md -plugins/qa-agent/skills/qa-agent/SKILL.md -``` - -From a fresh clone: - -```bash -git clone https://github.com/BAKUGOS1/QaAgent.git -cd QaAgent -npm install -npx playwright install -codex plugin marketplace add . -codex plugin add qa-agent@qa-agent-marketplace -``` - -Open a new Codex thread after installing so the `qa-agent` skill is available. - -If you are not inside the repo, use the absolute path: - -```bash -codex plugin marketplace add "C:\path\to\QaAgent" -codex plugin add qa-agent@qa-agent-marketplace -``` - -For Claude Code, add the same repo marketplace and install the plugin from Claude's plugin command UI: - -```text -/plugin marketplace add https://github.com/BAKUGOS1/QaAgent -/plugin install qa-agent@qa-agent-marketplace -``` - -More details: [`docs/PLUGIN_INSTALL.md`](docs/PLUGIN_INSTALL.md) - -## Requirements +--- -- Node.js 20 or newer -- npm -- Playwright browser binaries installed with `npx playwright install` -- Optional: a Groq API key for standalone API mode +## ๐Ÿ›๏ธ Architecture & System Design -## Commands - -```bash -npm run agent:codex -- --url "https://example.com" --task "Smoke test homepage" --headed -npm run agent:codex -- --task-file agent/tasks/example-task.json --headed -npm run agent:state -- --url "https://example.com" --headed -npm run agent:api -- --url "https://example.com" --task "Full professional QA" --headed -npm run test:smoke -npm run typecheck -npm run quality:gate +```mermaid +flowchart LR + URL[Website URL] โ”€โ”€โ–บ Agent[BrowserAgent] + Agent โ”€โ”€โ–บ Explorer[Autonomous Crawler] + Explorer โ”€โ”€โ–บ Interceptor[Network/API Interceptor] + Explorer โ”€โ”€โ–บ Healer[Selector Healer] + Agent โ”€โ”€โ–บ Detectors[QA Audit Detectors] + Detectors โ”€โ”€โ–บ Excel[Zero-Dep Excel Writer] ``` -Useful scripts: +Read the full system architecture specifications in [ARCHITECTURE.md](ARCHITECTURE.md). -| Script | Purpose | -| --- | --- | -| `npm run agent` | Run the default CLI entrypoint. | -| `npm run agent:codex` | Run Codex/no-API mode. | -| `npm run agent:api` | Run Groq/API mode. | -| `npm run agent:state` | Capture latest browser state without a full report. | -| `npm run test:smoke` | Run the smoke verification script. | -| `npm run typecheck` | Run TypeScript checks. | -| `npm run quality:gate` | Run typecheck, smoke, audit, secret scan, and report sanity. | +--- -## Codex / No-API Mode +## ๐ŸŽฎ CLI Command Directory -Codex mode is the local-first workflow. Codex reasons in the chat session, and this repo supplies the browser engine, task schema, screenshots, state snapshots, memory files, report writer, QA detectors, and playbooks. - -```bash -npm run agent:codex -- --url "https://example.com" --task "test login flow" --headed -npm run agent:codex -- --task-file agent/tasks/zoyo-lead-test.json --headed -``` +| Command | Action | +|---|---| +| `npm run agent:codex` | Execute Codex/no-API mode with autonomous crawling. | +| `npm run agent:api` | Execute Groq API tool loop mode. | +| `npm run agent:state` | Output the current page's accessible JSON state to disk. | +| `npm run test:smoke` | Run local framework integration tests. | +| `npm run typecheck` | Run type checking to verify codebase compile status. | +| `npm run quality:gate` | Execute full gate audit (Typecheck + Smoke + Security Scan + Report Sanity). | -Codex mode does not require OpenAI, Groq, Anthropic, Letta, Mastra, LangGraph, Mem0, Zep, Graphiti, or any other external agent framework inside the repository. +--- -## Groq API Mode +## ๐Ÿ”ง Configuring Groq API Mode -Groq mode is the standalone CLI mode. The Groq model chooses safe tool calls, and Playwright executes browser work locally. - -Create `.env.local` or `.env`: - -```bash -GROQ_API_KEY=your_key_here +Create a `.env.local` or `.env` file in the project root: +```env +GROQ_API_KEY=your_groq_api_key_here GROQ_MODEL=openai/gpt-oss-120b GROQ_FALLBACK_MODEL=openai/gpt-oss-20b TEST_EMAIL= @@ -231,175 +98,58 @@ HEADLESS=false USE_PERSISTENT_PROFILE=false ``` -Run: - +Execute a fully autonomous run: ```bash -npm run agent:api -- --url "https://example.com" --task "test full CRM lead creation flow" --count 10 --headed -npm run agent:api -- --task-file agent/tasks/zoyo-lead-test.json --max-steps 75 --headed +npm run agent:api -- --url "https://example.com" --task "Submit the contact form and verify success" --headed ``` -## Task Files +--- -Task files live in `agent/tasks/`. They support: +## ๐Ÿ›ก๏ธ Safety Enforcement Model -- `websiteUrl` -- `task` -- `qaProfile` -- `credentials` -- `testDataCount` -- `scope` -- `login` -- `modules` -- `report` -- `safety` -- optional explicit `steps` - -Example: +The agent blocks destructive actions by default using a safe-tool whitelist and intent filtering: +* **Allowed**: Navigation, screenshots, local state capture, lead generation, form submission, pagination, and searching. +* **Blocked**: Deletes, archives, payment checkout, user invites, settings modifications, bulk updates, and real message sends. +To override, set safety permissions in your JSON task file: ```json { - "websiteUrl": "https://example.com", - "task": "Full professional QA", - "qaProfile": "full-professional", - "testDataCount": 10, - "scope": ["forms", "crud", "search", "pagination"], "safety": { - "allowDelete": false, - "allowArchive": false, - "allowPayment": false, - "allowRealMessageSend": false, - "allowBulkUpdate": false, - "allowSettingsChange": false, - "allowSensitiveExport": false - } -} -``` - -Credentials can be referenced through environment variable names: - -```json -{ - "credentials": { - "emailEnv": "TEST_EMAIL", - "passwordEnv": "TEST_PASSWORD" + "allowDelete": true, + "allowRealMessageSend": false } } ``` -Never commit `.env`, `.env.local`, passwords, tokens, cookies, real customer data, payment data, or sensitive exports. +--- -## Reports And Artifacts +## ๐Ÿ—ƒ๏ธ Output Reports & Artifacts -Reports are generated locally: +All outputs remain local and are excluded from version control: +* **Reports**: Excel workbooks (`agent/reports/*.xlsx`) include a clean user-facing `Bug Report` sheet with embedded screenshots, a `Summary` dashboard, and detailed technical evidence sheets. +* **Logs & Traces**: Playwright traces (`agent/artifacts/traces/`) and raw browser console/network logs. +* **State Snapshot**: Current accessible state tree is cached under `agent/artifacts/state/latest-browser-state.json`. -```text -agent/reports/YYYY-MM-DD-HH-mm-agent-report.xlsx -``` +--- -Markdown and JSON are generated only when the task report config explicitly enables them. +## ๐Ÿ”Œ Installing as an Agent Plugin -Artifacts are stored locally: +Expose the `qa-agent` skill to your Codex or Claude Code terminal agent: -```text -agent/artifacts/screenshots/ -agent/artifacts/logs/ -agent/artifacts/traces/ -agent/artifacts/state/latest-browser-state.json +For **Codex**: +```bash +codex plugin marketplace add . +codex plugin add qa-agent@qa-agent-marketplace ``` -The Excel report starts with a clean `Bug Report` sheet using `Module`, `Issue`, `Description`, `Priority`, and `Status`. `Summary` comes next, followed by technical evidence sheets for steps, generated lead data, bugs, UX issues, missing validations, console errors, network errors, screenshots, browser state, QA checklist, and memory notes when available. - -Generated reports, screenshots, logs, traces, state files, browser profiles, and env files are ignored by Git. - -## QA Profiles - -Supported profiles: - -- `smoke` -- `functional` -- `ui-ux` -- `regression-basic` -- `accessibility-basic` -- `performance-basic` -- `security-basic` -- `full-professional` - -Professional playbooks include auth, forms, CRUD, search/filter/sort, tables/pagination, upload/download, navigation, responsive, accessibility basic, performance basic, security basic, and error states. - -## Safety Model - -Blocked by default: - -- Delete -- Archive -- Payment -- Real email, SMS, or WhatsApp sends -- Bulk update -- Password changes -- Billing or subscription changes -- User invites -- Sensitive exports -- Account settings changes -- Real customer destructive edits - -Allowed by default: - -- Safe navigation -- Screenshots -- Logs -- Browser state capture -- Test data creation -- Editing test-created data -- Validation checks -- Search, filter, sort, and pagination checks - -If an action is blocked, the run reports: `Blocked by safety guard.` - -## Project Structure - +For **Claude Code**: ```text -agent/src/browser/ Playwright browser engine, selectors, state, actions -agent/src/qa/ QA engine, detectors, validators, playbooks -agent/src/reports/ Markdown, JSON, and Excel report writers -agent/src/api-agent/ Groq API tool loop -agent/src/codex-agent/ Codex/no-API driver -agent/src/memory/ Safe local memory managers -agent/src/data/ Faker-based test data -agent/tasks/ Example task JSON files -agent/memory/ Local JSON memory stores -agent/artifacts/ Local screenshots, logs, traces, and state -agent/reports/ Local generated reports +/plugin marketplace add https://github.com/BAKUGOS1/QaAgent +/plugin install qa-agent@qa-agent-marketplace ``` -## Browser-Use Inspiration - -This project uses `browser-use` as architecture inspiration only. Browser-use is Python-based; QaAgent remains TypeScript + Playwright. Inspired concepts include browser state extraction, clickable element indexes, custom tools, persistent sessions, screenshots, and task-based actions. - -Related notes live in `agent/integrations/browser-use/`. - -## ECC Inspiration - -This project also borrows process ideas from `affaan-m/ECC`: quality gates, security-first workflows, risk-based E2E testing, flaky-test handling, artifact discipline, and clear agent guide files. ECC is not installed or required. - -Related notes live in `agent/integrations/ecc/`. - -## Troubleshooting - -- If Groq mode says `GROQ_API_KEY is missing`, add it to `.env.local` or use Codex/no-API mode. -- If a Playwright browser is missing, run `npx playwright install`. -- If selectors fail, run `npm run agent:state -- --url "" --headed` and inspect `agent/artifacts/state/latest-browser-state.json`. -- If login is needed, set `TEST_EMAIL` and `TEST_PASSWORD` in `.env.local` and reference the env names in task JSON. -- If a complex CRM flow needs precision, add explicit task steps or update selector memory. - -## Roadmap - -- Deeper selector healing across every action path. -- Deeper module crawler for full-professional runs. -- Visual regression checks. -- Stronger auth/session profile support without storing secrets. -- CI smoke tests. -- Deeper accessibility checks. +--- -## License +## ๐Ÿ“„ License -MIT. See [LICENSE](LICENSE). +This project is licensed under the MIT License. See [LICENSE](LICENSE) for details. diff --git a/agent/memory/selectors.json b/agent/memory/selectors.json index 0c80803..88764ae 100644 --- a/agent/memory/selectors.json +++ b/agent/memory/selectors.json @@ -31,5 +31,8 @@ "loginEmail": "input[name='email']", "loginPassword": "input[name='password']", "loginSubmit": "button:has-text('Login')" + }, + "https://example.com/": { + "a[role=\"link\"]:has-text(\"Learn more\")": "a[role=\"link\"]:has-text(\"Learn more\")" } } diff --git a/agent/memory/sites.json b/agent/memory/sites.json index 072b0ee..916ef46 100644 --- a/agent/memory/sites.json +++ b/agent/memory/sites.json @@ -8,7 +8,9 @@ "Clickable elements indexed: 1", "Console errors: 0", "Network errors: 0" - ] + ], + "lastRunSummary": "Partial Pass: 0 bugs, 1 UX issues", + "previousBugs": [] }, "https://www.wikipedia.org/": { "lastAuditedAt": "2026-06-05T15:39:58.167Z", diff --git a/agent/memory/test-history.json b/agent/memory/test-history.json index fe51488..7e41102 100644 --- a/agent/memory/test-history.json +++ b/agent/memory/test-history.json @@ -1 +1,50 @@ -[] +[ + { + "websiteUrl": "https://example.com", + "mode": "codex", + "date": "2026-06-08T02:47:20.747Z", + "status": "Partial Pass", + "reportMarkdown": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-17-agent-report.md", + "reportJson": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-17-agent-report.json", + "reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-17-agent-report.xlsx" + }, + { + "websiteUrl": "https://example.com", + "mode": "codex", + "date": "2026-06-08T02:48:00.357Z", + "status": "Partial Pass", + "reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-18-agent-report.xlsx" + }, + { + "websiteUrl": "https://example.com", + "mode": "codex", + "date": "2026-06-08T02:49:05.953Z", + "status": "Partial Pass", + "reportMarkdown": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-19-agent-report.md", + "reportJson": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-19-agent-report.json", + "reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-19-agent-report.xlsx" + }, + { + "websiteUrl": "https://example.com", + "mode": "codex", + "date": "2026-06-08T02:49:39.827Z", + "status": "Partial Pass", + "reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-20-agent-report.xlsx" + }, + { + "websiteUrl": "https://example.com", + "mode": "codex", + "date": "2026-06-08T02:52:41.551Z", + "status": "Partial Pass", + "reportMarkdown": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-23-agent-report.md", + "reportJson": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-23-agent-report.json", + "reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-23-agent-report.xlsx" + }, + { + "websiteUrl": "https://example.com", + "mode": "codex", + "date": "2026-06-08T02:53:16.083Z", + "status": "Partial Pass", + "reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-23-agent-report.xlsx" + } +] diff --git a/agent/src/api-agent/groq-tool-definitions.ts b/agent/src/api-agent/groq-tool-definitions.ts index f0adfec..19fbafe 100644 --- a/agent/src/api-agent/groq-tool-definitions.ts +++ b/agent/src/api-agent/groq-tool-definitions.ts @@ -95,6 +95,51 @@ export const groqTools = [ parameters: { type: "object", properties: { ms: { type: "number" } }, required: ["ms"] } } }, + { + type: "function", + function: { + name: "wait_for_navigation", + description: "Wait for the page URL to change after a navigation-triggering action.", + parameters: { type: "object", properties: {} } + } + }, + { + type: "function", + function: { + name: "scroll", + description: "Scroll the page up or down to reveal lazy-loaded or below-fold content.", + parameters: { + type: "object", + properties: { + direction: { type: "string", enum: ["down", "up"], description: "Scroll direction. Default: down." }, + amount: { type: "number", description: "Pixels to scroll. Default: 600." } + } + } + } + }, + { + type: "function", + function: { + name: "select_option", + description: "Select an option from a element." }, + value: { type: "string", description: "Option value or visible label to select." } + }, + required: ["selector", "value"] + } + } + }, + { + type: "function", + function: { + name: "hover", + description: "Hover over an element to reveal tooltips, dropdowns, or hover states.", + parameters: { type: "object", properties: { selector: { type: "string" } }, required: ["selector"] } + } + }, { type: "function", function: { @@ -135,6 +180,14 @@ export const groqTools = [ parameters: { type: "object", properties: {} } } }, + { + type: "function", + function: { + name: "get_api_responses", + description: "Return captured API response bodies to compare toasts with actual server responses.", + parameters: { type: "object", properties: {} } + } + }, { type: "function", function: { diff --git a/agent/src/api-agent/groq-tool-loop.ts b/agent/src/api-agent/groq-tool-loop.ts index f96c952..347b8b5 100644 --- a/agent/src/api-agent/groq-tool-loop.ts +++ b/agent/src/api-agent/groq-tool-loop.ts @@ -92,9 +92,15 @@ export async function runGroqToolLoop(task: QaTask, headed: boolean, maxSteps: n continue; } for (const call of response.toolCalls) { - const result = await executeToolCall(browser, task, generatedLeads, call.function.name, call.function.arguments, screenshots); - if (call.function.name === "generate_report") stopRequested = true; - messages.push({ role: "tool", tool_call_id: call.id, content: JSON.stringify(result) }); + try { + const result = await executeToolCall(browser, task, generatedLeads, call.function.name, call.function.arguments, screenshots); + if (call.function.name === "generate_report") stopRequested = true; + messages.push({ role: "tool", tool_call_id: call.id, content: JSON.stringify(result) }); + } catch (toolError) { + const errorMessage = toolError instanceof Error ? toolError.message : String(toolError); + messages.push({ role: "tool", tool_call_id: call.id, content: JSON.stringify({ error: errorMessage, recoverable: true }) }); + browser.recorder.record(`Tool ${call.function.name} failed: ${errorMessage}`); + } } } @@ -245,6 +251,24 @@ async function executeToolCall( return { errors: browser.getConsoleErrors() }; case "get_network_errors": return { errors: browser.getNetworkErrors() }; + case "get_api_responses": + return { responses: browser.getApiResponses() }; + case "scroll": + case "scroll_page": + await browser.scroll( + (String(args.direction || "down")) as "down" | "up", + Number(args.amount || 600) + ); + return { ok: true }; + case "select_option": + await browser.selectOption(String(args.selector), String(args.value || "")); + return { ok: true }; + case "hover": + await browser.hover(String(args.selector)); + return { ok: true }; + case "wait_for_navigation": + await browser.waitForNavigation(); + return { ok: true }; case "create_random_lead_data": case "generate_test_data": return { leads: generatedLeads.slice(0, Number(args.count || generatedLeads.length)) }; diff --git a/agent/src/browser/browser-agent.ts b/agent/src/browser/browser-agent.ts index d4e1fd7..3603dd7 100644 --- a/agent/src/browser/browser-agent.ts +++ b/agent/src/browser/browser-agent.ts @@ -5,6 +5,7 @@ import { ConsoleListener } from "./console-listener.js"; import { NetworkListener } from "./network-listener.js"; import { Recorder } from "./recorder.js"; import { detectBrokenImages, detectFormFields, getPageState, getVisibleButtons, getVisibleInputs } from "./page-analyzer.js"; +import { resolveSelector } from "./selector-healer.js"; import { ensureDir, timestampForFile } from "../shared/utils.js"; import type { BrowserState } from "../shared/types.js"; @@ -13,6 +14,8 @@ export class BrowserAgent { private context?: BrowserContext; private page?: Page; private traceStarted = false; + private cachedState?: BrowserState; + private stateStale = true; private readonly consoleListener = new ConsoleListener(); private readonly networkListener = new NetworkListener(); readonly recorder = new Recorder(); @@ -40,6 +43,13 @@ export class BrowserAgent { }).catch(() => undefined); this.consoleListener.attach(this.page); this.networkListener.attach(this.page); + + // Listen for new pages/popups + this.context.on("page", (newPage) => { + this.consoleListener.attach(newPage); + this.networkListener.attach(newPage); + this.recorder.record(`New page/popup opened: ${newPage.url()}`); + }); } async close(): Promise { @@ -52,17 +62,47 @@ export class BrowserAgent { return this.page; } + /** Switch focus to a popup/new tab if one was opened */ + async switchToLatestPage(): Promise { + if (!this.context) return; + const pages = this.context.pages(); + if (pages.length > 1) { + this.page = pages[pages.length - 1]; + this.consoleListener.attach(this.page); + this.networkListener.attach(this.page); + this.markStateStale(); + this.recorder.record(`Switched to page: ${this.page.url()}`); + } + } + + private markStateStale(): void { + this.stateStale = true; + this.cachedState = undefined; + } + async openUrl(url: string): Promise { await this.activePage.goto(url, { waitUntil: "domcontentloaded", timeout: 60_000 }); this.recorder.record(`Opened ${url}`); + this.markStateStale(); await this.saveBrowserState(); return this.activePage.url(); } async click(selector: string): Promise { - await this.activePage.locator(selector).first().click({ timeout: 15_000 }); + try { + await this.activePage.locator(selector).first().click({ timeout: 15_000 }); + } catch { + // Fallback: try selector healer + const healed = await resolveSelector(this, this.getUrl(), selector, selector); + if (healed.selector && healed.strategy !== "failed") { + await this.activePage.locator(healed.selector).first().click({ timeout: 15_000 }); + this.recorder.record(`Healed selector (${healed.strategy}): ${selector} โ†’ ${healed.selector}`); + } else { + throw new Error(`Click failed: selector "${selector}" not found even after healing.`); + } + } this.recorder.record(`Clicked ${selector}`); - await this.saveBrowserState(); + this.markStateStale(); } async clickByIndex(index: number): Promise { @@ -76,31 +116,41 @@ export class BrowserAgent { async clickByText(text: string): Promise { await this.activePage.getByText(text, { exact: false }).first().click({ timeout: 15_000 }); this.recorder.record(`Clicked text ${text}`); - await this.saveBrowserState(); + this.markStateStale(); } async clickByRole(role: string, name?: string): Promise { await this.activePage.getByRole(role as never, name ? { name } : undefined).first().click({ timeout: 15_000 }); this.recorder.record(`Clicked role ${role}${name ? ` named ${name}` : ""}`); - await this.saveBrowserState(); + this.markStateStale(); } async fill(selector: string, value: string): Promise { - await this.activePage.locator(selector).first().fill(value, { timeout: 15_000 }); + try { + await this.activePage.locator(selector).first().fill(value, { timeout: 15_000 }); + } catch { + const healed = await resolveSelector(this, this.getUrl(), selector, selector); + if (healed.selector && healed.strategy !== "failed") { + await this.activePage.locator(healed.selector).first().fill(value, { timeout: 15_000 }); + this.recorder.record(`Healed selector (${healed.strategy}): ${selector} โ†’ ${healed.selector}`); + } else { + throw new Error(`Fill failed: selector "${selector}" not found even after healing.`); + } + } this.recorder.record(`Filled ${selector}`); - await this.saveBrowserState(); + this.markStateStale(); } async fillByLabel(label: string, value: string): Promise { await this.activePage.getByLabel(label, { exact: false }).first().fill(value, { timeout: 15_000 }); this.recorder.record(`Filled label ${label}`); - await this.saveBrowserState(); + this.markStateStale(); } async fillByPlaceholder(placeholder: string, value: string): Promise { await this.activePage.getByPlaceholder(placeholder, { exact: false }).first().fill(value, { timeout: 15_000 }); this.recorder.record(`Filled placeholder ${placeholder}`); - await this.saveBrowserState(); + this.markStateStale(); } async fillByName(name: string, value: string): Promise { @@ -108,9 +158,19 @@ export class BrowserAgent { } async press(selector: string, key: string): Promise { - await this.activePage.locator(selector).first().press(key, { timeout: 15_000 }); + try { + await this.activePage.locator(selector).first().press(key, { timeout: 15_000 }); + } catch { + const healed = await resolveSelector(this, this.getUrl(), selector, selector); + if (healed.selector && healed.strategy !== "failed") { + await this.activePage.locator(healed.selector).first().press(key, { timeout: 15_000 }); + this.recorder.record(`Healed selector (${healed.strategy}): ${selector} โ†’ ${healed.selector}`); + } else { + throw new Error(`Press failed: selector "${selector}" not found even after healing.`); + } + } this.recorder.record(`Pressed ${key} on ${selector}`); - await this.saveBrowserState(); + this.markStateStale(); } async waitForSelector(selector: string): Promise { @@ -121,6 +181,13 @@ export class BrowserAgent { async waitForLoad(): Promise { await this.activePage.waitForLoadState("networkidle", { timeout: 15_000 }).catch(() => undefined); this.recorder.record("Waited for page load"); + this.markStateStale(); + } + + async waitForNavigation(): Promise { + await this.activePage.waitForURL(/.+/, { timeout: 15_000 }).catch(() => undefined); + this.recorder.record("Waited for navigation"); + this.markStateStale(); } async wait(ms: number): Promise { @@ -128,12 +195,50 @@ export class BrowserAgent { this.recorder.record(`Waited ${ms}ms`); } + /** Scroll the page by a given direction. Default scrolls down by one viewport. */ + async scroll(direction: "down" | "up" = "down", amount = 600): Promise { + const delta = direction === "down" ? amount : -amount; + await this.activePage.mouse.wheel(0, delta); + await this.activePage.waitForTimeout(300); + this.recorder.record(`Scrolled ${direction} by ${amount}px`); + this.markStateStale(); + } + + /** Select an option from a