From 0767766bdf912f6dc65abaa91ed67b79da2e9113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tu=C4=9Fkan=20Boz?= Date: Tue, 2 Jun 2026 16:21:29 +0300 Subject: [PATCH] feat: AI bug catching (aiReview and aiFuzz) aiReview(response) returns a list of likely problems (bad or inconsistent types, missing fields, leaked secrets or PII, status/body mismatches). aiFuzz generates adversarial request payloads to probe an endpoint. Both reuse the two-go/ai provider, parse JSON out of the reply tolerantly, and are advisory. Types, README, CHANGELOG, and stubbed unit tests. --- CHANGELOG.md | 3 + README.md | 25 +++++++++ src/ai/index.d.ts | 1 + src/ai/index.js | 1 + src/ai/review.d.ts | 31 +++++++++++ src/ai/review.js | 103 +++++++++++++++++++++++++++++++++++ src/index.js | 3 + test/unit/ai-review.test.mjs | 67 +++++++++++++++++++++++ 8 files changed, 234 insertions(+) create mode 100644 src/ai/review.d.ts create mode 100644 src/ai/review.js create mode 100644 test/unit/ai-review.test.mjs diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f0e6a1..d5a61f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,9 @@ All notable changes to this project are documented here. This project follows - **AI failure explanation** (`explainFailure`): send a failed assertion plus the request and response context to an LLM and get a likely cause and a suggested fix. Advisory only, it never changes pass or fail. +- **AI bug catching**: `aiReview(response)` returns a list of likely problems + (bad types, missing fields, leaked secrets, status/body mismatches), and + `aiFuzz(options)` generates adversarial request payloads to probe an endpoint. ## [0.4.0] diff --git a/README.md b/README.md index ae09da2..b8b4401 100644 --- a/README.md +++ b/README.md @@ -644,6 +644,31 @@ try { } ``` +You can also use the model to hunt for bugs. `aiReview` looks at a response and +returns a list of likely problems, and `aiFuzz` generates adversarial payloads +you send with the normal client. + +```js +import { aiReview, aiFuzz } from "two-go/ai"; + +const res = await api.get("/me"); +const findings = await aiReview(res, { provider: "openai" }); +// findings: [{ severity, field, message }], e.g. a leaked token or a wrong type + +const payloads = await aiFuzz({ + endpoint: "/users", + method: "POST", + schema: { type: "object", properties: { name: { type: "string" } } }, +}); +for (const body of payloads) { + const r = await api.post("/users").json(body); + if (r.status >= 500) console.log("possible bug on payload", body, "->", r.status); +} +``` + +Both are advisory. `aiReview` hands you findings, `aiFuzz` hands you inputs, and +you decide what to do with them. + ## TypeScript Types are written by hand and shipped with the package, so you get diff --git a/src/ai/index.d.ts b/src/ai/index.d.ts index a4739bd..c3209e8 100644 --- a/src/ai/index.d.ts +++ b/src/ai/index.d.ts @@ -8,3 +8,4 @@ export { } from "./provider.js"; export { aiGenerateTests, type GenerateOptions } from "./generate.js"; export { explainFailure, type ExplainOptions } from "./explain.js"; +export { aiReview, aiFuzz, type Finding, type ReviewOptions, type FuzzOptions } from "./review.js"; diff --git a/src/ai/index.js b/src/ai/index.js index 6577b2c..1b462b4 100644 --- a/src/ai/index.js +++ b/src/ai/index.js @@ -3,3 +3,4 @@ export { createProvider } from "./provider.js"; export { aiGenerateTests } from "./generate.js"; export { explainFailure } from "./explain.js"; +export { aiReview, aiFuzz } from "./review.js"; diff --git a/src/ai/review.d.ts b/src/ai/review.d.ts new file mode 100644 index 0000000..48bc3da --- /dev/null +++ b/src/ai/review.d.ts @@ -0,0 +1,31 @@ +// Type declarations for AI bug catching: response review and fuzz payloads. +import type { Provider, ProviderOptions } from "./provider.js"; + +export interface Finding { + severity: "low" | "medium" | "high"; + field: string | null; + message: string; +} + +export interface ReviewOptions extends Omit { + context?: string; + maxTokens?: number; + provider?: Provider | ProviderOptions["provider"]; +} + +export interface FuzzOptions extends Omit { + endpoint?: string; + method?: string; + schema?: unknown; + sample?: unknown; + count?: number; + instructions?: string; + maxTokens?: number; + provider?: Provider | ProviderOptions["provider"]; +} + +/** Review a response with an LLM and return a list of likely problems. */ +export declare function aiReview(response: unknown, options?: ReviewOptions): Promise; + +/** Generate adversarial request payloads to probe an endpoint. */ +export declare function aiFuzz(options?: FuzzOptions): Promise; diff --git a/src/ai/review.js b/src/ai/review.js new file mode 100644 index 0000000..cfe1802 --- /dev/null +++ b/src/ai/review.js @@ -0,0 +1,103 @@ +// Use an LLM to hunt for bugs. aiReview looks at a response and reports likely +// problems. aiFuzz generates adversarial request payloads you can send with the +// normal client. Both are advisory: they hand you findings or inputs, they do +// not change pass or fail on their own. + +import { createProvider } from "./provider.js"; + +const REVIEW_SYSTEM = [ + "You are a senior API reviewer hunting for bugs in an HTTP response.", + "Look for wrong or inconsistent types, missing or null fields that look required,", + "suspicious values (negative amounts, future timestamps on created dates, ids of 0),", + "leaked secrets, tokens, passwords or PII, mismatches between the status and the body,", + "and pagination or count inconsistencies.", + "Return ONLY a JSON array of findings. Each finding is", + '{ "severity": "low" | "medium" | "high", "field": string | null, "message": string }.', + "If nothing looks wrong, return [].", +].join("\n"); + +const FUZZ_SYSTEM = [ + "You generate adversarial request payloads to probe an API for bugs.", + "Cover boundary values, wrong types, missing required fields, oversized input,", + "unicode and injection-like strings, and malformed structures.", + "Return ONLY a JSON array of payloads. Each item is the request body to send.", +].join("\n"); + +function truncate(value, max = 4000) { + const s = value == null ? "" : String(value); + return s.length > max ? s.slice(0, max) + "... (truncated)" : s; +} + +// Pull a JSON array out of a model reply, tolerating markdown fences and prose. +function extractJsonArray(text) { + const raw = String(text).trim(); + const fence = raw.match(/```[a-zA-Z]*\n([\s\S]*?)\n```/); + const body = fence ? fence[1] : raw; + const start = body.indexOf("["); + const end = body.lastIndexOf("]"); + if (start === -1 || end === -1 || end < start) return []; + try { + const parsed = JSON.parse(body.slice(start, end + 1)); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} + +function resolveProvider(options) { + return options.provider && typeof options.provider.complete === "function" + ? options.provider + : createProvider(options); +} + +function buildReviewPrompt(response, options) { + const lines = ["Review this HTTP response for bugs."]; + if (response) { + const line = `${response.method || ""} ${response.url || ""}`.trim(); + if (line) lines.push(line); + if (response.status != null) lines.push("Status: " + response.status); + if (response.headers) lines.push("Headers: " + truncate(JSON.stringify(response.headers), 800)); + const body = response.text != null ? response.text : response.body !== undefined ? JSON.stringify(response.body, null, 2) : ""; + if (body) lines.push("Body:\n" + truncate(body)); + } + if (options.context) lines.push("Context: " + options.context); + return lines.join("\n"); +} + +// Review a response and return a list of findings (possibly empty). +export async function aiReview(response, options = {}) { + const provider = resolveProvider(options); + const reply = await provider.complete(buildReviewPrompt(response, options), { + system: REVIEW_SYSTEM, + maxTokens: options.maxTokens || 1024, + }); + return extractJsonArray(reply); +} + +function buildFuzzPrompt(options) { + const lines = []; + lines.push(`Generate ${options.count || 8} adversarial request payloads.`); + if (options.method || options.endpoint) { + lines.push(`Target: ${(options.method || "POST").toUpperCase()} ${options.endpoint || "/"}`); + } + if (options.schema !== undefined) { + lines.push("Schema of the expected body:"); + lines.push(typeof options.schema === "string" ? options.schema : JSON.stringify(options.schema, null, 2)); + } + if (options.sample !== undefined) { + lines.push("A valid sample body to mutate:"); + lines.push(typeof options.sample === "string" ? options.sample : JSON.stringify(options.sample, null, 2)); + } + if (options.instructions) lines.push("Extra instructions: " + options.instructions); + return lines.join("\n"); +} + +// Generate an array of adversarial payloads to send with the normal client. +export async function aiFuzz(options = {}) { + const provider = resolveProvider(options); + const reply = await provider.complete(buildFuzzPrompt(options), { + system: FUZZ_SYSTEM, + maxTokens: options.maxTokens || 1024, + }); + return extractJsonArray(reply); +} diff --git a/src/index.js b/src/index.js index 84dc1f4..0e98d2a 100644 --- a/src/index.js +++ b/src/index.js @@ -37,6 +37,7 @@ import { fromOpenapi } from "./importers/openapi.js"; import { createProvider } from "./ai/provider.js"; import { aiGenerateTests } from "./ai/generate.js"; import { explainFailure } from "./ai/explain.js"; +import { aiReview, aiFuzz } from "./ai/review.js"; // Namespace of all lodash-inspired utilities, available as both `_` and `utils`. import * as _ from "./utils/index.js"; @@ -93,6 +94,8 @@ export { createProvider, aiGenerateTests, explainFailure, + aiReview, + aiFuzz, }; // Also expose the utility belt under the `utils` name. diff --git a/test/unit/ai-review.test.mjs b/test/unit/ai-review.test.mjs new file mode 100644 index 0000000..ee5e386 --- /dev/null +++ b/test/unit/ai-review.test.mjs @@ -0,0 +1,67 @@ +// Unit tests for AI bug catching. The provider is stubbed, no network. +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { aiReview, aiFuzz } from "../../src/ai/review.js"; + +test("aiReview parses a JSON array of findings and sends the body in the prompt", async () => { + let seenPrompt = ""; + const stub = { + complete: async (prompt) => { + seenPrompt = prompt; + return '[{"severity":"high","field":"token","message":"auth token leaked in body"}]'; + }, + }; + + const findings = await aiReview( + { method: "GET", url: "/me", status: 200, body: { id: 1, token: "secret" } }, + { provider: stub } + ); + + assert.equal(findings.length, 1); + assert.equal(findings[0].severity, "high"); + assert.equal(findings[0].field, "token"); + assert.match(seenPrompt, /"token": "secret"/); + assert.match(seenPrompt, /Status: 200/); +}); + +test("aiReview tolerates markdown fences around the JSON", async () => { + const stub = { complete: async () => "```json\n[{\"severity\":\"low\",\"field\":null,\"message\":\"x\"}]\n```" }; + const findings = await aiReview({ status: 200, body: {} }, { provider: stub }); + assert.equal(findings.length, 1); + assert.equal(findings[0].field, null); +}); + +test("aiReview returns an empty array when the model says nothing is wrong", async () => { + const stub = { complete: async () => "[]" }; + assert.deepEqual(await aiReview({ status: 200, body: {} }, { provider: stub }), []); +}); + +test("aiReview returns an empty array on unparseable output instead of throwing", async () => { + const stub = { complete: async () => "I could not find any issues, sorry." }; + assert.deepEqual(await aiReview({ status: 200 }, { provider: stub }), []); +}); + +test("aiFuzz returns an array of payloads and passes the schema in the prompt", async () => { + let seenPrompt = ""; + const stub = { + complete: async (prompt) => { + seenPrompt = prompt; + return '[{"name":""},{"name":null},{"name":"