Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ All notable changes to this project are documented here. This project follows
- **AI failure explanation** (`explainFailure`): send a failed assertion plus
the request and response context to an LLM and get a likely cause and a
suggested fix. Advisory only, it never changes pass or fail.
- **AI bug catching**: `aiReview(response)` returns a list of likely problems
(bad types, missing fields, leaked secrets, status/body mismatches), and
`aiFuzz(options)` generates adversarial request payloads to probe an endpoint.

## [0.4.0]

Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,31 @@ try {
}
```

You can also use the model to hunt for bugs. `aiReview` looks at a response and
returns a list of likely problems, and `aiFuzz` generates adversarial payloads
you send with the normal client.

```js
import { aiReview, aiFuzz } from "two-go/ai";

const res = await api.get("/me");
const findings = await aiReview(res, { provider: "openai" });
// findings: [{ severity, field, message }], e.g. a leaked token or a wrong type

const payloads = await aiFuzz({
endpoint: "/users",
method: "POST",
schema: { type: "object", properties: { name: { type: "string" } } },
});
for (const body of payloads) {
const r = await api.post("/users").json(body);
if (r.status >= 500) console.log("possible bug on payload", body, "->", r.status);
}
```

Both are advisory. `aiReview` hands you findings, `aiFuzz` hands you inputs, and
you decide what to do with them.

## TypeScript

Types are written by hand and shipped with the package, so you get
Expand Down
1 change: 1 addition & 0 deletions src/ai/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ export {
} from "./provider.js";
export { aiGenerateTests, type GenerateOptions } from "./generate.js";
export { explainFailure, type ExplainOptions } from "./explain.js";
export { aiReview, aiFuzz, type Finding, type ReviewOptions, type FuzzOptions } from "./review.js";
1 change: 1 addition & 0 deletions src/ai/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
export { createProvider } from "./provider.js";
export { aiGenerateTests } from "./generate.js";
export { explainFailure } from "./explain.js";
export { aiReview, aiFuzz } from "./review.js";
31 changes: 31 additions & 0 deletions src/ai/review.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Type declarations for AI bug catching: response review and fuzz payloads.
import type { Provider, ProviderOptions } from "./provider.js";

export interface Finding {
severity: "low" | "medium" | "high";
field: string | null;
message: string;
}

export interface ReviewOptions extends Omit<ProviderOptions, "provider"> {
context?: string;
maxTokens?: number;
provider?: Provider | ProviderOptions["provider"];
}

export interface FuzzOptions extends Omit<ProviderOptions, "provider"> {
endpoint?: string;
method?: string;
schema?: unknown;
sample?: unknown;
count?: number;
instructions?: string;
maxTokens?: number;
provider?: Provider | ProviderOptions["provider"];
}

/** Review a response with an LLM and return a list of likely problems. */
export declare function aiReview(response: unknown, options?: ReviewOptions): Promise<Finding[]>;

/** Generate adversarial request payloads to probe an endpoint. */
export declare function aiFuzz(options?: FuzzOptions): Promise<unknown[]>;
103 changes: 103 additions & 0 deletions src/ai/review.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Use an LLM to hunt for bugs. aiReview looks at a response and reports likely
// problems. aiFuzz generates adversarial request payloads you can send with the
// normal client. Both are advisory: they hand you findings or inputs, they do
// not change pass or fail on their own.

import { createProvider } from "./provider.js";

const REVIEW_SYSTEM = [
"You are a senior API reviewer hunting for bugs in an HTTP response.",
"Look for wrong or inconsistent types, missing or null fields that look required,",
"suspicious values (negative amounts, future timestamps on created dates, ids of 0),",
"leaked secrets, tokens, passwords or PII, mismatches between the status and the body,",
"and pagination or count inconsistencies.",
"Return ONLY a JSON array of findings. Each finding is",
'{ "severity": "low" | "medium" | "high", "field": string | null, "message": string }.',
"If nothing looks wrong, return [].",
].join("\n");

const FUZZ_SYSTEM = [
"You generate adversarial request payloads to probe an API for bugs.",
"Cover boundary values, wrong types, missing required fields, oversized input,",
"unicode and injection-like strings, and malformed structures.",
"Return ONLY a JSON array of payloads. Each item is the request body to send.",
].join("\n");

function truncate(value, max = 4000) {
const s = value == null ? "" : String(value);
return s.length > max ? s.slice(0, max) + "... (truncated)" : s;
}

// Pull a JSON array out of a model reply, tolerating markdown fences and prose.
function extractJsonArray(text) {
const raw = String(text).trim();
const fence = raw.match(/```[a-zA-Z]*\n([\s\S]*?)\n```/);
const body = fence ? fence[1] : raw;
const start = body.indexOf("[");
const end = body.lastIndexOf("]");
if (start === -1 || end === -1 || end < start) return [];
try {
const parsed = JSON.parse(body.slice(start, end + 1));
return Array.isArray(parsed) ? parsed : [];
} catch {
return [];
}
}

function resolveProvider(options) {
return options.provider && typeof options.provider.complete === "function"
? options.provider
: createProvider(options);
}

function buildReviewPrompt(response, options) {
const lines = ["Review this HTTP response for bugs."];
if (response) {
const line = `${response.method || ""} ${response.url || ""}`.trim();
if (line) lines.push(line);
if (response.status != null) lines.push("Status: " + response.status);
if (response.headers) lines.push("Headers: " + truncate(JSON.stringify(response.headers), 800));
const body = response.text != null ? response.text : response.body !== undefined ? JSON.stringify(response.body, null, 2) : "";
if (body) lines.push("Body:\n" + truncate(body));
}
if (options.context) lines.push("Context: " + options.context);
return lines.join("\n");
}

// Review a response and return a list of findings (possibly empty).
export async function aiReview(response, options = {}) {
const provider = resolveProvider(options);
const reply = await provider.complete(buildReviewPrompt(response, options), {
system: REVIEW_SYSTEM,
maxTokens: options.maxTokens || 1024,
});
return extractJsonArray(reply);
}

function buildFuzzPrompt(options) {
const lines = [];
lines.push(`Generate ${options.count || 8} adversarial request payloads.`);
if (options.method || options.endpoint) {
lines.push(`Target: ${(options.method || "POST").toUpperCase()} ${options.endpoint || "/"}`);
}
if (options.schema !== undefined) {
lines.push("Schema of the expected body:");
lines.push(typeof options.schema === "string" ? options.schema : JSON.stringify(options.schema, null, 2));
}
if (options.sample !== undefined) {
lines.push("A valid sample body to mutate:");
lines.push(typeof options.sample === "string" ? options.sample : JSON.stringify(options.sample, null, 2));
}
if (options.instructions) lines.push("Extra instructions: " + options.instructions);
return lines.join("\n");
}

// Generate an array of adversarial payloads to send with the normal client.
export async function aiFuzz(options = {}) {
const provider = resolveProvider(options);
const reply = await provider.complete(buildFuzzPrompt(options), {
system: FUZZ_SYSTEM,
maxTokens: options.maxTokens || 1024,
});
return extractJsonArray(reply);
}
3 changes: 3 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import { fromOpenapi } from "./importers/openapi.js";
import { createProvider } from "./ai/provider.js";
import { aiGenerateTests } from "./ai/generate.js";
import { explainFailure } from "./ai/explain.js";
import { aiReview, aiFuzz } from "./ai/review.js";

// Namespace of all lodash-inspired utilities, available as both `_` and `utils`.
import * as _ from "./utils/index.js";
Expand Down Expand Up @@ -93,6 +94,8 @@ export {
createProvider,
aiGenerateTests,
explainFailure,
aiReview,
aiFuzz,
};

// Also expose the utility belt under the `utils` name.
Expand Down
67 changes: 67 additions & 0 deletions test/unit/ai-review.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Unit tests for AI bug catching. The provider is stubbed, no network.
import { test } from "node:test";
import assert from "node:assert/strict";

import { aiReview, aiFuzz } from "../../src/ai/review.js";

test("aiReview parses a JSON array of findings and sends the body in the prompt", async () => {
let seenPrompt = "";
const stub = {
complete: async (prompt) => {
seenPrompt = prompt;
return '[{"severity":"high","field":"token","message":"auth token leaked in body"}]';
},
};

const findings = await aiReview(
{ method: "GET", url: "/me", status: 200, body: { id: 1, token: "secret" } },
{ provider: stub }
);

assert.equal(findings.length, 1);
assert.equal(findings[0].severity, "high");
assert.equal(findings[0].field, "token");
assert.match(seenPrompt, /"token": "secret"/);
assert.match(seenPrompt, /Status: 200/);
});

test("aiReview tolerates markdown fences around the JSON", async () => {
const stub = { complete: async () => "```json\n[{\"severity\":\"low\",\"field\":null,\"message\":\"x\"}]\n```" };
const findings = await aiReview({ status: 200, body: {} }, { provider: stub });
assert.equal(findings.length, 1);
assert.equal(findings[0].field, null);
});

test("aiReview returns an empty array when the model says nothing is wrong", async () => {
const stub = { complete: async () => "[]" };
assert.deepEqual(await aiReview({ status: 200, body: {} }, { provider: stub }), []);
});

test("aiReview returns an empty array on unparseable output instead of throwing", async () => {
const stub = { complete: async () => "I could not find any issues, sorry." };
assert.deepEqual(await aiReview({ status: 200 }, { provider: stub }), []);
});

test("aiFuzz returns an array of payloads and passes the schema in the prompt", async () => {
let seenPrompt = "";
const stub = {
complete: async (prompt) => {
seenPrompt = prompt;
return '[{"name":""},{"name":null},{"name":"<script>"},{"age":-1}]';
},
};

const payloads = await aiFuzz({
provider: stub,
endpoint: "/users",
method: "POST",
count: 4,
schema: { type: "object", properties: { name: { type: "string" } } },
});

assert.equal(payloads.length, 4);
assert.deepEqual(payloads[3], { age: -1 });
assert.match(seenPrompt, /adversarial request payloads/);
assert.match(seenPrompt, /POST \/users/);
assert.match(seenPrompt, /"type": "string"/);
});
Loading