From f7ef687b6867c70f5946199708cdf90d37c55a2a Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 12:52:25 +0000 Subject: [PATCH 1/5] feat: add OpenAI provider support Adds OpenAI as a fourth model provider alongside Anthropic, Gemini, and Ollama. Includes the OpenAI provider implementation, 7 models in the catalog with verified June 2026 pricing, and automatic provider inference when OPENAI_API_KEY is the only key present. - src/providers/openaiFormat.ts: extracted shared OpenAI wire-format helpers (message translation, SSE parsing) used by both Ollama and the new OpenAI provider - src/providers/openai.ts: OpenAIProvider streaming over /v1/chat/completions; uses max_completion_tokens for o-series compat - Provider + config wired end-to-end: OPENAI_API_KEY, TINY_CODE_OPENAI_URL, openaiBaseUrl for Azure/proxy overrides, escalateTo support - Catalog: o3, gpt-4.1, o4-mini, gpt-4o, gpt-4.1-mini, gpt-4o-mini, gpt-4.1-nano with pricing and coding scores https://claude.ai/code/session_01TwQBmEkVM2JcZQYqV4jsZJ --- .env.example | 8 +- src/config/load.ts | 17 ++- src/models/catalog.ts | 11 +- src/providers/index.ts | 14 +++ src/providers/ollama.ts | 131 ++--------------------- src/providers/openai.ts | 104 ++++++++++++++++++ src/providers/openaiFormat.ts | 123 ++++++++++++++++++++++ src/providers/types.ts | 2 +- tests/config/load.test.ts | 25 +++++ tests/providers/openaiSend.test.ts | 163 +++++++++++++++++++++++++++++ tests/providers/translate.test.ts | 2 +- 11 files changed, 468 insertions(+), 132 deletions(-) create mode 100644 src/providers/openai.ts create mode 100644 src/providers/openaiFormat.ts create mode 100644 tests/providers/openaiSend.test.ts diff --git a/.env.example b/.env.example index ab7215c..1da73e1 100644 --- a/.env.example +++ b/.env.example @@ -1,12 +1,14 @@ -# Provide at least one for cloud providers. If both are present, Anthropic is -# the default. Ollama runs locally and needs no key. +# Provide at least one for cloud providers. If multiple are present, Anthropic +# takes precedence, then Gemini, then OpenAI. Ollama runs locally with no key. ANTHROPIC_API_KEY= GEMINI_API_KEY= +OPENAI_API_KEY= # Optional overrides (also settable via config file / CLI flags) -# TINY_CODE_PROVIDER=anthropic # anthropic | gemini | ollama +# TINY_CODE_PROVIDER=anthropic # anthropic | gemini | ollama | openai # TINY_CODE_MODEL=claude-opus-4-8 # TINY_CODE_OLLAMA_URL=http://localhost:11434/v1 # Ollama OpenAI-compatible endpoint +# TINY_CODE_OPENAI_URL=https://api.openai.com/v1 # Override OpenAI base URL (e.g. for Azure) # TINY_CODE_PRIORITY=performance # performance | cost | balanced — auto-picks a model when none is pinned # TINY_CODE_EFFORT=high # low | medium | high | xhigh | max — Anthropic thinking budget diff --git a/src/config/load.ts b/src/config/load.ts index 4d25096..a6760e3 100644 --- a/src/config/load.ts +++ b/src/config/load.ts @@ -5,7 +5,7 @@ import { z } from 'zod'; import type { Priority } from '../models/catalog.js'; import { recommendModel } from '../models/catalog.js'; -export type Provider = 'anthropic' | 'gemini' | 'ollama'; +export type Provider = 'anthropic' | 'gemini' | 'ollama' | 'openai'; export type Effort = 'low' | 'medium' | 'high' | 'xhigh' | 'max'; export type Routing = 'local-first' | 'off'; export type { Priority } from '../models/catalog.js'; @@ -34,8 +34,11 @@ export interface ResolvedConfig { priority: Priority; anthropicApiKey: string | undefined; geminiApiKey: string | undefined; + openaiApiKey: string | undefined; /** OpenAI-compatible base URL for the Ollama provider. */ ollamaBaseUrl: string; + /** Base URL for the OpenAI provider. Defaults to https://api.openai.com/v1. */ + openaiBaseUrl: string | undefined; maxTokens: number; thinking: boolean; effort: Effort; @@ -69,21 +72,23 @@ const DEFAULT_MODELS: Record = { anthropic: 'claude-opus-4-8', gemini: 'gemini-2.5-pro', ollama: 'qwen2.5-coder:7b', + openai: 'gpt-4.1', }; const DEFAULT_OLLAMA_URL = 'http://localhost:11434/v1'; const EscalateTargetSchema = z.object({ - provider: z.enum(['anthropic', 'gemini', 'ollama']), + provider: z.enum(['anthropic', 'gemini', 'ollama', 'openai']), model: z.string(), ollamaBaseUrl: z.string().url().optional(), }); const FileConfigSchema = z .object({ - provider: z.enum(['anthropic', 'gemini', 'ollama']).optional(), + provider: z.enum(['anthropic', 'gemini', 'ollama', 'openai']).optional(), model: z.string().optional(), ollamaBaseUrl: z.string().url().optional(), + openaiBaseUrl: z.string().url().optional(), priority: z.enum(['performance', 'cost', 'balanced']).optional(), maxTokens: z.number().int().positive().optional(), thinking: z.boolean().optional(), @@ -132,12 +137,13 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c const env = process.env; const anthropicApiKey = env.ANTHROPIC_API_KEY || undefined; const geminiApiKey = env.GEMINI_API_KEY || undefined; + const openaiApiKey = env.OPENAI_API_KEY || undefined; const provider: Provider = overrides.provider ?? (env.TINY_CODE_PROVIDER as Provider | undefined) ?? file.provider ?? - (anthropicApiKey ? 'anthropic' : geminiApiKey ? 'gemini' : 'anthropic'); + (anthropicApiKey ? 'anthropic' : geminiApiKey ? 'gemini' : openaiApiKey ? 'openai' : 'anthropic'); const priority: Priority = (env.TINY_CODE_PRIORITY as Priority | undefined) ?? file.priority ?? 'performance'; @@ -158,6 +164,7 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c const effort = (env.TINY_CODE_EFFORT as Effort | undefined) ?? file.effort ?? 'high'; const ollamaBaseUrl = env.TINY_CODE_OLLAMA_URL ?? file.ollamaBaseUrl ?? DEFAULT_OLLAMA_URL; + const openaiBaseUrl = env.TINY_CODE_OPENAI_URL ?? file.openaiBaseUrl ?? undefined; const escalateTo = file.escalateTo; // Default to local-first whenever an escalation target is configured. @@ -174,7 +181,9 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c priority, anthropicApiKey, geminiApiKey, + openaiApiKey, ollamaBaseUrl, + openaiBaseUrl, maxTokens, thinking: file.thinking ?? true, effort, diff --git a/src/models/catalog.ts b/src/models/catalog.ts index b7f1a94..f96a397 100644 --- a/src/models/catalog.ts +++ b/src/models/catalog.ts @@ -32,7 +32,7 @@ export interface ModelInfo { * from the bundled claude-api reference; Gemini figures from Google's published * API pricing. */ -export const CATALOG_AS_OF = '2026-06-08'; +export const CATALOG_AS_OF = '2026-06-10'; /** * The known coding models, newest/most-capable first within each provider. @@ -51,6 +51,15 @@ export const MODEL_CATALOG: ModelInfo[] = [ { id: 'gemini-2.5-pro', provider: 'gemini', label: 'Gemini 2.5 Pro', inputPricePerMTok: 1.25, outputPricePerMTok: 10, contextWindow: 1_048_576, codingScore: 90 }, { id: 'gemini-2.5-flash', provider: 'gemini', label: 'Gemini 2.5 Flash', inputPricePerMTok: 0.3, outputPricePerMTok: 2.5, contextWindow: 1_048_576, codingScore: 72 }, { id: 'gemini-2.5-flash-lite', provider: 'gemini', label: 'Gemini 2.5 Flash-Lite', inputPricePerMTok: 0.1, outputPricePerMTok: 0.4, contextWindow: 1_048_576, codingScore: 55 }, + + // OpenAI — pricing from OpenAI's published API rates (June 2026). + { id: 'o3', provider: 'openai', label: 'OpenAI o3', inputPricePerMTok: 2, outputPricePerMTok: 8, contextWindow: 200_000, codingScore: 94 }, + { id: 'gpt-4.1', provider: 'openai', label: 'GPT-4.1', inputPricePerMTok: 2, outputPricePerMTok: 8, contextWindow: 1_000_000, codingScore: 88 }, + { id: 'o4-mini', provider: 'openai', label: 'OpenAI o4-mini', inputPricePerMTok: 1.1, outputPricePerMTok: 4.4, contextWindow: 200_000, codingScore: 85 }, + { id: 'gpt-4o', provider: 'openai', label: 'GPT-4o', inputPricePerMTok: 2.5, outputPricePerMTok: 10, contextWindow: 128_000, codingScore: 82 }, + { id: 'gpt-4.1-mini', provider: 'openai', label: 'GPT-4.1 Mini', inputPricePerMTok: 0.4, outputPricePerMTok: 1.6, contextWindow: 1_000_000, codingScore: 72 }, + { id: 'gpt-4o-mini', provider: 'openai', label: 'GPT-4o Mini', inputPricePerMTok: 0.15, outputPricePerMTok: 0.6, contextWindow: 128_000, codingScore: 65 }, + { id: 'gpt-4.1-nano', provider: 'openai', label: 'GPT-4.1 Nano', inputPricePerMTok: 0.1, outputPricePerMTok: 0.4, contextWindow: 1_000_000, codingScore: 50 }, ]; /** Look up catalog facts for a model id, or `undefined` if it's not tracked. */ diff --git a/src/providers/index.ts b/src/providers/index.ts index 89c6b3f..e13bc11 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -3,11 +3,13 @@ import type { ResolvedConfig } from '../config/load.js'; import { AnthropicProvider } from './anthropic.js'; import { GeminiProvider } from './gemini.js'; import { OllamaProvider } from './ollama.js'; +import { OpenAIProvider } from './openai.js'; export type { ModelProvider, ProviderEvent, SendRequest, ToolSchema, Usage } from './types.js'; export { AnthropicProvider } from './anthropic.js'; export { GeminiProvider } from './gemini.js'; export { OllamaProvider } from './ollama.js'; +export { OpenAIProvider } from './openai.js'; /** Construct the configured provider, validating that its API key is present. */ export function createProvider(config: ResolvedConfig): ModelProvider { @@ -33,6 +35,18 @@ export function createProvider(config: ResolvedConfig): ModelProvider { }); } + if (config.provider === 'openai') { + if (!config.openaiApiKey) { + throw new Error('OPENAI_API_KEY is not set. Export it or switch providers with --provider anthropic.'); + } + return new OpenAIProvider({ + apiKey: config.openaiApiKey, + model: config.model, + maxTokens: config.maxTokens, + baseUrl: config.openaiBaseUrl, + }); + } + if (!config.geminiApiKey) { throw new Error('GEMINI_API_KEY is not set. Export it or switch providers with --provider anthropic.'); } diff --git a/src/providers/ollama.ts b/src/providers/ollama.ts index 79f0ca8..63f6f11 100644 --- a/src/providers/ollama.ts +++ b/src/providers/ollama.ts @@ -1,5 +1,12 @@ -import type { Message } from '../agent/types.js'; -import type { ModelProvider, ProviderEvent, SendRequest, ToolSchema } from './types.js'; +import type { ModelProvider, ProviderEvent, SendRequest } from './types.js'; +import { + type OpenAiMessage, + toOpenAiMessages, + toOpenAiTools, + parseSse, +} from './openaiFormat.js'; + +export { toOpenAiMessages, toOpenAiTools } from './openaiFormat.js'; export interface OllamaProviderOptions { /** OpenAI-compatible base URL, e.g. "http://localhost:11434/v1". */ @@ -18,87 +25,6 @@ export interface OllamaProviderOptions { timeoutMs?: number; } -interface OpenAiMessage { - role: 'system' | 'user' | 'assistant' | 'tool'; - content: string; - tool_calls?: { id: string; type: 'function'; function: { name: string; arguments: string } }[]; - tool_call_id?: string; -} - -/** - * Translate internal messages into OpenAI chat messages (the shape Ollama's - * `/v1/chat/completions` endpoint accepts). Unlike Gemini, OpenAI correlates - * tool results to calls by `tool_call_id`, and our Anthropic-style ids survive - * the round trip — so no id synthesis is needed. - * - * Assumes the loop never mixes plain text and tool results in one user turn in a - * way that would interleave them: we emit all `tool` messages first, then any - * text as a trailing user message. OpenAI requires each `tool` message to follow - * the assistant `tool_calls` that produced it; today's loop builds messages so - * that holds. If a future change interleaves them, revisit this ordering. - */ -export function toOpenAiMessages(messages: Message[]): OpenAiMessage[] { - const out: OpenAiMessage[] = []; - for (const m of messages) { - if (m.role === 'user') { - // A user turn may carry plain text and/or tool results; emit each result - // as its own `tool` message and gather any text into one user message. - let text = ''; - for (const b of m.content) { - if (b.type === 'text') text += b.text; - else if (b.type === 'tool_result') { - out.push({ role: 'tool', tool_call_id: b.toolUseId, content: b.content }); - } - } - if (text.length > 0) out.push({ role: 'user', content: text }); - continue; - } - - // assistant: merge text + tool_use into a single message - let text = ''; - const toolCalls: NonNullable = []; - for (const b of m.content) { - if (b.type === 'text') text += b.text; - else if (b.type === 'tool_use') { - toolCalls.push({ - id: b.id, - type: 'function', - function: { name: b.name, arguments: JSON.stringify(b.input ?? {}) }, - }); - } - } - const msg: OpenAiMessage = { role: 'assistant', content: text }; - if (toolCalls.length > 0) msg.tool_calls = toolCalls; - out.push(msg); - } - return out; -} - -/** Translate normalized tool schemas into OpenAI's `tools` array. */ -export function toOpenAiTools(tools: ToolSchema[]): unknown[] { - return tools.map((t) => ({ - type: 'function', - function: { name: t.name, description: t.description, parameters: t.jsonSchema }, - })); -} - -interface StreamChoice { - delta?: { - content?: string | null; - tool_calls?: { - index: number; - id?: string; - function?: { name?: string; arguments?: string }; - }[]; - }; - finish_reason?: string | null; -} - -interface StreamChunk { - choices?: StreamChoice[]; - usage?: { prompt_tokens?: number; completion_tokens?: number } | null; -} - export class OllamaProvider implements ModelProvider { readonly name = 'ollama' as const; readonly model: string; @@ -231,42 +157,3 @@ export class OllamaProvider implements ModelProvider { } } -/** Decode a single SSE line into a chunk, or `undefined` for non-data/keep-alive lines. */ -function parseSseLine(raw: string): StreamChunk | undefined { - const line = raw.trim(); - if (!line.startsWith('data:')) return undefined; - const payload = line.slice(5).trim(); - if (payload === '[DONE]' || payload.length === 0) return undefined; - try { - return JSON.parse(payload) as StreamChunk; - } catch { - // Ignore partial/non-JSON keep-alive lines. - return undefined; - } -} - -/** Parse an SSE byte stream into decoded JSON chunks, skipping the `[DONE]` sentinel. */ -async function* parseSse(body: ReadableStream): AsyncIterable { - const decoder = new TextDecoder(); - let buffer = ''; - const reader = body.getReader(); - try { - for (;;) { - const { done, value } = await reader.read(); - if (done) break; - buffer += decoder.decode(value, { stream: true }); - let nl: number; - while ((nl = buffer.indexOf('\n')) !== -1) { - const chunk = parseSseLine(buffer.slice(0, nl)); - buffer = buffer.slice(nl + 1); - if (chunk) yield chunk; - } - } - // Emit a final line that arrived without a trailing newline (e.g. a closing - // usage frame); otherwise the last chunk's token counts would be dropped. - const tail = parseSseLine(buffer); - if (tail) yield tail; - } finally { - reader.releaseLock(); - } -} diff --git a/src/providers/openai.ts b/src/providers/openai.ts new file mode 100644 index 0000000..152d804 --- /dev/null +++ b/src/providers/openai.ts @@ -0,0 +1,104 @@ +import type { ModelProvider, ProviderEvent, SendRequest } from './types.js'; +import { toOpenAiMessages, toOpenAiTools, parseSse } from './openaiFormat.js'; + +export interface OpenAIProviderOptions { + apiKey: string; + model: string; + /** Cap on tokens to generate per response. Omitted from the request if unset. */ + maxTokens?: number; + /** Override the base URL, e.g. for Azure OpenAI or a compatible proxy. Defaults to https://api.openai.com/v1. */ + baseUrl?: string; +} + +export class OpenAIProvider implements ModelProvider { + readonly name = 'openai' as const; + readonly model: string; + private readonly baseUrl: string; + private readonly apiKey: string; + private readonly maxTokens: number | undefined; + + constructor(opts: OpenAIProviderOptions) { + this.baseUrl = (opts.baseUrl ?? 'https://api.openai.com/v1').replace(/\/$/, ''); + this.model = opts.model; + this.apiKey = opts.apiKey; + this.maxTokens = opts.maxTokens; + } + + async *send(req: SendRequest): AsyncIterable { + const messages = [ + { role: 'system' as const, content: req.system }, + ...toOpenAiMessages(req.messages), + ]; + + const body: Record = { + model: this.model, + messages, + tools: req.tools.length > 0 ? toOpenAiTools(req.tools) : undefined, + stream: true, + stream_options: { include_usage: true }, + }; + if (this.maxTokens !== undefined) body.max_completion_tokens = this.maxTokens; + + let res: Response; + try { + res = await fetch(`${this.baseUrl}/chat/completions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.apiKey}`, + }, + body: JSON.stringify(body), + }); + } catch (err) { + throw new Error(`Cannot reach OpenAI at ${this.baseUrl}: ${(err as Error).message}`); + } + + if (!res.ok || !res.body) { + const detail = await res.text().catch(() => ''); + throw new Error(`OpenAI request failed (${res.status}): ${detail.slice(0, 200)}`); + } + + // Accumulate tool calls by their streamed index; arguments arrive in fragments. + const calls = new Map(); + let usage = { inputTokens: 0, outputTokens: 0 }; + let finish = 'stop'; + + for await (const chunk of parseSse(res.body)) { + const choice = chunk.choices?.[0]; + if (choice?.delta?.content) yield { type: 'text', delta: choice.delta.content }; + + for (const tc of choice?.delta?.tool_calls ?? []) { + const acc = calls.get(tc.index) ?? { id: '', name: '', args: '' }; + if (tc.id) acc.id = tc.id; + if (tc.function?.name) acc.name = tc.function.name; + if (tc.function?.arguments) acc.args += tc.function.arguments; + calls.set(tc.index, acc); + } + + if (choice?.finish_reason) finish = choice.finish_reason; + if (chunk.usage) { + usage = { + inputTokens: chunk.usage.prompt_tokens ?? 0, + outputTokens: chunk.usage.completion_tokens ?? 0, + }; + } + } + + for (const [index, c] of [...calls.entries()].sort((a, b) => a[0] - b[0])) { + let input: unknown = {}; + try { + input = c.args.trim() ? JSON.parse(c.args) : {}; + } catch { + // Malformed JSON from the model; degrade gracefully. + input = {}; + } + yield { type: 'tool_call', id: c.id || `openai-call-${index}`, name: c.name, input }; + } + + yield { + type: 'done', + usage, + stopReason: calls.size > 0 ? 'tool_use' : finish, + }; + } +} diff --git a/src/providers/openaiFormat.ts b/src/providers/openaiFormat.ts new file mode 100644 index 0000000..7aa0bca --- /dev/null +++ b/src/providers/openaiFormat.ts @@ -0,0 +1,123 @@ +import type { Message } from '../agent/types.js'; +import type { ToolSchema } from './types.js'; + +export interface OpenAiMessage { + role: 'system' | 'user' | 'assistant' | 'tool'; + content: string; + tool_calls?: { id: string; type: 'function'; function: { name: string; arguments: string } }[]; + tool_call_id?: string; +} + +/** + * Translate internal messages into OpenAI chat messages (the shape the + * `/v1/chat/completions` endpoint accepts). Unlike Gemini, OpenAI correlates + * tool results to calls by `tool_call_id`, and our Anthropic-style ids survive + * the round trip — so no id synthesis is needed. + * + * Assumes the loop never mixes plain text and tool results in one user turn in a + * way that would interleave them: we emit all `tool` messages first, then any + * text as a trailing user message. OpenAI requires each `tool` message to follow + * the assistant `tool_calls` that produced it; today's loop builds messages so + * that holds. If a future change interleaves them, revisit this ordering. + */ +export function toOpenAiMessages(messages: Message[]): OpenAiMessage[] { + const out: OpenAiMessage[] = []; + for (const m of messages) { + if (m.role === 'user') { + // A user turn may carry plain text and/or tool results; emit each result + // as its own `tool` message and gather any text into one user message. + let text = ''; + for (const b of m.content) { + if (b.type === 'text') text += b.text; + else if (b.type === 'tool_result') { + out.push({ role: 'tool', tool_call_id: b.toolUseId, content: b.content }); + } + } + if (text.length > 0) out.push({ role: 'user', content: text }); + continue; + } + + // assistant: merge text + tool_use into a single message + let text = ''; + const toolCalls: NonNullable = []; + for (const b of m.content) { + if (b.type === 'text') text += b.text; + else if (b.type === 'tool_use') { + toolCalls.push({ + id: b.id, + type: 'function', + function: { name: b.name, arguments: JSON.stringify(b.input ?? {}) }, + }); + } + } + const msg: OpenAiMessage = { role: 'assistant', content: text }; + if (toolCalls.length > 0) msg.tool_calls = toolCalls; + out.push(msg); + } + return out; +} + +/** Translate normalized tool schemas into OpenAI's `tools` array. */ +export function toOpenAiTools(tools: ToolSchema[]): unknown[] { + return tools.map((t) => ({ + type: 'function', + function: { name: t.name, description: t.description, parameters: t.jsonSchema }, + })); +} + +export interface StreamChoice { + delta?: { + content?: string | null; + tool_calls?: { + index: number; + id?: string; + function?: { name?: string; arguments?: string }; + }[]; + }; + finish_reason?: string | null; +} + +export interface StreamChunk { + choices?: StreamChoice[]; + usage?: { prompt_tokens?: number; completion_tokens?: number } | null; +} + +/** Decode a single SSE line into a chunk, or `undefined` for non-data/keep-alive lines. */ +export function parseSseLine(raw: string): StreamChunk | undefined { + const line = raw.trim(); + if (!line.startsWith('data:')) return undefined; + const payload = line.slice(5).trim(); + if (payload === '[DONE]' || payload.length === 0) return undefined; + try { + return JSON.parse(payload) as StreamChunk; + } catch { + // Ignore partial/non-JSON keep-alive lines. + return undefined; + } +} + +/** Parse an SSE byte stream into decoded JSON chunks, skipping the `[DONE]` sentinel. */ +export async function* parseSse(body: ReadableStream): AsyncIterable { + const decoder = new TextDecoder(); + let buffer = ''; + const reader = body.getReader(); + try { + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + let nl: number; + while ((nl = buffer.indexOf('\n')) !== -1) { + const chunk = parseSseLine(buffer.slice(0, nl)); + buffer = buffer.slice(nl + 1); + if (chunk) yield chunk; + } + } + // Emit a final line that arrived without a trailing newline (e.g. a closing + // usage frame); otherwise the last chunk's token counts would be dropped. + const tail = parseSseLine(buffer); + if (tail) yield tail; + } finally { + reader.releaseLock(); + } +} diff --git a/src/providers/types.ts b/src/providers/types.ts index c18443e..b950e4e 100644 --- a/src/providers/types.ts +++ b/src/providers/types.ts @@ -34,7 +34,7 @@ export interface SendRequest { * {@link ProviderEvent}. */ export interface ModelProvider { - readonly name: 'anthropic' | 'gemini' | 'ollama'; + readonly name: 'anthropic' | 'gemini' | 'ollama' | 'openai'; readonly model: string; send(req: SendRequest): AsyncIterable; } diff --git a/tests/config/load.test.ts b/tests/config/load.test.ts index 2da1595..170282b 100644 --- a/tests/config/load.test.ts +++ b/tests/config/load.test.ts @@ -7,12 +7,14 @@ import { loadConfig } from '../../src/config/load.js'; const ENV_KEYS = [ 'ANTHROPIC_API_KEY', 'GEMINI_API_KEY', + 'OPENAI_API_KEY', 'TINY_CODE_PROVIDER', 'TINY_CODE_MODEL', 'TINY_CODE_PRIORITY', 'TINY_CODE_MAX_TOKENS', 'TINY_CODE_EFFORT', 'TINY_CODE_OLLAMA_URL', + 'TINY_CODE_OPENAI_URL', 'TINY_CODE_IMPROVE', 'HOME', ]; @@ -148,6 +150,29 @@ describe('loadConfig', () => { expect(cfg.model).toBe('from-env'); }); + it('infers openai when only OPENAI_API_KEY is set', () => { + process.env.OPENAI_API_KEY = 'sk-openai-test'; + const cfg = loadConfig({}, cwd); + expect(cfg.provider).toBe('openai'); + // performance priority picks the highest-scoring OpenAI model (o3) + expect(cfg.model).toBe('o3'); + expect(cfg.openaiApiKey).toBe('sk-openai-test'); + }); + + it('prefers anthropic over openai when both keys are present', () => { + process.env.ANTHROPIC_API_KEY = 'sk-anthropic'; + process.env.OPENAI_API_KEY = 'sk-openai'; + const cfg = loadConfig({}, cwd); + expect(cfg.provider).toBe('anthropic'); + }); + + it('honors TINY_CODE_OPENAI_URL over the default', () => { + process.env.OPENAI_API_KEY = 'sk-openai-test'; + process.env.TINY_CODE_OPENAI_URL = 'https://my-azure-endpoint.openai.azure.com/openai'; + const cfg = loadConfig({ provider: 'openai' }, cwd); + expect(cfg.openaiBaseUrl).toBe('https://my-azure-endpoint.openai.azure.com/openai'); + }); + it('supports the ollama provider with its default model and base URL', () => { const cfg = loadConfig({ provider: 'ollama' }, cwd); expect(cfg.provider).toBe('ollama'); diff --git a/tests/providers/openaiSend.test.ts b/tests/providers/openaiSend.test.ts new file mode 100644 index 0000000..5a59c74 --- /dev/null +++ b/tests/providers/openaiSend.test.ts @@ -0,0 +1,163 @@ +import { describe, it, expect, vi, afterEach } from 'vitest'; +import { OpenAIProvider } from '../../src/providers/openai.js'; +import type { ProviderEvent } from '../../src/providers/types.js'; + +/** Build a fake SSE Response body from a list of OpenAI-style chunks. */ +function sseResponse(chunks: unknown[]): Response { + const lines = chunks.map((c) => `data: ${JSON.stringify(c)}\n\n`).concat('data: [DONE]\n\n'); + const stream = new ReadableStream({ + start(controller) { + const enc = new TextEncoder(); + for (const line of lines) controller.enqueue(enc.encode(line)); + controller.close(); + }, + }); + return new Response(stream, { status: 200, headers: { 'Content-Type': 'text/event-stream' } }); +} + +afterEach(() => vi.restoreAllMocks()); + +async function collect(provider: OpenAIProvider): Promise { + const events: ProviderEvent[] = []; + for await (const e of provider.send({ + system: 's', + messages: [{ role: 'user', content: [{ type: 'text', text: 'go' }] }], + tools: [{ name: 'ls', description: 'list', jsonSchema: { type: 'object' } }], + })) { + events.push(e); + } + return events; +} + +describe('OpenAIProvider.send', () => { + it('maps streamed deltas into text, tool_call, and done events', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + sseResponse([ + { choices: [{ delta: { content: 'Hel' } }] }, + { choices: [{ delta: { content: 'lo' } }] }, + { + choices: [ + { + delta: { tool_calls: [{ index: 0, id: 'c1', function: { name: 'ls', arguments: '{"path":' } }] }, + }, + ], + }, + { + choices: [{ delta: { tool_calls: [{ index: 0, function: { arguments: '"."}' } }] }, finish_reason: 'tool_calls' }], + }, + { choices: [], usage: { prompt_tokens: 11, completion_tokens: 7 } }, + ]), + ); + + const provider = new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1' }); + const events = await collect(provider); + + const text = events.filter((e) => e.type === 'text').map((e) => (e as { delta: string }).delta); + expect(text.join('')).toBe('Hello'); + + const call = events.find((e) => e.type === 'tool_call'); + expect(call).toMatchObject({ type: 'tool_call', id: 'c1', name: 'ls', input: { path: '.' } }); + + const done = events.find((e) => e.type === 'done'); + expect(done).toMatchObject({ + type: 'done', + stopReason: 'tool_use', + usage: { inputTokens: 11, outputTokens: 7 }, + }); + }); + + it('degrades to empty input on malformed tool-call JSON', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + sseResponse([ + { + choices: [ + { delta: { tool_calls: [{ index: 0, id: 'c1', function: { name: 'ls', arguments: '{bad' } }] } }, + ], + }, + ]), + ); + const provider = new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1' }); + const events = await collect(provider); + const call = events.find((e) => e.type === 'tool_call'); + expect(call).toMatchObject({ name: 'ls', input: {} }); + }); + + it('sends stream_options.include_usage in the request body', async () => { + const fetchMock = vi + .spyOn(globalThis, 'fetch') + .mockResolvedValue(sseResponse([{ choices: [{ delta: { content: 'ok' } }] }])); + + const provider = new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1' }); + await collect(provider); + + const body = JSON.parse((fetchMock.mock.calls[0]![1] as RequestInit).body as string); + expect(body.stream_options).toEqual({ include_usage: true }); + }); + + it('forwards maxTokens as max_completion_tokens, omits it when unset', async () => { + const fetchMock = vi + .spyOn(globalThis, 'fetch') + .mockResolvedValue(sseResponse([{ choices: [{ delta: { content: 'ok' } }] }])); + + await collect(new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1', maxTokens: 512 })); + const capped = JSON.parse((fetchMock.mock.calls[0]![1] as RequestInit).body as string); + expect(capped.max_completion_tokens).toBe(512); + + fetchMock.mockClear(); + await collect(new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1' })); + const uncapped = JSON.parse((fetchMock.mock.calls[0]![1] as RequestInit).body as string); + expect(uncapped).not.toHaveProperty('max_completion_tokens'); + }); + + it('sends the Authorization header with the API key', async () => { + const fetchMock = vi + .spyOn(globalThis, 'fetch') + .mockResolvedValue(sseResponse([{ choices: [{ delta: { content: 'ok' } }] }])); + + await collect(new OpenAIProvider({ apiKey: 'sk-my-key', model: 'gpt-4.1' })); + const headers = (fetchMock.mock.calls[0]![1] as RequestInit).headers as Record; + expect(headers['Authorization']).toBe('Bearer sk-my-key'); + }); + + it('uses a custom baseUrl when provided', async () => { + const fetchMock = vi + .spyOn(globalThis, 'fetch') + .mockResolvedValue(sseResponse([{ choices: [{ delta: { content: 'ok' } }] }])); + + await collect( + new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1', baseUrl: 'https://my-proxy.example.com/v1' }), + ); + expect(fetchMock.mock.calls[0]![0]).toBe('https://my-proxy.example.com/v1/chat/completions'); + }); + + it('still parses a final usage frame that lacks a trailing newline', async () => { + const raw = + 'data: {"choices":[{"delta":{"content":"hi"}}]}\n\n' + + 'data: {"choices":[],"usage":{"prompt_tokens":3,"completion_tokens":4}}'; // no trailing \n + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(raw)); + controller.close(); + }, + }); + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response(stream, { status: 200, headers: { 'Content-Type': 'text/event-stream' } }), + ); + + const provider = new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1' }); + const done = (await collect(provider)).find((e) => e.type === 'done'); + expect(done).toMatchObject({ usage: { inputTokens: 3, outputTokens: 4 } }); + }); + + it('throws a helpful error when the server is unreachable', async () => { + vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('ECONNREFUSED')); + const provider = new OpenAIProvider({ apiKey: 'sk-test', model: 'gpt-4.1' }); + await expect(collect(provider)).rejects.toThrow(/Cannot reach OpenAI/); + }); + + it('throws on non-2xx responses with the status and body detail', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue(new Response('invalid_api_key', { status: 401 })); + const provider = new OpenAIProvider({ apiKey: 'sk-bad', model: 'gpt-4.1' }); + await expect(collect(provider)).rejects.toThrow(/OpenAI request failed \(401\)/); + }); +}); diff --git a/tests/providers/translate.test.ts b/tests/providers/translate.test.ts index 0a55560..6c90461 100644 --- a/tests/providers/translate.test.ts +++ b/tests/providers/translate.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect } from 'vitest'; import { toAnthropicMessages } from '../../src/providers/anthropic.js'; import { toGeminiContents } from '../../src/providers/gemini.js'; -import { toOpenAiMessages, toOpenAiTools } from '../../src/providers/ollama.js'; +import { toOpenAiMessages, toOpenAiTools } from '../../src/providers/openaiFormat.js'; import type { Message } from '../../src/agent/types.js'; const conversation: Message[] = [ From b6c029b25cdf65e1fd88862650d609c185d7e641 Mon Sep 17 00:00:00 2001 From: Zack Anselm Date: Wed, 10 Jun 2026 09:20:37 -0500 Subject: [PATCH 2/5] revert version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 07004f4..1a2d61c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@therr/tiny-code", - "version": "0.3.0", + "version": "0.2.3", "description": "A small, extensible CLI coding agent with interchangeable Anthropic and Gemini models.", "type": "module", "bin": { From 1b0e30edb928de98bd32662e6399e2d40c67aaa3 Mon Sep 17 00:00:00 2001 From: Zack Anselm Date: Wed, 10 Jun 2026 09:25:26 -0500 Subject: [PATCH 3/5] =?UTF-8?q?feat:=20OpenAIProvider=20refactor=20?= =?UTF-8?q?=E2=80=94=20src/providers/openai.ts=20now=20extends=20OpenAiCom?= =?UTF-8?q?patibleProvider?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- package.json | 7 +- src/providers/openai-compatible.ts | 26 +++++-- src/providers/openai.ts | 119 +++++++---------------------- 4 files changed, 54 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index 537d336..6790f36 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Run cheap, open-weight models locally and **escalate heavy work to a frontier model only when needed** — see [Local models & cost-aware routing](#local-models--cost-aware-routing). > Status: early (v0.x). Published as `@therr/tiny-code`; the binary is -> `tiny-code`. Names may change before the first npm publish. +> `tiny-code`. APIs and config may still change between minor versions. ## Install diff --git a/package.json b/package.json index 1a2d61c..89ccb9e 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@therr/tiny-code", "version": "0.2.3", - "description": "A small, extensible CLI coding agent with interchangeable Anthropic and Gemini models.", + "description": "A small, extensible CLI coding agent with interchangeable Anthropic, Gemini, OpenAI, DeepSeek, Qwen, and local (Ollama) models.", "type": "module", "bin": { "tiny-code": "dist/cli.js" @@ -54,6 +54,11 @@ "anthropic", "claude", "gemini", + "openai", + "deepseek", + "qwen", + "ollama", + "local-llm", "llm" ], "license": "SEE LICENSE IN LICENSE", diff --git a/src/providers/openai-compatible.ts b/src/providers/openai-compatible.ts index 8492205..b424b95 100644 --- a/src/providers/openai-compatible.ts +++ b/src/providers/openai-compatible.ts @@ -17,7 +17,7 @@ export interface OpenAiCompatibleOptions { timeoutMs?: number; } -interface OpenAiMessage { +export interface OpenAiMessage { role: 'system' | 'user' | 'assistant' | 'tool'; content: string; tool_calls?: { id: string; type: 'function'; function: { name: string; arguments: string } }[]; @@ -126,13 +126,7 @@ export abstract class OpenAiCompatibleProvider implements ModelProvider { ...toOpenAiMessages(req.messages), ]; - const body = { - model: this.model, - messages, - tools: req.tools.length > 0 ? toOpenAiTools(req.tools) : undefined, - stream: true, - max_tokens: this.maxTokens, - }; + const body = this.buildBody(messages, req); // Idle-timeout guard: abort if the server goes silent for `timeoutMs`. The // raw fetch (unlike the cloud SDKs) has no built-in timeout, so without this @@ -216,6 +210,22 @@ export abstract class OpenAiCompatibleProvider implements ModelProvider { } } + /** + * Build the `/chat/completions` request body. Subclasses override to adjust + * provider-specific fields — e.g. OpenAI's hosted API requires + * `max_completion_tokens` rather than `max_tokens`. `stream_options` is added + * by {@link send} (with a no-`stream_options` retry), so it isn't set here. + */ + protected buildBody(messages: OpenAiMessage[], req: SendRequest): Record { + return { + model: this.model, + messages, + tools: req.tools.length > 0 ? toOpenAiTools(req.tools) : undefined, + stream: true, + max_tokens: this.maxTokens, + }; + } + /** Human-readable provider name used in error messages. */ protected label(): string { return this.name; diff --git a/src/providers/openai.ts b/src/providers/openai.ts index 008a6ea..1760795 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -1,104 +1,43 @@ -import type { ModelProvider, ProviderEvent, SendRequest } from './types.js'; -import { toOpenAiMessages, toOpenAiTools, parseSse } from './openai-compatible.js'; +import type { SendRequest } from './types.js'; +import { + OpenAiCompatibleProvider, + type OpenAiCompatibleOptions, + type OpenAiMessage, +} from './openai-compatible.js'; -export interface OpenAIProviderOptions { +/** OpenAI's hosted Chat Completions endpoint. */ +export const DEFAULT_OPENAI_URL = 'https://api.openai.com/v1'; + +export interface OpenAIProviderOptions extends Omit { apiKey: string; - model: string; - /** Cap on tokens to generate per response. Omitted from the request if unset. */ - maxTokens?: number; - /** Override the base URL, e.g. for Azure OpenAI or a compatible proxy. Defaults to https://api.openai.com/v1. */ + /** Override the base URL, e.g. for Azure OpenAI or a compatible proxy. Defaults to {@link DEFAULT_OPENAI_URL}. */ baseUrl?: string | undefined; } -export class OpenAIProvider implements ModelProvider { +/** + * OpenAI's hosted models (GPT-4.1, o3, o4-mini, …) over the OpenAI-compatible + * Chat Completions API. Extends the shared base — same streaming, tool-call + * accumulation, and idle-timeout guard — and differs only in two ways: it sends + * `max_completion_tokens` (the hosted API rejects `max_tokens` on newer/reasoning + * models) and labels its errors "OpenAI". + */ +export class OpenAIProvider extends OpenAiCompatibleProvider { readonly name = 'openai' as const; - readonly model: string; - private readonly baseUrl: string; - private readonly apiKey: string; - private readonly maxTokens: number | undefined; constructor(opts: OpenAIProviderOptions) { - this.baseUrl = (opts.baseUrl ?? 'https://api.openai.com/v1').replace(/\/$/, ''); - this.model = opts.model; - this.apiKey = opts.apiKey; - this.maxTokens = opts.maxTokens; + super({ ...opts, baseUrl: opts.baseUrl ?? DEFAULT_OPENAI_URL }); } - async *send(req: SendRequest): AsyncIterable { - const messages = [ - { role: 'system' as const, content: req.system }, - ...toOpenAiMessages(req.messages), - ]; - - const body: Record = { - model: this.model, - messages, - tools: req.tools.length > 0 ? toOpenAiTools(req.tools) : undefined, - stream: true, - stream_options: { include_usage: true }, - }; + protected override buildBody(messages: OpenAiMessage[], req: SendRequest): Record { + const body = super.buildBody(messages, req); + // The hosted API uses `max_completion_tokens`; `max_tokens` is rejected on + // newer/reasoning models. Swap the field the base set. + delete body.max_tokens; if (this.maxTokens !== undefined) body.max_completion_tokens = this.maxTokens; + return body; + } - let res: Response; - try { - res = await fetch(`${this.baseUrl}/chat/completions`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${this.apiKey}`, - }, - body: JSON.stringify(body), - }); - } catch (err) { - throw new Error(`Cannot reach OpenAI at ${this.baseUrl}: ${(err as Error).message}`); - } - - if (!res.ok || !res.body) { - const detail = await res.text().catch(() => ''); - throw new Error(`OpenAI request failed (${res.status}): ${detail.slice(0, 200)}`); - } - - // Accumulate tool calls by their streamed index; arguments arrive in fragments. - const calls = new Map(); - let usage = { inputTokens: 0, outputTokens: 0 }; - let finish = 'stop'; - - for await (const chunk of parseSse(res.body)) { - const choice = chunk.choices?.[0]; - if (choice?.delta?.content) yield { type: 'text', delta: choice.delta.content }; - - for (const tc of choice?.delta?.tool_calls ?? []) { - const acc = calls.get(tc.index) ?? { id: '', name: '', args: '' }; - if (tc.id) acc.id = tc.id; - if (tc.function?.name) acc.name = tc.function.name; - if (tc.function?.arguments) acc.args += tc.function.arguments; - calls.set(tc.index, acc); - } - - if (choice?.finish_reason) finish = choice.finish_reason; - if (chunk.usage) { - usage = { - inputTokens: chunk.usage.prompt_tokens ?? 0, - outputTokens: chunk.usage.completion_tokens ?? 0, - }; - } - } - - for (const [index, c] of [...calls.entries()].sort((a, b) => a[0] - b[0])) { - let input: unknown = {}; - try { - input = c.args.trim() ? JSON.parse(c.args) : {}; - } catch { - // Malformed JSON from the model; degrade gracefully. - input = {}; - } - yield { type: 'tool_call', id: c.id || `openai-call-${index}`, name: c.name, input }; - } - - yield { - type: 'done', - usage, - stopReason: calls.size > 0 ? 'tool_use' : finish, - }; + protected override label(): string { + return 'OpenAI'; } } From e5e132b7dc291a4bba4bb12fdd3999df82aea7ab Mon Sep 17 00:00:00 2001 From: Zack Anselm Date: Wed, 10 Jun 2026 09:25:51 -0500 Subject: [PATCH 4/5] release: v0.3.0 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3935389..c119ce2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@therr/tiny-code", - "version": "0.2.3", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@therr/tiny-code", - "version": "0.2.3", + "version": "0.3.0", "license": "SEE LICENSE IN LICENSE", "dependencies": { "@anthropic-ai/sdk": "^0.69.0", diff --git a/package.json b/package.json index 89ccb9e..5ea7f26 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@therr/tiny-code", - "version": "0.2.3", + "version": "0.3.0", "description": "A small, extensible CLI coding agent with interchangeable Anthropic, Gemini, OpenAI, DeepSeek, Qwen, and local (Ollama) models.", "type": "module", "bin": { From fb424176f31cd538744617a676c7fe6742acc628 Mon Sep 17 00:00:00 2001 From: Zack Anselm Date: Wed, 10 Jun 2026 09:26:29 -0500 Subject: [PATCH 5/5] release: v0.4.0 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index c119ce2..059c38d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@therr/tiny-code", - "version": "0.3.0", + "version": "0.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@therr/tiny-code", - "version": "0.3.0", + "version": "0.4.0", "license": "SEE LICENSE IN LICENSE", "dependencies": { "@anthropic-ai/sdk": "^0.69.0", diff --git a/package.json b/package.json index 5ea7f26..a7da5db 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@therr/tiny-code", - "version": "0.3.0", + "version": "0.4.0", "description": "A small, extensible CLI coding agent with interchangeable Anthropic, Gemini, OpenAI, DeepSeek, Qwen, and local (Ollama) models.", "type": "module", "bin": {