rili-live · rililive · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.env.example b/.env.example
@@ -1,15 +1,17 @@
 # Provide at least one for cloud providers. If several are present, the default
-# is the first available in this order: Anthropic, Gemini, DeepSeek, Qwen.
+# is the first available in this order: Anthropic, Gemini, OpenAI, DeepSeek, Qwen.
 # Ollama runs locally and needs no key.
 ANTHROPIC_API_KEY=
 GEMINI_API_KEY=
+OPENAI_API_KEY=
 DEEPSEEK_API_KEY=
 QWEN_API_KEY=          # Alibaba DashScope key (DASHSCOPE_API_KEY also accepted)
 
 # Optional overrides (also settable via config file / CLI flags)
-# TINY_CODE_PROVIDER=anthropic   # anthropic | gemini | ollama | deepseek | qwen
+# TINY_CODE_PROVIDER=anthropic   # anthropic | gemini | ollama | openai | deepseek | qwen
 # TINY_CODE_MODEL=claude-opus-4-8
 # TINY_CODE_OLLAMA_URL=http://localhost:11434/v1   # Ollama OpenAI-compatible endpoint
+# TINY_CODE_OPENAI_URL=https://api.openai.com/v1   # Override OpenAI base URL (e.g. for Azure)
 # TINY_CODE_DEEPSEEK_URL=https://api.deepseek.com/v1
 # TINY_CODE_QWEN_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 # TINY_CODE_PRIORITY=balanced   # performance | cost | balanced (default) — auto-picks a model when none is pinned

diff --git a/README.md b/README.md
@@ -3,15 +3,15 @@
 A small, extensible CLI coding agent built around one constraint: **keep token
 usage low**. As coding-agent costs climb, tiny-code automates the savings so
 you don't have to. Interactive terminal REPL, interchangeable **Anthropic**,
-**Gemini**, **DeepSeek**, **Qwen Coder**, and **local (Ollama)** models, and just
-the core features you actually use: read/write/edit files, run shell commands,
+**Gemini**, **OpenAI**, **DeepSeek**, **Qwen Coder**, and **local (Ollama)** models,
+and just the core features you actually use: read/write/edit files, run shell commands,
 search code, and a custom commands/skills system. No business logic baked in.
 
 Run cheap, open-weight models locally and **escalate heavy work to a frontier
 model only when needed** — see [Local models & cost-aware routing](#local-models--cost-aware-routing).
 
 > Status: early (v0.x). Published as `@therr/tiny-code`; the binary is
-> `tiny-code`. Names may change before the first npm publish.
+> `tiny-code`. APIs and config may still change between minor versions.
 
 ## Install
 
@@ -30,25 +30,28 @@ node dist/cli.js
 ## Setup
 
 Provide at least one API key. If several are set, the default is the first
-available in this order: Anthropic, Gemini, DeepSeek, Qwen.
+available in this order: Anthropic, Gemini, OpenAI, DeepSeek, Qwen.
 
 ```bash
 export ANTHROPIC_API_KEY=sk-ant-...
 export GEMINI_API_KEY=...
+export OPENAI_API_KEY=sk-...
 export DEEPSEEK_API_KEY=sk-...
 export QWEN_API_KEY=sk-...        # Alibaba DashScope key (DASHSCOPE_API_KEY also works)
 ```
 
-DeepSeek and Qwen are hosted, OpenAI-compatible coding models. Override their
-endpoints with `TINY_CODE_DEEPSEEK_URL` / `TINY_CODE_QWEN_URL` (or `deepseekBaseUrl`
-/ `qwenBaseUrl` in config) — e.g. to point Qwen at the international DashScope host.
+OpenAI, DeepSeek, and Qwen are hosted, OpenAI-compatible models. Override their
+endpoints with `TINY_CODE_OPENAI_URL` (e.g. for Azure OpenAI), `TINY_CODE_DEEPSEEK_URL`,
+or `TINY_CODE_QWEN_URL` (or `openaiBaseUrl` / `deepseekBaseUrl` / `qwenBaseUrl` in config)
+— e.g. to point Qwen at the international DashScope host.
 
 ## Usage
 
 ```bash
 tiny-code                       # start the REPL (uses an available key)
 tiny-code --provider gemini     # force a provider
 tiny-code --model claude-opus-4-8
+tiny-code --provider openai --model gpt-4.1                # OpenAI (also o3, o4-mini, …)
 tiny-code --provider deepseek --model deepseek-v4-pro     # DeepSeek's coding model
 tiny-code --provider qwen --model qwen3-coder-plus        # Qwen Coder
 tiny-code --provider ollama --model gemma3:12b   # run a local model (no API cost)

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@therr/tiny-code",
-  "version": "0.3.0",
-  "description": "A small, extensible CLI coding agent with interchangeable Anthropic and Gemini models.",
+  "version": "0.4.0",
+  "description": "A small, extensible CLI coding agent with interchangeable Anthropic, Gemini, OpenAI, DeepSeek, Qwen, and local (Ollama) models.",
   "type": "module",
   "bin": {
     "tiny-code": "dist/cli.js"
@@ -54,6 +54,11 @@
     "anthropic",
     "claude",
     "gemini",
+    "openai",
+    "deepseek",
+    "qwen",
+    "ollama",
+    "local-llm",
     "llm"
   ],
   "license": "SEE LICENSE IN LICENSE",

diff --git a/src/cli.ts b/src/cli.ts
@@ -12,7 +12,7 @@ Usage:
   tiny-code [options]
 
 Options:
-  --provider <name>   anthropic | gemini | ollama | deepseek | qwen
+  --provider <name>   anthropic | gemini | ollama | openai | deepseek | qwen
                       (default: inferred from API keys)
   --model <id>        Model id override (e.g. claude-opus-4-8, qwen3-coder-plus)
   --config <path>     Path to a config JSON file
@@ -22,6 +22,7 @@ Options:
 Environment:
   ANTHROPIC_API_KEY    Required for the Anthropic provider
   GEMINI_API_KEY       Required for the Gemini provider
+  OPENAI_API_KEY       Required for the OpenAI provider
   DEEPSEEK_API_KEY     Required for the DeepSeek provider
   QWEN_API_KEY         Required for the Qwen provider (or DASHSCOPE_API_KEY)
   TINY_CODE_OLLAMA_URL Ollama OpenAI-compatible base URL (default http://localhost:11434/v1)

diff --git a/src/config/load.ts b/src/config/load.ts
@@ -5,7 +5,7 @@ import { z } from 'zod';
 import type { Priority } from '../models/catalog.js';
 import { recommendModel } from '../models/catalog.js';
 
-export type Provider = 'anthropic' | 'gemini' | 'ollama' | 'deepseek' | 'qwen';
+export type Provider = 'anthropic' | 'gemini' | 'ollama' | 'openai' | 'deepseek' | 'qwen';
 export type Effort = 'low' | 'medium' | 'high' | 'xhigh' | 'max';
 export type Routing = 'local-first' | 'off';
 export type { Priority } from '../models/catalog.js';
@@ -37,10 +37,13 @@ export interface ResolvedConfig {
   priority: Priority;
   anthropicApiKey: string | undefined;
   geminiApiKey: string | undefined;
+  openaiApiKey: string | undefined;
   deepseekApiKey: string | undefined;
   qwenApiKey: string | undefined;
   /** OpenAI-compatible base URL for the Ollama provider. */
   ollamaBaseUrl: string;
+  /** Base URL for the OpenAI provider. Defaults to https://api.openai.com/v1. */
+  openaiBaseUrl: string | undefined;
   /** Override for the DeepSeek API endpoint (defaults to DeepSeek's hosted URL). */
   deepseekBaseUrl: string | undefined;
   /** Override for the Qwen/DashScope API endpoint (defaults to DashScope's URL). */
@@ -78,13 +81,14 @@ const DEFAULT_MODELS: Record<Provider, string> = {
   anthropic: 'claude-opus-4-8',
   gemini: 'gemini-2.5-pro',
   ollama: 'qwen2.5-coder:7b',
+  openai: 'gpt-4.1',
   deepseek: 'deepseek-v4-pro',
   qwen: 'qwen3-coder-plus',
 };
 
 const DEFAULT_OLLAMA_URL = 'http://localhost:11434/v1';
 
-const PROVIDERS = ['anthropic', 'gemini', 'ollama', 'deepseek', 'qwen'] as const;
+const PROVIDERS = ['anthropic', 'gemini', 'ollama', 'openai', 'deepseek', 'qwen'] as const;
 const PRIORITIES = ['performance', 'cost', 'balanced'] as const;
 
 /**
@@ -118,6 +122,7 @@ const FileConfigSchema = z
     provider: z.enum(PROVIDERS).optional(),
     model: z.string().optional(),
     ollamaBaseUrl: z.string().url().optional(),
+    openaiBaseUrl: z.string().url().optional(),
     deepseekBaseUrl: z.string().url().optional(),
     qwenBaseUrl: z.string().url().optional(),
     priority: z.enum(['performance', 'cost', 'balanced']).optional(),
@@ -168,6 +173,7 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c
   const env = process.env;
   const anthropicApiKey = env.ANTHROPIC_API_KEY || undefined;
   const geminiApiKey = env.GEMINI_API_KEY || undefined;
+  const openaiApiKey = env.OPENAI_API_KEY || undefined;
   const deepseekApiKey = env.DEEPSEEK_API_KEY || undefined;
   const qwenApiKey = env.QWEN_API_KEY || env.DASHSCOPE_API_KEY || undefined;
 
@@ -179,11 +185,13 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c
       ? 'anthropic'
       : geminiApiKey
         ? 'gemini'
-        : deepseekApiKey
-          ? 'deepseek'
-          : qwenApiKey
-            ? 'qwen'
-            : 'anthropic');
+        : openaiApiKey
+          ? 'openai'
+          : deepseekApiKey
+            ? 'deepseek'
+            : qwenApiKey
+              ? 'qwen'
+              : 'anthropic');
 
   const priority: Priority =
     readEnvEnum('TINY_CODE_PRIORITY', env.TINY_CODE_PRIORITY, PRIORITIES) ?? file.priority ?? 'balanced';
@@ -204,6 +212,7 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c
   const effort = (env.TINY_CODE_EFFORT as Effort | undefined) ?? file.effort ?? 'high';
 
   const ollamaBaseUrl = env.TINY_CODE_OLLAMA_URL ?? file.ollamaBaseUrl ?? DEFAULT_OLLAMA_URL;
+  const openaiBaseUrl = env.TINY_CODE_OPENAI_URL ?? file.openaiBaseUrl ?? undefined;
   const deepseekBaseUrl = env.TINY_CODE_DEEPSEEK_URL ?? file.deepseekBaseUrl;
   const qwenBaseUrl = env.TINY_CODE_QWEN_URL ?? file.qwenBaseUrl;
 
@@ -223,9 +232,11 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c
     priority,
     anthropicApiKey,
     geminiApiKey,
+    openaiApiKey,
     deepseekApiKey,
     qwenApiKey,
     ollamaBaseUrl,
+    openaiBaseUrl,
     deepseekBaseUrl,
     qwenBaseUrl,
     maxTokens,

diff --git a/src/models/catalog.ts b/src/models/catalog.ts
@@ -53,6 +53,15 @@ export const MODEL_CATALOG: ModelInfo[] = [
   { id: 'gemini-2.5-flash', provider: 'gemini', label: 'Gemini 2.5 Flash', inputPricePerMTok: 0.3, outputPricePerMTok: 2.5, contextWindow: 1_048_576, codingScore: 72 },
   { id: 'gemini-2.5-flash-lite', provider: 'gemini', label: 'Gemini 2.5 Flash-Lite', inputPricePerMTok: 0.1, outputPricePerMTok: 0.4, contextWindow: 1_048_576, codingScore: 55 },
 
+  // OpenAI — pricing from OpenAI's published API rates (June 2026).
+  { id: 'o3', provider: 'openai', label: 'OpenAI o3', inputPricePerMTok: 2, outputPricePerMTok: 8, contextWindow: 200_000, codingScore: 94 },
+  { id: 'gpt-4.1', provider: 'openai', label: 'GPT-4.1', inputPricePerMTok: 2, outputPricePerMTok: 8, contextWindow: 1_000_000, codingScore: 88 },
+  { id: 'o4-mini', provider: 'openai', label: 'OpenAI o4-mini', inputPricePerMTok: 1.1, outputPricePerMTok: 4.4, contextWindow: 200_000, codingScore: 85 },
+  { id: 'gpt-4o', provider: 'openai', label: 'GPT-4o', inputPricePerMTok: 2.5, outputPricePerMTok: 10, contextWindow: 128_000, codingScore: 82 },
+  { id: 'gpt-4.1-mini', provider: 'openai', label: 'GPT-4.1 Mini', inputPricePerMTok: 0.4, outputPricePerMTok: 1.6, contextWindow: 1_000_000, codingScore: 72 },
+  { id: 'gpt-4o-mini', provider: 'openai', label: 'GPT-4o Mini', inputPricePerMTok: 0.15, outputPricePerMTok: 0.6, contextWindow: 128_000, codingScore: 65 },
+  { id: 'gpt-4.1-nano', provider: 'openai', label: 'GPT-4.1 Nano', inputPricePerMTok: 0.1, outputPricePerMTok: 0.4, contextWindow: 1_000_000, codingScore: 50 },
+
   // DeepSeek — DeepSeek API (cache-miss) pricing. The V4 family carries DeepSeek's
   // coding capability; the legacy "deepseek-coder" model is retired.
   { id: 'deepseek-v4-pro', provider: 'deepseek', label: 'DeepSeek V4 Pro', inputPricePerMTok: 1.74, outputPricePerMTok: 3.48, contextWindow: 1_048_576, codingScore: 91 },

diff --git a/src/providers/index.ts b/src/providers/index.ts
@@ -3,13 +3,15 @@ import type { ResolvedConfig } from '../config/load.js';
 import { AnthropicProvider } from './anthropic.js';
 import { GeminiProvider } from './gemini.js';
 import { OllamaProvider } from './ollama.js';
+import { OpenAIProvider } from './openai.js';
 import { DeepSeekProvider } from './deepseek.js';
 import { QwenProvider } from './qwen.js';
 
 export type { ModelProvider, ProviderEvent, SendRequest, ToolSchema, Usage } from './types.js';
 export { AnthropicProvider } from './anthropic.js';
 export { GeminiProvider } from './gemini.js';
 export { OllamaProvider } from './ollama.js';
+export { OpenAIProvider } from './openai.js';
 export { DeepSeekProvider } from './deepseek.js';
 export { QwenProvider } from './qwen.js';
 export { OpenAiCompatibleProvider } from './openai-compatible.js';
@@ -38,6 +40,18 @@ export function createProvider(config: ResolvedConfig): ModelProvider {
     });
   }
 
+  if (config.provider === 'openai') {
+    if (!config.openaiApiKey) {
+      throw new Error('OPENAI_API_KEY is not set. Export it or switch providers with --provider anthropic.');
+    }
+    return new OpenAIProvider({
+      apiKey: config.openaiApiKey,
+      model: config.model,
+      maxTokens: config.maxTokens,
+      baseUrl: config.openaiBaseUrl,
+    });
+  }
+
   if (config.provider === 'deepseek') {
     if (!config.deepseekApiKey) {
       throw new Error('DEEPSEEK_API_KEY is not set. Export it or switch providers with --provider anthropic.');

diff --git a/src/providers/openai-compatible.ts b/src/providers/openai-compatible.ts
@@ -17,7 +17,7 @@ export interface OpenAiCompatibleOptions {
   timeoutMs?: number;
 }
 
-interface OpenAiMessage {
+export interface OpenAiMessage {
   role: 'system' | 'user' | 'assistant' | 'tool';
   content: string;
   tool_calls?: { id: string; type: 'function'; function: { name: string; arguments: string } }[];
@@ -126,13 +126,7 @@ export abstract class OpenAiCompatibleProvider implements ModelProvider {
       ...toOpenAiMessages(req.messages),
     ];
 
-    const body = {
-      model: this.model,
-      messages,
-      tools: req.tools.length > 0 ? toOpenAiTools(req.tools) : undefined,
-      stream: true,
-      max_tokens: this.maxTokens,
-    };
+    const body = this.buildBody(messages, req);
 
     // Idle-timeout guard: abort if the server goes silent for `timeoutMs`. The
     // raw fetch (unlike the cloud SDKs) has no built-in timeout, so without this
@@ -216,6 +210,22 @@ export abstract class OpenAiCompatibleProvider implements ModelProvider {
     }
   }
 
+  /**
+   * Build the `/chat/completions` request body. Subclasses override to adjust
+   * provider-specific fields — e.g. OpenAI's hosted API requires
+   * `max_completion_tokens` rather than `max_tokens`. `stream_options` is added
+   * by {@link send} (with a no-`stream_options` retry), so it isn't set here.
+   */
+  protected buildBody(messages: OpenAiMessage[], req: SendRequest): Record<string, unknown> {
+    return {
+      model: this.model,
+      messages,
+      tools: req.tools.length > 0 ? toOpenAiTools(req.tools) : undefined,
+      stream: true,
+      max_tokens: this.maxTokens,
+    };
+  }
+
   /** Human-readable provider name used in error messages. */
   protected label(): string {
     return this.name;

diff --git a/src/providers/openai.ts b/src/providers/openai.ts
@@ -0,0 +1,43 @@
+import type { SendRequest } from './types.js';
+import {
+  OpenAiCompatibleProvider,
+  type OpenAiCompatibleOptions,
+  type OpenAiMessage,
+} from './openai-compatible.js';
+
+/** OpenAI's hosted Chat Completions endpoint. */
+export const DEFAULT_OPENAI_URL = 'https://api.openai.com/v1';
+
+export interface OpenAIProviderOptions extends Omit<OpenAiCompatibleOptions, 'baseUrl'> {
+  apiKey: string;
+  /** Override the base URL, e.g. for Azure OpenAI or a compatible proxy. Defaults to {@link DEFAULT_OPENAI_URL}. */
+  baseUrl?: string | undefined;
+}
+
+/**
+ * OpenAI's hosted models (GPT-4.1, o3, o4-mini, …) over the OpenAI-compatible
+ * Chat Completions API. Extends the shared base — same streaming, tool-call
+ * accumulation, and idle-timeout guard — and differs only in two ways: it sends
+ * `max_completion_tokens` (the hosted API rejects `max_tokens` on newer/reasoning
+ * models) and labels its errors "OpenAI".
+ */
+export class OpenAIProvider extends OpenAiCompatibleProvider {
+  readonly name = 'openai' as const;
+
+  constructor(opts: OpenAIProviderOptions) {
+    super({ ...opts, baseUrl: opts.baseUrl ?? DEFAULT_OPENAI_URL });
+  }
+
+  protected override buildBody(messages: OpenAiMessage[], req: SendRequest): Record<string, unknown> {
+    const body = super.buildBody(messages, req);
+    // The hosted API uses `max_completion_tokens`; `max_tokens` is rejected on
+    // newer/reasoning models. Swap the field the base set.
+    delete body.max_tokens;
+    if (this.maxTokens !== undefined) body.max_completion_tokens = this.maxTokens;
+    return body;
+  }
+
+  protected override label(): string {
+    return 'OpenAI';
+  }
+}
diff --git a/src/providers/types.ts b/src/providers/types.ts
@@ -34,7 +34,7 @@ export interface SendRequest {
  * {@link ProviderEvent}.
  */
 export interface ModelProvider {
-  readonly name: 'anthropic' | 'gemini' | 'ollama' | 'deepseek' | 'qwen';
+  readonly name: 'anthropic' | 'gemini' | 'ollama' | 'openai' | 'deepseek' | 'qwen';
   readonly model: string;
   send(req: SendRequest): AsyncIterable<ProviderEvent>;
 }