web/src/aiassistant/controller.ts at develop · DataLab-Platform/web · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
/**
 * Conversation controller orchestrating the LLM tool-call loop.
 *
 * Mirrors ``DataLab/datalab/aiassistant/controller.py``:
 *
 * 1. Append the user message to the history.
 * 2. Send the history + tool schemas to the provider.
 * 3. If the assistant emits tool calls, ask the host to confirm each
 *    mutating call, run them, append the results to the history, loop.
 * 4. Stop when the assistant returns plain prose with no tool calls
 *    (or when ``MAX_ITERATIONS`` is reached as a safety net).
 *
 * The controller is GUI-agnostic: it talks to the UI through a
 * :type:`ControllerListener` and a :type:`ConfirmToolCallback`.
 */

import type { RuntimeApi } from "../runtime/runtime";
import { REMOTE_MODEL_CHANGED_EVENT } from "../runtime/remoteBridge";
import { chatCompletions } from "./provider";
import { callTool, indexTools } from "./tools";
import type {
  ChatMessage,
  ConfirmToolCallback,
  ControllerListener,
  ProviderSettings,
  TokenUsage,
  Tool,
  ToolCall,
  ToolResult,
} from "./types";

/** Hard cap on the tool-call loop to avoid runaway iterations when a
 *  badly-tuned model keeps requesting tools. */
const MAX_ITERATIONS = 10;

/** Default ``onModelChanged`` emitter: broadcasts a DOM ``CustomEvent`` so
 *  the React App refreshes the panel tree (parity with the remote-bridge
 *  fan-out used by the macro proxy). Keeps the only DOM access in one place
 *  and no-ops in non-DOM environments (tests). Hosts can inject their own
 *  callback via :type:`ControllerOptions` to stay fully GUI-agnostic. */
export function dispatchRemoteModelChanged(panel: string | null): void {
  if (typeof window === "undefined") return;
  try {
    window.dispatchEvent(
      new CustomEvent(REMOTE_MODEL_CHANGED_EVENT, { detail: { panel } }),
    );
  } catch {
    // ignore — non-DOM environments just skip the refresh.
  }
}

/** Thrown when :meth:`AIController.abort` interrupts the running loop.
 *  Mirrors the browser's ``DOMException("...", "AbortError")`` (also
 *  raised by ``fetch`` when the abort signal fires) so callers can use
 *  a single ``error.name === "AbortError"`` discriminator. */
export class AbortError extends Error {
  constructor(message = "Aborted by user.") {
    super(message);
    this.name = "AbortError";
  }
}

/** True when *err* is an abort (our :class:`AbortError` or the browser's
 *  built-in fetch ``AbortError``). */
export function isAbortError(err: unknown): boolean {
  return (
    !!err &&
    typeof err === "object" &&
    "name" in err &&
    (err as { name: unknown }).name === "AbortError"
  );
}

export interface ControllerOptions {
  runtime: RuntimeApi;
  tools: Tool[];
  systemPrompt: string;
  /** Resolved at request time, so the user can edit settings between
   *  turns and the next call picks them up immediately. */
  getSettings: () => ProviderSettings;
  confirmTool: ConfirmToolCallback;
  listener?: ControllerListener;
  /** Invoked after a mutating tool succeeds so the host can refresh its
   *  view of the workspace. Defaults to :func:`dispatchRemoteModelChanged`
   *  (a DOM ``CustomEvent`` broadcast); inject a callback to keep the
   *  controller free of any DOM dependency. */
  onModelChanged?: (panel: string | null) => void;
}

export class AIController {
  private readonly runtime: RuntimeApi;
  private readonly tools: Tool[];
  private readonly toolIndex: Map<string, Tool>;
  private readonly getSettings: () => ProviderSettings;
  private readonly confirmTool: ConfirmToolCallback;
  private readonly listener: ControllerListener;
  private readonly onModelChanged: (panel: string | null) => void;
  private readonly history: ChatMessage[];
  /** Set while :meth:`runLoop` is in flight; ``null`` otherwise. */
  private abortController: AbortController | null = null;
  /** Running token-usage total across the conversation. Reset on
   *  :meth:`reset` and :meth:`setMessages`. */
  private cumulativeUsage: TokenUsage = {};

  constructor(opts: ControllerOptions) {
    this.runtime = opts.runtime;
    this.tools = opts.tools;
    this.toolIndex = indexTools(opts.tools);
    this.getSettings = opts.getSettings;
    this.confirmTool = opts.confirmTool;
    this.listener = opts.listener ?? {};
    this.onModelChanged = opts.onModelChanged ?? dispatchRemoteModelChanged;
    this.history = [{ role: "system", content: opts.systemPrompt }];
  }

  /** Snapshot of the current transcript (system message included). */
  getHistory(): ChatMessage[] {
    return [...this.history];
  }

  /** Conversation messages, excluding the system prompt — suitable for
   *  persistence (the system prompt is recomputed at load time so any
   *  prompt change between sessions is picked up). */
  getMessages(): ChatMessage[] {
    return this.history.slice(1);
  }

  /** Replace the current history with *messages* (system prompt kept).
   *  Used when restoring a persisted conversation. *initialUsage* seeds
   *  the cumulative counter so a restored conversation displays the
   *  same total it had when it was last saved. */
  setMessages(messages: ChatMessage[], initialUsage?: TokenUsage): void {
    this.history.splice(1);
    for (const msg of messages) {
      if (msg.role === "system") continue;
      this.history.push(msg);
    }
    this.cumulativeUsage = initialUsage ? { ...initialUsage } : {};
  }

  /** Drop every message except the system prompt. */
  reset(): void {
    this.history.splice(1);
    this.cumulativeUsage = {};
  }

  /** Cancel the in-flight ``sendUserMessage`` call (if any). The pending
   *  promise rejects with :class:`AbortError`. Idempotent — a no-op when
   *  the controller is idle. */
  abort(): void {
    this.abortController?.abort();
  }

  /** True while a ``sendUserMessage`` call is in flight. */
  get isRunning(): boolean {
    return this.abortController !== null;
  }

  /** Snapshot of the cumulative token usage across the conversation. */
  getUsage(): TokenUsage {
    return { ...this.cumulativeUsage };
  }

  /** Return the (possibly truncated) message window sent to the
   *  provider on the next request. Mirrors DataLab Qt's
   *  ``_messages_for_provider``: when ``maxHistoryMessages`` is
   *  positive, only the latest N non-system messages are kept, then
   *  leading messages are trimmed so the window always starts on a
   *  ``user`` message — this avoids leaving an orphan ``tool`` reply
   *  or an assistant ``tool_calls`` whose responses were dropped,
   *  which most providers reject. */
  messagesForProvider(cap: number): ChatMessage[] {
    if (cap <= 0) return [...this.history];
    const system = this.history[0];
    const nonSystem = this.history.slice(1);
    let window = nonSystem.slice(-cap);
    while (window.length > 0 && window[0].role !== "user") {
      window = window.slice(1);
    }
    if (window.length === 0) {
      for (let idx = nonSystem.length - 1; idx >= 0; idx -= 1) {
        if (nonSystem[idx].role === "user") {
          window = nonSystem.slice(idx);
          break;
        }
      }
    }
    return system ? [system, ...window] : window;
  }

  /** Push the user message, drive the tool-call loop, return the final
   *  assistant prose. May be ``null`` if the assistant only emitted
   *  tool calls and produced no closing message — rare but possible. */
  async sendUserMessage(text: string): Promise<string | null> {
    const userMessage: ChatMessage = { role: "user", content: text };
    this.appendMessage(userMessage);
    return this.runLoop();
  }

  // ------------------------------------------------------------------
  // Internals
  // ------------------------------------------------------------------

  private appendMessage(message: ChatMessage): void {
    this.history.push(message);
    this.listener.onMessageAppended?.(message);
  }

  private async runLoop(): Promise<string | null> {
    this.abortController = new AbortController();
    const signal = this.abortController.signal;
    try {
      for (let iter = 0; iter < MAX_ITERATIONS; iter += 1) {
        if (signal.aborted) throw new AbortError();
        const settings = this.getSettings();
        let streamedText = "";
        // Only opt into streaming when the host actually listens for
        // deltas — otherwise the underlying provider stays on the
        // simpler non-streamed path (cheaper, and keeps unit tests that
        // stub the raw ``fetch`` response unaffected).
        const onDelta = this.listener.onAssistantDelta
          ? (delta: { contentDelta?: string }) => {
              if (delta.contentDelta) {
                streamedText += delta.contentDelta;
                this.listener.onAssistantDelta!(
                  delta.contentDelta,
                  streamedText,
                );
              }
            }
          : undefined;
        const response = await chatCompletions(
          settings,
          this.messagesForProvider(settings.maxHistoryMessages ?? 0),
          this.tools,
          { signal, ...(onDelta ? { onDelta } : {}) },
        );
        const assistantMessage: ChatMessage = {
          role: "assistant",
          // OpenAI accepts ``content: null`` only when ``tool_calls`` is
          // also present. Some local servers (LM Studio, llama.cpp) emit
          // ``content: null`` even on plain prose turns — coerce to ""
          // so replaying the transcript against a strict provider works.
          content:
            response.content ?? (response.toolCalls.length > 0 ? null : ""),
          ...(response.toolCalls.length > 0
            ? { tool_calls: response.toolCalls }
            : {}),
        };
        this.appendMessage(assistantMessage);

        if (response.usage) {
          this.cumulativeUsage = sumUsage(this.cumulativeUsage, response.usage);
          this.listener.onUsage?.(response.usage, this.getUsage());
        }

        if (response.toolCalls.length === 0) {
          return response.content;
        }

        for (const call of response.toolCalls) {
          if (signal.aborted) {
            // Keep the transcript valid — every assistant ``tool_calls``
            // entry must have a matching ``role:"tool"`` response, even
            // when the user aborted before we could run the tool.
            this.appendToolResultMessage(call, {
              ok: false,
              error: "Aborted by user.",
            });
            continue;
          }
          await this.handleToolCall(call);
        }
        if (signal.aborted) throw new AbortError();
      }
      const error = new Error(
        `Aborted: more than ${MAX_ITERATIONS} tool-call iterations without ` +
          `a final answer.`,
      );
      this.listener.onError?.(error);
      throw error;
    } finally {
      this.abortController = null;
    }
  }

  private async handleToolCall(call: ToolCall): Promise<void> {
    this.listener.onToolStarted?.(call);
    const tool = this.toolIndex.get(call.function.name);
    let result: ToolResult;
    if (!tool) {
      result = {
        ok: false,
        error: `Unknown tool '${call.function.name}'.`,
      };
    } else {
      let args: Record<string, unknown>;
      try {
        args = call.function.arguments
          ? (JSON.parse(call.function.arguments) as Record<string, unknown>)
          : {};
      } catch (err) {
        result = {
          ok: false,
          error:
            "Failed to parse tool arguments as JSON: " +
            (err instanceof Error ? err.message : String(err)),
        };
        this.appendToolResultMessage(call, result);
        this.listener.onToolFinished?.(call, result);
        return;
      }

      if (!tool.readonly) {
        const approved = await this.confirmTool(tool, args);
        if (!approved) {
          result = {
            ok: false,
            error: "Rejected by user.",
          };
          this.appendToolResultMessage(call, result);
          this.listener.onToolFinished?.(call, result);
          return;
        }
      }

      result = await callTool(tool, this.runtime, args);
    }
    // Mutating tools changed the workspace — notify the host so the React
    // App refreshes the panel tree (parity with the remote-bridge fan-out
    // used by the macro proxy).
    if (tool && !tool.readonly && result.ok) {
      this.onModelChanged(null);
    }
    this.appendToolResultMessage(call, result);
    // Multimodal tools (e.g. ``capture_view``) attach extra messages
    // that must be appended *after* the tool-result message — typically
    // a synthetic ``role:"user"`` message carrying an inline image so
    // vision models can see the captured plot in their next turn.
    if (result.followupMessages) {
      for (const msg of result.followupMessages) {
        this.appendMessage(msg);
      }
    }
    this.listener.onToolFinished?.(call, result);
  }

  private appendToolResultMessage(call: ToolCall, result: ToolResult): void {
    let content: string;
    try {
      content = JSON.stringify(
        result.ok
          ? { ok: true, result: result.data ?? null }
          : { ok: false, error: result.error ?? "unknown error" },
      );
    } catch {
      content = JSON.stringify({
        ok: result.ok,
        error: result.ok ? undefined : (result.error ?? "unknown error"),
        result: result.ok ? String(result.data) : undefined,
      });
    }
    this.appendMessage({
      role: "tool",
      tool_call_id: call.id,
      name: call.function.name,
      content,
    });
  }
}

/** Field-wise sum of two :type:`TokenUsage` records. ``undefined`` +
 *  ``undefined`` stays ``undefined`` so we don't fabricate zeros for
 *  counters the provider never reported. */
function sumUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
  const add = (
    x: number | undefined,
    y: number | undefined,
  ): number | undefined => {
    if (x === undefined && y === undefined) return undefined;
    return (x ?? 0) + (y ?? 0);
  };
  const out: TokenUsage = {};
  const p = add(a.promptTokens, b.promptTokens);
  const c = add(a.completionTokens, b.completionTokens);
  const t = add(a.totalTokens, b.totalTokens);
  if (p !== undefined) out.promptTokens = p;
  if (c !== undefined) out.completionTokens = c;
  if (t !== undefined) out.totalTokens = t;
  return out;
}