From 79a4f848eb2135a7404105c5b61503ecb83e2ef1 Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 16:22:45 +1000 Subject: [PATCH 01/10] feat(models): add getMaxOutputTokens() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaces a model's `maxOutputTokens` value from the cached models list, or NULL when unknown. Callers can use this to clamp request `maxTokens` to the destination model's actual cap — Bedrock rejects oversize values with a 400 (Nova Lite is 5000 tokens, Claude Sonnet 4.6 is 64K, etc.), so a model-aware clamp avoids opaque upstream failures. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Service/ModelsService.php | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/Service/ModelsService.php b/src/Service/ModelsService.php index 29c2c52..b078972 100644 --- a/src/Service/ModelsService.php +++ b/src/Service/ModelsService.php @@ -128,6 +128,45 @@ public function getModelDetails(string $model_id, bool $bypass_cache = FALSE): ? return NULL; } + /** + * Get the maximum output tokens supported by a given model. + * + * Used by the HTTP clients to clamp user-configured maxTokens down to the + * model's hard cap before sending a request upstream. Returning NULL means + * "unknown — do not clamp". + * + * @param string $model_id + * Model identifier. + * + * @return int|null + * Positive integer cap, or NULL when unknown. + */ + public function getMaxOutputTokens(string $model_id): ?int { + try { + $all_models = $this->getModels(); + } + catch (\Exception $e) { + // Defensive: getModels() already swallows API errors internally, but if + // the client itself is misconfigured (e.g. missing organisation ID at + // construct of the cache lookup), don't propagate — callers treat NULL + // as "unknown, pass through". + $this->logger->warning('Could not resolve max output tokens for @model: @message', [ + '@model' => $model_id, + '@message' => $e->getMessage(), + ]); + return NULL; + } + + foreach ($all_models as $model) { + if (($model['id'] ?? NULL) === $model_id) { + $cap = (int) ($model['maxOutputTokens'] ?? 0); + return $cap > 0 ? $cap : NULL; + } + } + + return NULL; + } + /** * Get models filtered by operation type (for Drupal AI compatibility). * From e7fc0ff8f561461b77485a7e96b8fdb1486f6b0d Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 16:24:57 +1000 Subject: [PATCH 02/10] fix(logging): include upstream response body on HTTP error Dashboard 4xx/5xx responses carry validation details in the body (e.g. "Model X does not support function calling"). Previously the error path only logged status code + reason phrase, leaving operators to guess at the actual cause. Now we capture the first 2KB of the response body so the failure is immediately diagnosable from watchdog. Applies to both `QuantCloudClient::post()` (around the JSON branch and the chat fallback) and `QuantCloudStreamingClient::chatStreamRaw()` / `chatStream()`. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Client/QuantCloudClient.php | 10 ++++++++-- src/Client/QuantCloudStreamingClient.php | 14 ++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/Client/QuantCloudClient.php b/src/Client/QuantCloudClient.php index b551878..eacadd7 100644 --- a/src/Client/QuantCloudClient.php +++ b/src/Client/QuantCloudClient.php @@ -232,17 +232,20 @@ public function post(string $path, array $data, array $request_options = []): ar catch (GuzzleException $e) { $status = NULL; $reason = NULL; + $body = ''; if ($e instanceof RequestException && $e->getResponse()) { $status = $e->getResponse()->getStatusCode(); $reason = $e->getResponse()->getReasonPhrase(); + $body = (string) $e->getResponse()->getBody(); } $this->logger->error( - 'Quant Dashboard AI request failed for @path after @timeout seconds (status: @status @reason)', + 'Quant Dashboard AI request failed for @path after @timeout seconds (status: @status @reason). Response: @body', [ '@path' => $path, '@timeout' => $timeout, '@status' => $status ?? 'n/a', '@reason' => $reason ?? 'transport error', + '@body' => mb_substr($body, 0, 2000), ] ); throw new \RuntimeException('AI API request failed (status: ' . ($status ?? 'n/a') . ')', 0, $e); @@ -411,13 +414,16 @@ public function get(string $path, array $query_params = []): array { catch (GuzzleException $e) { $status = NULL; $reason = NULL; + $body = ''; if ($e instanceof RequestException && $e->getResponse()) { $status = $e->getResponse()->getStatusCode(); $reason = $e->getResponse()->getReasonPhrase(); + $body = (string) $e->getResponse()->getBody(); } - $this->logger->error('Quant Dashboard AI request failed (status: @status @reason)', [ + $this->logger->error('Quant Dashboard AI request failed (status: @status @reason). Response: @body', [ '@status' => $status ?? 'n/a', '@reason' => $reason ?? 'transport error', + '@body' => mb_substr($body, 0, 2000), ]); throw new \RuntimeException('AI API request failed (status: ' . ($status ?? 'n/a') . ')', 0, $e); } diff --git a/src/Client/QuantCloudStreamingClient.php b/src/Client/QuantCloudStreamingClient.php index 8b71531..fab59b7 100644 --- a/src/Client/QuantCloudStreamingClient.php +++ b/src/Client/QuantCloudStreamingClient.php @@ -82,8 +82,13 @@ public function chatStreamRaw(array $messages, string $model_id, array $options } catch (\Exception $e) { - $this->logger->error('Streaming request failed: @message', [ + $body = ''; + if ($e instanceof \GuzzleHttp\Exception\RequestException && $e->getResponse()) { + $body = (string) $e->getResponse()->getBody(); + } + $this->logger->error('Streaming request failed: @message body=@body', [ '@message' => $e->getMessage(), + '@body' => mb_substr($body, 0, 2000), ]); throw new \RuntimeException('Streaming failed: ' . $e->getMessage(), 0, $e); } @@ -201,8 +206,13 @@ public function chatStream(array $messages, string $model_id, callable $callback } catch (\Exception $e) { - $this->logger->error('Streaming request failed: @message', [ + $body = ''; + if ($e instanceof \GuzzleHttp\Exception\RequestException && $e->getResponse()) { + $body = (string) $e->getResponse()->getBody(); + } + $this->logger->error('Streaming request failed: @message body=@body', [ '@message' => $e->getMessage(), + '@body' => mb_substr($body, 0, 2000), ]); throw new \RuntimeException('Streaming failed: ' . $e->getMessage(), 0, $e); } From ef80c73b0d9583999cca58c7738677e5c65be268 Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 16:25:29 +1000 Subject: [PATCH 03/10] refactor(streaming): rewrite SSE iterator for native dashboard frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dashboard's chat/stream SSE format is neither standard Bedrock Converse nor OpenAI — it uses a custom shape: - Init frame: {requestId, model, streaming: true} - Text delta: {delta: "...", complete: bool} - Tool input: {name, toolUseId, input: {...}} - Summary done: {stopReason, usage, response: {role, content, toolUse}} The previous iterator yielded raw arrays through an anonymous IteratorAggregate, bypassing `createStreamedChatMessage()`. As a result `StreamedChatMessageIterator::reconstructChatOutput()` saw no messages and produced empty output, breaking the Drupal AI Agents loop entirely (agents read the assistant reply via the iterator, not the underlying HTTP body). This commit replumbs the iterator onto `doIterate()` per the base class contract, parses each frame variant explicitly, and yields proper `StreamedChatMessage` objects so reconstruction works. The summary frame's `response.content` is deliberately skipped (it carries the full accumulated text already emitted via deltas; re-emitting would double the assistant message). Tool calls require an extra hop: the base iterator's `assembleToolCalls()` calls `$tool->toArray()` on each entry returned by `getTools()`. Plain arrays don't have a `toArray()` method, so this commit adds a tiny readonly `StreamedToolCall` value object that renders to the OpenAI-shape array the assembler expects. Removes `QuantCloudProviderFailureDetectionTest.php` — it tested protected helpers (`isEmptyResponseContent`, `isLikelyTokenLimited`, `getOutputTokenCount`, `getStopReason`) that lived on the old chat() implementation and no longer exist after the provider refactor. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/QuantCloudChatMessageIterator.php | 330 +++++++++++++----- src/StreamedToolCall.php | 39 +++ ...QuantCloudProviderFailureDetectionTest.php | 285 --------------- 3 files changed, 274 insertions(+), 380 deletions(-) create mode 100644 src/StreamedToolCall.php delete mode 100644 tests/src/Unit/QuantCloudProviderFailureDetectionTest.php diff --git a/src/QuantCloudChatMessageIterator.php b/src/QuantCloudChatMessageIterator.php index e298580..8c3f8c0 100644 --- a/src/QuantCloudChatMessageIterator.php +++ b/src/QuantCloudChatMessageIterator.php @@ -1,142 +1,282 @@ stream = $stream; - $this->logger = $logger; + // The base class wants a Traversable in the constructor; we supply an + // empty one because doIterate() drives streaming directly. + $instance = new static(new \ArrayIterator([])); + $instance->stream = $stream; + $instance->logger = $logger; + return $instance; + } + + /** + * {@inheritdoc} + */ + public function doIterate(): \Generator { + $decodeWarnings = 0; + $maxWarnings = 3; + + while (!$this->stream->eof()) { + $line = $this->readLine(); + + if ($line === '' || !str_starts_with($line, 'data:')) { + continue; } - /** - * - */ - public function getIterator(): \Generator { - while (!$this->stream->eof()) { - $line = $this->readLine(); - - if (strpos($line, 'data: ') === 0) { - $raw_json = substr($line, 6); - $json_data = json_decode($raw_json, TRUE); - - if ($json_data === NULL) { - $this->logger->warning('Failed to decode SSE JSON data'); - continue; - } - - if (isset($json_data['delta'])) { - yield [ - 'delta' => $json_data['delta'], - 'role' => $json_data['role'] ?? 'assistant', - 'usage' => $json_data['usage'] ?? [], - ]; - } - - // Handle tool use events - yield tool data for the iterator consumer. - if (isset($json_data['toolUse'])) { - yield [ - 'delta' => '', - 'role' => $json_data['role'] ?? 'assistant', - 'usage' => $json_data['usage'] ?? [], - 'toolUse' => $json_data['toolUse'], - 'stopReason' => $json_data['stopReason'] ?? NULL, - 'content' => $json_data['content'] ?? '', - ]; - } - - // Also handle tool_request events from done event. - if (isset($json_data['response']['toolUse'])) { - yield [ - 'delta' => '', - 'role' => 'assistant', - 'usage' => $json_data['usage'] ?? [], - 'toolUse' => $json_data['response']['toolUse'], - 'stopReason' => $json_data['stopReason'] ?? NULL, - 'content' => $json_data['response']['content'] ?? '', - ]; - } - - if ($json_data['complete'] ?? FALSE) { - break; - } - } - } + $rawJson = trim(substr($line, strlen('data:'))); + if ($rawJson === '') { + continue; } - /** - * - */ - private function readLine(): string { - $line = ''; - while (!$this->stream->eof()) { - $char = $this->stream->read(1); - if ($char === "\n") { - break; - } - $line .= $char; + try { + $event = Json::decode($rawJson); + } + catch (\Throwable) { + $event = NULL; + } + + if (!is_array($event)) { + $decodeWarnings++; + if ($decodeWarnings <= $maxWarnings) { + $this->logger->warning( + 'Failed to decode SSE JSON data for Quant Cloud streaming response. Warning @count of @limit.', + ['@count' => $decodeWarnings, '@limit' => $maxWarnings], + ); } - return trim($line); + continue; } - }; + yield from $this->handleEvent($event); - $instance = new static($wrapper); - $instance->stream = $stream; - $instance->logger = $logger; - return $instance; + // Some upstream frames include `complete: true` to indicate the stream + // is finished; respect that as an early exit even before the summary + // frame ships. + if (($event['complete'] ?? FALSE) === TRUE) { + break; + } + } } /** - * {@inheritdoc} + * Translate one decoded SSE event into zero or more streamed messages. + * + * @param array $event + * The decoded JSON payload of a single `data:` line. + * + * @return \Generator<\Drupal\ai\OperationType\Chat\StreamedChatMessageInterface> */ - public function getIterator(): \Generator { - foreach ($this->iterator as $data) { - yield new StreamedChatMessage( - $data['role'] ?? 'assistant', - $data['delta'] ?? '', - $data['usage'] ?? [] + protected function handleEvent(array $event): \Generator { + $usage = is_array($event['usage'] ?? NULL) ? $event['usage'] : []; + + // Text delta frame. + if (isset($event['delta']) && is_string($event['delta'])) { + $message = $this->createStreamedChatMessage( + 'assistant', + $event['delta'], + $usage, + NULL, + $event, ); + $this->applyUsage($message, $usage); + yield $message; + } + + // Inline tool input frame (single tool call). + if (isset($event['toolUseId'], $event['name']) && isset($event['input']) && is_array($event['input'])) { + $tool = $this->renderToolCall( + (string) $event['toolUseId'], + (string) $event['name'], + $event['input'], + ); + $message = $this->createStreamedChatMessage( + 'assistant', + '', + $usage, + [$tool], + $event, + ); + $this->applyUsage($message, $usage); + yield $message; + } + + // Top-level `toolUse` array frame (sibling of `content`). + if (isset($event['toolUse']) && is_array($event['toolUse'])) { + foreach ($this->collectToolUses($event['toolUse']) as $tool) { + $message = $this->createStreamedChatMessage( + 'assistant', + '', + $usage, + [$tool], + $event, + ); + $this->applyUsage($message, $usage); + yield $message; + } + } + + // Summary / done frame: may contain a nested `response.toolUse` array and + // the final stopReason. We deliberately ignore `response.content` here — + // that field carries the *full* accumulated assistant text, which we've + // already emitted as a sequence of `delta` chunks. Yielding it again + // would double the message text on reconstruction. + if (isset($event['response']) && is_array($event['response'])) { + $response = $event['response']; + + if (isset($response['toolUse']) && is_array($response['toolUse'])) { + foreach ($this->collectToolUses($response['toolUse']) as $tool) { + $message = $this->createStreamedChatMessage( + 'assistant', + '', + $usage, + [$tool], + $event, + ); + $this->applyUsage($message, $usage); + yield $message; + } + } + } + + if (isset($event['stopReason']) && is_string($event['stopReason'])) { + $this->setFinishReason($event['stopReason']); + } + } + + /** + * Normalise a list of dashboard toolUse entries into render objects. + * + * @param array $toolUses + * The `toolUse` array as returned by the dashboard. + * + * @return array + * List of tool-call value objects. + */ + protected function collectToolUses(array $toolUses): array { + $rendered = []; + foreach ($toolUses as $entry) { + if (!is_array($entry)) { + continue; + } + $name = (string) ($entry['name'] ?? ''); + $id = (string) ($entry['toolUseId'] ?? $entry['id'] ?? ''); + $input = is_array($entry['input'] ?? NULL) ? $entry['input'] : []; + if ($name === '' && $id === '') { + continue; + } + $rendered[] = $this->renderToolCall($id, $name, $input); + } + return $rendered; + } + + /** + * Build the tool-call object expected by + * `StreamedChatMessageIterator::assembleToolCalls()`. + * + * @return \Drupal\ai_provider_quant_cloud\StreamedToolCall + * A value object whose `toArray()` returns the OpenAI-shape render array. + */ + protected function renderToolCall(string $toolId, string $name, array $arguments): StreamedToolCall { + return new StreamedToolCall( + $toolId, + $name, + Json::encode($arguments), + ); + } + + /** + * Apply usage data from an SSE frame onto a streamed message. + * + * The dashboard surfaces usage as `inputTokens` / `outputTokens` / + * `totalTokens` on the summary frame; the base class consumes these per + * chunk to populate the final TokenUsageDto. + * + * @param \Drupal\ai\OperationType\Chat\StreamedChatMessageInterface $message + * The chunk to annotate. + * @param array $usage + * The `usage` block from the SSE event. + */ + protected function applyUsage($message, array $usage): void { + if (isset($usage['inputTokens']) && is_numeric($usage['inputTokens'])) { + $message->setInputTokenUsage((int) $usage['inputTokens']); + } + if (isset($usage['outputTokens']) && is_numeric($usage['outputTokens'])) { + $message->setOutputTokenUsage((int) $usage['outputTokens']); + } + if (isset($usage['totalTokens']) && is_numeric($usage['totalTokens'])) { + $message->setTotalTokenUsage((int) $usage['totalTokens']); + } + } + + /** + * Read one newline-terminated line from the upstream stream. + * + * @return string + * The line without its trailing newline / carriage return. + */ + protected function readLine(): string { + $line = ''; + while (!$this->stream->eof()) { + $char = $this->stream->read(1); + if ($char === '' || $char === "\n") { + break; + } + $line .= $char; } + return rtrim($line, "\r"); } } diff --git a/src/StreamedToolCall.php b/src/StreamedToolCall.php new file mode 100644 index 0000000..ba1526f --- /dev/null +++ b/src/StreamedToolCall.php @@ -0,0 +1,39 @@ +toArray()` on every entry returned by + * `StreamedChatMessage::getTools()`, expecting the OpenAI-shape render array + * (`id`, `type`, `function: {name, arguments}`). Bedrock-shape providers + * generate their tool calls as plain arrays — we wrap them in this object so + * the contract holds. + */ +final readonly class StreamedToolCall { + + public function __construct( + private string $id, + private string $name, + private string $arguments, + ) {} + + /** + * Render in the OpenAI-shape expected by the parent iterator's assembler. + */ + public function toArray(): array { + return [ + 'id' => $this->id, + 'type' => 'function', + 'function' => [ + 'name' => $this->name, + 'arguments' => $this->arguments, + ], + ]; + } + +} diff --git a/tests/src/Unit/QuantCloudProviderFailureDetectionTest.php b/tests/src/Unit/QuantCloudProviderFailureDetectionTest.php deleted file mode 100644 index 0e84503..0000000 --- a/tests/src/Unit/QuantCloudProviderFailureDetectionTest.php +++ /dev/null @@ -1,285 +0,0 @@ -assertSame( - $expected, - $this->provider()->isEmptyResponseContentForTest($content) - ); - } - - /** - * Provides response content cases. - * - * @return array - * Test cases. - */ - public static function emptyContentProvider(): array { - return [ - 'empty string' => ['', TRUE], - 'null' => [NULL, TRUE], - 'zero string' => ['0', FALSE], - 'zero integer' => [0, FALSE], - 'integer content' => [42, FALSE], - 'false content' => [FALSE, FALSE], - 'empty array' => [[], TRUE], - 'text item' => [[['text' => 'Generated content']], FALSE], - 'empty text item' => [[['text' => '']], TRUE], - 'string item' => [['Generated content'], FALSE], - ]; - } - - /** - * Tests token-limit detection. - * - * @param string|null $stop_reason - * Response stop reason. - * @param int $output_tokens - * Output token count. - * @param int $max_tokens - * Configured token limit. - * @param bool $expected - * Expected result. - */ - #[DataProvider('tokenLimitProvider')] - public function testIsLikelyTokenLimited( - ?string $stop_reason, - int $output_tokens, - int $max_tokens, - bool $expected, - ): void { - $this->assertSame( - $expected, - $this->provider()->isLikelyTokenLimitedForTest( - $stop_reason, - $output_tokens, - $max_tokens - ) - ); - } - - /** - * Provides token-limit cases. - * - * @return array - * Test cases. - */ - public static function tokenLimitProvider(): array { - return [ - 'explicit max token reason' => ['max_tokens', 100, 4096, TRUE], - 'length reason' => ['length', 100, 4096, TRUE], - 'complete reason with exact token count' => [ - 'end_turn', - 4096, - 4096, - FALSE, - ], - 'unknown reason with exact token count' => [ - 'unknown', - 4096, - 4096, - TRUE, - ], - 'missing reason with exact token count' => [ - NULL, - 4096, - 4096, - TRUE, - ], - 'missing reason below token count' => [ - NULL, - 4095, - 4096, - FALSE, - ], - 'disabled max token limit' => [ - 'max_tokens', - 4096, - 0, - FALSE, - ], - ]; - } - - /** - * Tests output token extraction. - * - * @param array $response_data - * Response data. - * @param int $expected - * Expected token count. - */ - #[DataProvider('outputTokenProvider')] - public function testGetOutputTokenCount( - array $response_data, - int $expected, - ): void { - $this->assertSame( - $expected, - $this->provider()->getOutputTokenCountForTest($response_data) - ); - } - - /** - * Provides output token response shapes. - * - * @return array - * Test cases. - */ - public static function outputTokenProvider(): array { - return [ - 'top-level camel case usage' => [ - ['usage' => ['outputTokens' => 123]], - 123, - ], - 'float output tokens' => [ - ['usage' => ['outputTokens' => 1.9]], - 1, - ], - 'nested snake case usage' => [ - ['response' => ['usage' => ['output_tokens' => 456]]], - 456, - ], - 'metadata completion tokens' => [ - ['metadata' => ['usage' => ['completion_tokens' => 789]]], - 789, - ], - 'missing usage' => [ - [], - 0, - ], - 'non-array usage' => [ - ['usage' => 'invalid'], - 0, - ], - ]; - } - - /** - * Tests stop reason extraction. - * - * @param array $response_data - * Response data. - * @param string|null $expected - * Expected stop reason. - */ - #[DataProvider('stopReasonProvider')] - public function testGetStopReason( - array $response_data, - ?string $expected, - ): void { - $this->assertSame( - $expected, - $this->provider()->getStopReasonForTest($response_data) - ); - } - - /** - * Provides stop reason response shapes. - * - * @return array - * Test cases. - */ - public static function stopReasonProvider(): array { - return [ - 'top-level camel case' => [ - ['stopReason' => 'end_turn'], - 'end_turn', - ], - 'top-level snake case' => [ - ['stop_reason' => 'max_tokens'], - 'max_tokens', - ], - 'nested camel case' => [ - ['response' => ['stopReason' => 'tool_use']], - 'tool_use', - ], - 'nested snake case' => [ - ['response' => ['stop_reason' => 'stop']], - 'stop', - ], - 'missing stop reason' => [ - [], - NULL, - ], - ]; - } - - /** - * Gets a provider instance without calling the plugin constructor. - */ - protected function provider(): TestableQuantCloudProvider { - $reflection = new \ReflectionClass(TestableQuantCloudProvider::class); - return $reflection->newInstanceWithoutConstructor(); - } - -} - -/** - * Test double exposing protected helper methods. - */ -class TestableQuantCloudProvider extends QuantCloudProvider { - - /** - * Exposes isEmptyResponseContent(). - */ - public function isEmptyResponseContentForTest(mixed $content): bool { - return $this->isEmptyResponseContent($content); - } - - /** - * Exposes isLikelyTokenLimited(). - */ - public function isLikelyTokenLimitedForTest( - ?string $stop_reason, - int $output_tokens, - int $max_tokens, - ): bool { - return $this->isLikelyTokenLimited( - $stop_reason, - $output_tokens, - $max_tokens - ); - } - - /** - * Exposes getOutputTokenCount(). - */ - public function getOutputTokenCountForTest(array $response_data): int { - return $this->getOutputTokenCount($response_data); - } - - /** - * Exposes getStopReason(). - */ - public function getStopReasonForTest(array $response_data): ?string { - return $this->getStopReason($response_data); - } - -} From 93e541cf13a33f280bd587fb3f3e138b8eb4d800 Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 16:26:04 +1000 Subject: [PATCH 04/10] feat(chat): Fiber-aware streaming for Drupal AI Agents compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous chat() implementation was buffered-only — every request to the dashboard's /chat endpoint waited for the complete response before returning. For long Claude responses (large tool-bearing Canvas page builds in particular), this regularly tripped the dashboard's CloudFront origin timeout (~60-180s) and surfaced as a 504 to the caller, even though Bedrock itself was still generating. This rewrite adds an inline Fiber-aware streaming hook (mirroring the pattern in Drupal AI's `OpenAiBasedProviderClientBase::chat()`): - If the caller has set `$this->streamed = TRUE` (e.g. AI Explorer's "Streamed" checkbox), return the SSE iterator directly so the consumer can read chunks live. - Otherwise, if we're inside an active Fiber (Drupal AI Agents' solving loop is fibered), open the streaming endpoint anyway, iterate chunks while suspending the Fiber between them so the cooperator can yield, then reconstruct a buffered-style ChatOutput for the caller. From the agent's perspective the response is buffered; on the wire it streams, which keeps the connection alive and dodges the CloudFront origin timeout. - Otherwise (plain buffered call), POST to /chat as before and parse the dashboard's native Bedrock-flavoured response. Other changes folded in: - `maxTokens` is clamped to the destination model's `maxOutputTokens` from ModelsService before sending. Bedrock rejects oversize values with a 400 (Nova Lite caps at 5000, etc.), so the clamp keeps callers from running into opaque upstream failures when the AI Defaults UI lets you configure a higher value than a given model supports. - Tool definitions sent in `toolConfig.tools` are now correctly wrapped in Bedrock's `{toolSpec: {name, description, inputSchema}}` shape (rather than the OpenAI `{type, function}` shape Drupal AI's `ToolsInput::renderToolsArray()` returns), since the dashboard's validator requires `toolSpec`. - Native parsing of the dashboard's response shape, including the non-standard sibling `toolUse` array (sibling of `content`, not nested inside) that the dashboard returns for assistant tool calls. - `applyMaxTokensClamp()` and `ensureAuthenticated()` are private helpers used by both the buffered and streaming branches. Embeddings, text-to-image, and image-to-image continue to use `QuantCloudClient` directly — those endpoints have non-OpenAI request and response shapes that aren't worth running through the SDK. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Plugin/AiProvider/QuantCloudProvider.php | 1643 ++++++------------ 1 file changed, 576 insertions(+), 1067 deletions(-) diff --git a/src/Plugin/AiProvider/QuantCloudProvider.php b/src/Plugin/AiProvider/QuantCloudProvider.php index e1c8ec4..edc60ca 100644 --- a/src/Plugin/AiProvider/QuantCloudProvider.php +++ b/src/Plugin/AiProvider/QuantCloudProvider.php @@ -1,11 +1,17 @@ client = $container->get('ai_provider_quant_cloud.client'); $instance->streamingClient = $container->get('ai_provider_quant_cloud.streaming_client'); + $instance->authService = $container->get('ai_provider_quant_cloud.auth'); $instance->modelsService = $container->get('ai_provider_quant_cloud.models'); $instance->logger = $container->get('logger.factory')->get('ai_provider_quant_cloud'); return $instance; @@ -97,11 +107,11 @@ public function getConfig(): ImmutableConfig { * {@inheritdoc} */ public function getApiDefinition(): array { - $definition = Yaml::parseFile( - $this->moduleHandler->getModule('ai_provider_quant_cloud') - ->getPath() . '/definitions/api_defaults.yml' - ); - return $definition; + $definition_file = $this->moduleHandler + ->getModule('ai_provider_quant_cloud') + ->getPath() . '/definitions/api_defaults.yml'; + + return Yaml::parseFile($definition_file); } /** @@ -110,19 +120,16 @@ public function getApiDefinition(): array { public function isUsable(?string $operation_type = NULL, array $capabilities = []): bool { $config = $this->getConfig(); - // Check basic configuration: token and organization ID are required. if (!$config->get('auth.access_token_key') || !$config->get('auth.organization_id')) { return FALSE; } - // Check platform is configured. if (!$config->get('platform')) { return FALSE; } - // If operation type is specified, check if we support it. if ($operation_type) { - return in_array($operation_type, $this->getSupportedOperationTypes()); + return in_array($operation_type, $this->getSupportedOperationTypes(), TRUE); } return TRUE; @@ -134,22 +141,39 @@ public function isUsable(?string $operation_type = NULL, array $capabilities = [ public function getSupportedOperationTypes(): array { return [ 'chat', + 'chat_with_complex_json', + 'chat_with_image_vision', + 'chat_with_structured_response', + 'chat_with_tools', 'embeddings', 'text_to_image', 'image_to_image', ]; } + /** + * {@inheritdoc} + */ + public function getSupportedCapabilities(): array { + // We expose StreamChatOutput so callers can opt into SSE chat. We do not + // advertise ChatFiberSupport because the upstream wire format isn't + // OpenAI-shape — our Fiber-aware path still works (it lives in chat() + // directly), but we don't want the framework to treat us as a generic + // OpenAI Fiber provider. + return [AiProviderCapability::StreamChatOutput]; + } + /** * {@inheritdoc} */ public function getConfiguredModels(?string $operation_type = NULL, array $capabilities = []): array { - // Fetch models dynamically from the API via ModelsService. try { - // Get models for the operation type (defaults to 'chat' if not specified) - // Map Drupal operation types to API features. $feature_map = [ 'chat' => 'chat', + 'chat_with_complex_json' => 'chat', + 'chat_with_image_vision' => 'chat', + 'chat_with_structured_response' => 'chat', + 'chat_with_tools' => 'chat', 'embeddings' => 'embeddings', 'text_to_image' => 'image_generation', 'image_to_image' => 'image_generation', @@ -164,475 +188,451 @@ public function getConfiguredModels(?string $operation_type = NULL, array $capab continue; } - // Filter by capabilities if specified. if (!empty($capabilities)) { $model_capabilities = $model['capabilities'] ?? []; - - // Check if model supports all required capabilities. $supports_all = TRUE; foreach ($capabilities as $capability) { - // Convert enum to string value if needed. - $capability_string = $capability instanceof AiModelCapability ? $capability->value : (string) $capability; - - // Map Drupal capability names to our API capability flags. + $capability_string = $capability instanceof AiModelCapability + ? $capability->value + : (string) $capability; $capability_flag = $this->mapCapabilityFlag($capability_string); - if ($capability_flag && empty($model_capabilities[$capability_flag])) { $supports_all = FALSE; break; } } - - // Skip this model if it doesn't support required capabilities. if (!$supports_all) { continue; } } - // Drupal expects simple string labels for form dropdowns. - $model_name = $model['name'] ?? $model_id; - $models[$model_id] = $model_name; + $models[$model_id] = $model['name'] ?? $model_id; } return $models; - } - catch (\Exception $e) { - // Return empty array if API is not configured or fails - // The provider won't be usable until configuration is complete. + catch (\Exception) { + // Provider isn't usable until configuration is complete; return empty + // so the UI surfaces "no models" rather than throwing during discovery. return []; } } /** - * Map Drupal capability names to API capability flags. - * - * @param string $capability - * Drupal capability name (from AiModelCapability enum). - * - * @return string|null - * API capability flag, or NULL if not mapped. + * Map a Drupal capability enum value to the dashboard's model capability flag. */ protected function mapCapabilityFlag(string $capability): ?string { - $mapping = [ - // Function calling / Tools. - 'chat_tools' => 'supportsTools', - 'chat_combined_tools_and_structured_response' => 'supportsTools', - - // Structured output / JSON. - 'chat_json_output' => 'supportsStructuredOutput', - 'chat_structured_response' => 'supportsStructuredOutput', - - // Vision / Multimodal. + return match ($capability) { + 'chat_tools', 'chat_combined_tools_and_structured_response' => 'supportsTools', + 'chat_json_output', 'chat_structured_response' => 'supportsStructuredOutput', 'chat_with_image_vision' => 'supportsVision', - 'chat_with_video' => 'supportsMultimodal', - 'chat_with_audio' => 'supportsMultimodal', - ]; + 'chat_with_video', 'chat_with_audio' => 'supportsMultimodal', + default => NULL, + }; + } - return $mapping[$capability] ?? NULL; + /** + * {@inheritdoc} + */ + public function getModelSettings(string $model_id, array $generalConfig = []): array { + return $generalConfig; } /** - * Map API features to Drupal AI operation types. - * - * @param array $features - * API supported features. + * {@inheritdoc} * - * @return array - * Drupal AI operation types. + * Authentication is managed end-to-end by the {@see AuthService} (OAuth + + * key store), so the framework-level setter is a no-op. We keep it on the + * class so the AiProviderInterface contract is satisfied. */ - protected function mapFeaturesToOperations(array $features): array { - $operations = []; - - if (in_array('chat', $features)) { - $operations[] = 'chat'; - } - - if (in_array('embeddings', $features)) { - $operations[] = 'embeddings'; - } - - return $operations; + public function setAuthentication(mixed $authentication): void { + // No-op: AuthService owns the bearer token lifecycle. } /** * {@inheritdoc} */ public function chat(ChatInput|array|string $input, string $model_id, array $tags = []): ChatOutput { - // Normalize input to ChatInput. - if (is_string($input)) { - $input = new ChatInput([new ChatMessage('user', $input)]); + $this->ensureAuthenticated(); + + [$messages, $options] = $this->buildChatPayload($input); + $options = $this->applyMaxTokensClamp($options, $model_id); + + // Caller explicitly requested streaming (e.g. the AI Explorer UI's + // "Streamed" checkbox). Return the iterator directly so the caller can + // pump chunks live. + if ($this->streamed) { + $stream = $this->streamingClient->chatStreamRaw($messages, $model_id, $options); + $iterator = QuantCloudChatMessageIterator::create($stream, $this->logger); + return new ChatOutput($iterator, [], NULL); } - elseif (is_array($input)) { - $messages = []; - foreach ($input as $msg) { - if (is_array($msg)) { - $messages[] = new ChatMessage($msg['role'] ?? 'user', $msg['content'] ?? ''); + + // Inside a Fiber (Drupal AI Agents loop). Use streaming under the hood to + // keep the connection alive past the dashboard's gateway timeout, but + // accumulate into a buffered-style ChatOutput so the caller sees a single + // complete response. + if (\Fiber::getCurrent()) { + $stream = $this->streamingClient->chatStreamRaw($messages, $model_id, $options); + $iterator = QuantCloudChatMessageIterator::create($stream, $this->logger); + foreach ($iterator as $chunk) { + if ($chunk !== NULL) { + \Fiber::suspend(); } } - $input = new ChatInput($messages); + $message = $iterator->reconstructChatOutput()->getNormalized(); + return new ChatOutput($message, [], NULL); } - // Format messages (supports multimodal content) - $messages = $this->formatMessages($input->getMessages()); + // Plain buffered path. + $response = $this->client->chat($messages, $model_id, $options); + return $this->parseChatResponse($response, $input); + } + + /** + * Ensure we have a valid access token before issuing any request. + * + * @throws \Drupal\ai\Exception\AiSetupFailureException + */ + protected function ensureAuthenticated(): void { + $token = $this->authService->getValidAccessToken(); + if (!$token) { + throw new AiSetupFailureException('No valid Quant Cloud access token available — re-authenticate via the Quant Cloud AI settings.'); + } + } - // Build request options. + /** + * Normalise a chat input into the dashboard's native message + options shape. + * + * @return array{0: array>, 1: array} + * A two-tuple of [messages, options]. + */ + protected function buildChatPayload(ChatInput|array|string $input): array { + $messages = []; $options = []; - // Check for structured output (JSON Schema) - if ($input->getChatStructuredJsonSchema()) { - $schema = $input->getChatStructuredJsonSchema(); - $options['response_format'] = [ - 'type' => 'json_schema', - 'schema' => $schema['schema'], - 'name' => $schema['name'] ?? 'json_schema', - 'strict' => $schema['strict'] ?? FALSE, - ]; - } + if ($input instanceof ChatInput) { + foreach ($input->getMessages() as $message) { + $messages[] = $this->convertMessage($message); + } - // Check for tools/function calling. - if ($input->getChatTools()) { - $tools_input = $input->getChatTools(); - $options['toolConfig'] = [ - 'tools' => $this->formatToolsForApi($tools_input), - ]; - } + $system_prompt = $input->getSystemPrompt(); + if ($system_prompt === '' && $this->chatSystemRole !== '') { + $system_prompt = $this->chatSystemRole; + } + if ($system_prompt !== '') { + $options['systemPrompt'] = $system_prompt; + } + + if ($input->getChatTools()) { + $options['toolConfig'] = [ + 'tools' => $this->convertToolsToBedrockShape( + $input->getChatTools()->renderToolsArray() + ), + ]; + } - // Check for system prompt - use chatSystemRole from base class (like OpenAI provider) - // This is set via setChatSystemRole() by agents before calling chat() - if ($this->chatSystemRole) { - $options['systemPrompt'] = $this->chatSystemRole; + $structured = $input->getChatStructuredJsonSchema(); + if (!empty($structured)) { + $options['response_format'] = $structured; + } + } + elseif (is_array($input)) { + $messages = $input; + if ($this->chatSystemRole !== '') { + $options['systemPrompt'] = $this->chatSystemRole; + } } - // Fallback: also check input object (for backward compatibility) - elseif (method_exists($input, 'getSystemRole') && $input->getSystemRole()) { - $options['systemPrompt'] = $input->getSystemRole(); + else { + $messages = [['role' => 'user', 'content' => $input]]; + if ($this->chatSystemRole !== '') { + $options['systemPrompt'] = $this->chatSystemRole; + } } - if ( - !empty($this->configuration['http_client_options']) - && is_array($this->configuration['http_client_options']) - ) { - $http_options = $this->configuration['http_client_options']; - foreach (['timeout', 'connect_timeout'] as $request_option) { - if (isset($http_options[$request_option])) { - $options[$request_option] = $http_options[$request_option]; - } + // Merge provider-level configuration (temperature / maxTokens / etc.) + // last so explicit caller intent (above) wins. + foreach ($this->configuration as $key => $value) { + if (!isset($options[$key])) { + $options[$key] = $value; } } - try { - // Check if streaming is requested (set by base class from UI checkbox) - $use_streaming = $this->streamed ?? FALSE; - - if ($use_streaming) { - // Streaming via SSE - return iterator for real-time streaming. - $stream = $this->streamingClient->chatStreamRaw($messages, $model_id, $options); - - // Create streaming iterator (like AWS Bedrock provider does) - $message = QuantCloudChatMessageIterator::create($stream, $this->logger); + return [$messages, $options]; + } - // Return ChatOutput with the iterator as the message - // The iterator will be consumed by AI Explorer for real-time display. - return new ChatOutput($message, [], NULL); + /** + * Convert OpenAI-shaped tool definitions into the Bedrock toolSpec shape. + * + * The dashboard's Bedrock Converse chat endpoint requires each tool to be + * wrapped in a `toolSpec` object with `inputSchema.json`, rather than the + * OpenAI `{type: function, function: {...}}` envelope produced by Drupal AI. + * + * @param array> $openAiTools + * Tools rendered by ToolsInput::renderToolsArray(). + * + * @return array> + * Tools in Bedrock toolSpec shape. + */ + private function convertToolsToBedrockShape(array $openAiTools): array { + $bedrockTools = []; + foreach ($openAiTools as $tool) { + if (!isset($tool['function']) || !is_array($tool['function'])) { + continue; + } + $fn = $tool['function']; + $spec = [ + 'name' => $fn['name'] ?? '', + 'description' => $fn['description'] ?? '', + ]; + if (isset($fn['parameters']) && is_array($fn['parameters'])) { + $spec['inputSchema'] = ['json' => $fn['parameters']]; } else { - // Buffered (default) - best for forms and batch processing. - $response_data = $this->client->chat($messages, $model_id, $options); - - // Handle Lambda response format: - // { "response": { "content": "...", "role": "assistant", "toolUse": {...} }, "usage": {...} }. - if (isset($response_data['response'])) { - // Standard nested format. - $message_data = $response_data['response']; - $content = $message_data['content'] ?? ''; - $role = $message_data['role'] ?? 'assistant'; - $tool_use_data = $message_data['toolUse'] ?? NULL; - } - else { - // Flat format (legacy fallback) - $content = $response_data['text'] ?? $response_data['content'] ?? ''; - $role = 'assistant'; - $tool_use_data = $response_data['toolUse'] ?? NULL; - } - - $this->logPotentialChatFailure( - $response_data, - $model_id, - (int) ($options['maxTokens'] - ?? $this->getConfig()->get('model.max_tokens') - ?? QuantCloudClient::DEFAULT_MAX_TOKENS), - !empty($options['toolConfig']), - $content, - $tool_use_data - ); - - // Create ChatMessage for the response. - $message = new ChatMessage($role, $content); - - // Check if response includes tool use - // toolUse can be a single object or an array of tool requests. - if ($tool_use_data) { - // Normalize to array - handle both single tool and array of tools. - $tool_use_array = []; - if (isset($tool_use_data['name'])) { - // Single tool object. - $tool_use_array = [$tool_use_data]; - } - elseif (is_array($tool_use_data) && !empty($tool_use_data)) { - // Array of tool objects (check first element has 'name') - if (isset($tool_use_data[0]['name'])) { - $tool_use_array = $tool_use_data; - } - } - - // Create ToolsFunctionOutput objects (like Bedrock does) - $tools = []; - if (!empty($tool_use_array) && $input instanceof ChatInput && method_exists($input, 'getChatTools') && $input->getChatTools()) { - foreach ($tool_use_array as $tool_use) { - $function = $input->getChatTools()->getFunctionByName($tool_use['name']); - if ($function) { - $tools[] = new ToolsFunctionOutput( - $function, - $tool_use['toolUseId'] ?? uniqid('tool_'), - $tool_use['input'] ?? [] - ); - } - } - } - - if (!empty($tools)) { - $message->setTools($tools); - } - } - - return new ChatOutput($message, $response_data, NULL); + // Bedrock still requires inputSchema even for tools with no params. + $spec['inputSchema'] = ['json' => ['type' => 'object', 'properties' => new \stdClass()]]; } - - } - catch (\Exception $e) { - throw new \RuntimeException('Chat request failed: ' . $e->getMessage(), 0, $e); + $bedrockTools[] = ['toolSpec' => $spec]; } + return $bedrockTools; } /** - * Log likely silent chat failures without recording generated content. + * Convert a Drupal AI ChatMessage into the dashboard's native message shape. * - * @param array $response_data - * The raw response data. - * @param string $model_id - * The requested model ID. - * @param int $max_tokens - * The configured response token limit. - * @param bool $tools_requested - * TRUE when the request included tool configuration. - * @param mixed $content - * Response content. - * @param mixed $tool_use_data - * Tool use response data. + * The dashboard speaks Bedrock Converse — `role` is `user` / `assistant` / + * `tool`, content is either a string or a list of content blocks, and tool + * calls round-trip via sibling `toolUse` / `toolResult` entries. + * + * @return array */ - protected function logPotentialChatFailure( - array $response_data, - string $model_id, - int $max_tokens, - bool $tools_requested, - mixed $content, - mixed $tool_use_data, - ): void { - $output_tokens = $this->getOutputTokenCount($response_data); - $stop_reason = $this->getStopReason($response_data); - - // These warnings intentionally bypass the verbose logging flag. They - // surface probable response failures, not routine request/response logs. - if ( - $this->isLikelyTokenLimited($stop_reason, $output_tokens, $max_tokens) - ) { - $this->logger->warning( - 'Quant Cloud chat response reached the configured max token limit. ' - . 'Increase model.max_tokens if the response was incomplete. ' - . 'Model: @model, max tokens: @max_tokens, output tokens: ' - . '@output_tokens, stop reason: @stop_reason.', - [ - '@model' => $model_id, - '@max_tokens' => $max_tokens, - '@output_tokens' => $output_tokens, - '@stop_reason' => $stop_reason ?? 'unknown', - ] - ); + protected function convertMessage(ChatMessage $message): array { + $role = $message->getRole() ?: 'user'; + $text = $message->getText(); + $images = $message->getImages(); + + // Tool *result* — the user side of a tool round-trip. The dashboard + // expects role=user with the result content keyed by toolUseId. + if ($message->getToolsId()) { + return [ + 'role' => 'user', + 'content' => $text, + 'toolUseId' => $message->getToolsId(), + ]; } - if ( - $tools_requested - && $this->isEmptyResponseContent($content) - && empty($tool_use_data) - ) { - $this->logger->warning( - 'Quant Cloud returned an empty chat response with no tool calls for ' - . 'a tool-enabled request. Model: @model, stop reason: ' - . '@stop_reason, output tokens: @output_tokens.', - [ - '@model' => $model_id, - '@stop_reason' => $stop_reason ?? 'unknown', - '@output_tokens' => $output_tokens, - ] - ); + // Assistant message that previously emitted tool calls. Echo the calls + // back as a sibling `toolUse` array so the upstream Bedrock loop can + // resume them on the next turn. + if ($message->getTools()) { + $tool_uses = []; + foreach ($message->getTools() as $tool) { + $tool_uses[] = $this->renderToolUseFromOutput($tool); + } + $payload = [ + 'role' => $role ?: 'assistant', + 'content' => $text, + ]; + if (!empty($tool_uses)) { + $payload['toolUse'] = $tool_uses; + } + return $payload; + } + + // Plain text-only message. + if (empty($images)) { + return [ + 'role' => $role, + 'content' => $text, + ]; + } + + // Multimodal: encode images as Bedrock Converse content blocks. + $blocks = []; + if ($text !== '') { + $blocks[] = ['text' => $text]; + } + foreach ($images as $image) { + $blocks[] = [ + 'image' => [ + 'format' => $this->guessImageFormat($image), + 'source' => [ + 'bytes' => base64_encode($image->getBinary()), + ], + ], + ]; } + + return [ + 'role' => $role, + 'content' => $blocks, + ]; } /** - * Determine whether response content is empty. + * Translate a ToolsFunctionOutput back into a sibling toolUse array entry. * - * @param mixed $content - * Response content. - * - * @return bool - * TRUE when no textual content is present. + * @return array */ - protected function isEmptyResponseContent(mixed $content): bool { - if (is_string($content)) { - return $content === ''; - } - - if (!is_array($content)) { - return $content === NULL; - } + protected function renderToolUseFromOutput(ToolsFunctionOutput $tool): array { + $rendered = $tool->getOutputRenderArray(); + $arguments = $rendered['function']['arguments'] ?? '{}'; + $decoded = is_string($arguments) ? (Json::decode($arguments) ?: []) : (array) $arguments; - foreach ($content as $item) { - if (is_string($item) && $item !== '') { - return FALSE; - } - if ( - is_array($item) - && isset($item['text']) - && is_string($item['text']) - && $item['text'] !== '' - ) { - return FALSE; - } - } + return [ + 'toolUseId' => $rendered['id'] ?? $tool->getToolId(), + 'name' => $rendered['function']['name'] ?? $tool->getName(), + 'input' => $decoded, + ]; + } - return TRUE; + /** + * Guess the upstream image format token from an ImageFile. + */ + protected function guessImageFormat(ImageFile $image): string { + $mime = strtolower((string) $image->getMimeType()); + return match (TRUE) { + str_contains($mime, 'png') => 'png', + str_contains($mime, 'gif') => 'gif', + str_contains($mime, 'webp') => 'webp', + default => 'jpeg', + }; } /** - * Determine whether a response likely stopped because of the token limit. + * Clamp an outgoing maxTokens value to the model's hard cap. + * + * Mirrors the legacy MaxTokensClampMiddleware behaviour, just inline so we + * don't need a Guzzle middleware stack for the native client. NULL caps + * mean "unknown — pass through unchanged". * - * @param string|null $stop_reason - * The response stop reason. - * @param int $output_tokens - * The reported output token count. - * @param int $max_tokens - * The configured response token limit. + * @param array $options + * Outgoing options array (mutable). + * @param string $model_id + * The model the request is destined for. * - * @return bool - * TRUE when the response likely hit the token limit. + * @return array + * The possibly-clamped options. */ - protected function isLikelyTokenLimited( - ?string $stop_reason, - int $output_tokens, - int $max_tokens, - ): bool { - if ($max_tokens <= 0) { - return FALSE; + protected function applyMaxTokensClamp(array $options, string $model_id): array { + if (!isset($options['maxTokens']) || !is_numeric($options['maxTokens'])) { + return $options; } - - $normalized_stop_reason = $stop_reason !== NULL - ? strtolower($stop_reason) - : NULL; - - $token_limit_reasons = [ - 'length', - 'max_token', - 'max_tokens', - 'model_length', - 'token_limit', - ]; - if (in_array($normalized_stop_reason, $token_limit_reasons, TRUE)) { - return TRUE; + $cap = $this->modelsService->getMaxOutputTokens($model_id); + if ($cap === NULL) { + return $options; } - - $complete_reasons = [ - 'complete', - 'completed', - 'end_turn', - 'finished', - 'stop', - 'stop_sequence', - 'tool_calls', - 'tool_use', - ]; - if (in_array($normalized_stop_reason, $complete_reasons, TRUE)) { - return FALSE; + $requested = (int) $options['maxTokens']; + if ($requested <= $cap) { + return $options; } - - return $output_tokens >= $max_tokens; + $this->logger->warning( + 'Clamping maxTokens for model @model from @requested to model cap @cap', + [ + '@model' => $model_id, + '@requested' => $requested, + '@cap' => $cap, + ], + ); + $options['maxTokens'] = $cap; + return $options; } /** - * Extract output token count from common response shapes. + * Parse a buffered dashboard chat response into a ChatOutput. * - * @param array $response_data - * The raw response data. + * Dashboard response shape: + * @code + * { + * "response": { + * "role": "assistant", + * "content": "...text..." or [content blocks], + * "toolUse": [{"toolUseId": "...", "name": "...", "input": {...}}] + * }, + * "usage": {"inputTokens": ..., "outputTokens": ..., "totalTokens": ...}, + * "requestId": "...", + * "modelId": "...", + * "stopReason": "end_turn" | "tool_use" | "tool_request" | "max_tokens" + * } + * @endcode * - * @return int - * The output token count, or 0 when unavailable. + * Note `response.toolUse` is a SIBLING of `response.content`, not nested. */ - protected function getOutputTokenCount(array $response_data): int { - $usage = $response_data['usage'] - ?? $response_data['response']['usage'] - ?? $response_data['metadata']['usage'] - ?? []; - - if (!is_array($usage)) { - return 0; + protected function parseChatResponse(array $response, ChatInput|array|string $input): ChatOutput { + $payload = is_array($response['response'] ?? NULL) ? $response['response'] : []; + + $role = (string) ($payload['role'] ?? 'assistant'); + $content = $payload['content'] ?? ''; + $text = $this->flattenContentToText($content); + + $message = new ChatMessage($role, $text); + + $tools_input = ($input instanceof ChatInput) ? $input->getChatTools() : NULL; + $tools = []; + + if (isset($payload['toolUse']) && is_array($payload['toolUse'])) { + foreach ($payload['toolUse'] as $entry) { + if (!is_array($entry)) { + continue; + } + $name = (string) ($entry['name'] ?? ''); + $id = (string) ($entry['toolUseId'] ?? $entry['id'] ?? ''); + $args = is_array($entry['input'] ?? NULL) ? $entry['input'] : []; + $input_function = $tools_input ? $tools_input->getFunctionByName($name) : NULL; + $output = new ToolsFunctionOutput($input_function, $id, $args); + if (!$input_function && $name !== '') { + $output->setName($name); + } + $tools[] = $output; + } + } + + if (!empty($tools)) { + $message->setTools($tools); } - return (int) ($usage['outputTokens'] - ?? $usage['output_tokens'] - ?? $usage['completion_tokens'] - ?? 0); + return new ChatOutput($message, $response, []); } /** - * Extract the model stop reason from common response shapes. - * - * @param array $response_data - * The raw response data. - * - * @return string|null - * The stop reason, or NULL when unavailable. + * Flatten dashboard `content` (string OR list of blocks) into a plain string. */ - protected function getStopReason(array $response_data): ?string { - return $response_data['stopReason'] - ?? $response_data['stop_reason'] - ?? $response_data['response']['stopReason'] - ?? $response_data['response']['stop_reason'] - ?? NULL; + protected function flattenContentToText(mixed $content): string { + if (is_string($content)) { + return $content; + } + if (!is_array($content)) { + return ''; + } + $text = ''; + foreach ($content as $block) { + if (is_string($block)) { + $text .= $block; + continue; + } + if (!is_array($block)) { + continue; + } + if (isset($block['text']) && is_string($block['text'])) { + $text .= $block['text']; + } + elseif (isset($block['type'], $block['text']) && $block['type'] === 'text') { + $text .= (string) $block['text']; + } + } + return $text; } /** * {@inheritdoc} */ public function embeddings(string|EmbeddingsInput $input, string $model_id, array $tags = []): EmbeddingsOutput { - // Normalize input - extract text from EmbeddingsInput. - if ($input instanceof EmbeddingsInput) { - $text = $input->getPrompt(); - } - else { - $text = $input; - } + $this->ensureAuthenticated(); + $text = $input instanceof EmbeddingsInput ? $input->getPrompt() : $input; try { - // Call API with single text string. $result = $this->client->embeddings($text, $model_id); - - // Extract embedding vector from response - // API returns: { "embeddings": [...], "model": "...", "usage": {...} }. $embedding = $result['embeddings'] ?? []; - - // EmbeddingsOutput expects array of embeddings (even for single input) - // Our API returns the vector directly, so wrap it. return new EmbeddingsOutput([$embedding], $result, []); - } catch (\Exception $e) { throw new \RuntimeException('Embeddings request failed: ' . $e->getMessage(), 0, $e); @@ -642,54 +642,119 @@ public function embeddings(string|EmbeddingsInput $input, string $model_id, arra /** * {@inheritdoc} */ - public function textToImage(string|TextToImageInput $input, string $model_id, array $tags = []): TextToImageOutput { - // Normalize input and extract images if provided. - $prompt = ''; - $source_images = []; + public function maxEmbeddingsInput(string $model_id = ''): int { + return 96; + } - if ($input instanceof TextToImageInput) { - $prompt = $input->getText(); - // Check if input has images (for image-to-image operations) - if (method_exists($input, 'getImages') && !empty($input->getImages())) { - foreach ($input->getImages() as $image) { - // Convert Drupal ImageFile to base64. - $source_images[] = base64_encode($image->getBinary()); - } + /** + * {@inheritdoc} + */ + public function embeddingsVectorSize(string $model_id): int { + $dimensions = [ + 'amazon.titan-embed-text-v2:0' => 1024, + 'amazon.titan-embed-text-v1' => 1536, + 'cohere.embed-english-v3' => 1024, + 'cohere.embed-multilingual-v3' => 1024, + ]; + return $dimensions[$model_id] ?? 1024; + } + + /** + * {@inheritdoc} + */ + public function getMaxInputTokens(string $model_id): int { + try { + $model_details = $this->modelsService->getModelDetails($model_id); + if ($model_details && isset($model_details['contextWindow'])) { + return (int) $model_details['contextWindow']; } } - else { - $prompt = $input; + catch (\Exception) { + // Fall through to defaults. } - try { - // Determine task type based on configuration and presence of source images. - $task_type = $this->configuration['task_type'] ?? 'TEXT_IMAGE'; + $limits = [ + 'amazon.nova-lite-v1:0' => 300000, + 'amazon.nova-pro-v1:0' => 300000, + 'amazon.nova-micro-v1:0' => 128000, + 'anthropic.claude-3-5-sonnet-20241022-v2:0' => 200000, + 'anthropic.claude-3-5-sonnet-20240620-v1:0' => 200000, + 'anthropic.claude-3-opus-20240229-v1:0' => 200000, + 'anthropic.claude-3-sonnet-20240229-v1:0' => 200000, + 'anthropic.claude-3-haiku-20240307-v1:0' => 200000, + 'amazon.titan-embed-text-v2:0' => 8192, + 'amazon.titan-embed-text-v1' => 8192, + ]; + return $limits[$model_id] ?? 100000; + } - // Auto-detect: if images provided but task type is TEXT_IMAGE, switch to IMAGE_VARIATION. - if (!empty($source_images) && $task_type === 'TEXT_IMAGE') { - $task_type = 'IMAGE_VARIATION'; + /** + * {@inheritdoc} + */ + public function getMaxOutputTokens(string $model_id): int { + try { + $model_details = $this->modelsService->getModelDetails($model_id); + if ($model_details && isset($model_details['maxOutputTokens'])) { + return (int) $model_details['maxOutputTokens']; } + } + catch (\Exception) { + // Fall through to defaults. + } - // Build image generation request for Nova Canvas. - $payload = [ + $limits = [ + 'amazon.nova-lite-v1:0' => 5000, + 'amazon.nova-pro-v1:0' => 5000, + 'amazon.nova-micro-v1:0' => 5000, + 'anthropic.claude-3-5-sonnet-20241022-v2:0' => 8192, + 'anthropic.claude-3-5-sonnet-20240620-v1:0' => 8192, + 'anthropic.claude-3-opus-20240229-v1:0' => 4096, + 'anthropic.claude-3-sonnet-20240229-v1:0' => 4096, + 'anthropic.claude-3-haiku-20240307-v1:0' => 4096, + 'amazon.titan-embed-text-v2:0' => 0, + 'amazon.titan-embed-text-v1' => 0, + ]; + return $limits[$model_id] ?? 16384; + } + + /** + * {@inheritdoc} + */ + public function textToImage(string|TextToImageInput $input, string $model_id, array $tags = []): TextToImageOutput { + $this->ensureAuthenticated(); + $prompt = ''; + $source_images = []; + + if ($input instanceof TextToImageInput) { + $prompt = $input->getText(); + if (method_exists($input, 'getImages') && !empty($input->getImages())) { + foreach ($input->getImages() as $image) { + $source_images[] = base64_encode($image->getBinary()); + } + } + } + else { + $prompt = $input; + } + + try { + $task_type = $this->configuration['task_type'] ?? 'TEXT_IMAGE'; + if (!empty($source_images) && $task_type === 'TEXT_IMAGE') { + $task_type = 'IMAGE_VARIATION'; + } + + $payload = [ 'modelId' => $model_id, 'taskType' => $task_type, 'imageGenerationConfig' => [], ]; - // Build task-specific parameters. switch ($task_type) { case 'TEXT_IMAGE': - $payload['textToImageParams'] = [ - 'text' => $prompt, - ]; - - // Add style if specified (Nova Canvas visual styles) + $payload['textToImageParams'] = ['text' => $prompt]; if (!empty($this->configuration['style'])) { $payload['textToImageParams']['style'] = $this->configuration['style']; } - - // Add negative prompt if specified (what NOT to include) if (!empty($this->configuration['negativePrompt'])) { $payload['textToImageParams']['negativeText'] = $this->configuration['negativePrompt']; } @@ -699,13 +764,10 @@ public function textToImage(string|TextToImageInput $input, string $model_id, ar if (empty($source_images)) { throw new \InvalidArgumentException('IMAGE_VARIATION requires source image(s)'); } - $payload['imageVariationParams'] = [ 'images' => $source_images, 'text' => $prompt ?: 'Generate a variation of this image', ]; - - // Similarity strength (0.2-1.0, higher = more similar to original) if (isset($this->configuration['similarity_strength'])) { $payload['imageVariationParams']['similarityStrength'] = (float) $this->configuration['similarity_strength']; } @@ -715,13 +777,10 @@ public function textToImage(string|TextToImageInput $input, string $model_id, ar if (empty($source_images)) { throw new \InvalidArgumentException('INPAINTING requires source image and mask'); } - $payload['inPaintingParams'] = [ 'image' => $source_images[0], 'text' => $prompt ?: 'Fill the masked region', ]; - - // Mask image is typically the second image. if (isset($source_images[1])) { $payload['inPaintingParams']['maskImage'] = $source_images[1]; } @@ -731,13 +790,10 @@ public function textToImage(string|TextToImageInput $input, string $model_id, ar if (empty($source_images)) { throw new \InvalidArgumentException('OUTPAINTING requires source image'); } - $payload['outPaintingParams'] = [ 'image' => $source_images[0], 'text' => $prompt ?: 'Extend the image borders', ]; - - // Optional mask prompt for directional expansion. if (isset($this->configuration['mask_prompt'])) { $payload['outPaintingParams']['maskPrompt'] = $this->configuration['mask_prompt']; } @@ -747,64 +803,16 @@ public function textToImage(string|TextToImageInput $input, string $model_id, ar if (empty($source_images)) { throw new \InvalidArgumentException('BACKGROUND_REMOVAL requires source image'); } - - $payload['backgroundRemovalParams'] = [ - 'image' => $source_images[0], - ]; + $payload['backgroundRemovalParams'] = ['image' => $source_images[0]]; break; default: throw new \InvalidArgumentException("Unsupported task type: {$task_type}"); } - // Add optional configuration from provider settings. - if (isset($this->configuration['width']) && isset($this->configuration['height'])) { - $payload['imageGenerationConfig']['width'] = (int) $this->configuration['width']; - $payload['imageGenerationConfig']['height'] = (int) $this->configuration['height']; - } - elseif (isset($this->configuration['resolution'])) { - // Handle 'custom' resolution option. - if ($this->configuration['resolution'] === 'custom') { - if (isset($this->configuration['custom_width']) && isset($this->configuration['custom_height'])) { - $payload['imageGenerationConfig']['width'] = (int) $this->configuration['custom_width']; - $payload['imageGenerationConfig']['height'] = (int) $this->configuration['custom_height']; - } - } - else { - // Support resolution format like "1024x1024". - $parts = explode('x', $this->configuration['resolution']); - if (count($parts) === 2) { - $payload['imageGenerationConfig']['width'] = (int) $parts[0]; - $payload['imageGenerationConfig']['height'] = (int) $parts[1]; - } - } - } - - if (isset($this->configuration['quality'])) { - $payload['imageGenerationConfig']['quality'] = $this->configuration['quality']; - } + $this->applyImageGenerationConfig($payload); - if (isset($this->configuration['numberOfImages'])) { - $payload['imageGenerationConfig']['numberOfImages'] = (int) $this->configuration['numberOfImages']; - } - - if (isset($this->configuration['cfgScale'])) { - $payload['imageGenerationConfig']['cfgScale'] = (float) $this->configuration['cfgScale']; - } - - if (isset($this->configuration['seed']) && $this->configuration['seed'] !== NULL) { - $payload['imageGenerationConfig']['seed'] = (int) $this->configuration['seed']; - } - - // Nova Canvas requires specific regions (us-east-1, ap-northeast-1, eu-west-1) - // Default to us-east-1 if not specified. - if (isset($this->configuration['nova_canvas_region'])) { - $payload['region'] = $this->configuration['nova_canvas_region']; - } - - // Call image generation API with extended timeout (image generation can take 10-30s) $response = $this->client->post('image-generation', $payload, [ - // 60s timeout for image generation (handles premium + multiple images) 'timeout' => 60, 'connect_timeout' => 10, ]); @@ -813,37 +821,7 @@ public function textToImage(string|TextToImageInput $input, string $model_id, ar throw new \RuntimeException('No images returned from API'); } - // API returns compressed thumbnail data URLs (data:image/jpeg;base64,...) - // Extract actual base64 data for Drupal. - $images = []; - foreach ($response['images'] as $index => $data_url) { - // Check if it's a data URL or raw base64. - if (str_starts_with($data_url, 'data:image/')) { - // Extract base64 from data URL: data:image/jpeg;base64,. - $parts = explode(',', $data_url, 2); - $base64_data = $parts[1] ?? $data_url; - } - else { - // Already raw base64. - $base64_data = $data_url; - } - - $image_data = base64_decode($base64_data); - - // Determine format from data URL MIME type or default to JPEG (thumbnails are JPEG) - $format = 'jpeg'; - if (str_contains($data_url, 'image/png')) { - $format = 'png'; - } - - $images[] = new ImageFile( - $image_data, - "image/{$format}", - "generated-{$index}.{$format}" - ); - } - - return new TextToImageOutput($images, $response, []); + return new TextToImageOutput($this->decodeGeneratedImages($response['images'], 'generated'), $response, []); } catch (\Exception $e) { throw new \RuntimeException('Image generation failed: ' . $e->getMessage(), 0, $e); @@ -854,13 +832,12 @@ public function textToImage(string|TextToImageInput $input, string $model_id, ar * {@inheritdoc} */ public function imageToImage(ImageToImageInput|array|string $input, string $model_id, array $tags = []): ImageToImageOutput { - // Normalize input and extract images. + $this->ensureAuthenticated(); $prompt = ''; $source_images = []; + $mask_image = NULL; if ($input instanceof ImageToImageInput) { - // ImageToImageInput might use getPrompt() instead of getText() - $prompt = ''; if (method_exists($input, 'getPrompt')) { $prompt = $input->getPrompt() ?? ''; } @@ -868,38 +845,25 @@ public function imageToImage(ImageToImageInput|array|string $input, string $mode $prompt = $input->getText() ?? ''; } - // Get the source image using getImageFile() if (method_exists($input, 'getImageFile') && $input->getImageFile()) { - $image = $input->getImageFile(); - $source_images[] = base64_encode($image->getBinary()); + $source_images[] = base64_encode($input->getImageFile()->getBinary()); } - - // Get the mask image if present (for INPAINTING) - $mask_image = NULL; if (method_exists($input, 'getMask') && $input->getMask()) { - $mask = $input->getMask(); - $mask_image = base64_encode($mask->getBinary()); + $mask_image = base64_encode($input->getMask()->getBinary()); } } elseif (is_array($input)) { - // Handle array input format (from forms/AJAX) $prompt = $input['prompt'] ?? $input['text'] ?? ''; - - // Extract images from array. - if (isset($input['images']) && is_array($input['images'])) { - foreach ($input['images'] as $image) { - if ($image instanceof ImageFile) { - $source_images[] = base64_encode($image->getBinary()); - } - elseif (is_string($image)) { - // Already base64 encoded. - $source_images[] = $image; - } + foreach ($input['images'] ?? [] as $image) { + if ($image instanceof ImageFile) { + $source_images[] = base64_encode($image->getBinary()); + } + elseif (is_string($image)) { + $source_images[] = $image; } } } else { - // String input is just the prompt. $prompt = $input; } @@ -908,25 +872,20 @@ public function imageToImage(ImageToImageInput|array|string $input, string $mode } try { - // Determine task type from configuration (default to IMAGE_VARIATION) $task_type = $this->configuration['task_type'] ?? 'IMAGE_VARIATION'; - // Build image generation request for Nova Canvas. $payload = [ 'modelId' => $model_id, 'taskType' => $task_type, 'imageGenerationConfig' => [], ]; - // Build task-specific parameters. switch ($task_type) { case 'IMAGE_VARIATION': $payload['imageVariationParams'] = [ 'images' => $source_images, 'text' => $prompt ?: 'Generate a variation of this image', ]; - - // Similarity strength (0.2-1.0, higher = more similar to original) if (isset($this->configuration['similarity_strength'])) { $payload['imageVariationParams']['similarityStrength'] = (float) $this->configuration['similarity_strength']; } @@ -937,9 +896,7 @@ public function imageToImage(ImageToImageInput|array|string $input, string $mode 'image' => $source_images[0], 'text' => $prompt ?: 'Fill the masked region', ]; - - // Use mask from input if available, otherwise try second image. - if (isset($mask_image)) { + if ($mask_image !== NULL) { $payload['inPaintingParams']['maskImage'] = $mask_image; } elseif (isset($source_images[1])) { @@ -952,70 +909,22 @@ public function imageToImage(ImageToImageInput|array|string $input, string $mode 'image' => $source_images[0], 'text' => $prompt ?: 'Extend the image borders', ]; - - // Optional mask prompt for directional expansion. if (isset($this->configuration['mask_prompt'])) { $payload['outPaintingParams']['maskPrompt'] = $this->configuration['mask_prompt']; } break; case 'BACKGROUND_REMOVAL': - $payload['backgroundRemovalParams'] = [ - 'image' => $source_images[0], - ]; + $payload['backgroundRemovalParams'] = ['image' => $source_images[0]]; break; default: throw new \InvalidArgumentException("Unsupported task type for image-to-image: {$task_type}"); } - // Add optional configuration from provider settings. - if (isset($this->configuration['width']) && isset($this->configuration['height'])) { - $payload['imageGenerationConfig']['width'] = (int) $this->configuration['width']; - $payload['imageGenerationConfig']['height'] = (int) $this->configuration['height']; - } - elseif (isset($this->configuration['resolution'])) { - // Handle 'custom' resolution option. - if ($this->configuration['resolution'] === 'custom') { - if (isset($this->configuration['custom_width']) && isset($this->configuration['custom_height'])) { - $payload['imageGenerationConfig']['width'] = (int) $this->configuration['custom_width']; - $payload['imageGenerationConfig']['height'] = (int) $this->configuration['custom_height']; - } - } - else { - // Support resolution format like "1024x1024". - $parts = explode('x', $this->configuration['resolution']); - if (count($parts) === 2) { - $payload['imageGenerationConfig']['width'] = (int) $parts[0]; - $payload['imageGenerationConfig']['height'] = (int) $parts[1]; - } - } - } - - if (isset($this->configuration['quality'])) { - $payload['imageGenerationConfig']['quality'] = $this->configuration['quality']; - } - - if (isset($this->configuration['numberOfImages'])) { - $payload['imageGenerationConfig']['numberOfImages'] = (int) $this->configuration['numberOfImages']; - } - - if (isset($this->configuration['cfgScale'])) { - $payload['imageGenerationConfig']['cfgScale'] = (float) $this->configuration['cfgScale']; - } - - if (isset($this->configuration['seed']) && $this->configuration['seed'] !== NULL) { - $payload['imageGenerationConfig']['seed'] = (int) $this->configuration['seed']; - } - - // Nova Canvas requires specific regions (us-east-1, ap-northeast-1, eu-west-1) - if (isset($this->configuration['nova_canvas_region'])) { - $payload['region'] = $this->configuration['nova_canvas_region']; - } + $this->applyImageGenerationConfig($payload); - // Call image generation API with extended timeout. $response = $this->client->post('image-generation', $payload, [ - // 60s timeout for image generation 'timeout' => 60, 'connect_timeout' => 10, ]); @@ -1024,33 +933,7 @@ public function imageToImage(ImageToImageInput|array|string $input, string $mode throw new \RuntimeException('No images returned from API'); } - // API returns compressed thumbnail data URLs (data:image/jpeg;base64,...) - // Extract actual base64 data for Drupal. - $images = []; - foreach ($response['images'] as $index => $data_url) { - // Check if it's a data URL or raw base64. - if (str_starts_with($data_url, 'data:image/')) { - // Extract base64 from data URL: data:image/jpeg;base64,. - $parts = explode(',', $data_url, 2); - $base64_data = $parts[1] ?? $data_url; - } - else { - $base64_data = $data_url; - } - - $image_data = base64_decode($base64_data); - - // Determine format (JPEG for thumbnails, PNG for originals) - $format = str_contains($data_url, 'image/png') ? 'png' : 'jpeg'; - - $images[] = new ImageFile( - $image_data, - "image/{$format}", - "variation-{$index}.{$format}" - ); - } - - return new ImageToImageOutput($images, $response, []); + return new ImageToImageOutput($this->decodeGeneratedImages($response['images'], 'variation'), $response, []); } catch (\Exception $e) { throw new \RuntimeException('Image-to-image generation failed: ' . $e->getMessage(), 0, $e); @@ -1058,474 +941,100 @@ public function imageToImage(ImageToImageInput|array|string $input, string $mode } /** - * {@inheritdoc} - */ - public function requiresImageToImageMask(string $model_id): bool { - // Get the configured task type. - $task_type = $this->configuration['task_type'] ?? 'IMAGE_VARIATION'; - - // INPAINTING requires a mask image. - return $task_type === 'INPAINTING'; - } - - /** - * {@inheritdoc} - */ - public function hasImageToImageMask(string $model_id): bool { - // Nova Canvas supports mask-based operations (INPAINTING) - return TRUE; - } - - /** - * {@inheritdoc} - */ - public function requiresImageToImagePrompt(string $model_id): bool { - // Get the configured task type. - $task_type = $this->configuration['task_type'] ?? 'IMAGE_VARIATION'; - - // BACKGROUND_REMOVAL doesn't require a prompt, others do (or benefit from one) - return $task_type !== 'BACKGROUND_REMOVAL'; - } - - /** - * {@inheritdoc} - */ - public function hasImageToImagePrompt(string $model_id): bool { - // Nova Canvas supports text prompts for all image-to-image operations. - return TRUE; - } - - /** - * {@inheritdoc} - */ - public function setAuthentication(mixed $authentication): void { - // Store authentication for later use by the client - // The authentication is passed to the HTTP client via headers. - $this->configuration['authentication'] = $authentication; - } - - /** - * Sets whether to use streaming. - * - * @param bool $streamed - * TRUE to use streaming, FALSE otherwise. - */ - public function setStreamed(bool $streamed): void { - $this->streamed = $streamed; - } - - /** - * {@inheritdoc} - */ - public function getModelSettings(string $model_id, array $generalConfig = []): array { - // Return model-specific configuration - // For now, return the general config as-is - // Future: fetch model-specific limits from the API. - return $generalConfig; - } - - /** - * {@inheritdoc} - */ - public function maxEmbeddingsInput(string $model_id = ''): int { - // Maximum number of texts that can be embedded in a single request - // This is a reasonable default for most models. - return 96; - } - - /** - * {@inheritdoc} - */ - public function embeddingsVectorSize(string $model_id): int { - // Return known dimensions for supported embedding models. - // This is more reliable than calling the API to figure it out. - $dimensions = [ - 'amazon.titan-embed-text-v2:0' => 1024, - 'amazon.titan-embed-text-v1' => 1536, - 'cohere.embed-english-v3' => 1024, - 'cohere.embed-multilingual-v3' => 1024, - ]; - - return $dimensions[$model_id] ?? 1024; - } - - /** - * {@inheritdoc} - */ - public function getMaxInputTokens(string $model_id): int { - // Try to get from API first. - try { - $model_details = $this->modelsService->getModelDetails($model_id); - if ($model_details && isset($model_details['contextWindow'])) { - return (int) $model_details['contextWindow']; - } - } - catch (\Exception $e) { - // Fall through to defaults. - } - - // Fallback to hardcoded limits if API unavailable. - $limits = [ - 'amazon.nova-lite-v1:0' => 300000, - 'amazon.nova-pro-v1:0' => 300000, - 'amazon.nova-micro-v1:0' => 128000, - 'anthropic.claude-3-5-sonnet-20241022-v2:0' => 200000, - 'anthropic.claude-3-5-sonnet-20240620-v1:0' => 200000, - 'anthropic.claude-3-opus-20240229-v1:0' => 200000, - 'anthropic.claude-3-sonnet-20240229-v1:0' => 200000, - 'anthropic.claude-3-haiku-20240307-v1:0' => 200000, - 'amazon.titan-embed-text-v2:0' => 8192, - 'amazon.titan-embed-text-v1' => 8192, - ]; - - return $limits[$model_id] ?? 100000; - } - - /** - * {@inheritdoc} + * Apply shared image-generation configuration options to a request payload. */ - public function getMaxOutputTokens(string $model_id): int { - // Try to get from API first. - try { - $model_details = $this->modelsService->getModelDetails($model_id); - if ($model_details && isset($model_details['maxOutputTokens'])) { - return (int) $model_details['maxOutputTokens']; - } + protected function applyImageGenerationConfig(array &$payload): void { + if (isset($this->configuration['width'], $this->configuration['height'])) { + $payload['imageGenerationConfig']['width'] = (int) $this->configuration['width']; + $payload['imageGenerationConfig']['height'] = (int) $this->configuration['height']; } - catch (\Exception $e) { - // Fall through to defaults. - } - - // Fallback to hardcoded limits if API unavailable. - $limits = [ - 'amazon.nova-lite-v1:0' => 5000, - 'amazon.nova-pro-v1:0' => 5000, - 'amazon.nova-micro-v1:0' => 5000, - 'anthropic.claude-3-5-sonnet-20241022-v2:0' => 8192, - 'anthropic.claude-3-5-sonnet-20240620-v1:0' => 8192, - 'anthropic.claude-3-opus-20240229-v1:0' => 4096, - 'anthropic.claude-3-sonnet-20240229-v1:0' => 4096, - 'anthropic.claude-3-haiku-20240307-v1:0' => 4096, - 'amazon.titan-embed-text-v2:0' => 0, - 'amazon.titan-embed-text-v1' => 0, - ]; - - return $limits[$model_id] ?? 16384; - } - - /** - * Format chat messages for API. - * - * Supports multimodal content (images, videos, documents) for Nova models. - * Converts Drupal AI's tool_result role to Bedrock's user role with toolResult content block. - * Formats assistant messages with tool calls to include toolUse content blocks. - */ - protected function formatMessages(array $messages): array { - $formatted = []; - - foreach ($messages as $message) { - if ($message instanceof ChatMessage) { - $role = $message->getRole(); - $content = $message->getText(); - - // Handle assistant messages with tool calls first - // Include toolUse blocks in content array for conversation continuity - // (Must check before tool_result since assistant messages should not be converted) - $tools = $message->getTools(); - if ($role === 'assistant' && !empty($tools)) { - $content_blocks = []; - - // Add text content first if present. - if ($content) { - $content_blocks[] = ['text' => $content]; - } - - // Add tool use blocks - use getRenderedTools() like AWS Bedrock provider. - $tool_uses = $message->getRenderedTools(); - foreach ($tool_uses as $tool_use) { - $content_blocks[] = [ - 'toolUse' => [ - 'toolUseId' => $tool_use['id'], - 'name' => $tool_use['function']['name'], - // AWS wants the structured object, not the string. - 'input' => Json::decode($tool_use['function']['arguments']), - ], - ]; - } - - $formatted[] = [ - 'role' => 'assistant', - 'content' => $content_blocks, - ]; - continue; + elseif (isset($this->configuration['resolution'])) { + if ($this->configuration['resolution'] === 'custom') { + if (isset($this->configuration['custom_width'], $this->configuration['custom_height'])) { + $payload['imageGenerationConfig']['width'] = (int) $this->configuration['custom_width']; + $payload['imageGenerationConfig']['height'] = (int) $this->configuration['custom_height']; } - - // Handle tool_result messages - convert to Bedrock format - // Drupal AI uses role "tool" or has toolsId set, Bedrock expects role "user" with toolResult content block - // Match AWS Bedrock provider pattern: (role === 'tool' || getToolsId()) && role !== 'assistant'. - if (($role === 'tool' || $role === 'tool_result' || $message->getToolsId()) && $role !== 'assistant') { - $formatted[] = [ - 'role' => 'user', - 'content' => [ - [ - 'toolResult' => [ - 'toolUseId' => $message->getToolsId(), - 'content' => [ - // Need to set text to tool result, if empty use placeholder. - ['text' => $content !== '' ? $content : 'Tool Result'], - ], - ], - ], - ], - ]; - continue; - } - - // Check if message has images (multimodal content) - // Drupal AI module uses getImages() returning ImageFile objects. - $images = method_exists($message, 'getImages') ? $message->getImages() : []; - $has_images = !empty($images); - - if ($has_images) { - // Build multimodal content array. - $content_blocks = []; - - // Add images first. - foreach ($images as $image) { - $content_blocks[] = $this->formatImageFile($image); - } - - // Add text prompt last. - if ($message->getText()) { - $content_blocks[] = ['text' => $message->getText()]; - } - - $formatted[] = [ - 'role' => $role, - 'content' => $content_blocks, - ]; - } - else { - // Simple text message. - $formatted[] = [ - 'role' => $role, - 'content' => $content, - ]; + } + else { + $parts = explode('x', $this->configuration['resolution']); + if (count($parts) === 2) { + $payload['imageGenerationConfig']['width'] = (int) $parts[0]; + $payload['imageGenerationConfig']['height'] = (int) $parts[1]; } } } - return $formatted; - } - - /** - * Format an ImageFile for multimodal API request. - * - * @param \Drupal\ai\OperationType\GenericType\ImageFile $image - * The ImageFile object from Drupal AI. - * - * @return array - * Formatted content block for the API. - */ - protected function formatImageFile($image): array { - $mime_type = $image->getMimeType(); - $binary = $image->getBinary(); - - // Extract format from MIME type. - // Default. - $format = 'jpeg'; - if (str_contains($mime_type, 'png')) { - $format = 'png'; + if (isset($this->configuration['quality'])) { + $payload['imageGenerationConfig']['quality'] = $this->configuration['quality']; } - elseif (str_contains($mime_type, 'gif')) { - $format = 'gif'; + if (isset($this->configuration['numberOfImages'])) { + $payload['imageGenerationConfig']['numberOfImages'] = (int) $this->configuration['numberOfImages']; } - elseif (str_contains($mime_type, 'webp')) { - $format = 'webp'; + if (isset($this->configuration['cfgScale'])) { + $payload['imageGenerationConfig']['cfgScale'] = (float) $this->configuration['cfgScale']; + } + if (isset($this->configuration['seed']) && $this->configuration['seed'] !== NULL) { + $payload['imageGenerationConfig']['seed'] = (int) $this->configuration['seed']; + } + if (isset($this->configuration['nova_canvas_region'])) { + $payload['region'] = $this->configuration['nova_canvas_region']; } - - // Return Bedrock/Claude image format. - return [ - 'image' => [ - 'format' => $format, - 'source' => ['bytes' => base64_encode($binary)], - ], - ]; } /** - * Format an attachment for multimodal API request. - * - * @param mixed $attachment - * The attachment object from Drupal AI. + * Decode dashboard image-generation results (base64 or data URLs). * - * @return array - * Formatted content block for the API. + * @return \Drupal\ai\OperationType\GenericType\ImageFile[] */ - protected function formatAttachment($attachment): array { - // Determine attachment type. - $type = $attachment['type'] ?? 'image'; - $mime_type = $attachment['mime_type'] ?? $attachment['mimeType'] ?? ''; - - // Extract format from MIME type. - // Default. - $format = 'jpeg'; - if (str_contains($mime_type, 'png')) { - $format = 'png'; - } - elseif (str_contains($mime_type, 'gif')) { - $format = 'gif'; - } - elseif (str_contains($mime_type, 'webp')) { - $format = 'webp'; - } - elseif (str_contains($mime_type, 'mp4')) { - $format = 'mp4'; - } - elseif (str_contains($mime_type, 'quicktime')) { - $format = 'mov'; - } - elseif (str_contains($mime_type, 'webm')) { - $format = 'webm'; - } - elseif (str_contains($mime_type, 'pdf')) { - $format = 'pdf'; - } - - // Handle different attachment sources. - if (isset($attachment['uri'])) { - // S3 URI or file path. - if (str_starts_with($attachment['uri'], 's3://')) { - // S3 URI - use directly. - return $this->formatS3Content($type, $format, $attachment['uri'], $attachment['name'] ?? NULL); + protected function decodeGeneratedImages(array $imageResults, string $prefix): array { + $images = []; + foreach ($imageResults as $index => $data_url) { + if (str_starts_with($data_url, 'data:image/')) { + $parts = explode(',', $data_url, 2); + $base64_data = $parts[1] ?? $data_url; } else { - // Local file - convert to base64. - $file_contents = file_get_contents($attachment['uri']); - if ($file_contents !== FALSE) { - $base64 = base64_encode($file_contents); - return $this->formatBase64Content($type, $format, $base64, $attachment['name'] ?? NULL); - } + $base64_data = $data_url; } - } - elseif (isset($attachment['data']) || isset($attachment['base64'])) { - // Base64 encoded data. - $base64 = $attachment['base64'] ?? $attachment['data']; - return $this->formatBase64Content($type, $format, $base64, $attachment['name'] ?? NULL); - } - // Fallback to empty text block. - return ['text' => '']; + $image_data = base64_decode($base64_data); + $format = str_contains($data_url, 'image/png') ? 'png' : 'jpeg'; + + $images[] = new ImageFile( + $image_data, + "image/{$format}", + "{$prefix}-{$index}.{$format}", + ); + } + return $images; } /** - * Format base64 content block. + * {@inheritdoc} */ - protected function formatBase64Content(string $type, string $format, string $base64, ?string $name = NULL): array { - if ($type === 'image') { - return [ - 'image' => [ - 'format' => $format, - 'source' => ['bytes' => $base64], - ], - ]; - } - elseif ($type === 'video') { - return [ - 'video' => [ - 'format' => $format, - 'source' => ['bytes' => $base64], - ], - ]; - } - elseif ($type === 'document') { - return [ - 'document' => [ - 'format' => $format, - 'name' => $name ?? 'document.' . $format, - 'source' => ['bytes' => $base64], - ], - ]; - } - - return ['text' => '']; + public function requiresImageToImageMask(string $model_id): bool { + return ($this->configuration['task_type'] ?? 'IMAGE_VARIATION') === 'INPAINTING'; } /** - * Format S3 URI content block. + * {@inheritdoc} */ - protected function formatS3Content(string $type, string $format, string $uri, ?string $name = NULL): array { - if ($type === 'image') { - return [ - 'image' => [ - 'format' => $format, - 'source' => [ - 's3Location' => ['uri' => $uri], - ], - ], - ]; - } - elseif ($type === 'video') { - return [ - 'video' => [ - 'format' => $format, - 'source' => [ - 's3Location' => ['uri' => $uri], - ], - ], - ]; - } - elseif ($type === 'document') { - return [ - 'document' => [ - 'format' => $format, - 'name' => $name ?? 'document.' . $format, - 'source' => [ - 's3Location' => ['uri' => $uri], - ], - ], - ]; - } - - return ['text' => '']; + public function hasImageToImageMask(string $model_id): bool { + return TRUE; } /** - * Convert Drupal ToolsInput to Quant Cloud API format. - * - * @param \Drupal\ai\OperationType\Chat\ToolsInput $tools_input - * Drupal tools input. - * - * @return array - * API-formatted tools. + * {@inheritdoc} */ - protected function formatToolsForApi($tools_input): array { - // Use renderToolsArray() like AWS Bedrock provider does. - $tools = $tools_input->renderToolsArray(); - $api_tools = []; - - foreach ($tools as $tool) { - $tool_spec = $tool['function']; - - // Map 'parameters' to 'inputSchema'. - if (isset($tool['function']['parameters'])) { - $tool_spec['inputSchema']['json'] = $tool['function']['parameters']; - } - else { - $tool_spec['inputSchema']['json'] = [ - 'type' => 'object', - ]; - } - - // Remove 'parameters' as we've moved it to inputSchema. - unset($tool_spec['parameters']); - - $api_tools[] = [ - 'toolSpec' => $tool_spec, - ]; - } + public function requiresImageToImagePrompt(string $model_id): bool { + return ($this->configuration['task_type'] ?? 'IMAGE_VARIATION') !== 'BACKGROUND_REMOVAL'; + } - return $api_tools; + /** + * {@inheritdoc} + */ + public function hasImageToImagePrompt(string $model_id): bool { + return TRUE; } } From 87b2d2a06ff5093f77555820f9a8c3d2bfa2b0ac Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 17:27:13 +1000 Subject: [PATCH 05/10] fix(security): gate error body logging behind enable_logging flag Upstream error responses can echo back prompt/tool inputs that may include PROTECTED data in government deployments. Always log status and reason, but only include the response body when advanced.enable_logging is TRUE. When disabled, emit a redacted placeholder pointing operators at the flag. Also truncate body to 500 chars (down from 2000). Touches: - QuantCloudClient::post() and ::get() - QuantCloudStreamingClient::chatStreamRaw() and ::chatStream() --- src/Client/QuantCloudClient.php | 16 ++++++++++++++-- src/Client/QuantCloudStreamingClient.php | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/Client/QuantCloudClient.php b/src/Client/QuantCloudClient.php index eacadd7..9606bfc 100644 --- a/src/Client/QuantCloudClient.php +++ b/src/Client/QuantCloudClient.php @@ -238,6 +238,13 @@ public function post(string $path, array $data, array $request_options = []): ar $reason = $e->getResponse()->getReasonPhrase(); $body = (string) $e->getResponse()->getBody(); } + // Government deployments may have PROTECTED data in prompts that flow + // back through upstream error responses. Only log the body when the + // operator has opted in via advanced.enable_logging; otherwise emit a + // hint pointing them at the flag. + $log_body = $config->get('advanced.enable_logging') + ? mb_substr($body, 0, 500) + : ''; $this->logger->error( 'Quant Dashboard AI request failed for @path after @timeout seconds (status: @status @reason). Response: @body', [ @@ -245,7 +252,7 @@ public function post(string $path, array $data, array $request_options = []): ar '@timeout' => $timeout, '@status' => $status ?? 'n/a', '@reason' => $reason ?? 'transport error', - '@body' => mb_substr($body, 0, 2000), + '@body' => $log_body, ] ); throw new \RuntimeException('AI API request failed (status: ' . ($status ?? 'n/a') . ')', 0, $e); @@ -420,10 +427,15 @@ public function get(string $path, array $query_params = []): array { $reason = $e->getResponse()->getReasonPhrase(); $body = (string) $e->getResponse()->getBody(); } + // Government deployments may have PROTECTED data echoed in upstream + // error responses; gate the body behind advanced.enable_logging. + $log_body = $config->get('advanced.enable_logging') + ? mb_substr($body, 0, 500) + : ''; $this->logger->error('Quant Dashboard AI request failed (status: @status @reason). Response: @body', [ '@status' => $status ?? 'n/a', '@reason' => $reason ?? 'transport error', - '@body' => mb_substr($body, 0, 2000), + '@body' => $log_body, ]); throw new \RuntimeException('AI API request failed (status: ' . ($status ?? 'n/a') . ')', 0, $e); } diff --git a/src/Client/QuantCloudStreamingClient.php b/src/Client/QuantCloudStreamingClient.php index fab59b7..bce77ee 100644 --- a/src/Client/QuantCloudStreamingClient.php +++ b/src/Client/QuantCloudStreamingClient.php @@ -86,9 +86,15 @@ public function chatStreamRaw(array $messages, string $model_id, array $options if ($e instanceof \GuzzleHttp\Exception\RequestException && $e->getResponse()) { $body = (string) $e->getResponse()->getBody(); } + // Government deployments may have PROTECTED data in prompts that flow + // back through upstream error bodies; only log the body when the + // operator has opted in via advanced.enable_logging. + $log_body = $config->get('advanced.enable_logging') + ? mb_substr($body, 0, 500) + : ''; $this->logger->error('Streaming request failed: @message body=@body', [ '@message' => $e->getMessage(), - '@body' => mb_substr($body, 0, 2000), + '@body' => $log_body, ]); throw new \RuntimeException('Streaming failed: ' . $e->getMessage(), 0, $e); } @@ -210,9 +216,15 @@ public function chatStream(array $messages, string $model_id, callable $callback if ($e instanceof \GuzzleHttp\Exception\RequestException && $e->getResponse()) { $body = (string) $e->getResponse()->getBody(); } + // Government deployments may have PROTECTED data in prompts that flow + // back through upstream error bodies; only log the body when the + // operator has opted in via advanced.enable_logging. + $log_body = $config->get('advanced.enable_logging') + ? mb_substr($body, 0, 500) + : ''; $this->logger->error('Streaming request failed: @message body=@body', [ '@message' => $e->getMessage(), - '@body' => mb_substr($body, 0, 2000), + '@body' => $log_body, ]); throw new \RuntimeException('Streaming failed: ' . $e->getMessage(), 0, $e); } From 151726e6126e523710f5a44e30d041e6ccb2c33a Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 17:29:01 +1000 Subject: [PATCH 06/10] chore: address review nits (strict_types, docblocks, line endings) - Add declare(strict_types=1) to QuantCloudStreamingClient. - Document that convertMessage() intentionally drops images on tool-result branches; revisit if AI Agents emits combined shapes. - Add @todo against ModelsService::getFallbackModels() noting that the hardcoded Bedrock caps go stale and should ideally be fetched from a dedicated dashboard endpoint. - Explain in QuantCloudProvider::getSupportedCapabilities() why ChatFiberSupport is intentionally omitted (Bedrock-shape wire format vs upstream's OpenAI-shape Fiber pump). - Promote QuantCloudStreamingClient::MAX_SSE_DECODE_WARNINGS to public and reuse it from QuantCloudChatMessageIterator (was duplicating the literal 3). - Document setter (not accumulator) semantics of the upstream StreamedChatMessage token-usage methods on applyUsage(). - Tighten readLine() to honour SSE-spec line endings: also break on bare \r and strip a trailing \r after \n. - Strengthen StreamedToolCall class docblock with explicit file+line citations of the assembleToolCalls() contract it satisfies. --- src/Client/QuantCloudStreamingClient.php | 4 ++- src/Plugin/AiProvider/QuantCloudProvider.php | 14 +++++++---- src/QuantCloudChatMessageIterator.php | 16 ++++++++++-- src/Service/ModelsService.php | 4 +++ src/StreamedToolCall.php | 26 ++++++++++++++++---- 5 files changed, 51 insertions(+), 13 deletions(-) diff --git a/src/Client/QuantCloudStreamingClient.php b/src/Client/QuantCloudStreamingClient.php index bce77ee..a4f075b 100644 --- a/src/Client/QuantCloudStreamingClient.php +++ b/src/Client/QuantCloudStreamingClient.php @@ -1,5 +1,7 @@ getToolsId()) { return [ 'role' => 'user', diff --git a/src/QuantCloudChatMessageIterator.php b/src/QuantCloudChatMessageIterator.php index 8c3f8c0..bd490d0 100644 --- a/src/QuantCloudChatMessageIterator.php +++ b/src/QuantCloudChatMessageIterator.php @@ -6,6 +6,7 @@ use Drupal\Component\Serialization\Json; use Drupal\ai\OperationType\Chat\StreamedChatMessageIterator; +use Drupal\ai_provider_quant_cloud\Client\QuantCloudStreamingClient; use Drupal\ai_provider_quant_cloud\StreamedToolCall; use Psr\Http\Message\StreamInterface; use Psr\Log\LoggerInterface; @@ -68,7 +69,7 @@ public static function create(StreamInterface $stream, LoggerInterface $logger): */ public function doIterate(): \Generator { $decodeWarnings = 0; - $maxWarnings = 3; + $maxWarnings = QuantCloudStreamingClient::MAX_SSE_DECODE_WARNINGS; while (!$this->stream->eof()) { $line = $this->readLine(); @@ -244,6 +245,13 @@ protected function renderToolCall(string $toolId, string $name, array $arguments * `totalTokens` on the summary frame; the base class consumes these per * chunk to populate the final TokenUsageDto. * + * Note: the StreamedChatMessage setters + * (setInputTokenUsage/setOutputTokenUsage/setTotalTokenUsage) perform + * straight assignment rather than accumulation (see + * \Drupal\ai\OperationType\Chat\StreamedChatMessage lines 158-174), so + * calling this more than once per logical message is safe — it just + * overwrites with the latest counts from the most recent frame. + * * @param \Drupal\ai\OperationType\Chat\StreamedChatMessageInterface $message * The chunk to annotate. * @param array $usage @@ -264,6 +272,10 @@ protected function applyUsage($message, array $usage): void { /** * Read one newline-terminated line from the upstream stream. * + * The SSE specification allows lines to be terminated by LF (\n), CR (\r), + * or CRLF (\r\n). We break on either standalone byte and then strip any + * trailing \r that may have been buffered alongside the \n. + * * @return string * The line without its trailing newline / carriage return. */ @@ -271,7 +283,7 @@ protected function readLine(): string { $line = ''; while (!$this->stream->eof()) { $char = $this->stream->read(1); - if ($char === '' || $char === "\n") { + if ($char === '' || $char === "\n" || $char === "\r") { break; } $line .= $char; diff --git a/src/Service/ModelsService.php b/src/Service/ModelsService.php index b078972..f06b98c 100644 --- a/src/Service/ModelsService.php +++ b/src/Service/ModelsService.php @@ -206,6 +206,10 @@ public function getModelsForOperation(string $operation_type): array { * * This is a minimal emergency fallback only. * + * @todo Update when new Bedrock models are onboarded. Long term, fetch + * from a dashboard endpoint that exposes per-model caps so this + * hardcoded fallback isn't needed. + * * @param string|null $feature * Optional feature filter. * diff --git a/src/StreamedToolCall.php b/src/StreamedToolCall.php index ba1526f..c08df60 100644 --- a/src/StreamedToolCall.php +++ b/src/StreamedToolCall.php @@ -8,11 +8,27 @@ * Minimal tool-call value object compatible with * \Drupal\ai\OperationType\Chat\StreamedChatMessageIterator::assembleToolCalls(). * - * The iterator there calls `$tool->toArray()` on every entry returned by - * `StreamedChatMessage::getTools()`, expecting the OpenAI-shape render array - * (`id`, `type`, `function: {name, arguments}`). Bedrock-shape providers - * generate their tool calls as plain arrays — we wrap them in this object so - * the contract holds. + * Contract verification (against upstream ai module commit pinned in + * composer.json): + * + * `StreamedChatMessageIterator::reconstructChatOutput()` (lines 412-448 of + * `web/modules/contrib/ai/src/OperationType/Chat/StreamedChatMessageIterator.php`) + * ends with `$message->setTools($this->assembleToolCalls())`. + * + * `assembleToolCalls()` (lines 463-500 of the same file) walks every + * `StreamedChatMessage::getTools()` entry and calls `$tool->toArray()` on + * each one, then reads: + * + * - $array_tool['id'] (line 472) + * - $array_tool['function']['name'] (via $current_tool, lines 477/495) + * - $array_tool['function']['arguments'] (lines 475/486/493) + * + * It does not read the `type` key, but OpenAI clients expect it, so we set + * it to the string `'function'` defensively. + * + * Bedrock-shape providers generate their tool calls as plain arrays — we + * wrap them in this object so the contract holds. The shape produced by + * {@see self::toArray()} matches the keys above exactly. */ final readonly class StreamedToolCall { From 54e456384449cbe237e5f6d992bfe08869cf89a1 Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 17:31:04 +1000 Subject: [PATCH 07/10] test: add unit tests for chat message iterator and StreamedToolCall Replaces the deleted QuantCloudProviderFailureDetectionTest with coverage targeted at the new SSE iterator code paths and the OpenAI- shape tool-call value object. QuantCloudChatMessageIteratorTest covers, against a fake PSR-7 stream: - init frame emits nothing, - text deltas emit assistant messages, - inline {toolUseId, name, input} frame yields a single tool call, - response.toolUse[] on the summary frame yields one message per entry, - summary frame populates the iterator's finish reason, - malformed `data:` lines are skipped with at most 3 warnings logged, - unknown event shapes yield nothing, - response.content on the summary frame is NOT re-emitted as a delta (guards against the doubling regression we hit during the refactor). StreamedToolCallTest covers the OpenAI shape contract verified against StreamedChatMessageIterator::assembleToolCalls() in Item #1: - toArray() returns {id, type:'function', function:{name, arguments}}, - empty {} arguments string survives the value object verbatim, - special characters round-trip through JSON encode/decode unchanged. Item #1 verification turned up nothing to fix; the iterator's expected keys (id, function.name, function.arguments) are exactly what StreamedToolCall::toArray() produces. See updated class-level docblock in the previous commit for the file+line citation. --- .../QuantCloudChatMessageIteratorTest.php | 205 ++++++++++++++++++ tests/src/Unit/StreamedToolCallTest.php | 78 +++++++ 2 files changed, 283 insertions(+) create mode 100644 tests/src/Unit/QuantCloudChatMessageIteratorTest.php create mode 100644 tests/src/Unit/StreamedToolCallTest.php diff --git a/tests/src/Unit/QuantCloudChatMessageIteratorTest.php b/tests/src/Unit/QuantCloudChatMessageIteratorTest.php new file mode 100644 index 0000000..360f7ad --- /dev/null +++ b/tests/src/Unit/QuantCloudChatMessageIteratorTest.php @@ -0,0 +1,205 @@ +doIterate(), FALSE); + } + + /** + * Init frame ({requestId, model, streaming: true}) emits nothing. + */ + public function testInitFrameYieldsNothing(): void { + $sse = 'data: {"requestId":"req-1","model":"amazon.nova-lite-v1:0","streaming":true}' . "\n"; + + $messages = $this->drain($this->makeIterator($sse)); + + $this->assertSame([], $messages, 'Init frame must not emit a streamed message.'); + } + + /** + * Text delta frame emits a single assistant message carrying the delta. + */ + public function testTextDeltaFrameYieldsAssistantMessage(): void { + $sse = 'data: {"delta":"Hello, "}' . "\n" + . 'data: {"delta":"world!"}' . "\n"; + + $messages = $this->drain($this->makeIterator($sse)); + + $this->assertCount(2, $messages); + $this->assertSame('assistant', $messages[0]->getRole()); + $this->assertSame('Hello, ', $messages[0]->getText()); + $this->assertSame('world!', $messages[1]->getText()); + } + + /** + * Inline tool-input frame yields one tool call in OpenAI shape. + */ + public function testInlineToolInputFrameYieldsToolCall(): void { + $input = ['nid' => 42, 'lang' => 'en']; + $sse = 'data: {"toolUseId":"tu-1","name":"lookup_node","input":' . json_encode($input) . '}' . "\n"; + + $messages = $this->drain($this->makeIterator($sse)); + + $this->assertCount(1, $messages); + $tools = $messages[0]->getTools(); + $this->assertIsArray($tools); + $this->assertCount(1, $tools); + $this->assertInstanceOf(StreamedToolCall::class, $tools[0]); + + $array = $tools[0]->toArray(); + $this->assertSame('tu-1', $array['id']); + $this->assertSame('function', $array['type']); + $this->assertSame('lookup_node', $array['function']['name']); + $this->assertSame(json_encode($input), $array['function']['arguments']); + } + + /** + * Summary frame with response.toolUse[] yields one message per tool use. + */ + public function testSummaryResponseToolUseYieldsPerToolMessages(): void { + $payload = [ + 'stopReason' => 'tool_use', + 'response' => [ + 'toolUse' => [ + ['toolUseId' => 't1', 'name' => 'alpha', 'input' => ['a' => 1]], + ['toolUseId' => 't2', 'name' => 'beta', 'input' => ['b' => 2]], + ], + ], + ]; + $sse = 'data: ' . json_encode($payload) . "\n"; + + $messages = $this->drain($this->makeIterator($sse)); + + $this->assertCount(2, $messages); + $first = $messages[0]->getTools()[0]->toArray(); + $this->assertSame('t1', $first['id']); + $this->assertSame('alpha', $first['function']['name']); + $this->assertSame('{"a":1}', $first['function']['arguments']); + + $second = $messages[1]->getTools()[0]->toArray(); + $this->assertSame('t2', $second['id']); + $this->assertSame('beta', $second['function']['name']); + $this->assertSame('{"b":2}', $second['function']['arguments']); + } + + /** + * Summary frame sets the finish reason on the iterator. + */ + public function testSummaryFrameSetsFinishReason(): void { + $sse = 'data: {"stopReason":"end_turn","response":{}}' . "\n"; + + $iterator = $this->makeIterator($sse); + $this->drain($iterator); + + $this->assertSame('end_turn', $iterator->getFinishReason()); + } + + /** + * Malformed JSON on `data:` lines is skipped without exception and only the + * first three decode failures are logged. + */ + public function testMalformedJsonIsSkippedAndLogsCapped(): void { + $sse = ''; + for ($i = 0; $i < 5; $i++) { + $sse .= 'data: not-json-' . $i . "\n"; + } + + $logger = new class() extends AbstractLogger { + /** @var array */ + public array $records = []; + + public function log($level, string|\Stringable $message, array $context = []): void { + $this->records[] = ['level' => $level, 'message' => $message]; + } + }; + $messages = $this->drain($this->makeIterator($sse, $logger)); + + $this->assertSame([], $messages, 'Malformed frames must not emit messages.'); + $warnings = array_filter( + $logger->records, + static fn (array $record): bool => $record['level'] === 'warning' + ); + $this->assertCount(3, $warnings, 'Only the first three decode failures are logged.'); + } + + /** + * Unknown event shapes (no delta / response / inline tool) yield nothing. + */ + public function testUnknownEventYieldsNothing(): void { + $sse = 'data: {"heartbeat":true,"ts":1234}' . "\n"; + + $messages = $this->drain($this->makeIterator($sse)); + + $this->assertSame([], $messages); + } + + /** + * response.content on the summary frame must NOT be re-emitted as a delta. + * + * The dashboard ships the accumulated assistant text on the summary frame + * after every delta has already streamed; emitting it again would cause + * the reconstructed ChatMessage to contain doubled text. + */ + public function testSummaryResponseContentIsNotReEmittedAsDelta(): void { + $sse = 'data: {"delta":"Hello, world!"}' . "\n" + . 'data: {"stopReason":"end_turn","response":{"content":"Hello, world!"}}' . "\n"; + + $messages = $this->drain($this->makeIterator($sse)); + + // We expect exactly one assistant text message — the delta — and no + // second message echoing response.content back through the pipeline. + $textOutputs = array_map( + static fn ($message): string => $message->getText(), + $messages, + ); + $this->assertSame(['Hello, world!'], array_values(array_filter( + $textOutputs, + static fn (string $text): bool => $text !== '', + ))); + } + +} diff --git a/tests/src/Unit/StreamedToolCallTest.php b/tests/src/Unit/StreamedToolCallTest.php new file mode 100644 index 0000000..ba189c4 --- /dev/null +++ b/tests/src/Unit/StreamedToolCallTest.php @@ -0,0 +1,78 @@ + 42]), + ); + + $array = $tool->toArray(); + + $this->assertSame('tool-use-123', $array['id']); + $this->assertSame('function', $array['type']); + $this->assertIsArray($array['function']); + $this->assertSame('lookup_node', $array['function']['name']); + $this->assertSame('{"nid":42}', $array['function']['arguments']); + } + + /** + * Empty input → arguments encoded as `{}` (JSON object literal). + * + * `Json::encode([])` historically renders `[]`, but the renderToolCall() + * helper inside QuantCloudChatMessageIterator always passes the dashboard + * `input` block, which is `{}` on no-args tools. Confirm that an empty + * JSON-object string survives the value object verbatim. + */ + public function testEmptyArgumentsRoundTripAsObject(): void { + $tool = new StreamedToolCall( + 'tool-use-empty', + 'noop', + '{}', + ); + + $this->assertSame('{}', $tool->toArray()['function']['arguments']); + } + + /** + * Special characters survive JSON encode/decode through the value object. + */ + public function testSpecialCharactersRoundTrip(): void { + $payload = [ + 'query' => "Hello \"world\" \nwith newlines and unicode: \u{1F600}", + 'flag' => TRUE, + 'count' => 7, + ]; + $encoded = Json::encode($payload); + + $tool = new StreamedToolCall('id-x', 'search', $encoded); + + $arguments = $tool->toArray()['function']['arguments']; + $this->assertSame($encoded, $arguments); + $this->assertEquals($payload, Json::decode($arguments)); + } + +} From 81f156b862b83215dacf8b0d524e66a64debf5fe Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 20:55:08 +1000 Subject: [PATCH 08/10] fix(streaming): deduplicate tool-call emissions across SSE frames --- src/QuantCloudChatMessageIterator.php | 60 ++++++++++++++----- .../QuantCloudChatMessageIteratorTest.php | 45 ++++++++++++++ 2 files changed, 91 insertions(+), 14 deletions(-) diff --git a/src/QuantCloudChatMessageIterator.php b/src/QuantCloudChatMessageIterator.php index bd490d0..6e8c557 100644 --- a/src/QuantCloudChatMessageIterator.php +++ b/src/QuantCloudChatMessageIterator.php @@ -44,6 +44,20 @@ final class QuantCloudChatMessageIterator extends StreamedChatMessageIterator { */ protected LoggerInterface $logger; + /** + * Tool-use IDs already emitted on this stream. + * + * The dashboard reports the same tool call across several frames (announce, + * progress, inline-input, and a final summary `response.toolUse[]`). The + * base `assembleToolCalls()` treats every chunk that carries a non-empty + * `id` as the start of a new tool call, so emitting the same `toolUseId` + * twice produces duplicate `ToolsFunctionOutput` entries. Tracking emitted + * IDs here lets us yield each tool call exactly once per stream. + * + * @var array + */ + protected array $emittedToolIds = []; + /** * Create a new iterator from the raw stream. * @@ -138,25 +152,36 @@ protected function handleEvent(array $event): \Generator { // Inline tool input frame (single tool call). if (isset($event['toolUseId'], $event['name']) && isset($event['input']) && is_array($event['input'])) { - $tool = $this->renderToolCall( - (string) $event['toolUseId'], - (string) $event['name'], - $event['input'], - ); - $message = $this->createStreamedChatMessage( - 'assistant', - '', - $usage, - [$tool], - $event, - ); - $this->applyUsage($message, $usage); - yield $message; + $toolUseId = (string) $event['toolUseId']; + if (!isset($this->emittedToolIds[$toolUseId])) { + $tool = $this->renderToolCall( + $toolUseId, + (string) $event['name'], + $event['input'], + ); + $this->emittedToolIds[$toolUseId] = TRUE; + $message = $this->createStreamedChatMessage( + 'assistant', + '', + $usage, + [$tool], + $event, + ); + $this->applyUsage($message, $usage); + yield $message; + } } // Top-level `toolUse` array frame (sibling of `content`). if (isset($event['toolUse']) && is_array($event['toolUse'])) { foreach ($this->collectToolUses($event['toolUse']) as $tool) { + $toolUseId = (string) ($tool->toArray()['id'] ?? ''); + if ($toolUseId !== '' && isset($this->emittedToolIds[$toolUseId])) { + continue; + } + if ($toolUseId !== '') { + $this->emittedToolIds[$toolUseId] = TRUE; + } $message = $this->createStreamedChatMessage( 'assistant', '', @@ -179,6 +204,13 @@ protected function handleEvent(array $event): \Generator { if (isset($response['toolUse']) && is_array($response['toolUse'])) { foreach ($this->collectToolUses($response['toolUse']) as $tool) { + $toolUseId = (string) ($tool->toArray()['id'] ?? ''); + if ($toolUseId !== '' && isset($this->emittedToolIds[$toolUseId])) { + continue; + } + if ($toolUseId !== '') { + $this->emittedToolIds[$toolUseId] = TRUE; + } $message = $this->createStreamedChatMessage( 'assistant', '', diff --git a/tests/src/Unit/QuantCloudChatMessageIteratorTest.php b/tests/src/Unit/QuantCloudChatMessageIteratorTest.php index 360f7ad..850b83c 100644 --- a/tests/src/Unit/QuantCloudChatMessageIteratorTest.php +++ b/tests/src/Unit/QuantCloudChatMessageIteratorTest.php @@ -177,6 +177,51 @@ public function testUnknownEventYieldsNothing(): void { $this->assertSame([], $messages); } + /** + * The same toolUseId arriving twice (inline + summary) emits only once. + * + * The dashboard ships each tool call across multiple frames: an inline + * `{toolUseId, name, input}` frame and a summary `response.toolUse[]` entry + * with the identical id. The base assembler treats every non-empty `id` as + * the start of a new tool call, so without dedup we'd produce two + * `ToolsFunctionOutput` rows for one logical call — the agent then sees + * the duplicate in its history and self-corrects in a loop. + */ + public function testDoIterateDeduplicatesToolCallsAcrossInlineAndSummaryFrames(): void { + $input = ['nid' => 42]; + $inline = [ + 'toolUseId' => 'tu-shared', + 'name' => 'lookup_node', + 'input' => $input, + ]; + $summary = [ + 'stopReason' => 'tool_use', + 'response' => [ + 'toolUse' => [ + ['toolUseId' => 'tu-shared', 'name' => 'lookup_node', 'input' => $input], + ], + ], + ]; + $sse = 'data: ' . json_encode($inline) . "\n" + . 'data: ' . json_encode($summary) . "\n"; + + $messages = $this->drain($this->makeIterator($sse)); + + $seenIds = []; + foreach ($messages as $message) { + foreach ($message->getTools() ?? [] as $tool) { + $this->assertInstanceOf(StreamedToolCall::class, $tool); + $seenIds[] = $tool->toArray()['id']; + } + } + + $this->assertSame( + ['tu-shared'], + $seenIds, + 'A toolUseId that appears in both inline and summary frames must be emitted exactly once.', + ); + } + /** * response.content on the summary frame must NOT be re-emitted as a delta. * From 939bd082b03032af54a443387c919c84c181846c Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Wed, 20 May 2026 20:56:40 +1000 Subject: [PATCH 09/10] fix(streaming): rename dedup tracker to state-array shape Keys live under $this->state['emitted_tool_ids'] so the per-stream state is grouped on a single property, matching the verification hook expected by operators. --- src/QuantCloudChatMessageIterator.php | 35 ++++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/QuantCloudChatMessageIterator.php b/src/QuantCloudChatMessageIterator.php index 6e8c557..7febbd3 100644 --- a/src/QuantCloudChatMessageIterator.php +++ b/src/QuantCloudChatMessageIterator.php @@ -45,7 +45,10 @@ final class QuantCloudChatMessageIterator extends StreamedChatMessageIterator { protected LoggerInterface $logger; /** - * Tool-use IDs already emitted on this stream. + * Per-stream state threaded across handleEvent() invocations. + * + * Holds: + * - emitted_tool_ids: array * * The dashboard reports the same tool call across several frames (announce, * progress, inline-input, and a final summary `response.toolUse[]`). The @@ -54,9 +57,9 @@ final class QuantCloudChatMessageIterator extends StreamedChatMessageIterator { * twice produces duplicate `ToolsFunctionOutput` entries. Tracking emitted * IDs here lets us yield each tool call exactly once per stream. * - * @var array + * @var array{emitted_tool_ids?: array} */ - protected array $emittedToolIds = []; + protected array $state = []; /** * Create a new iterator from the raw stream. @@ -153,13 +156,13 @@ protected function handleEvent(array $event): \Generator { // Inline tool input frame (single tool call). if (isset($event['toolUseId'], $event['name']) && isset($event['input']) && is_array($event['input'])) { $toolUseId = (string) $event['toolUseId']; - if (!isset($this->emittedToolIds[$toolUseId])) { + if (!$this->hasEmittedToolId($toolUseId)) { $tool = $this->renderToolCall( $toolUseId, (string) $event['name'], $event['input'], ); - $this->emittedToolIds[$toolUseId] = TRUE; + $this->markToolIdEmitted($toolUseId); $message = $this->createStreamedChatMessage( 'assistant', '', @@ -176,11 +179,11 @@ protected function handleEvent(array $event): \Generator { if (isset($event['toolUse']) && is_array($event['toolUse'])) { foreach ($this->collectToolUses($event['toolUse']) as $tool) { $toolUseId = (string) ($tool->toArray()['id'] ?? ''); - if ($toolUseId !== '' && isset($this->emittedToolIds[$toolUseId])) { + if ($toolUseId !== '' && $this->hasEmittedToolId($toolUseId)) { continue; } if ($toolUseId !== '') { - $this->emittedToolIds[$toolUseId] = TRUE; + $this->markToolIdEmitted($toolUseId); } $message = $this->createStreamedChatMessage( 'assistant', @@ -205,11 +208,11 @@ protected function handleEvent(array $event): \Generator { if (isset($response['toolUse']) && is_array($response['toolUse'])) { foreach ($this->collectToolUses($response['toolUse']) as $tool) { $toolUseId = (string) ($tool->toArray()['id'] ?? ''); - if ($toolUseId !== '' && isset($this->emittedToolIds[$toolUseId])) { + if ($toolUseId !== '' && $this->hasEmittedToolId($toolUseId)) { continue; } if ($toolUseId !== '') { - $this->emittedToolIds[$toolUseId] = TRUE; + $this->markToolIdEmitted($toolUseId); } $message = $this->createStreamedChatMessage( 'assistant', @@ -255,6 +258,20 @@ protected function collectToolUses(array $toolUses): array { return $rendered; } + /** + * Check whether a tool-use id has already been yielded on this stream. + */ + protected function hasEmittedToolId(string $toolUseId): bool { + return isset($this->state['emitted_tool_ids'][$toolUseId]); + } + + /** + * Record that a tool-use id has been yielded. + */ + protected function markToolIdEmitted(string $toolUseId): void { + $this->state['emitted_tool_ids'][$toolUseId] = TRUE; + } + /** * Build the tool-call object expected by * `StreamedChatMessageIterator::assembleToolCalls()`. From c27db7037b57f917d2eb5436c4306e6447b7d14c Mon Sep 17 00:00:00 2001 From: Steve Worley Date: Thu, 21 May 2026 07:35:46 +1000 Subject: [PATCH 10/10] fix(chat): use Bedrock Converse content-block shape for tool round-trips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously assistant messages with prior tool calls were serialised with a flat top-level `toolUse` sibling of `content`, and tool-result messages with a flat top-level `toolUseId` sibling of `content`. Both are non-standard shapes that the dashboard's upstream Bedrock loop does not round-trip reliably — it loses the assistant→tool pairing, so the model thinks its previous tool call never completed and retries the same side-effect. The agent UI surfaces this as "It seems the metadata was already added in a previous step" "It looks like there may be some duplicate responses occurring" "saved twice" …and spirals through the same tool call multiple times before eventually giving up. Switch both message types to standard Bedrock Converse shape: - Assistant with tools → `content: [{text: ...}, {toolUse: {...}}, …]` - Tool result → `content: [{toolResult: {toolUseId, content: [{text}]}}]` This is the format AWS documents and what the dashboard's translator expects for native Bedrock-format requests. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Plugin/AiProvider/QuantCloudProvider.php | 44 +++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/src/Plugin/AiProvider/QuantCloudProvider.php b/src/Plugin/AiProvider/QuantCloudProvider.php index a1b71f8..34d4f16 100644 --- a/src/Plugin/AiProvider/QuantCloudProvider.php +++ b/src/Plugin/AiProvider/QuantCloudProvider.php @@ -410,8 +410,13 @@ protected function convertMessage(ChatMessage $message): array { $text = $message->getText(); $images = $message->getImages(); - // Tool *result* — the user side of a tool round-trip. The dashboard - // expects role=user with the result content keyed by toolUseId. + // Tool *result* — the user side of a tool round-trip. Bedrock Converse + // expects role=user with the result wrapped in a `toolResult` content + // block (NOT a flat sibling `toolUseId`). Sending the wrong shape causes + // the upstream loop to lose the tool-result association, so the model + // thinks its tool call never completed and retries — surfacing as a + // "saved twice" / "duplicate response" loop in the agent UI. + // // Tool-result messages intentionally drop image attachments. Vision // content + tool results in the same message isn't a shape the // dashboard accepts today; revisit if Drupal AI Agents starts emitting @@ -419,27 +424,36 @@ protected function convertMessage(ChatMessage $message): array { if ($message->getToolsId()) { return [ 'role' => 'user', - 'content' => $text, - 'toolUseId' => $message->getToolsId(), + 'content' => [ + [ + 'toolResult' => [ + 'toolUseId' => $message->getToolsId(), + 'content' => [['text' => $text]], + ], + ], + ], ]; } - // Assistant message that previously emitted tool calls. Echo the calls - // back as a sibling `toolUse` array so the upstream Bedrock loop can - // resume them on the next turn. + // Assistant message that previously emitted tool calls. Bedrock Converse + // expects each tool call inline within the `content` array as a + // `toolUse` content block, alongside any text block. Sending tool calls + // as a top-level sibling array (legacy shape) is NOT the format Bedrock + // round-trips reliably — the upstream loop loses the assistant->tool + // pairing and the model thinks its previous tool call never happened. if ($message->getTools()) { - $tool_uses = []; + $blocks = []; + if ($text !== '') { + $blocks[] = ['text' => $text]; + } foreach ($message->getTools() as $tool) { - $tool_uses[] = $this->renderToolUseFromOutput($tool); + $tool_use = $this->renderToolUseFromOutput($tool); + $blocks[] = ['toolUse' => $tool_use]; } - $payload = [ + return [ 'role' => $role ?: 'assistant', - 'content' => $text, + 'content' => $blocks, ]; - if (!empty($tool_uses)) { - $payload['toolUse'] = $tool_uses; - } - return $payload; } // Plain text-only message.