From 03b8aa54018d5bf66a33aae6ee17192e3e1bd739 Mon Sep 17 00:00:00 2001 From: "smalruby3-editor-bot[bot]" <297607354+smalruby3-editor-bot[bot]@users.noreply.github.com> Date: Wed, 1 Jul 2026 10:12:48 +0000 Subject: [PATCH 1/3] fix(text2speech): route synthesis through Smalruby CORS proxy (#859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scratch's synthesis service (synthesis-service.scratch.mit.edu) is CORS-locked to scratch.mit.edu, so smalruby.app is blocked when calling it directly — same root cause as translate (#857). Route text2speech through a new Smalruby proxy endpoint. infra/smalruby-api: add scratch-api-synthesis Lambda + GET /scratch-api-proxy/synth route. Unlike translate (text), synthesis returns binary audio (mp3), so the proxy Base64-encodes it (isBase64Encoded: true) and API Gateway decodes it for the client (same approach as cors-proxy binary handling). Adds mocked-fetch unit tests. scratch-vm: override SERVER_HOST in scratch3_text2speech (upstream file) with Smalruby markers so ${SERVER_HOST}/synth targets the proxy. Guard test + smalruby-markers-vm.md entry so future upstream merges detect the override (as happened to translate). CDK deploy and real-device audio playback are left to a human (HITL) per the issue. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/infra/smalruby-api.md | 13 ++- docs/maintenance/smalruby-markers-vm.md | 2 + .../lambda/scratch-api-synthesis.ts | 70 ++++++++++++++++ .../tests/scratch-api-synthesis.test.ts | 82 +++++++++++++++++++ infra/smalruby-api/lib/smalruby-api-stack.ts | 12 +++ packages/scratch-vm/.prettierignore | 1 + .../extensions/scratch3_text2speech/index.js | 12 ++- .../test/unit/extension_text2speech_proxy.js | 62 ++++++++++++++ 8 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 infra/smalruby-api/lambda/scratch-api-synthesis.ts create mode 100644 infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts create mode 100644 packages/scratch-vm/test/unit/extension_text2speech_proxy.js diff --git a/docs/infra/smalruby-api.md b/docs/infra/smalruby-api.md index 70eb90b5ac0..b7693d22ce0 100644 --- a/docs/infra/smalruby-api.md +++ b/docs/infra/smalruby-api.md @@ -4,7 +4,7 @@ ## 概要 -`infra/smalruby-api/` は Smalruby のフロントエンド (smalruby.app) が利用する**汎用バックエンド機能**を提供する 4 つの Lambda を束ねた **HTTP API v2** スタック。 +`infra/smalruby-api/` は Smalruby のフロントエンド (smalruby.app) が利用する**汎用バックエンド機能**を提供する 5 つの Lambda を束ねた **HTTP API v2** スタック。 | エンドポイント | Lambda | 用途 | |---|---|---| @@ -12,6 +12,7 @@ | `GET /mesh-domain` | `smalruby-api-mesh-zone{stageSuffix}` | クライアント IP から Mesh ドメイン (CRC32 ハッシュ) を生成 | | `GET /scratch-api-proxy/projects/{projectId}` | `smalruby-api-scratch-projects{stageSuffix}` | Scratch 公式 API (project info) のステータス透過プロキシ | | `GET /scratch-api-proxy/translate` | `smalruby-api-scratch-translate{stageSuffix}` | Scratch 公式翻訳サービスのプロキシ | +| `GET /scratch-api-proxy/synth` | `smalruby-api-scratch-synthesis{stageSuffix}` | Scratch 公式音声合成サービスのプロキシ (バイナリ音声を Base64 返却) | `OPTIONS` (preflight) は HTTP API v2 の **built-in CORS** が自動処理。旧 SAM の `cors-for-smalruby` Lambda は不要。 @@ -66,6 +67,16 @@ Scratch translate サービス (`https://translate-service.scratch.mit.edu/trans 実装: `infra/smalruby-api/lambda/scratch-api-translate.ts` +### `GET /scratch-api-proxy/synth` + +Scratch 音声合成サービス (`https://synthesis-service.scratch.mit.edu/synth`) のプロキシ。text2speech (「音声で話す」) 拡張が利用する。 + +**入力**: `?locale=<>&gender=<>&text=<>` + +**処理**: サーバ側で synthesis サービスを fetch し、**バイナリ音声 (mp3) を Base64 エンコードして返す** (`isBase64Encoded: true` + `Content-Type: audio/mpeg`)。API Gateway が Base64 をデコードしてクライアントにはバイナリで届く。translate (テキスト) と違い応答がバイナリな点が最大の差 (cors-proxy のバイナリ Base64 化と同じ考え方)。 + +実装: `infra/smalruby-api/lambda/scratch-api-synthesis.ts` + ## 環境変数 `.env.example` 参照。主要なもの: diff --git a/docs/maintenance/smalruby-markers-vm.md b/docs/maintenance/smalruby-markers-vm.md index f109efa9685..af81b3a5412 100644 --- a/docs/maintenance/smalruby-markers-vm.md +++ b/docs/maintenance/smalruby-markers-vm.md @@ -21,6 +21,7 @@ scratch-vm の **upstream ファイルに埋め込んだ Smalruby マーカー** | `src/engine/blocks.js` | XML coords guard | `blockToXML` で x/y が finite number のときだけ XML 属性を出力。Ruby → blocks 変換の x/y 未指定 (undefined) を scratch-blocks v2 に正しく伝え、`fromRuby` 再レイアウト経路を維持する | | `src/engine/blocks.js` | orphaned-parent guard | `getTopLevelScript` で `block.parent` が this._blocks に存在しない場合に停止。Ruby → blocks 変換中の孤立 parent id で `undefined.parent` 参照クラッシュを防ぐ | | `src/extensions/scratch3_translate/index.js` | translate CORS proxy | `serverURL` を Smalruby プロキシ (`https://api.smalruby.app/scratch-api-proxy/`) に上書き。Scratch の翻訳サービスは CORS を scratch.mit.edu 限定にしたため smalruby.app からの直叩きが失敗する。マーカー無しで上書きしていた過去の版が v13.7.2 upstream マージで静かに revert された (#857) ため、次回以降のマージで検知できるようマーカーで囲む | +| `src/extensions/scratch3_text2speech/index.js` | synthesis CORS proxy | `SERVER_HOST` を Smalruby プロキシ (`https://api.smalruby.app/scratch-api-proxy`) に上書き。Scratch の音声合成サービスは CORS を scratch.mit.edu 限定にしたため smalruby.app からの直叩きが失敗する。拡張が `${SERVER_HOST}/synth?...` を組むため base を差し替えるだけで proxy 経由になる。プロキシはバイナリ音声を Base64 返却する (`infra/smalruby-api` scratch-api-synthesis)。translate (#857) と同じ根本原因なので同様にマーカーで囲む (#859) | ## 関連ファイル @@ -28,3 +29,4 @@ scratch-vm の **upstream ファイルに埋め込んだ Smalruby マーカー** - `src/extension-support/smalruby-extensions.js` — extension-manager.js のマーカーから参照 - `test/unit/blocks_operators_regex.js` — scratch3_operators.js の regex support のテスト - `test/unit/extension_translate_proxy.js` — scratch3_translate/index.js の translate CORS proxy のテスト +- `test/unit/extension_text2speech_proxy.js` — scratch3_text2speech/index.js の synthesis CORS proxy のテスト diff --git a/infra/smalruby-api/lambda/scratch-api-synthesis.ts b/infra/smalruby-api/lambda/scratch-api-synthesis.ts new file mode 100644 index 00000000000..4610ecdb150 --- /dev/null +++ b/infra/smalruby-api/lambda/scratch-api-synthesis.ts @@ -0,0 +1,70 @@ +import type { + APIGatewayProxyEventV2, + APIGatewayProxyStructuredResultV2, +} from 'aws-lambda'; + +const API_HOST = 'https://synthesis-service.scratch.mit.edu'; + +/** + * Proxy for Scratch's text-to-speech synthesis service. + * + * Unlike the translate proxy (which forwards text), the synthesis service + * returns a binary audio file (mp3). API Gateway HTTP API v2 can only carry + * binary payloads when the Lambda sets `isBase64Encoded: true`, so we Base64 + * encode the audio and let API Gateway decode it back to bytes for the client. + */ +export const handler = async ( + event: APIGatewayProxyEventV2, +): Promise => { + const locale = (event.queryStringParameters?.locale ?? '').trim(); + const gender = (event.queryStringParameters?.gender ?? '').trim(); + const text = event.queryStringParameters?.text ?? ''; + + if (!locale || !gender) { + return { + statusCode: 400, + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + code: 'BadRequest', + message: 'locale and gender are required', + }), + isBase64Encoded: false, + }; + } + + try { + const params = new URLSearchParams({ locale, gender, text }); + const res = await fetch(`${API_HOST}/synth?${params.toString()}`); + + if (res.status < 200 || res.status >= 300) { + // Pass upstream failures through as JSON so the client sees the status. + const body = await res.text(); + return { + statusCode: res.status, + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + code: 'UpstreamError', + message: `HTTP ${res.status}: ${body}`, + }), + isBase64Encoded: false, + }; + } + + const contentType = res.headers.get('content-type') || 'audio/mpeg'; + const buffer = Buffer.from(await res.arrayBuffer()); + return { + statusCode: 200, + headers: { 'Content-Type': contentType }, + body: buffer.toString('base64'), + isBase64Encoded: true, + }; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + return { + statusCode: 502, + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ code: 'BadGateway', message }), + isBase64Encoded: false, + }; + } +}; diff --git a/infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts b/infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts new file mode 100644 index 00000000000..5dd9161bdd4 --- /dev/null +++ b/infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts @@ -0,0 +1,82 @@ +import { handler } from '../scratch-api-synthesis'; + +const mockFetch = jest.fn(); +global.fetch = mockFetch as unknown as typeof fetch; + +const event = (qs?: Record) => + ({ + queryStringParameters: qs, + requestContext: { http: { sourceIp: '1.2.3.4' } }, + }) as never; + +beforeEach(() => { + mockFetch.mockReset(); +}); + +describe('scratch-api-synthesis handler', () => { + test('returns 400 when locale is missing', async () => { + const res = await handler(event({ gender: 'female', text: 'hi' })); + expect(res.statusCode).toBe(400); + const body = JSON.parse(res.body as string); + expect(body.code).toBe('BadRequest'); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + test('returns 400 when gender is missing', async () => { + const res = await handler(event({ locale: 'ja-JP', text: 'hi' })); + expect(res.statusCode).toBe(400); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + test('proxies synth request and returns Base64-encoded audio', async () => { + const audio = Buffer.from([0x49, 0x44, 0x33, 0x04]); // fake mp3 bytes + mockFetch.mockResolvedValueOnce( + new Response(audio, { + status: 200, + headers: { 'content-type': 'audio/mpeg' }, + }), + ); + + const res = await handler(event({ locale: 'ja-JP', gender: 'female', text: 'こんにちは' })); + + expect(res.statusCode).toBe(200); + expect(res.isBase64Encoded).toBe(true); + expect(res.headers?.['Content-Type']).toBe('audio/mpeg'); + expect(res.body).toBe(audio.toString('base64')); + + expect(mockFetch).toHaveBeenCalledTimes(1); + const calledUrl = mockFetch.mock.calls[0][0] as string; + expect(calledUrl).toMatch(/^https:\/\/synthesis-service\.scratch\.mit\.edu\/synth\?/); + expect(calledUrl).toContain('locale=ja-JP'); + expect(calledUrl).toContain('gender=female'); + }); + + test('url-encodes special characters in text', async () => { + mockFetch.mockResolvedValueOnce( + new Response(Buffer.from([0]), { + status: 200, + headers: { 'content-type': 'audio/mpeg' }, + }), + ); + await handler(event({ locale: 'en-US', gender: 'male', text: 'hello world & goodbye' })); + const calledUrl = mockFetch.mock.calls[0][0] as string; + expect(calledUrl).toContain('text=hello+world+%26+goodbye'); + }); + + test('passes through upstream errors as JSON', async () => { + mockFetch.mockResolvedValueOnce(new Response('boom', { status: 503 })); + const res = await handler(event({ locale: 'ja-JP', gender: 'female', text: 'x' })); + expect(res.statusCode).toBe(503); + expect(res.isBase64Encoded).toBe(false); + const body = JSON.parse(res.body as string); + expect(body.code).toBe('UpstreamError'); + }); + + test('returns 502 on network error', async () => { + mockFetch.mockRejectedValueOnce(new Error('ETIMEDOUT')); + const res = await handler(event({ locale: 'ja-JP', gender: 'female', text: 'x' })); + expect(res.statusCode).toBe(502); + const body = JSON.parse(res.body as string); + expect(body.code).toBe('BadGateway'); + }); +}); diff --git a/infra/smalruby-api/lib/smalruby-api-stack.ts b/infra/smalruby-api/lib/smalruby-api-stack.ts index 84a0fbfe550..4ed49e4a433 100644 --- a/infra/smalruby-api/lib/smalruby-api-stack.ts +++ b/infra/smalruby-api/lib/smalruby-api-stack.ts @@ -104,6 +104,12 @@ export class SmalrubyApiStack extends cdk.Stack { 'scratch-api-translate.ts', ); + const scratchSynthesisFn = makeLambda( + 'ScratchApiSynthesis', + `smalruby-api-scratch-synthesis${stageSuffix}`, + 'scratch-api-synthesis.ts', + ); + // --- Custom Domain --- const parentZoneName = process.env.ROUTE53_PARENT_ZONE_NAME || 'api.smalruby.app'; @@ -201,6 +207,12 @@ export class SmalrubyApiStack extends cdk.Stack { integration: integrationFor('ScratchApiTranslateIntegration', scratchTranslateFn), }); + this.api.addRoutes({ + path: '/scratch-api-proxy/synth', + methods: [apigatewayv2.HttpMethod.GET], + integration: integrationFor('ScratchApiSynthesisIntegration', scratchSynthesisFn), + }); + // Throttling const defaultStage = this.api.defaultStage?.node.defaultChild as apigatewayv2.CfnStage; if (defaultStage) { diff --git a/packages/scratch-vm/.prettierignore b/packages/scratch-vm/.prettierignore index 4fb279c2878..2b92aec003f 100644 --- a/packages/scratch-vm/.prettierignore +++ b/packages/scratch-vm/.prettierignore @@ -90,6 +90,7 @@ test/unit/* !test/unit/extension_mesh_v2.js !test/unit/extension_smalrubot_s1.js !test/unit/extension_translate_proxy.js +!test/unit/extension_text2speech_proxy.js !test/unit/extension_smalruby_ruby_each.js !test/unit/mesh_service_v2_cost.js !test/unit/mesh_service_v2_global_vars.js diff --git a/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js b/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js index 7a324dcb9ee..d4ccda2baa7 100644 --- a/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js +++ b/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js @@ -27,7 +27,17 @@ const blockIconURI = 'data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNv * The url of the synthesis server. * @type {string} */ -const SERVER_HOST = 'https://synthesis-service.scratch.mit.edu'; +// === Smalruby: Start of synthesis CORS proxy === +// Scratch's synthesis service is CORS-locked to scratch.mit.edu, so calling it +// directly from smalruby.app fails ('Access-Control-Allow-Origin' mismatch). +// Route through the Smalruby proxy (infra/smalruby-api scratch-api-proxy/synth) +// which forwards to synthesis-service.scratch.mit.edu server-side and returns the +// binary audio (Base64-decoded by API Gateway) with permissive CORS headers. +// The extension builds `${SERVER_HOST}/synth?...`, so pointing SERVER_HOST at the +// proxy base yields `https://api.smalruby.app/scratch-api-proxy/synth?...`. +// Keep this override across upstream merges (same root cause as translate #857). +const SERVER_HOST = 'https://api.smalruby.app/scratch-api-proxy'; +// === Smalruby: End of synthesis CORS proxy === /** * How long to wait in ms before timing out requests to synthesis server. diff --git a/packages/scratch-vm/test/unit/extension_text2speech_proxy.js b/packages/scratch-vm/test/unit/extension_text2speech_proxy.js new file mode 100644 index 00000000000..e9dfe7343c6 --- /dev/null +++ b/packages/scratch-vm/test/unit/extension_text2speech_proxy.js @@ -0,0 +1,62 @@ +const { test } = require('tap'); + +const fetchModulePath = require.resolve('../../src/util/fetch-with-timeout'); +const extPath = require.resolve('../../src/extensions/scratch3_text2speech'); + +// The Text2Speech extension is an upstream Scratch file whose synthesis service is +// CORS-locked to scratch.mit.edu. Smalruby must route requests through its own +// proxy (api.smalruby.app/scratch-api-proxy/synth) so smalruby.app is not blocked +// by CORS. This test guards that the SERVER_HOST override is not silently reverted +// by an upstream merge (same root cause as translate; see issue #859 / #857). +test('text2speech extension routes fetch through the Smalruby CORS proxy', (t) => { + // Stub fetchWithTimeout before the extension captures it via destructuring at + // module load time, then fresh-require the extension so it picks up the stub. + const fetchModule = require(fetchModulePath); + const originalFetch = fetchModule.fetchWithTimeout; + let capturedUrl = null; + fetchModule.fetchWithTimeout = (url) => { + capturedUrl = url; + // Short-circuit before the audio-engine path by rejecting. + return Promise.reject(new Error('stubbed')); + }; + + delete require.cache[extPath]; + const Scratch3Text2SpeechBlocks = require(extPath); + // Minimal runtime: constructor subscribes via .on, and getCurrentLanguage + // calls getTargetForStage (null stage => falls back to DEFAULT_LANGUAGE). + const runtime = { + on: () => {}, + getTargetForStage: () => null, + }; + const ext = new Scratch3Text2SpeechBlocks(runtime); + + // Minimal target providing custom-state storage used by _getState. + const customState = {}; + const target = { + getCustomState: (key) => customState[key], + setCustomState: (key, value) => { + customState[key] = value; + }, + }; + + // speakAndWait always resolves (it swallows fetch errors in a .catch and logs + // a warning), so we assert on the captured URL after it settles. + return ext.speakAndWait({ WORDS: 'hello' }, { target }).then(() => { + // restore before assertions so a failure does not leak the stub + fetchModule.fetchWithTimeout = originalFetch; + delete require.cache[extPath]; + + t.ok(capturedUrl, 'fetchWithTimeout was called'); + t.match( + capturedUrl, + /^https:\/\/api\.smalruby\.app\/scratch-api-proxy\/synth\?/, + 'SERVER_HOST points to the Smalruby CORS proxy', + ); + t.notMatch( + capturedUrl, + /synthesis-service\.scratch\.mit\.edu/, + 'does not call the CORS-locked Scratch synthesis service directly', + ); + t.end(); + }); +}); From 39bee15bb9e1b8d964801ea6f437927c66eb427c Mon Sep 17 00:00:00 2001 From: "smalruby3-editor-bot[bot]" <297607354+smalruby3-editor-bot[bot]@users.noreply.github.com> Date: Wed, 1 Jul 2026 10:31:41 +0000 Subject: [PATCH 2/3] docs(text2speech): mark CORS proxy override as upstream modification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The synthesis proxy override (#859) turns text2speech into an upstream modification, but its feature doc still declared it upstream-as-is. Update the badge to 🔧 and document the CORS proxy routing, mirroring translate (#857) so the DoD upstream-divergence rule is satisfied. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/extension-text2speech/README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/extension-text2speech/README.md b/docs/extension-text2speech/README.md index 582b7e50a63..5d4b9f0f543 100644 --- a/docs/extension-text2speech/README.md +++ b/docs/extension-text2speech/README.md @@ -1,6 +1,7 @@ # 拡張機能: 音声合成 (Text to Speech) -> **⬆️ upstream そのまま** — upstream の実装をほぼそのまま利用 +> **🔧 upstream 改良** — upstream にあるが Smalruby で機能を改良・拡張している +> 改良点: 音声合成リクエストの送信先を Smalruby の CORS 回避プロキシ (`api.smalruby.app/scratch-api-proxy/synth`) に上書き。Scratch の音声合成サービスは CORS を scratch.mit.edu 限定にしたため、smalruby.app からの直叩きが CORS で失敗する。 - **Smalruby ランタイム対応**: ❌(smalruby3 gem 未対応。AWS Polly API + ブラウザ音声再生) - **デフォルト表示**: ✅(拡張機能ライブラリにデフォルトで表示される) @@ -10,6 +11,17 @@ 入力したテキストを**音声で読み上げる**拡張機能。AWS Polly を使った音声合成(複数の声・言語対応)。upstream Scratch 標準。 +### Smalruby 独自: CORS 回避プロキシ経由 + +Scratch の音声合成サービス (`synthesis-service.scratch.mit.edu`) は `Access-Control-Allow-Origin` を +`scratch.mit.edu` 限定に締めたため、`smalruby.app` から直接叩くと CORS でブロックされる。 +そこで VM 実装 `scratch3_text2speech/index.js` の `SERVER_HOST` を Smalruby プロキシ +`https://api.smalruby.app/scratch-api-proxy` に上書きしている(`=== Smalruby: Start/End of synthesis CORS proxy ===` +マーカーで囲む)。拡張は `${SERVER_HOST}/synth?...` を組むため base の差し替えだけで proxy 経由になる。 +プロキシはサーバ側で `synthesis-service.scratch.mit.edu` を叩き、バイナリ音声 (mp3) を Base64 で返却し +(API Gateway HTTP API v2 がクライアントへバイト列にデコード)、built-in CORS でレスポンスを返す +(`infra/smalruby-api` の `GET /scratch-api-proxy/synth`)。translate (#857) と同じ根本原因・方針。 + ## ユーザーストーリー - **小学生**として、自分の作ったキャラクターに「しゃべらせたい」 From 5894bb0e60a5580e4fc5567ea984be5098066968 Mon Sep 17 00:00:00 2001 From: "smalruby3-editor-bot[bot]" <297607354+smalruby3-editor-bot[bot]@users.noreply.github.com> Date: Wed, 1 Jul 2026 11:16:25 +0000 Subject: [PATCH 3/3] refactor(text2speech): reuse generic CORS proxy instead of a dedicated synth Lambda MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit レビュー指摘 (#861) を受けて、音声合成 (text2speech) の CORS 回避を専用 Lambda では なく Smalruby の**汎用** cors-proxy (`GET /cors-proxy?url=`) の再利用に 切り替える。汎用 cors-proxy は既に `audio/*` をバイナリ判定して Base64 返却するため、 専用 `scratch-api-synthesis` Lambda・ルート・integration の追加もインフラの再デプロイも 不要になる。 - `scratch3_text2speech/index.js`: `SERVER_HOST` を upstream 値に戻し、合成 URL 組み立て 箇所だけを generic cors-proxy でラップ (Smalruby マーカーで囲む)。upstream 差分が最小化。 - `infra/smalruby-api`: 追加した `scratch-api-synthesis.ts` / そのテスト / stack のルートを削除。 - docs (extension-text2speech / smalruby-api / markers-vm) を汎用プロキシ方式に更新。 - ガードテスト `test/unit/extension_text2speech_proxy.js` を cors-proxy 経路の検証に更新。 Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/extension-text2speech/README.md | 27 ++++-- docs/infra/smalruby-api.md | 15 +--- docs/maintenance/smalruby-markers-vm.md | 2 +- .../lambda/scratch-api-synthesis.ts | 70 ---------------- .../tests/scratch-api-synthesis.test.ts | 82 ------------------- infra/smalruby-api/lib/smalruby-api-stack.ts | 12 --- .../extensions/scratch3_text2speech/index.js | 24 ++++-- .../test/unit/extension_text2speech_proxy.js | 23 ++++-- 8 files changed, 55 insertions(+), 200 deletions(-) delete mode 100644 infra/smalruby-api/lambda/scratch-api-synthesis.ts delete mode 100644 infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts diff --git a/docs/extension-text2speech/README.md b/docs/extension-text2speech/README.md index 5d4b9f0f543..424d11f5c5d 100644 --- a/docs/extension-text2speech/README.md +++ b/docs/extension-text2speech/README.md @@ -1,7 +1,7 @@ # 拡張機能: 音声合成 (Text to Speech) > **🔧 upstream 改良** — upstream にあるが Smalruby で機能を改良・拡張している -> 改良点: 音声合成リクエストの送信先を Smalruby の CORS 回避プロキシ (`api.smalruby.app/scratch-api-proxy/synth`) に上書き。Scratch の音声合成サービスは CORS を scratch.mit.edu 限定にしたため、smalruby.app からの直叩きが CORS で失敗する。 +> 改良点: 音声合成リクエストを Smalruby の**汎用** CORS 回避プロキシ (`api.smalruby.app/cors-proxy`) 経由に切り替え。Scratch の音声合成サービスは CORS を scratch.mit.edu 限定にしたため、smalruby.app からの直叩きが CORS で失敗する。 - **Smalruby ランタイム対応**: ❌(smalruby3 gem 未対応。AWS Polly API + ブラウザ音声再生) - **デフォルト表示**: ✅(拡張機能ライブラリにデフォルトで表示される) @@ -11,16 +11,27 @@ 入力したテキストを**音声で読み上げる**拡張機能。AWS Polly を使った音声合成(複数の声・言語対応)。upstream Scratch 標準。 -### Smalruby 独自: CORS 回避プロキシ経由 +### Smalruby 独自: 汎用 CORS 回避プロキシ経由 Scratch の音声合成サービス (`synthesis-service.scratch.mit.edu`) は `Access-Control-Allow-Origin` を `scratch.mit.edu` 限定に締めたため、`smalruby.app` から直接叩くと CORS でブロックされる。 -そこで VM 実装 `scratch3_text2speech/index.js` の `SERVER_HOST` を Smalruby プロキシ -`https://api.smalruby.app/scratch-api-proxy` に上書きしている(`=== Smalruby: Start/End of synthesis CORS proxy ===` -マーカーで囲む)。拡張は `${SERVER_HOST}/synth?...` を組むため base の差し替えだけで proxy 経由になる。 -プロキシはサーバ側で `synthesis-service.scratch.mit.edu` を叩き、バイナリ音声 (mp3) を Base64 で返却し -(API Gateway HTTP API v2 がクライアントへバイト列にデコード)、built-in CORS でレスポンスを返す -(`infra/smalruby-api` の `GET /scratch-api-proxy/synth`)。translate (#857) と同じ根本原因・方針。 +そこで VM 実装 `scratch3_text2speech/index.js` では、合成 URL (`${SERVER_HOST}/synth?...`) を +Smalruby の**汎用** CORS プロキシで包む(`=== Smalruby: Start/End of synthesis CORS proxy ===` +マーカーで囲む): + +``` +https://api.smalruby.app/cors-proxy?url= +``` + +`SERVER_HOST` は upstream の値 (`https://synthesis-service.scratch.mit.edu`) のまま維持し、URL 組み立て +箇所だけをラップするので upstream との差分が最小になる(upstream マージ時の silent revert 検知は +`test/unit/extension_text2speech_proxy.js` が担保)。 + +汎用 `cors-proxy` はサーバ側で対象 URL を fetch し、`audio/*` をバイナリと判定して mp3 を Base64 で返却 +(`isBase64Encoded: true`。API Gateway HTTP API v2 がクライアントへバイト列にデコード)、built-in CORS で +レスポンスを返す(`infra/smalruby-api` の `GET /cors-proxy`)。translate (#857) と同じ根本原因だが、 +translate は専用 Lambda、音声合成は**既存の汎用プロキシを再利用**する点が異なり、専用 Lambda の追加も +インフラの再デプロイも不要(#859)。 ## ユーザーストーリー diff --git a/docs/infra/smalruby-api.md b/docs/infra/smalruby-api.md index b7693d22ce0..51cbe64f821 100644 --- a/docs/infra/smalruby-api.md +++ b/docs/infra/smalruby-api.md @@ -4,15 +4,14 @@ ## 概要 -`infra/smalruby-api/` は Smalruby のフロントエンド (smalruby.app) が利用する**汎用バックエンド機能**を提供する 5 つの Lambda を束ねた **HTTP API v2** スタック。 +`infra/smalruby-api/` は Smalruby のフロントエンド (smalruby.app) が利用する**汎用バックエンド機能**を提供する 4 つの Lambda を束ねた **HTTP API v2** スタック。 | エンドポイント | Lambda | 用途 | |---|---|---| -| `GET /cors-proxy` | `smalruby-api-cors-proxy{stageSuffix}` | 任意 URL の CORS フリーフェッチ + Google Drive URL 変換 + バイナリ Base64 化 | +| `GET /cors-proxy` | `smalruby-api-cors-proxy{stageSuffix}` | 任意 URL の CORS フリーフェッチ + Google Drive URL 変換 + バイナリ Base64 化。音声合成 (text2speech) もこの汎用プロキシ経由で `synthesis-service.scratch.mit.edu` を叩く | | `GET /mesh-domain` | `smalruby-api-mesh-zone{stageSuffix}` | クライアント IP から Mesh ドメイン (CRC32 ハッシュ) を生成 | | `GET /scratch-api-proxy/projects/{projectId}` | `smalruby-api-scratch-projects{stageSuffix}` | Scratch 公式 API (project info) のステータス透過プロキシ | | `GET /scratch-api-proxy/translate` | `smalruby-api-scratch-translate{stageSuffix}` | Scratch 公式翻訳サービスのプロキシ | -| `GET /scratch-api-proxy/synth` | `smalruby-api-scratch-synthesis{stageSuffix}` | Scratch 公式音声合成サービスのプロキシ (バイナリ音声を Base64 返却) | `OPTIONS` (preflight) は HTTP API v2 の **built-in CORS** が自動処理。旧 SAM の `cors-for-smalruby` Lambda は不要。 @@ -67,15 +66,7 @@ Scratch translate サービス (`https://translate-service.scratch.mit.edu/trans 実装: `infra/smalruby-api/lambda/scratch-api-translate.ts` -### `GET /scratch-api-proxy/synth` - -Scratch 音声合成サービス (`https://synthesis-service.scratch.mit.edu/synth`) のプロキシ。text2speech (「音声で話す」) 拡張が利用する。 - -**入力**: `?locale=<>&gender=<>&text=<>` - -**処理**: サーバ側で synthesis サービスを fetch し、**バイナリ音声 (mp3) を Base64 エンコードして返す** (`isBase64Encoded: true` + `Content-Type: audio/mpeg`)。API Gateway が Base64 をデコードしてクライアントにはバイナリで届く。translate (テキスト) と違い応答がバイナリな点が最大の差 (cors-proxy のバイナリ Base64 化と同じ考え方)。 - -実装: `infra/smalruby-api/lambda/scratch-api-synthesis.ts` +> **音声合成 (text2speech) について**: 音声合成サービス (`https://synthesis-service.scratch.mit.edu/synth`) も同じ CORS 制約があるが、専用 Lambda は作らず**汎用 `GET /cors-proxy?url=`** を再利用する。`cors-proxy` は `audio/*` をバイナリと判定して Base64 返却 (`isBase64Encoded: true`) するため、API Gateway 側でバイト列にデコードされ、拡張の `arrayBuffer()` がそのまま音声を得られる。フロント側の実装は `packages/scratch-vm/src/extensions/scratch3_text2speech/index.js` を参照 (#859)。 ## 環境変数 diff --git a/docs/maintenance/smalruby-markers-vm.md b/docs/maintenance/smalruby-markers-vm.md index af81b3a5412..ac712c36ba6 100644 --- a/docs/maintenance/smalruby-markers-vm.md +++ b/docs/maintenance/smalruby-markers-vm.md @@ -21,7 +21,7 @@ scratch-vm の **upstream ファイルに埋め込んだ Smalruby マーカー** | `src/engine/blocks.js` | XML coords guard | `blockToXML` で x/y が finite number のときだけ XML 属性を出力。Ruby → blocks 変換の x/y 未指定 (undefined) を scratch-blocks v2 に正しく伝え、`fromRuby` 再レイアウト経路を維持する | | `src/engine/blocks.js` | orphaned-parent guard | `getTopLevelScript` で `block.parent` が this._blocks に存在しない場合に停止。Ruby → blocks 変換中の孤立 parent id で `undefined.parent` 参照クラッシュを防ぐ | | `src/extensions/scratch3_translate/index.js` | translate CORS proxy | `serverURL` を Smalruby プロキシ (`https://api.smalruby.app/scratch-api-proxy/`) に上書き。Scratch の翻訳サービスは CORS を scratch.mit.edu 限定にしたため smalruby.app からの直叩きが失敗する。マーカー無しで上書きしていた過去の版が v13.7.2 upstream マージで静かに revert された (#857) ため、次回以降のマージで検知できるようマーカーで囲む | -| `src/extensions/scratch3_text2speech/index.js` | synthesis CORS proxy | `SERVER_HOST` を Smalruby プロキシ (`https://api.smalruby.app/scratch-api-proxy`) に上書き。Scratch の音声合成サービスは CORS を scratch.mit.edu 限定にしたため smalruby.app からの直叩きが失敗する。拡張が `${SERVER_HOST}/synth?...` を組むため base を差し替えるだけで proxy 経由になる。プロキシはバイナリ音声を Base64 返却する (`infra/smalruby-api` scratch-api-synthesis)。translate (#857) と同じ根本原因なので同様にマーカーで囲む (#859) | +| `src/extensions/scratch3_text2speech/index.js` | synthesis CORS proxy | 音声合成 URL を Smalruby の**汎用** CORS プロキシ (`https://api.smalruby.app/cors-proxy?url=`) で包む。Scratch の音声合成サービスは CORS を scratch.mit.edu 限定にしたため smalruby.app からの直叩きが失敗する。汎用 cors-proxy はバイナリ音声を Base64 で返却する (API Gateway がバイト列にデコード) ので専用 Lambda は不要。`SERVER_HOST` は upstream の値のまま維持し、URL 組み立て箇所だけラップするので upstream 差分が最小。translate (#857) と同じ根本原因 (#859) | ## 関連ファイル diff --git a/infra/smalruby-api/lambda/scratch-api-synthesis.ts b/infra/smalruby-api/lambda/scratch-api-synthesis.ts deleted file mode 100644 index 4610ecdb150..00000000000 --- a/infra/smalruby-api/lambda/scratch-api-synthesis.ts +++ /dev/null @@ -1,70 +0,0 @@ -import type { - APIGatewayProxyEventV2, - APIGatewayProxyStructuredResultV2, -} from 'aws-lambda'; - -const API_HOST = 'https://synthesis-service.scratch.mit.edu'; - -/** - * Proxy for Scratch's text-to-speech synthesis service. - * - * Unlike the translate proxy (which forwards text), the synthesis service - * returns a binary audio file (mp3). API Gateway HTTP API v2 can only carry - * binary payloads when the Lambda sets `isBase64Encoded: true`, so we Base64 - * encode the audio and let API Gateway decode it back to bytes for the client. - */ -export const handler = async ( - event: APIGatewayProxyEventV2, -): Promise => { - const locale = (event.queryStringParameters?.locale ?? '').trim(); - const gender = (event.queryStringParameters?.gender ?? '').trim(); - const text = event.queryStringParameters?.text ?? ''; - - if (!locale || !gender) { - return { - statusCode: 400, - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - code: 'BadRequest', - message: 'locale and gender are required', - }), - isBase64Encoded: false, - }; - } - - try { - const params = new URLSearchParams({ locale, gender, text }); - const res = await fetch(`${API_HOST}/synth?${params.toString()}`); - - if (res.status < 200 || res.status >= 300) { - // Pass upstream failures through as JSON so the client sees the status. - const body = await res.text(); - return { - statusCode: res.status, - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - code: 'UpstreamError', - message: `HTTP ${res.status}: ${body}`, - }), - isBase64Encoded: false, - }; - } - - const contentType = res.headers.get('content-type') || 'audio/mpeg'; - const buffer = Buffer.from(await res.arrayBuffer()); - return { - statusCode: 200, - headers: { 'Content-Type': contentType }, - body: buffer.toString('base64'), - isBase64Encoded: true, - }; - } catch (e) { - const message = e instanceof Error ? e.message : String(e); - return { - statusCode: 502, - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ code: 'BadGateway', message }), - isBase64Encoded: false, - }; - } -}; diff --git a/infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts b/infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts deleted file mode 100644 index 5dd9161bdd4..00000000000 --- a/infra/smalruby-api/lambda/tests/scratch-api-synthesis.test.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { handler } from '../scratch-api-synthesis'; - -const mockFetch = jest.fn(); -global.fetch = mockFetch as unknown as typeof fetch; - -const event = (qs?: Record) => - ({ - queryStringParameters: qs, - requestContext: { http: { sourceIp: '1.2.3.4' } }, - }) as never; - -beforeEach(() => { - mockFetch.mockReset(); -}); - -describe('scratch-api-synthesis handler', () => { - test('returns 400 when locale is missing', async () => { - const res = await handler(event({ gender: 'female', text: 'hi' })); - expect(res.statusCode).toBe(400); - const body = JSON.parse(res.body as string); - expect(body.code).toBe('BadRequest'); - expect(mockFetch).not.toHaveBeenCalled(); - }); - - test('returns 400 when gender is missing', async () => { - const res = await handler(event({ locale: 'ja-JP', text: 'hi' })); - expect(res.statusCode).toBe(400); - expect(mockFetch).not.toHaveBeenCalled(); - }); - - test('proxies synth request and returns Base64-encoded audio', async () => { - const audio = Buffer.from([0x49, 0x44, 0x33, 0x04]); // fake mp3 bytes - mockFetch.mockResolvedValueOnce( - new Response(audio, { - status: 200, - headers: { 'content-type': 'audio/mpeg' }, - }), - ); - - const res = await handler(event({ locale: 'ja-JP', gender: 'female', text: 'こんにちは' })); - - expect(res.statusCode).toBe(200); - expect(res.isBase64Encoded).toBe(true); - expect(res.headers?.['Content-Type']).toBe('audio/mpeg'); - expect(res.body).toBe(audio.toString('base64')); - - expect(mockFetch).toHaveBeenCalledTimes(1); - const calledUrl = mockFetch.mock.calls[0][0] as string; - expect(calledUrl).toMatch(/^https:\/\/synthesis-service\.scratch\.mit\.edu\/synth\?/); - expect(calledUrl).toContain('locale=ja-JP'); - expect(calledUrl).toContain('gender=female'); - }); - - test('url-encodes special characters in text', async () => { - mockFetch.mockResolvedValueOnce( - new Response(Buffer.from([0]), { - status: 200, - headers: { 'content-type': 'audio/mpeg' }, - }), - ); - await handler(event({ locale: 'en-US', gender: 'male', text: 'hello world & goodbye' })); - const calledUrl = mockFetch.mock.calls[0][0] as string; - expect(calledUrl).toContain('text=hello+world+%26+goodbye'); - }); - - test('passes through upstream errors as JSON', async () => { - mockFetch.mockResolvedValueOnce(new Response('boom', { status: 503 })); - const res = await handler(event({ locale: 'ja-JP', gender: 'female', text: 'x' })); - expect(res.statusCode).toBe(503); - expect(res.isBase64Encoded).toBe(false); - const body = JSON.parse(res.body as string); - expect(body.code).toBe('UpstreamError'); - }); - - test('returns 502 on network error', async () => { - mockFetch.mockRejectedValueOnce(new Error('ETIMEDOUT')); - const res = await handler(event({ locale: 'ja-JP', gender: 'female', text: 'x' })); - expect(res.statusCode).toBe(502); - const body = JSON.parse(res.body as string); - expect(body.code).toBe('BadGateway'); - }); -}); diff --git a/infra/smalruby-api/lib/smalruby-api-stack.ts b/infra/smalruby-api/lib/smalruby-api-stack.ts index 4ed49e4a433..84a0fbfe550 100644 --- a/infra/smalruby-api/lib/smalruby-api-stack.ts +++ b/infra/smalruby-api/lib/smalruby-api-stack.ts @@ -104,12 +104,6 @@ export class SmalrubyApiStack extends cdk.Stack { 'scratch-api-translate.ts', ); - const scratchSynthesisFn = makeLambda( - 'ScratchApiSynthesis', - `smalruby-api-scratch-synthesis${stageSuffix}`, - 'scratch-api-synthesis.ts', - ); - // --- Custom Domain --- const parentZoneName = process.env.ROUTE53_PARENT_ZONE_NAME || 'api.smalruby.app'; @@ -207,12 +201,6 @@ export class SmalrubyApiStack extends cdk.Stack { integration: integrationFor('ScratchApiTranslateIntegration', scratchTranslateFn), }); - this.api.addRoutes({ - path: '/scratch-api-proxy/synth', - methods: [apigatewayv2.HttpMethod.GET], - integration: integrationFor('ScratchApiSynthesisIntegration', scratchSynthesisFn), - }); - // Throttling const defaultStage = this.api.defaultStage?.node.defaultChild as apigatewayv2.CfnStage; if (defaultStage) { diff --git a/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js b/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js index d4ccda2baa7..7ea50ded2bb 100644 --- a/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js +++ b/packages/scratch-vm/src/extensions/scratch3_text2speech/index.js @@ -27,16 +27,19 @@ const blockIconURI = 'data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNv * The url of the synthesis server. * @type {string} */ +const SERVER_HOST = 'https://synthesis-service.scratch.mit.edu'; + // === Smalruby: Start of synthesis CORS proxy === // Scratch's synthesis service is CORS-locked to scratch.mit.edu, so calling it // directly from smalruby.app fails ('Access-Control-Allow-Origin' mismatch). -// Route through the Smalruby proxy (infra/smalruby-api scratch-api-proxy/synth) -// which forwards to synthesis-service.scratch.mit.edu server-side and returns the -// binary audio (Base64-decoded by API Gateway) with permissive CORS headers. -// The extension builds `${SERVER_HOST}/synth?...`, so pointing SERVER_HOST at the -// proxy base yields `https://api.smalruby.app/scratch-api-proxy/synth?...`. -// Keep this override across upstream merges (same root cause as translate #857). -const SERVER_HOST = 'https://api.smalruby.app/scratch-api-proxy'; +// Instead of a dedicated per-service Lambda, we reuse Smalruby's *generic* CORS +// proxy (infra/smalruby-api `GET /cors-proxy?url=`). It fetches +// the target server-side and Base64-encodes binary audio (API Gateway decodes it +// back to bytes for the client), returning permissive CORS headers. See where the +// synth request URL is built (search for CORS_PROXY_HOST). SERVER_HOST stays at the +// upstream value so upstream merges only touch the URL-build site (guarded by +// test/unit/extension_text2speech_proxy.js). Same root cause as translate (#857/#859). +const CORS_PROXY_HOST = 'https://api.smalruby.app/cors-proxy'; // === Smalruby: End of synthesis CORS proxy === /** @@ -731,6 +734,13 @@ class Scratch3Text2SpeechBlocks { path += `&gender=${gender}`; path += `&text=${encodeURIComponent(words.substring(0, 128))}`; + // === Smalruby: Start of synthesis CORS proxy === + // Wrap the synthesis URL in the generic Smalruby CORS proxy so smalruby.app + // is not blocked by the CORS-locked Scratch service. The whole synth URL + // (including its query string) becomes the encoded `url` param. + path = `${CORS_PROXY_HOST}?url=${encodeURIComponent(path)}`; + // === Smalruby: End of synthesis CORS proxy === + // Perform HTTP request to get audio file return fetchWithTimeout(path, {}, SERVER_TIMEOUT) .then(res => { diff --git a/packages/scratch-vm/test/unit/extension_text2speech_proxy.js b/packages/scratch-vm/test/unit/extension_text2speech_proxy.js index e9dfe7343c6..3fae35050ee 100644 --- a/packages/scratch-vm/test/unit/extension_text2speech_proxy.js +++ b/packages/scratch-vm/test/unit/extension_text2speech_proxy.js @@ -5,10 +5,11 @@ const extPath = require.resolve('../../src/extensions/scratch3_text2speech'); // The Text2Speech extension is an upstream Scratch file whose synthesis service is // CORS-locked to scratch.mit.edu. Smalruby must route requests through its own -// proxy (api.smalruby.app/scratch-api-proxy/synth) so smalruby.app is not blocked -// by CORS. This test guards that the SERVER_HOST override is not silently reverted -// by an upstream merge (same root cause as translate; see issue #859 / #857). -test('text2speech extension routes fetch through the Smalruby CORS proxy', (t) => { +// generic CORS proxy (api.smalruby.app/cors-proxy?url=) so +// smalruby.app is not blocked by CORS. This test guards that the proxy wrapping is +// not silently reverted by an upstream merge (same root cause as translate; see +// issue #859 / #857). +test('text2speech extension routes fetch through the generic Smalruby CORS proxy', (t) => { // Stub fetchWithTimeout before the extension captures it via destructuring at // module load time, then fresh-require the extension so it picks up the stub. const fetchModule = require(fetchModulePath); @@ -49,13 +50,19 @@ test('text2speech extension routes fetch through the Smalruby CORS proxy', (t) = t.ok(capturedUrl, 'fetchWithTimeout was called'); t.match( capturedUrl, - /^https:\/\/api\.smalruby\.app\/scratch-api-proxy\/synth\?/, - 'SERVER_HOST points to the Smalruby CORS proxy', + /^https:\/\/api\.smalruby\.app\/cors-proxy\?url=/, + 'request goes to the generic Smalruby CORS proxy', + ); + // The synth URL (with its query) is carried as the encoded `url` param. + t.match( + capturedUrl, + /url=https%3A%2F%2Fsynthesis-service\.scratch\.mit\.edu%2Fsynth/, + 'the CORS-locked synthesis URL is wrapped as the proxy `url` param', ); t.notMatch( capturedUrl, - /synthesis-service\.scratch\.mit\.edu/, - 'does not call the CORS-locked Scratch synthesis service directly', + /^https:\/\/synthesis-service\.scratch\.mit\.edu/, + 'the browser does not call the CORS-locked Scratch service directly', ); t.end(); });