Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/main/ipc/speech.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ vi.mock('electron', () => ({
app: { getPath: vi.fn(() => '/tmp/orca-speech-test') },
BrowserWindow: { fromWebContents: fromWebContentsMock },
ipcMain: { handle: handleMock },
safeStorage: {
decryptString: vi.fn(),
encryptString: vi.fn(() => Buffer.from('encrypted')),
isEncryptionAvailable: vi.fn(() => true)
},
systemPreferences: {
getMediaAccessStatus: vi.fn(() => 'granted'),
askForMediaAccess: vi.fn(() => Promise.resolve(true))
Expand Down
19 changes: 19 additions & 0 deletions src/main/ipc/speech.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ import { writeFile, unlink } from 'fs/promises'
import { createHash } from 'crypto'
import { SPEECH_MODEL_CATALOG, getCatalogModel } from '../speech/model-catalog'
import { getSpeechModelManager, getSpeechSttService } from '../speech/speech-runtime-service'
import {
clearOpenAiSpeechApiKey,
hasOpenAiSpeechApiKey,
saveOpenAiSpeechApiKey
} from '../speech/openai-api-key-store'
import type { Store } from '../persistence'

export function registerSpeechHandlers(store: Store): void {
Expand All @@ -15,6 +20,20 @@ export function registerSpeechHandlers(store: Store): void {
return getSpeechModelManager(store).getModelStates()
})

ipcMain.handle('speech:getOpenAiApiKeyStatus', async () => {
return { configured: hasOpenAiSpeechApiKey() }
})

ipcMain.handle('speech:saveOpenAiApiKey', async (_event, apiKey: string) => {
saveOpenAiSpeechApiKey(apiKey)
return { configured: true }
})

ipcMain.handle('speech:clearOpenAiApiKey', async () => {
clearOpenAiSpeechApiKey()
return { configured: false }
})

ipcMain.handle('speech:downloadModel', async (event, modelId: string) => {
const manager = getSpeechModelManager(store)
const window = BrowserWindow.fromWebContents(event.sender)
Expand Down
32 changes: 32 additions & 0 deletions src/main/speech/model-catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
description:
'Highest accuracy for 25 European languages. Punctuation, capitalization, and word-level timestamps.',
type: 'transducer',
provider: 'local',
language: 'multilingual',
sizeBytes: 180_000_000,
downloadUrl:
Expand All @@ -25,6 +26,7 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
description:
'English only. Faster than v3 with similar accuracy. Punctuation and capitalization.',
type: 'transducer',
provider: 'local',
language: 'en',
sizeBytes: 170_000_000,
downloadUrl:
Expand All @@ -41,6 +43,7 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
label: 'Zipformer Bilingual',
description: 'Chinese + English with code-switching. Low-latency real-time streaming.',
type: 'transducer',
provider: 'local',
language: 'zh-en',
sizeBytes: 130_000_000,
downloadUrl:
Expand All @@ -63,6 +66,7 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
description:
'Chinese (Mandarin + dialects) + English. Strong on accented and regional Chinese.',
type: 'paraformer',
provider: 'local',
language: 'zh-en',
sizeBytes: 115_000_000,
downloadUrl:
Expand All @@ -78,6 +82,7 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
label: 'Zipformer Streaming EN',
description: 'English only. Lightweight 20M-param model, good balance of speed and size.',
type: 'transducer',
provider: 'local',
language: 'en',
sizeBytes: 128_000_000,
downloadUrl:
Expand All @@ -99,6 +104,7 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
label: 'Zipformer Streaming ZH',
description: 'Chinese only. Ultra-lightweight 14M-param model, ideal for low-resource devices.',
type: 'transducer',
provider: 'local',
language: 'zh',
sizeBytes: 74_000_000,
downloadUrl:
Expand All @@ -120,6 +126,7 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
label: 'Whisper Tiny',
description: '90+ languages. Lower accuracy than Parakeet but broadest language coverage.',
type: 'whisper',
provider: 'local',
language: 'multilingual',
sizeBytes: 116_000_000,
downloadUrl:
Expand All @@ -129,9 +136,34 @@ export const SPEECH_MODEL_CATALOG: SpeechModelManifest[] = [
files: ['tiny-encoder.onnx', 'tiny-decoder.onnx', 'tiny-tokens.txt'],
sampleRate: 16000,
streaming: false
},
{
id: 'openai-gpt-4o-mini-transcribe',
label: 'GPT-4o mini Transcribe',
description:
'Cloud transcription with strong accuracy and low cost. Requires an OpenAI API key.',
type: 'openai',
provider: 'openai',
language: 'multilingual',
sampleRate: 16000,
streaming: false
},
{
id: 'openai-gpt-4o-transcribe',
label: 'GPT-4o Transcribe',
description: 'Cloud transcription with higher accuracy. Requires an OpenAI API key.',
type: 'openai',
provider: 'openai',
language: 'multilingual',
sampleRate: 16000,
streaming: false
}
]

export function getCatalogModel(id: string): SpeechModelManifest | undefined {
return SPEECH_MODEL_CATALOG.find((m) => m.id === id)
}

export function isLocalSpeechModel(manifest: SpeechModelManifest): boolean {
return manifest.provider === 'local'
}
33 changes: 32 additions & 1 deletion src/main/speech/model-manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
import { SPEECH_MODEL_CATALOG } from './model-catalog'
import { ModelManager } from './model-manager'

const { httpsGetMock, spawnMock } = vi.hoisted(() => ({
const { hasOpenAiSpeechApiKeyMock, httpsGetMock, spawnMock } = vi.hoisted(() => ({
hasOpenAiSpeechApiKeyMock: vi.fn(),
httpsGetMock: vi.fn(),
spawnMock: vi.fn()
}))
Expand All @@ -27,6 +28,10 @@ vi.mock('https', async () => {
return { ...(actual as Record<string, unknown>), get: httpsGetMock }
})

vi.mock('./openai-api-key-store', () => ({
hasOpenAiSpeechApiKey: hasOpenAiSpeechApiKeyMock
}))

type ModelManagerInternals = {
verifyArchiveSha256: (archivePath: string, expectedSha256: string) => Promise<void>
downloadFile: (
Expand All @@ -48,11 +53,16 @@ type ModelManagerInternals = {
describe('ModelManager', () => {
beforeEach(() => {
httpsGetMock.mockReset()
hasOpenAiSpeechApiKeyMock.mockReset()
hasOpenAiSpeechApiKeyMock.mockReturnValue(false)
spawnMock.mockReset()
})

it('requires pinned SHA-256 hashes for every catalog archive', () => {
for (const manifest of SPEECH_MODEL_CATALOG) {
if (manifest.provider !== 'local') {
continue
}
expect(manifest.archiveSha256).toMatch(/^[a-f0-9]{64}$/)
}
})
Expand Down Expand Up @@ -93,6 +103,27 @@ describe('ModelManager', () => {
}
})

it('marks OpenAI transcription models ready only when an API key is configured', async () => {
const dir = mkdtempSync(join(tmpdir(), 'orca-model-manager-'))
try {
const manager = new ModelManager(dir)

await expect(manager.getModelState('openai-gpt-4o-mini-transcribe')).resolves.toEqual({
id: 'openai-gpt-4o-mini-transcribe',
status: 'not-downloaded'
})

hasOpenAiSpeechApiKeyMock.mockReturnValue(true)

await expect(manager.getModelState('openai-gpt-4o-mini-transcribe')).resolves.toEqual({
id: 'openai-gpt-4o-mini-transcribe',
status: 'ready'
})
} finally {
rmSync(dir, { recursive: true, force: true })
}
})

it('aborts an in-flight model download request when cancelled', async () => {
const dir = mkdtempSync(join(tmpdir(), 'orca-model-manager-'))
try {
Expand Down
26 changes: 25 additions & 1 deletion src/main/speech/model-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ import type {
SpeechModelState,
SpeechModelStatus
} from '../../shared/speech-types'
import { SPEECH_MODEL_CATALOG, getCatalogModel } from './model-catalog'
import { SPEECH_MODEL_CATALOG, getCatalogModel, isLocalSpeechModel } from './model-catalog'
import { hasOpenAiSpeechApiKey } from './openai-api-key-store'
import { resolveTarExecutable } from './tar-executable'

type DownloadHandle = {
Expand Down Expand Up @@ -68,6 +69,13 @@ export class ModelManager {
return { id: modelId, status: 'error', error: 'Unknown model' }
}

if (manifest.provider === 'openai') {
return {
id: modelId,
status: hasOpenAiSpeechApiKey() ? 'ready' : 'not-downloaded'
}
}

const modelDir = this.getModelDir(modelId)
if (existsSync(modelDir) && this.validateModelFiles(manifest, modelDir)) {
const state: SpeechModelState = { id: modelId, status: 'ready' }
Expand Down Expand Up @@ -97,6 +105,9 @@ export class ModelManager {
}

private validateModelFiles(manifest: SpeechModelManifest, modelDir: string): boolean {
if (!manifest.files) {
return false
}
return manifest.files.every((f) => existsSync(join(modelDir, f)))
}

Expand All @@ -109,6 +120,12 @@ export class ModelManager {
if (!manifest) {
throw new Error(`Unknown model: ${modelId}`)
}
if (!isLocalSpeechModel(manifest)) {
throw new Error(`Model does not support downloads: ${modelId}`)
}
if (!manifest.downloadUrl || !manifest.archiveSha256 || !manifest.sizeBytes) {
throw new Error(`Model download metadata missing: ${modelId}`)
}

const modelDir = this.getModelDir(modelId)
if (existsSync(modelDir) && this.validateModelFiles(manifest, modelDir)) {
Expand Down Expand Up @@ -213,6 +230,10 @@ export class ModelManager {
if (!getCatalogModel(modelId)) {
throw new Error(`Unknown model: ${modelId}`)
}
const manifest = getCatalogModel(modelId)
if (!manifest || !isLocalSpeechModel(manifest)) {
throw new Error(`Model does not support deletion: ${modelId}`)
}
this.cancelDownload(modelId)
const modelDir = this.getModelDir(modelId)
if (existsSync(modelDir)) {
Expand Down Expand Up @@ -542,6 +563,9 @@ export class ModelManager {
}

private async flattenNestedDir(modelDir: string, manifest: SpeechModelManifest): Promise<void> {
if (!manifest.files) {
return
}
const entries = await readdir(modelDir, { withFileTypes: true })
for (const entry of entries) {
if (entry.isDirectory()) {
Expand Down
88 changes: 88 additions & 0 deletions src/main/speech/openai-api-key-store.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import { existsSync, mkdirSync, mkdtempSync, writeFileSync } from 'fs'
import { tmpdir } from 'os'
import type * as Os from 'os'
import { join } from 'path'
import { beforeEach, describe, expect, it, vi } from 'vitest'

const safeStorageMock = vi.hoisted(() => ({
decryptString: vi.fn((value: Buffer) => value.toString('utf8')),
encryptString: vi.fn((value: string) => Buffer.from(value)),
isEncryptionAvailable: vi.fn(() => true)
}))

let tempHome = ''

async function loadStoreModule() {
vi.resetModules()
vi.doMock('electron', () => ({
safeStorage: safeStorageMock
}))
vi.doMock('os', async () => {
const actual = await vi.importActual<typeof Os>('os')
return { ...actual, homedir: () => tempHome }
})
return import('./openai-api-key-store')
}

beforeEach(() => {
tempHome = mkdtempLike('orca-openai-key-store-')
safeStorageMock.decryptString.mockClear()
safeStorageMock.encryptString.mockClear()
safeStorageMock.isEncryptionAvailable.mockClear()
safeStorageMock.isEncryptionAvailable.mockReturnValue(true)
})

function mkdtempLike(prefix: string): string {
return mkdtempSync(join(tmpdir(), prefix))
}

function writeStoredOpenAiKey(value: string): void {
const orcaDir = join(tempHome, '.orca')
mkdirSync(orcaDir, { recursive: true })
writeFileSync(join(orcaDir, 'openai-speech-token.enc'), value)
}

describe('OpenAI speech API key store', () => {
it('checks configured status without decrypting or touching safeStorage', async () => {
writeStoredOpenAiKey('encrypted-key')
const store = await loadStoreModule()

expect(store.hasOpenAiSpeechApiKey()).toBe(true)
expect(safeStorageMock.isEncryptionAvailable).not.toHaveBeenCalled()
expect(safeStorageMock.decryptString).not.toHaveBeenCalled()
})

it('decrypts only when the key is read for an API request', async () => {
writeStoredOpenAiKey('encrypted-key')
const store = await loadStoreModule()

expect(store.readOpenAiSpeechApiKey()).toBe('encrypted-key')
expect(safeStorageMock.decryptString).toHaveBeenCalledOnce()
})

it('caches the decrypted key so repeated dictations do not repeatedly touch safeStorage', async () => {
writeStoredOpenAiKey('encrypted-key')
const store = await loadStoreModule()

expect(store.readOpenAiSpeechApiKey()).toBe('encrypted-key')
expect(store.readOpenAiSpeechApiKey()).toBe('encrypted-key')
expect(safeStorageMock.decryptString).toHaveBeenCalledOnce()
})

it('uses the in-memory key after save without decrypting from safeStorage', async () => {
const store = await loadStoreModule()

store.saveOpenAiSpeechApiKey('saved-key')

expect(store.readOpenAiSpeechApiKey()).toBe('saved-key')
expect(safeStorageMock.decryptString).not.toHaveBeenCalled()
})

it('reports missing status without creating storage files', async () => {
const store = await loadStoreModule()

expect(store.hasOpenAiSpeechApiKey()).toBe(false)
expect(existsSync(join(tempHome, '.orca'))).toBe(false)
expect(safeStorageMock.decryptString).not.toHaveBeenCalled()
})
})
Loading
Loading