diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..79f6177 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,54 @@ +name: CodeQL + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + - cron: "27 3 * * 1" + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + runs-on: ubuntu-latest + permissions: + security-events: write + packages: read + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: go + build-mode: autobuild + - language: actions + build-mode: none + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Go + if: matrix.language == 'go' + uses: actions/setup-go@v5 + with: + go-version: "1.22" + cache: true + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + + - name: Autobuild + if: matrix.build-mode == 'autobuild' + uses: github/codeql-action/autobuild@v4 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e88c17c..06aa47d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,36 +1,27 @@ -name: PyInstaller +name: Build on: push: branches: - main + pull_request: + branches: + - main jobs: build: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 + - name: Set up Go + uses: actions/setup-go@v5 with: - python-version: 3.8 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install pyinstaller click requests prettytable + go-version: "1.22" - - name: Build executable - run: | - pyinstaller --onefile gladia_cli.py --hidden-import=click --hidden-import=requests --hidden-import=prettytable - - - name: Upload release - uses: ncipollo/release-action@v1 - with: - artifacts: "./dist/gladia_cli" - bodyFile: "release_note.md" - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Build + run: make build + - name: Test + run: make test diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..471251b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,33 @@ +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + release: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v7 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version-file: go.mod + + - name: Run GoReleaser + uses: goreleaser/goreleaser-action@v7 + with: + distribution: goreleaser + version: "~> v2" + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 064bc68..dfd95ad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,3 @@ -# -# PYTHON -# - -# Ignore build files -build/ -*.egg-info/ - -# Ignore Python cache and compiled files -__pycache__/ -*.pyc -*.pyo -*.pyd -*.pyz - -# Ignore virtual environments -venv/ - # # GOLANG # @@ -52,4 +34,5 @@ cover.out cover.html .vscode -.idea/ \ No newline at end of file +.idea/ +gladia diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 0000000..8a4601e --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,67 @@ +version: 2 + +project_name: gladia + +before: + hooks: + - go mod tidy + +builds: + - main: ./cmd/ + binary: gladia + ldflags: + - -s -w -X main.version={{.Version}} + env: + - CGO_ENABLED=0 + goos: + - linux + - windows + - darwin + goarch: + - amd64 + - arm64 + - "386" + - arm + goarm: + - "7" + ignore: + - goos: windows + goarch: arm64 + - goos: windows + goarch: arm + - goos: darwin + goarch: "386" + - goos: darwin + goarch: arm + +archives: + - formats: + - tar.gz + name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" + format_overrides: + - goos: windows + formats: + - zip + +checksum: + name_template: "checksums.txt" + +changelog: + sort: asc + filters: + exclude: + - "^docs:" + - "^test:" + - "^chore:" + - Merge pull request + - Merge branch + +release: + github: + owner: gladiaio + name: gladia-cli + draft: false + prerelease: auto + extra_files: + - glob: install.sh + - glob: install.ps1 diff --git a/Makefile b/Makefile index 5320df3..8cfd916 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,25 @@ build: - go build -o gladia -v ./cmd/*.go + go build -o gladia -v ./cmd/ .PHONY: dist dist: - GOOS=linux GOARCH=arm GOARM=7 go build -o dist/gladia-linux-arm7 -v ./cmd/*.go - GOOS=linux GOARCH=arm64 go build -o dist/gladia-linux-arm64 -v ./cmd/*.go - GOOS=linux GOARCH=amd64 go build -o dist/gladia-linux-x86_64 -v ./cmd/*.go - GOOS=linux GOARCH=386 go build -o dist/gladia-linux-i386 -v ./cmd/*.go - GOOS=windows GOARCH=amd64 go build -o dist/gladia-windows-x86_64.exe -v ./cmd/*.go - GOOS=darwin GOARCH=amd64 go build -o dist/gladia-darwin-x86_64 -v ./cmd/*.go - GOOS=darwin GOARCH=arm64 go build -o dist/gladia-darwin-arm64 -v ./cmd/*.go + mkdir -p dist + GOOS=linux GOARCH=arm GOARM=7 go build -o dist/gladia-linux-arm7 -v ./cmd/ + GOOS=linux GOARCH=arm64 go build -o dist/gladia-linux-arm64 -v ./cmd/ + GOOS=linux GOARCH=amd64 go build -o dist/gladia-linux-amd64 -v ./cmd/ + GOOS=linux GOARCH=386 go build -o dist/gladia-linux-i386 -v ./cmd/ + GOOS=windows GOARCH=amd64 go build -o dist/gladia-windows-amd64.exe -v ./cmd/ + GOOS=darwin GOARCH=amd64 go build -o dist/gladia-darwin-amd64 -v ./cmd/ + GOOS=darwin GOARCH=arm64 go build -o dist/gladia-darwin-arm64 -v ./cmd/ @echo "Dist files:" @ls -l dist dev: - go run -x ./cmd/*.go -audio-file split_infinity.wav + go run ./cmd/ transcribe split_infinity.wav watch-dev: - go run -x ./cmd/*.go -audio-file split_infinity.wav + go run ./cmd/ transcribe split_infinity.wav test: go test -race -v ./... diff --git a/README.md b/README.md index 06b46cb..27c120d 100644 --- a/README.md +++ b/README.md @@ -1,124 +1,110 @@ # gladia-cli -## Go Based CLI (New, Faster but alpha) +## Install -### Direct install +### macOS & Linux -Linux AMD (For Linux running on 64-bit AMD or Intel processors (x86_64 architecture)) - -``` -wget https://github.com/gladiaio/gladia-cli/raw/main/dist/gladia-linux-amd64 +```bash +curl -fsSL https://github.com/gladiaio/gladia-cli/releases/latest/download/install.sh | sh ``` -Linux ARM 8 (For Linux running on 64-bit ARM processors (ARMv8 architecture)). +### Windows +```powershell +powershell -c "irm https://github.com/gladiaio/gladia-cli/releases/latest/download/install.ps1 | iex" ``` -wget https://github.com/gladiaio/gladia-cli/raw/main/dist/gladia-linux-arm64 -``` -Linux ARM 7 (For Linux running on 32-bit ARM processors (ARMv7 architecture)). +Other platforms: [GitHub releases](https://github.com/gladiaio/gladia-cli/releases). + +## Install (from source) ```bash -wget https://github.com/gladiaio/gladia-cli/raw/main/dist/gladia-linux-arm7 +make build # → ./gladia ``` +## Setup -MacOS Intel (For macOS running on 64-bit AMD or Intel processors (x86_64 architecture)). +Get an API key at [app.gladia.io/account](https://app.gladia.io/account), then either: ```bash -wget https://github.com/gladiaio/gladia-cli/raw/main/dist/gladia-darwin-amd64 +export GLADIA_API_KEY=your_key +# or +./gladia auth set your_key # saves to ~/.gladia (mode 0600) ``` -MacOS ARM (For macOS running on ARM64 architecture (like Apple's M1, M2 or M3 chips)). +**Credential order:** `GLADIA_API_KEY` → `~/.gladia` → `--gladia-key` + +## Usage ```bash -wget https://github.com/gladiaio/gladia-cli/raw/main/dist/gladia-darwin-arm64 +./gladia transcribe [flags] ``` -Windows (For Windows running on 64-bit AMD or Intel processors (x86_64 architecture)). +**Examples** ```bash -wget https://github.com/gladiaio/gladia-cli/raw/main/dist/gladia-windows-amd64.exe +./gladia transcribe meeting.wav +./gladia transcribe https://example.com/audio.mp3 -o json +./gladia transcribe podcast.mp3 --language en,fr,de +./gladia transcribe mixed.mp3 --code-switching --language en,fr +./gladia transcribe call.wav --diarize -o srt +./gladia transcribe podcast.mp3 --model solaria-3 ``` -### Build from source +## Commands -```bash -make build -``` +| Command | Description | +|---------|-------------| +| `transcribe ` | Transcribe an audio | +| `auth set ` | Save API key to `~/.gladia` | +| `languages` | List supported ISO 639-1 codes | -## Usage +## Flags (`transcribe`) -here is the usage: +| Flag | Default | Description | +|------|---------|-------------| +| `-o`, `--output` | `text` | Output: `text`, `json`, `json-full`, `srt`, `vtt` | +| `--language` | — | Expected language(s), comma-separated (`en` or `en,fr,de`) | +| `--code-switching`, `--code-switch` | off | Detect language per utterance | +| `--diarize` | off | **Optional.** Identify speakers in the transcript | +| `--model` | — | STT model: `solaria-1` or `solaria-3` (default: API default) | +| `-v`, `--verbose` | off | Show progress while polling | -```bash -Usage of ./gladia: - -audio-file string - Path to the audio file - -audio-url string - URL of the audio file - -diarization - Enable diarization - -diarization-max-speakers int - Maximum number of speakers for diarization - -direct-translate - Enable direct translation - -direct-translate-language string - Language for direct translation - -gladia-key string - Gladia API key - -language string - Language for transcription (default "english") - -language-behaviour string - Language behavior (manual, automatic single language, automatic multiple languages) (default "automatic multiple languages") - -noise-reduction - Enable noise reduction - -output-format string - Output format (table, csv, json, srt, vtt, txt) (default "table") - -save-gladia-key - Save Gladia API key - -transcription-hint string - Transcription hint - -transcription-language-list - List available languages for transcription - -translation-language-list - List available languages for translation - -verbose - Enable verbose printing (default=true) -``` +**Global flag** (any command): `--gladia-key` — API key if not in env or `~/.gladia` -Authentication: +## Language -1. get you Gladia key here: https://app.gladia.io/account -2. save the key if needed using -3. or use it inline for each request +| Goal | What to run | +|------|-------------| +| Auto-detect | `transcribe ` | +| Constrain detection | `--language en,fr,de` (no code switching) | +| Code switching | `--code-switching` (+ optional `--language` hints) | -Basic Example: +- **`--language`** — tells Gladia which language(s) to expect. Several codes (`en,fr,de`) narrow detection; they do **not** turn on code switching. +- **`--code-switching`** — separate option: re-detect language on each utterance. Combine with `--language` when you know which languages may appear. ```bash -./gladia_cli --audio-url http://files.gladia.io/example/audio-transcription/split_infinity.wav - -+------------+----------+----------+-----------------------+--------------------------------+ -| TIME BEGIN | TIME END | LANGUAGE | SPEAKER | TRANSCRIPTION | -+------------+----------+----------+-----------------------+--------------------------------+ -| 0.18 | 4.68 | en | speaker_not_activated | Split infinity in a time when | -| | | | | less is more, | -| 5.52 | 7.76 | en | speaker_not_activated | where too much is never | -| | | | | enough. | -| 8.51 | 10.79 | en | speaker_not_activated | There is always hope for the | -| | | | | future. | -| 11.71 | 14.11 | en | speaker_not_activated | The future can be read from | -| | | | | the past. | -| 14.57 | 19.91 | en | speaker_not_activated | The past foreshadows the | -| | | | | present and the present hasn't | -| | | | | been written yet. | -+------------+----------+----------+-----------------------+--------------------------------+ +./gladia languages # list valid codes ``` +## Diarization (optional) + +Use **`--diarize`** when you need **who spoke when**. Off by default. + +- Works with any output format; most useful with `-o text`, `srt`, or `vtt`. +- Speaker labels are included in the output (e.g. `Speaker 0: …`). + ```bash -./gladia --gladia-key MY_GLADIA_KEY --OTHER_OPTIONS ... +./gladia transcribe meeting.wav --diarize +./gladia transcribe panel.mp3 --diarize -o srt ``` +## Develop + ```bash -./gladia --gladia-key MY_GLADIA_KEY --save-gladia-key +make build && make test && make dist ``` + +## License + +[MIT](LICENSE) diff --git a/cmd/auth.go b/cmd/auth.go new file mode 100644 index 0000000..c02eb56 --- /dev/null +++ b/cmd/auth.go @@ -0,0 +1,28 @@ +package main + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +func newAuthCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "auth", + Short: "Manage Gladia API credentials", + } + + cmd.AddCommand(&cobra.Command{ + Use: "set [api-key]", + Short: "Save your API key to ~/.gladia", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + if err := SaveGladiaKeyToFile(args[0]); err != nil { + return fmt.Errorf("save API key: %w", err) + } + return nil + }, + }) + + return cmd +} diff --git a/cmd/config.go b/cmd/config.go index 64c737d..d75d6a9 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -2,21 +2,24 @@ package main import ( "bufio" + "errors" "fmt" "os" "path/filepath" "strings" ) -const CONFIG_FILENAME = ".gladia" +const ( + configFilename = ".gladia" + envGladiaAPIKey = "GLADIA_API_KEY" +) func GetGladiaConfigFilePath() (string, error) { homeDir, err := os.UserHomeDir() if err != nil { return "", err } - - return filepath.Join(homeDir, CONFIG_FILENAME), nil + return filepath.Join(homeDir, configFilename), nil } func SaveGladiaKeyToFile(gladiaKey string) error { @@ -25,20 +28,20 @@ func SaveGladiaKeyToFile(gladiaKey string) error { return err } - file, err := os.Create(configPath) + file, err := os.OpenFile(configPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) if err != nil { return err } defer file.Close() writer := bufio.NewWriter(file) - _, err = writer.WriteString(gladiaKey + "\n") - if err != nil { + if _, err := writer.WriteString(strings.TrimSpace(gladiaKey) + "\n"); err != nil { return err } - - err = writer.Flush() - if err != nil { + if err := writer.Flush(); err != nil { + return err + } + if err := os.Chmod(configPath, 0o600); err != nil { return err } @@ -66,3 +69,28 @@ func GetGladiaKeyFromFile() (string, error) { return strings.TrimSpace(gladiaKey), nil } + +// ResolveAPIKey returns a key from GLADIA_API_KEY, then ~/.gladia, then flagKey. +func ResolveAPIKey(flagKey string) (string, error) { + if k := strings.TrimSpace(os.Getenv(envGladiaAPIKey)); k != "" { + return k, nil + } + if k, err := GetGladiaKeyFromFile(); err == nil && k != "" { + return k, nil + } + if k := strings.TrimSpace(flagKey); k != "" { + return k, nil + } + return "", errors.New(missingAPIKeyMessage()) +} + +func missingAPIKeyMessage() string { + configPath, _ := GetGladiaConfigFilePath() + return fmt.Sprintf(`no Gladia API key found. + + • export GLADIA_API_KEY= + • gladia auth set (writes %s) + • gladia transcribe --gladia-key + +Get a key at https://app.gladia.io/account`, configPath) +} diff --git a/cmd/config_test.go b/cmd/config_test.go new file mode 100644 index 0000000..32910ca --- /dev/null +++ b/cmd/config_test.go @@ -0,0 +1,137 @@ +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestResolveAPIKey_fromEnv(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "env-key") + t.Cleanup(func() { t.Setenv(envGladiaAPIKey, "") }) + + key, err := ResolveAPIKey("") + if err != nil { + t.Fatal(err) + } + if key != "env-key" { + t.Fatalf("got %q, want env-key", key) + } +} + +func TestResolveAPIKey_fromFile(t *testing.T) { + home := withTempHome(t) + t.Setenv(envGladiaAPIKey, "") + path := filepath.Join(home, configFilename) + if err := os.WriteFile(path, []byte("file-key\n"), 0o600); err != nil { + t.Fatal(err) + } + + key, err := ResolveAPIKey("") + if err != nil { + t.Fatal(err) + } + if key != "file-key" { + t.Fatalf("got %q, want file-key", key) + } +} + +func TestResolveAPIKey_fromFlag(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "") + + key, err := ResolveAPIKey("flag-key") + if err != nil { + t.Fatal(err) + } + if key != "flag-key" { + t.Fatalf("got %q, want flag-key", key) + } +} + +func TestResolveAPIKey_priorityEnvOverFileAndFlag(t *testing.T) { + home := withTempHome(t) + t.Setenv(envGladiaAPIKey, "env-wins") + path := filepath.Join(home, configFilename) + if err := os.WriteFile(path, []byte("file-key\n"), 0o600); err != nil { + t.Fatal(err) + } + + key, err := ResolveAPIKey("flag-key") + if err != nil { + t.Fatal(err) + } + if key != "env-wins" { + t.Fatalf("got %q, want env-wins", key) + } +} + +func TestResolveAPIKey_priorityFileOverFlag(t *testing.T) { + home := withTempHome(t) + t.Setenv(envGladiaAPIKey, "") + path := filepath.Join(home, configFilename) + if err := os.WriteFile(path, []byte("file-wins\n"), 0o600); err != nil { + t.Fatal(err) + } + + key, err := ResolveAPIKey("flag-key") + if err != nil { + t.Fatal(err) + } + if key != "file-wins" { + t.Fatalf("got %q, want file-wins", key) + } +} + +func TestResolveAPIKey_missing(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "") + + _, err := ResolveAPIKey("") + if err == nil { + t.Fatal("expected error when no API key configured") + } + msg := err.Error() + for _, part := range []string{"GLADIA_API_KEY", "gladia auth set", "app.gladia.io"} { + if !strings.Contains(msg, part) { + t.Fatalf("error %q missing %q", msg, part) + } + } +} + +func TestSaveGladiaKeyToFile_contentAndPermissions(t *testing.T) { + home := withTempHome(t) + + captureStdout(t, func() { + if err := SaveGladiaKeyToFile(" secret-key \n"); err != nil { + t.Fatal(err) + } + }) + + path := filepath.Join(home, configFilename) + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if string(data) != "secret-key\n" { + t.Fatalf("file content = %q, want %q", data, "secret-key\n") + } + + info, err := os.Stat(path) + if err != nil { + t.Fatal(err) + } + if info.Mode().Perm() != 0o600 { + t.Fatalf("file mode = %o, want 0600", info.Mode().Perm()) + } +} + +func TestGetGladiaKeyFromFile_missing(t *testing.T) { + withTempHome(t) + _, err := GetGladiaKeyFromFile() + if err == nil { + t.Fatal("expected error for missing config file") + } +} diff --git a/cmd/formatter.go b/cmd/formatter.go index 6df9c75..9b56e1e 100644 --- a/cmd/formatter.go +++ b/cmd/formatter.go @@ -10,7 +10,6 @@ import ( ) func PrintTXTTranscription(response gladia.TranscriptionResult) { - println() fmt.Println(response.Result.Transcription.FullTranscript) } diff --git a/cmd/formatter_test.go b/cmd/formatter_test.go new file mode 100644 index 0000000..ce996b2 --- /dev/null +++ b/cmd/formatter_test.go @@ -0,0 +1,160 @@ +package main + +import ( + "encoding/json" + "strings" + "testing" + + gladia "github.com/gladiaio/gladia-cli/pkg/client" +) + +func TestPrintTXTTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintTXTTranscription(sampleTranscriptionResult()) + }) + if strings.TrimSpace(out) != "hello world" { + t.Fatalf("got %q", out) + } +} + +func TestPrintTXTDiarizedTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintTXTDiarizedTranscription(sampleTranscriptionResult()) + }) + if !strings.Contains(out, "Speaker 0: hello world") { + t.Fatalf("got %q", out) + } + if !strings.Contains(out, "Speaker 1: second line") { + t.Fatalf("got %q", out) + } +} + +func TestPrintJSONSimplifiedTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintJSONSimplifiedTranscription(sampleTranscriptionResult()) + }) + var items []map[string]interface{} + if err := json.Unmarshal([]byte(out), &items); err != nil { + t.Fatal(err) + } + if len(items) != 2 { + t.Fatalf("len = %d", len(items)) + } + if items[0]["transcription"] != "hello world" { + t.Fatalf("first item = %v", items[0]) + } +} + +func TestPrintJSONSimplifiedTranscription_empty(t *testing.T) { + var empty gladia.TranscriptionResult + out := captureStdout(t, func() { + PrintJSONSimplifiedTranscription(empty) + }) + if !strings.Contains(out, "No transcriptions available") { + t.Fatalf("got %q", out) + } +} + +func TestPrintJSONTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintJSONTranscription(sampleTranscriptionResult()) + }) + var parsed map[string]interface{} + if err := json.Unmarshal([]byte(out), &parsed); err != nil { + t.Fatal(err) + } + if _, ok := parsed["result"]; !ok { + t.Fatalf("missing result key in %v", parsed) + } +} + +func TestPrintSRTTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintSRTTranscription(sampleTranscriptionResult()) + }) + if !strings.Contains(out, "1\n00:00:00,500 --> 00:00:02,250\nhello world") { + t.Fatalf("got %q", out) + } +} + +func TestPrintSRTDiarizedTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintSRTDiarizedTranscription(sampleTranscriptionResult()) + }) + if !strings.Contains(out, "Speaker 0: hello world") { + t.Fatalf("got %q", out) + } +} + +func TestPrintVTTTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintVTTTranscription(sampleTranscriptionResult()) + }) + if !strings.HasPrefix(out, "WEBVTT\n") { + t.Fatalf("got %q", out) + } + if !strings.Contains(out, "00:00:00.500 --> 00:00:02.250") { + t.Fatalf("got %q", out) + } +} + +func TestPrintVTTDiarizedTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintVTTDiarizedTranscription(sampleTranscriptionResult()) + }) + if !strings.Contains(out, "Speaker 1: second line") { + t.Fatalf("got %q", out) + } +} + +func TestSecondsToSRTTimeFormat(t *testing.T) { + tests := []struct { + sec float64 + want string + }{ + {0.5, "00:00:00,500"}, + {65.25, "00:01:05,250"}, + {3661.001, "01:01:01,001"}, + } + for _, tc := range tests { + if got := secondsToSRTTimeFormat(tc.sec); got != tc.want { + t.Errorf("secondsToSRTTimeFormat(%v) = %q, want %q", tc.sec, got, tc.want) + } + } +} + +func TestSecondsToVTTTimeFormat(t *testing.T) { + if got := secondsToVTTTimeFormat(65.25); got != "00:01:05.250" { + t.Fatalf("got %q", got) + } +} + +func TestPrintCSVTranscription(t *testing.T) { + out := captureStdout(t, func() { + PrintCSVTranscription(sampleTranscriptionResult()) + }) + if !strings.HasPrefix(out, "time_begin, time_end, language, speaker, transcription\n") { + t.Fatalf("got %q", out) + } +} + +func TestPrintSummarization_withResult(t *testing.T) { + result := sampleTranscriptionResult() + summary := "short summary" + result.Result.Summarization.Results = &summary + out := captureStdout(t, func() { + PrintSummarization(result) + }) + if !strings.Contains(out, "short summary") { + t.Fatalf("got %q", out) + } +} + +func TestPrintSummarization_empty(t *testing.T) { + out := captureStdout(t, func() { + PrintSummarization(sampleTranscriptionResult()) + }) + if !strings.Contains(out, "No summarization results available") { + t.Fatalf("got %q", out) + } +} diff --git a/cmd/languages.go b/cmd/languages.go new file mode 100644 index 0000000..6dc6259 --- /dev/null +++ b/cmd/languages.go @@ -0,0 +1,17 @@ +package main + +import ( + "github.com/gladiaio/gladia-cli/pkg/client/types" + "github.com/spf13/cobra" +) + +func newLanguagesCmd() *cobra.Command { + return &cobra.Command{ + Use: "languages", + Short: "List supported transcription language codes", + RunE: func(cmd *cobra.Command, args []string) error { + _, err := types.DisplayAllInputLanguagesNames() + return err + }, + } +} diff --git a/cmd/main.go b/cmd/main.go index 81aeeed..6144242 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,174 +1,37 @@ package main import ( - "flag" "fmt" - "strings" + "os" - gladia "github.com/gladiaio/gladia-cli/pkg/client" - types "github.com/gladiaio/gladia-cli/pkg/client/types" + "github.com/spf13/cobra" ) -func main() { - audioURLPtr := flag.String("audio-url", "", "URL of the audio file") - audioFilePtr := flag.String("audio-file", "", "Path to the audio file") - - diarizationPtr := flag.Bool("diarization", false, "Enable diarization") - diarizationMinSpeakersPtr := flag.Int("diarization-min-speakers", 1, "Minimum number of speakers") - diarizationMaxSpeakersPtr := flag.Int("diarization-max-speakers", 8, "Maximum number of speakers") - diarizationNumberOfSpeakersPtr := flag.Int("diarization-number-of-speakers", 4, "Number of speakers") - - enableCodeSwitchingPtr := flag.Bool("enable-code-switching", false, "Enable code switching") - detectLanguagePtr := flag.Bool("detect-language", true, "Enable language detection") - - summarizationPtr := flag.Bool("summarization", false, "Enable summarization") - summarizationTypePtr := flag.String("summarization-type", "general", "Summarization type") - - customVocabularyPtr := flag.String("custom-vocabulary", "", "Comma-separated list of custom vocabulary words") - - outputFormatPtr := flag.String("output-format", "table", "Output format (table, csv, json, etc.)") - - languageListPtr := flag.Bool("transcription-language-list", false, "List available languages") - translationListPtr := flag.Bool("translation-language-list", false, "List translation languages") - - gladiaKeyPtr := flag.String("gladia-key", "", "Gladia API key") - saveGladiaKeyPtr := flag.Bool("save-gladia-key", false, "Save Gladia API key") - - verbosePtr := flag.Bool("verbose", true, "Enable verbose printing (default=true)") - - flag.Parse() - - // 1) If we only intend to save the key (and no audio is passed), do so and skip the rest - if *saveGladiaKeyPtr && *gladiaKeyPtr != "" && *audioURLPtr == "" && *audioFilePtr == "" { - err := SaveGladiaKeyToFile(*gladiaKeyPtr) - if err != nil { - fmt.Printf("Error saving Gladia API key: %s\n", err) - } else { - fmt.Printf("Gladia API key saved successfully.\n") - } - // Immediately return so we don't prompt for audio - return - } - - // 2) Otherwise, if user also provided audio but wants to save the key, save it but continue - if *saveGladiaKeyPtr && *gladiaKeyPtr != "" { - err := SaveGladiaKeyToFile(*gladiaKeyPtr) - if err != nil { - fmt.Printf("Error saving Gladia API key: %s\n", err) - return - } - fmt.Printf("Gladia API key saved successfully.\n") - } +// version is set at build time via ldflags: -X main.version= +var version = "dev" - // 3) If user did not provide --gladia-key, try reading from a stored file - if *gladiaKeyPtr == "" { - apiKey, err := GetGladiaKeyFromFile() - if err != nil { - fmt.Printf("Missing Gladia API key: %s\n", err) - return - } - *gladiaKeyPtr = apiKey - } - - client := gladia.NewGladiaClient(*gladiaKeyPtr, *verbosePtr) - - // 4) If just listing languages, do that and return - if *languageListPtr { - if _, err := types.DisplayAllInputLanguagesNames(); err != nil { - fmt.Printf("Error getting languages: %s\n", err) - } - return - } - - if *translationListPtr { - if _, err := types.DisplayAllTargetLanguagesNames(); err != nil { - fmt.Printf("Error getting languages: %s\n", err) - } - return - } - - // 5) If no file or URL is provided, prompt for audio - if *audioURLPtr == "" && *audioFilePtr == "" { - fmt.Println("Please provide an audio URL or file path") - return - } +var rootGladiaKey string - // 6) Upload if there's a file, otherwise we'll use the provided URL - var audioURL string - var err error - if *audioFilePtr != "" { - audioURL, err = client.UploadFile(*audioFilePtr) - if err != nil { - fmt.Printf("Error uploading file: %s\n", err) - return - } - } else { - audioURL = *audioURLPtr +func newRootCmd() *cobra.Command { + rootCmd := &cobra.Command{ + Use: "gladia", + Short: "Gladia speech-to-text CLI", + Long: "Transcribe audio files and URLs with the Gladia API.", + Version: version, } - // 7) Build the transcription request - transcriptionReq := gladia.TranscriptionRequest{ - Diarization: *diarizationPtr, - DiarizationConfig: struct { - MinSpeakers int `json:"min_speakers"` - MaxSpeakers int `json:"max_speakers"` - NumberOfSpeakers int `json:"number_of_speakers"` - }{ - MinSpeakers: *diarizationMinSpeakersPtr, - MaxSpeakers: *diarizationMaxSpeakersPtr, - NumberOfSpeakers: *diarizationNumberOfSpeakersPtr, - }, - EnableCodeSwitching: *enableCodeSwitchingPtr, - DetectLanguage: *detectLanguagePtr, - Summarization: *summarizationPtr, - SummarizationConfig: &gladia.SummarizationConfig{ - Type: *summarizationTypePtr, - }, - CustomVocabulary: strings.Split(*customVocabularyPtr, ","), - } + rootCmd.PersistentFlags().StringVar(&rootGladiaKey, "gladia-key", "", "Gladia API key (used when GLADIA_API_KEY and ~/.gladia are unset)") - // 8) Transcribe and handle the result - transcriptionResult, err := client.TranscribeAudioURL(audioURL, transcriptionReq) - if err != nil { - fmt.Printf("Transcription error: %s\n", err) - return - } + rootCmd.AddCommand(newTranscribeCmd()) + rootCmd.AddCommand(newAuthCmd()) + rootCmd.AddCommand(newLanguagesCmd()) - if *verbosePtr { - fmt.Println("Final transcription result:") - } - if *verbosePtr { - fmt.Println(transcriptionResult.Result.Transcription.FullTranscript) - } else { - // If user doesn't want the final line, skip it or just print the bare transcript - // e.g., fmt.Println(transcriptionResult.Result.Transcription.FullTranscript) - } + return rootCmd +} - // 9) Format the output - switch *outputFormatPtr { - case "table": - PrintTableTranscription(*transcriptionResult) - case "csv": - PrintCSVTranscription(*transcriptionResult) - case "json": - PrintJSONTranscription(*transcriptionResult) - case "json-simplified": - PrintJSONSimplifiedTranscription(*transcriptionResult) - case "srt": - PrintSRTTranscription(*transcriptionResult) - case "srt-diarized": - PrintSRTDiarizedTranscription(*transcriptionResult) - case "vtt": - PrintVTTTranscription(*transcriptionResult) - case "vtt-diarized": - PrintVTTDiarizedTranscription(*transcriptionResult) - case "txt": - PrintTXTTranscription(*transcriptionResult) - case "txt-diarized": - PrintTXTDiarizedTranscription(*transcriptionResult) - case "summary": - PrintSummarization(*transcriptionResult) - default: - PrintTableTranscription(*transcriptionResult) +func main() { + if err := newRootCmd().Execute(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) } } diff --git a/cmd/print_output_test.go b/cmd/print_output_test.go new file mode 100644 index 0000000..bf0f8bb --- /dev/null +++ b/cmd/print_output_test.go @@ -0,0 +1,40 @@ +package main + +import ( + "bytes" + "testing" +) + +func TestPrintTranscriptionResult_formats(t *testing.T) { + result := sampleTranscriptionResult() + formats := []struct { + format string + diarize bool + check func(string) bool + }{ + {"text", false, func(s string) bool { return bytes.Contains([]byte(s), []byte("hello world")) }}, + {"txt", false, func(s string) bool { return bytes.Contains([]byte(s), []byte("hello world")) }}, + {"text", true, func(s string) bool { return bytes.Contains([]byte(s), []byte("Speaker 0:")) }}, + {"json", false, func(s string) bool { return bytes.Contains([]byte(s), []byte(`"transcription"`)) }}, + {"json-full", false, func(s string) bool { return bytes.Contains([]byte(s), []byte(`"full_transcript"`)) }}, + {"srt", false, func(s string) bool { return bytes.Contains([]byte(s), []byte("-->")) }}, + {"srt", true, func(s string) bool { return bytes.Contains([]byte(s), []byte("Speaker 0:")) }}, + {"vtt", false, func(s string) bool { return bytes.HasPrefix([]byte(s), []byte("WEBVTT")) }}, + {"vtt", true, func(s string) bool { return bytes.Contains([]byte(s), []byte("Speaker 1:")) }}, + } + + for _, tc := range formats { + name := tc.format + if tc.diarize { + name += "+diarize" + } + t.Run(name, func(t *testing.T) { + out := captureStdout(t, func() { + printTranscriptionResult(result, tc.format, tc.diarize) + }) + if !tc.check(out) { + t.Fatalf("unexpected output: %q", out) + } + }) + } +} diff --git a/cmd/root_test.go b/cmd/root_test.go new file mode 100644 index 0000000..80bfdc1 --- /dev/null +++ b/cmd/root_test.go @@ -0,0 +1,75 @@ +package main + +import ( + "bytes" + "io" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestRootCommand_help(t *testing.T) { + cmd := newRootCmd() + buf := &bytes.Buffer{} + cmd.SetOut(buf) + cmd.SetErr(buf) + cmd.SetArgs([]string{"--help"}) + + if err := cmd.Execute(); err != nil { + t.Fatal(err) + } + out := buf.String() + for _, sub := range []string{"transcribe", "auth", "languages"} { + if !strings.Contains(out, sub) { + t.Fatalf("help missing %q:\n%s", sub, out) + } + } +} + +func TestAuthSet_writesConfig(t *testing.T) { + home := withTempHome(t) + t.Setenv(envGladiaAPIKey, "") + + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"auth", "set", "my-secret"}) + + captureStdout(t, func() { + if err := cmd.Execute(); err != nil { + t.Fatal(err) + } + }) + + key, err := GetGladiaKeyFromFile() + if err != nil { + t.Fatal(err) + } + if key != "my-secret" { + t.Fatalf("got %q", key) + } + + info, err := os.Stat(filepath.Join(home, configFilename)) + if err != nil { + t.Fatal(err) + } + if info.Mode().Perm() != 0o600 { + t.Fatalf("mode = %o", info.Mode().Perm()) + } +} + +func TestLanguagesCommand(t *testing.T) { + cmd := newRootCmd() + cmd.SetArgs([]string{"languages"}) + out := captureStdout(t, func() { + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + if err := cmd.Execute(); err != nil { + t.Fatal(err) + } + }) + if !strings.Contains(out, "en:") { + t.Fatalf("expected language listing, got %q", out) + } +} diff --git a/cmd/testhelpers_test.go b/cmd/testhelpers_test.go new file mode 100644 index 0000000..d3227d9 --- /dev/null +++ b/cmd/testhelpers_test.go @@ -0,0 +1,53 @@ +package main + +import ( + "os" + "testing" + + gladia "github.com/gladiaio/gladia-cli/pkg/client" +) + +func withTempHome(t *testing.T) string { + t.Helper() + dir := t.TempDir() + t.Setenv("HOME", dir) + return dir +} + +func captureStdout(t *testing.T, fn func()) string { + t.Helper() + r, w, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + old := os.Stdout + os.Stdout = w + fn() + w.Close() + os.Stdout = old + buf := make([]byte, 1<<20) + n, _ := r.Read(buf) + return string(buf[:n]) +} + +func sampleTranscriptionResult() gladia.TranscriptionResult { + var result gladia.TranscriptionResult + result.Result.Transcription.FullTranscript = "hello world" + result.Result.Transcription.Utterances = gladia.Utterances{ + { + Start: 0.5, + End: 2.25, + Language: "en", + Speaker: 0, + Text: "hello world", + }, + { + Start: 3.0, + End: 5.5, + Language: "en", + Speaker: 1, + Text: "second line", + }, + } + return result +} diff --git a/cmd/transcribe.go b/cmd/transcribe.go new file mode 100644 index 0000000..4a2267d --- /dev/null +++ b/cmd/transcribe.go @@ -0,0 +1,193 @@ +package main + +import ( + "fmt" + "os" + "strings" + + gladia "github.com/gladiaio/gladia-cli/pkg/client" + "github.com/gladiaio/gladia-cli/pkg/client/types" + "github.com/spf13/cobra" +) + +func newTranscribeCmd() *cobra.Command { + var ( + outputFormat string + languageFlag string + codeSwitching bool + verbose bool + diarization bool + modelFlag string + ) + + cmd := &cobra.Command{ + Use: "transcribe [source]", + Short: "Transcribe a local audio file or URL", + Long: `Transcribe pre-recorded audio from a file path or http(s) URL. + +Examples: + gladia transcribe meeting.wav + gladia transcribe audio.mp3 -o text + gladia transcribe podcast.mp3 --language en + gladia transcribe interview.mp3 --code-switching + gladia transcribe interview.mp3 --language en,fr,de + gladia transcribe call.wav --code-switch --language en -o json + gladia transcribe call.wav --diarize -o srt + gladia transcribe podcast.mp3 --model solaria-3 + gladia transcribe https://example.com/audio.mp3 -o json`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + if err := validateOutputFormat(outputFormat); err != nil { + return err + } + + if err := validateModel(modelFlag); err != nil { + return err + } + + langs, err := types.ParseLanguages(languageFlag) + if err != nil { + return err + } + + codeSwitchSet := cmd.Flags().Changed("code-switching") || cmd.Flags().Changed("code-switch") + langConfig, err := buildLanguageConfig(langs, codeSwitching, codeSwitchSet) + if err != nil { + return err + } + + key, err := ResolveAPIKey(rootGladiaKey) + if err != nil { + return err + } + + client := gladia.NewGladiaClient(key, verbose) + + audioURL, err := resolveAudioSource(client, args[0]) + if err != nil { + return err + } + + transcriptionReq := gladia.TranscriptionRequest{ + Model: modelFlag, + LanguageConfig: langConfig, + Diarization: diarization, + } + if diarization { + transcriptionReq.DiarizationConfig = &gladia.DiarizationConfig{ + MinSpeakers: 1, + MaxSpeakers: 8, + } + } + + result, err := client.TranscribeAudioURL(audioURL, transcriptionReq) + if err != nil { + return fmt.Errorf("transcription failed: %w", err) + } + + printTranscriptionResult(*result, outputFormat, diarization) + return nil + }, + } + + cmd.Flags().StringVarP(&outputFormat, "output", "o", "text", "Output format: text, json, json-full, srt, vtt") + cmd.Flags().StringVar(&languageFlag, "language", "", "Optional ISO 639-1 code(s), comma-separated (e.g. en or en,fr,de)") + cmd.Flags().BoolVar(&codeSwitching, "code-switching", false, "Enable code switching (detect language per utterance; independent of --language)") + cmd.Flags().BoolVar(&codeSwitching, "code-switch", false, "Alias for --code-switching") + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show progress while transcribing") + cmd.Flags().BoolVar(&diarization, "diarize", false, "Enable speaker diarization") + cmd.Flags().StringVar(&modelFlag, "model", "", "STT model: solaria-1 or solaria-3 (default: API default)") + + return cmd +} + +func buildLanguageConfig(langs []types.Language, codeSwitching, codeSwitchSet bool) (*gladia.LanguageConfig, error) { + if len(langs) == 0 && !codeSwitchSet { + return nil, nil + } + + codes := make([]string, len(langs)) + for i, lang := range langs { + codes[i] = string(lang) + } + + cfg := &gladia.LanguageConfig{Languages: codes} + + if codeSwitchSet { + cfg.CodeSwitching = codeSwitching + } + + return cfg, nil +} + +func validateOutputFormat(format string) error { + switch format { + case "text", "txt", "json", "json-full", "srt", "vtt": + return nil + default: + return fmt.Errorf("unknown output format %q (use text, json, json-full, srt, or vtt)", format) + } +} + +func validateModel(model string) error { + if model == "" { + return nil + } + switch model { + case "solaria-1", "solaria-3": + return nil + default: + return fmt.Errorf("unknown model %q (use solaria-1 or solaria-3)", model) + } +} + +func isHTTPURL(s string) bool { + lower := strings.ToLower(s) + return strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") +} + +func resolveAudioSource(client *gladia.GladiaClient, source string) (string, error) { + if isHTTPURL(source) { + return source, nil + } + + if _, err := os.Stat(source); err != nil { + return "", fmt.Errorf("%q is not a URL and not a readable file: %w", source, err) + } + + audioURL, err := client.UploadFile(source) + if err != nil { + return "", fmt.Errorf("upload file: %w", err) + } + return audioURL, nil +} + +func printTranscriptionResult(result gladia.TranscriptionResult, format string, diarize bool) { + switch format { + case "text", "txt": + if diarize { + PrintTXTDiarizedTranscription(result) + } else { + PrintTXTTranscription(result) + } + case "json": + PrintJSONSimplifiedTranscription(result) + case "json-full": + PrintJSONTranscription(result) + case "srt": + if diarize { + PrintSRTDiarizedTranscription(result) + } else { + PrintSRTTranscription(result) + } + case "vtt": + if diarize { + PrintVTTDiarizedTranscription(result) + } else { + PrintVTTTranscription(result) + } + default: + fmt.Fprintf(os.Stderr, "unknown output format %q\n", format) + os.Exit(1) + } +} diff --git a/cmd/transcribe_test.go b/cmd/transcribe_test.go new file mode 100644 index 0000000..9ff8b15 --- /dev/null +++ b/cmd/transcribe_test.go @@ -0,0 +1,462 @@ +package main + +import ( + "bytes" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + gladia "github.com/gladiaio/gladia-cli/pkg/client" + "github.com/gladiaio/gladia-cli/pkg/client/types" +) + +func TestValidateModel(t *testing.T) { + for _, model := range []string{"", "solaria-1", "solaria-3"} { + if err := validateModel(model); err != nil { + t.Errorf("model %q: %v", model, err) + } + } + if err := validateModel("solaria-2"); err == nil { + t.Fatal("expected error for unknown model") + } +} + +func TestValidateOutputFormat(t *testing.T) { + valid := []string{"text", "txt", "json", "json-full", "srt", "vtt"} + for _, format := range valid { + if err := validateOutputFormat(format); err != nil { + t.Errorf("format %q: %v", format, err) + } + } + if err := validateOutputFormat("table"); err == nil { + t.Fatal("expected error for unknown format") + } +} + +func TestIsHTTPURL(t *testing.T) { + tests := []struct { + in string + want bool + }{ + {"https://example.com/a.wav", true}, + {"http://example.com/a.wav", true}, + {"HTTPS://X.COM", true}, + {"/local/file.wav", false}, + {"file.wav", false}, + {"ftp://example.com", false}, + } + for _, tc := range tests { + if got := isHTTPURL(tc.in); got != tc.want { + t.Errorf("isHTTPURL(%q) = %v, want %v", tc.in, got, tc.want) + } + } +} + +func TestBuildLanguageConfig(t *testing.T) { + en := types.Language("en") + fr := types.Language("fr") + de := types.Language("de") + + tests := []struct { + name string + langs []types.Language + codeSwitching bool + codeSwitchSet bool + wantNil bool + wantCodes []string + wantCS bool + }{ + {"empty", nil, false, false, true, nil, false}, + {"code switch only", nil, true, true, false, []string{}, true}, + {"code switch off explicit", nil, false, true, false, []string{}, false}, + {"single en", []types.Language{en}, false, false, false, []string{"en"}, false}, + {"single en + code switch flag", []types.Language{en}, true, true, false, []string{"en"}, true}, + {"multi languages no code switch", []types.Language{en, fr, de}, false, false, false, []string{"en", "fr", "de"}, false}, + {"multi explicit off", []types.Language{en, fr}, false, true, false, []string{"en", "fr"}, false}, + {"multi explicit on", []types.Language{en, fr}, true, true, false, []string{"en", "fr"}, true}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cfg, err := buildLanguageConfig(tc.langs, tc.codeSwitching, tc.codeSwitchSet) + if err != nil { + t.Fatal(err) + } + if tc.wantNil { + if cfg != nil { + t.Fatalf("expected nil config, got %+v", cfg) + } + return + } + if cfg == nil { + t.Fatal("expected non-nil config") + } + if strings.Join(cfg.Languages, ",") != strings.Join(tc.wantCodes, ",") { + t.Fatalf("languages = %v, want %v", cfg.Languages, tc.wantCodes) + } + if cfg.CodeSwitching != tc.wantCS { + t.Fatalf("code_switching = %v, want %v", cfg.CodeSwitching, tc.wantCS) + } + }) + } +} + +func TestResolveAudioSource_URL(t *testing.T) { + client := gladia.NewGladiaClient("key", false) + url := "https://cdn.example.com/audio.wav" + got, err := resolveAudioSource(client, url) + if err != nil { + t.Fatal(err) + } + if got != url { + t.Fatalf("got %q, want %q", got, url) + } +} + +func TestResolveAudioSource_missingFile(t *testing.T) { + client := gladia.NewGladiaClient("key", false) + _, err := resolveAudioSource(client, filepath.Join(t.TempDir(), "nope.wav")) + if err == nil { + t.Fatal("expected error for missing file") + } +} + +func TestResolveAudioSource_upload(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v2/upload/" { + t.Fatalf("path = %s", r.URL.Path) + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"audio_url": "https://api.gladia.io/audio/123"}) + })) + defer server.Close() + + client := gladia.NewGladiaClient("test-key", false) + client.GladiaEndpoint = server.URL + + dir := t.TempDir() + path := filepath.Join(dir, "sample.wav") + if err := os.WriteFile(path, []byte("RIFF"), 0o644); err != nil { + t.Fatal(err) + } + + got, err := resolveAudioSource(client, path) + if err != nil { + t.Fatal(err) + } + if got != "https://api.gladia.io/audio/123" { + t.Fatalf("got %q", got) + } +} + +func TestTranscribeCommand_invalidOutputFormat(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "k") + + cmd := newRootCmd() + buf := &bytes.Buffer{} + cmd.SetOut(buf) + cmd.SetErr(buf) + cmd.SetArgs([]string{"transcribe", "https://example.com/a.wav", "-o", "table"}) + + if err := cmd.Execute(); err == nil { + t.Fatal("expected error for invalid output format") + } +} + +func TestTranscribeCommand_invalidModel(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "k") + + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"transcribe", "https://example.com/a.wav", "--model", "solaria-2"}) + + if err := cmd.Execute(); err == nil { + t.Fatal("expected error for invalid model") + } +} + +func TestTranscribeCommand_invalidLanguage(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "k") + + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"transcribe", "https://example.com/a.wav", "--language", "notalang"}) + + if err := cmd.Execute(); err == nil { + t.Fatal("expected error for invalid language") + } +} + +func TestTranscribeCommand_URLTextOutput(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "test-key") + + donePayload := sampleTranscriptionResult() + donePayload.Status = "done" + doneBody, _ := json.Marshal(donePayload) + + server := httptest.NewServer(nil) + base := server.URL + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPost && r.URL.Path == "/v2/transcription/": + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(map[string]string{"result_url": base + "/result/1"}) + case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/result/"): + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write(doneBody) + default: + t.Fatalf("unexpected %s %s", r.Method, r.URL.Path) + } + }) + defer server.Close() + + oldEndpoint := gladia.GladiaApiEndpoint + gladia.GladiaApiEndpoint = server.URL + t.Cleanup(func() { gladia.GladiaApiEndpoint = oldEndpoint }) + + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"transcribe", "https://example.com/audio.wav", "-o", "text"}) + + out := captureStdout(t, func() { + if err := cmd.Execute(); err != nil { + t.Fatalf("execute: %v", err) + } + }) + if got := strings.TrimSpace(out); got != "hello world" { + t.Fatalf("stdout = %q, want hello world", got) + } +} + +func TestTranscribeCommand_codeSwitchingWithoutLanguages(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "test-key") + + var postedBody map[string]interface{} + donePayload := sampleTranscriptionResult() + donePayload.Status = "done" + doneBody, _ := json.Marshal(donePayload) + + server := httptest.NewServer(nil) + base := server.URL + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPost && r.URL.Path == "/v2/transcription/": + _ = json.NewDecoder(r.Body).Decode(&postedBody) + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(map[string]string{"result_url": base + "/result/1"}) + case r.Method == http.MethodGet: + _, _ = w.Write(doneBody) + } + }) + defer server.Close() + + oldEndpoint := gladia.GladiaApiEndpoint + gladia.GladiaApiEndpoint = server.URL + t.Cleanup(func() { gladia.GladiaApiEndpoint = oldEndpoint }) + + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{"transcribe", "https://example.com/audio.wav", "--code-switching"}) + + if err := cmd.Execute(); err != nil { + t.Fatal(err) + } + + lc := postedBody["language_config"].(map[string]interface{}) + if lc["code_switching"] != true { + t.Fatalf("code_switching = %v", lc["code_switching"]) + } + langs, _ := lc["languages"].([]interface{}) + if len(langs) != 0 { + t.Fatalf("languages = %v, want empty slice", lc["languages"]) + } +} + +func TestTranscribeCommand_modelRequestBody(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "test-key") + + var postedBody map[string]interface{} + donePayload := sampleTranscriptionResult() + donePayload.Status = "done" + doneBody, _ := json.Marshal(donePayload) + + server := httptest.NewServer(nil) + base := server.URL + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPost && r.URL.Path == "/v2/transcription/": + _ = json.NewDecoder(r.Body).Decode(&postedBody) + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(map[string]string{"result_url": base + "/result/1"}) + case r.Method == http.MethodGet: + _, _ = w.Write(doneBody) + } + }) + defer server.Close() + + oldEndpoint := gladia.GladiaApiEndpoint + gladia.GladiaApiEndpoint = server.URL + t.Cleanup(func() { gladia.GladiaApiEndpoint = oldEndpoint }) + + run := func(args ...string) { + t.Helper() + postedBody = nil + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs(append([]string{"transcribe", "https://example.com/audio.wav"}, args...)) + if err := cmd.Execute(); err != nil { + t.Fatalf("execute: %v", err) + } + } + + t.Run("without --model", func(t *testing.T) { + run() + if _, ok := postedBody["model"]; ok { + t.Fatalf("model present: %#v", postedBody) + } + }) + + t.Run("with --model solaria-3", func(t *testing.T) { + run("--model", "solaria-3") + if postedBody["model"] != "solaria-3" { + t.Fatalf("model = %v", postedBody["model"]) + } + }) +} + +func TestTranscribeCommand_diarizationRequestBody(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "test-key") + + var postedBody map[string]interface{} + donePayload := sampleTranscriptionResult() + donePayload.Status = "done" + doneBody, _ := json.Marshal(donePayload) + + server := httptest.NewServer(nil) + base := server.URL + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPost && r.URL.Path == "/v2/transcription/": + _ = json.NewDecoder(r.Body).Decode(&postedBody) + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(map[string]string{"result_url": base + "/result/1"}) + case r.Method == http.MethodGet: + _, _ = w.Write(doneBody) + } + }) + defer server.Close() + + oldEndpoint := gladia.GladiaApiEndpoint + gladia.GladiaApiEndpoint = server.URL + t.Cleanup(func() { gladia.GladiaApiEndpoint = oldEndpoint }) + + run := func(args ...string) { + t.Helper() + postedBody = nil + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs(append([]string{"transcribe", "https://example.com/audio.wav"}, args...)) + if err := cmd.Execute(); err != nil { + t.Fatalf("execute: %v", err) + } + } + + t.Run("without --diarize", func(t *testing.T) { + run() + if _, ok := postedBody["diarization"]; ok { + t.Fatalf("diarization present: %#v", postedBody) + } + if _, ok := postedBody["diarization_config"]; ok { + t.Fatalf("diarization_config present: %#v", postedBody) + } + }) + + t.Run("with --diarize", func(t *testing.T) { + run("--diarize") + if postedBody["diarization"] != true { + t.Fatalf("diarization = %v", postedBody["diarization"]) + } + cfg, ok := postedBody["diarization_config"].(map[string]interface{}) + if !ok { + t.Fatalf("diarization_config missing: %#v", postedBody) + } + if cfg["min_speakers"] != float64(1) || cfg["max_speakers"] != float64(8) { + t.Fatalf("speaker range = %v", cfg) + } + if _, ok := cfg["number_of_speakers"]; ok { + t.Fatalf("number_of_speakers should be omitted: %v", cfg) + } + }) +} + +func TestTranscribeCommand_languageAndCodeSwitching(t *testing.T) { + withTempHome(t) + t.Setenv(envGladiaAPIKey, "test-key") + + var postedBody map[string]interface{} + donePayload := sampleTranscriptionResult() + donePayload.Status = "done" + doneBody, _ := json.Marshal(donePayload) + + server := httptest.NewServer(nil) + base := server.URL + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPost && r.URL.Path == "/v2/transcription/": + _ = json.NewDecoder(r.Body).Decode(&postedBody) + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(map[string]string{"result_url": base + "/result/1"}) + case r.Method == http.MethodGet: + _, _ = w.Write(doneBody) + } + }) + defer server.Close() + + oldEndpoint := gladia.GladiaApiEndpoint + gladia.GladiaApiEndpoint = server.URL + t.Cleanup(func() { gladia.GladiaApiEndpoint = oldEndpoint }) + + cmd := newRootCmd() + cmd.SetOut(io.Discard) + cmd.SetErr(io.Discard) + cmd.SetArgs([]string{ + "transcribe", "https://example.com/audio.wav", + "--language", "en,fr", + "--code-switching", + "-o", "json", + }) + + if err := cmd.Execute(); err != nil { + t.Fatal(err) + } + + lc, ok := postedBody["language_config"].(map[string]interface{}) + if !ok { + t.Fatalf("language_config missing in %#v", postedBody) + } + if lc["code_switching"] != true { + t.Fatalf("code_switching = %v, want true when --code-switching is set", lc["code_switching"]) + } + langs, ok := lc["languages"].([]interface{}) + if !ok || len(langs) != 2 { + t.Fatalf("languages = %v", lc["languages"]) + } +} diff --git a/dist/gladia-darwin-arm64 b/dist/gladia-darwin-arm64 deleted file mode 100755 index 73700e4..0000000 Binary files a/dist/gladia-darwin-arm64 and /dev/null differ diff --git a/dist/gladia-darwin-x86_64 b/dist/gladia-darwin-x86_64 deleted file mode 100755 index 081d761..0000000 Binary files a/dist/gladia-darwin-x86_64 and /dev/null differ diff --git a/dist/gladia-linux-arm64 b/dist/gladia-linux-arm64 deleted file mode 100755 index 50026b1..0000000 Binary files a/dist/gladia-linux-arm64 and /dev/null differ diff --git a/dist/gladia-linux-arm7 b/dist/gladia-linux-arm7 deleted file mode 100755 index 2a32948..0000000 Binary files a/dist/gladia-linux-arm7 and /dev/null differ diff --git a/dist/gladia-linux-armhf b/dist/gladia-linux-armhf deleted file mode 100755 index 2a32948..0000000 Binary files a/dist/gladia-linux-armhf and /dev/null differ diff --git a/dist/gladia-linux-i386 b/dist/gladia-linux-i386 deleted file mode 100755 index c73968c..0000000 Binary files a/dist/gladia-linux-i386 and /dev/null differ diff --git a/dist/gladia-linux-x86_64 b/dist/gladia-linux-x86_64 deleted file mode 100755 index cd5379e..0000000 Binary files a/dist/gladia-linux-x86_64 and /dev/null differ diff --git a/go.mod b/go.mod index 82d5c95..c532377 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,12 @@ go 1.18 require ( github.com/olekukonko/tablewriter v0.0.5 + github.com/spf13/cobra v1.8.1 golang.org/x/text v0.14.0 ) -require github.com/mattn/go-runewidth v0.0.9 // indirect +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/mattn/go-runewidth v0.0.9 // indirect + github.com/spf13/pflag v1.0.5 // indirect +) diff --git a/go.sum b/go.sum index 05f7a89..e569e75 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,16 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/install.ps1 b/install.ps1 new file mode 100644 index 0000000..0627f82 --- /dev/null +++ b/install.ps1 @@ -0,0 +1,78 @@ +#Requires -Version 5.1 +$ErrorActionPreference = 'Stop' + +$Repo = 'gladiaio/gladia-cli' +$BinaryName = 'gladia.exe' + +function Get-Arch { + if ([Environment]::Is64BitOperatingSystem) { + return 'amd64' + } + return '386' +} + +function Get-LatestTag { + $headers = @{ Accept = 'application/vnd.github+json' } + if ($env:GITHUB_TOKEN) { + $headers['Authorization'] = "Bearer $($env:GITHUB_TOKEN)" + } + + $release = Invoke-RestMethod -Uri "https://api.github.com/repos/$Repo/releases/latest" -Headers $headers + return $release.tag_name +} + +function Add-ToUserPath { + param([string]$Directory) + + $userPath = [Environment]::GetEnvironmentVariable('Path', 'User') + if ($null -eq $userPath) { + $userPath = '' + } + + $parts = $userPath -split ';' | Where-Object { $_ -and $_ -ne $Directory } + $newPath = ($parts + $Directory) -join ';' + [Environment]::SetEnvironmentVariable('Path', $newPath, 'User') + + if ($env:Path -notlike "*$Directory*") { + $env:Path = "$env:Path;$Directory" + } +} + +$arch = Get-Arch +$tag = Get-LatestTag + +if (-not $tag) { + throw 'Could not determine latest release' +} + +$version = $tag.TrimStart('v') +$archive = "gladia_${version}_windows_${arch}.zip" +$url = "https://github.com/$Repo/releases/download/$tag/$archive" +$installDir = if ($env:GLADIA_INSTALL_DIR) { $env:GLADIA_INSTALL_DIR } else { Join-Path $env:LOCALAPPDATA 'Programs\gladia-cli\bin' } + +$tmpdir = Join-Path $env:TEMP ("gladia-install-{0}" -f [guid]::NewGuid().ToString()) +New-Item -ItemType Directory -Path $tmpdir -Force | Out-Null + +try { + Write-Host "Installing gladia $tag (windows/$arch)..." + + $zipPath = Join-Path $tmpdir $archive + Invoke-WebRequest -Uri $url -OutFile $zipPath -UseBasicParsing + Expand-Archive -Path $zipPath -DestinationPath $tmpdir -Force + + $binaryPath = Join-Path $tmpdir $BinaryName + if (-not (Test-Path $binaryPath)) { + throw "$BinaryName not found in archive" + } + + New-Item -ItemType Directory -Path $installDir -Force | Out-Null + Copy-Item -Path $binaryPath -Destination (Join-Path $installDir $BinaryName) -Force + Add-ToUserPath -Directory $installDir + + $installed = Join-Path $installDir $BinaryName + Write-Host "Installed gladia to $installed" + Write-Host "Restart your terminal if gladia is not found" +} +finally { + Remove-Item -Recurse -Force $tmpdir -ErrorAction SilentlyContinue +} diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..45488a7 --- /dev/null +++ b/install.sh @@ -0,0 +1,100 @@ +#!/bin/sh +set -eu + +REPO="gladiaio/gladia-cli" +BINARY="gladia" + +detect_os() { + case "$(uname -s)" in + Darwin) printf '%s\n' darwin ;; + Linux) printf '%s\n' linux ;; + *) + echo "error: unsupported OS: $(uname -s) (macOS and Linux only)" >&2 + exit 1 + ;; + esac +} + +detect_arch() { + case "$(uname -m)" in + x86_64 | amd64) printf '%s\n' amd64 ;; + aarch64 | arm64) printf '%s\n' arm64 ;; + armv7l | armv6l) printf '%s\n' armv7 ;; + i386 | i686) printf '%s\n' 386 ;; + *) + echo "error: unsupported architecture: $(uname -m)" >&2 + exit 1 + ;; + esac +} + +fetch_latest_tag() { + if [ -n "${GITHUB_TOKEN:-}" ]; then + curl -fsSL \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/${REPO}/releases/latest" \ + | sed -n 's/.*"tag_name"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' \ + | head -n 1 + else + curl -fsSL \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/${REPO}/releases/latest" \ + | sed -n 's/.*"tag_name"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' \ + | head -n 1 + fi +} + +install_binary() { + src="$1" + dest_dir="$2" + + if [ -w "$dest_dir" ]; then + install -m 755 "$src" "$dest_dir/" + else + if ! command -v sudo >/dev/null 2>&1; then + echo "error: cannot write to ${dest_dir} and sudo is not available" >&2 + echo "hint: set GLADIA_INSTALL_DIR to a writable directory (e.g. \$HOME/.local/bin)" >&2 + exit 1 + fi + sudo install -m 755 "$src" "$dest_dir/" + fi +} + +os="$(detect_os)" +arch="$(detect_arch)" +tag="$(fetch_latest_tag)" + +if [ -z "$tag" ]; then + echo "error: could not determine latest release" >&2 + exit 1 +fi + +version="${tag#v}" +archive="${BINARY}_${version}_${os}_${arch}.tar.gz" +url="https://github.com/${REPO}/releases/download/${tag}/${archive}" +install_dir="${GLADIA_INSTALL_DIR:-/usr/local/bin}" + +tmpdir="$(mktemp -d)" +trap 'rm -rf "$tmpdir"' EXIT INT HUP TERM + +echo "Installing ${BINARY} ${tag} (${os}/${arch})..." + +if ! curl -fsSL "$url" | tar -xz -C "$tmpdir"; then + echo "error: failed to download ${url}" >&2 + exit 1 +fi + +if [ ! -f "${tmpdir}/${BINARY}" ]; then + echo "error: ${BINARY} binary not found in archive" >&2 + exit 1 +fi + +install_binary "${tmpdir}/${BINARY}" "$install_dir" + +if command -v "${BINARY}" >/dev/null 2>&1; then + echo "Installed ${BINARY} $(command -v "${BINARY}") ($("${BINARY}" --version 2>/dev/null || true))" +else + echo "Installed ${BINARY} to ${install_dir}/${BINARY}" + echo "Ensure ${install_dir} is in your PATH" +fi diff --git a/pkg/client/client.go b/pkg/client/client.go index e7e773d..8da1187 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -1,8 +1,11 @@ package client -import "net/http" +import ( + "net/http" + "strings" +) -// Can be override by the developer, before initializing the client. +// GladiaApiEndpoint is the default Gladia API base URL. var GladiaApiEndpoint = "https://api.gladia.io" type GladiaClient struct { @@ -20,3 +23,7 @@ func NewGladiaClient(apiKey string, verbose bool) *GladiaClient { Verbose: verbose, } } + +func (c *GladiaClient) apiURL(path string) string { + return strings.TrimSuffix(c.GladiaEndpoint, "/") + path +} diff --git a/pkg/client/client_test.go b/pkg/client/client_test.go new file mode 100644 index 0000000..92c3eca --- /dev/null +++ b/pkg/client/client_test.go @@ -0,0 +1,22 @@ +package client + +import "testing" + +func TestGladiaClient_apiURL(t *testing.T) { + c := &GladiaClient{GladiaEndpoint: "https://api.gladia.io"} + if got := c.apiURL("/v2/upload/"); got != "https://api.gladia.io/v2/upload/" { + t.Fatalf("got %q", got) + } + + c.GladiaEndpoint = "https://api.gladia.io/" + if got := c.apiURL("/v2/transcription/"); got != "https://api.gladia.io/v2/transcription/" { + t.Fatalf("got %q", got) + } +} + +func TestNewGladiaClient_defaults(t *testing.T) { + c := NewGladiaClient("key", true) + if c.ApiKey != "key" || !c.Verbose || c.GladiaEndpoint != GladiaApiEndpoint { + t.Fatalf("unexpected client: %+v", c) + } +} diff --git a/pkg/client/transcribe.go b/pkg/client/transcribe.go index 1fb5601..742ad57 100644 --- a/pkg/client/transcribe.go +++ b/pkg/client/transcribe.go @@ -9,6 +9,7 @@ import ( "net/http" "os" "path/filepath" + "strings" "time" ) @@ -25,16 +26,23 @@ type UploadResponse struct { } `json:"audio_metadata"` } +type LanguageConfig struct { + Languages []string `json:"languages,omitempty"` + CodeSwitching bool `json:"code_switching,omitempty"` +} + +type DiarizationConfig struct { + MinSpeakers int `json:"min_speakers,omitempty"` + MaxSpeakers int `json:"max_speakers,omitempty"` + NumberOfSpeakers int `json:"number_of_speakers,omitempty"` +} + type TranscriptionRequest struct { AudioURL string `json:"audio_url"` - Diarization bool `json:"diarization"` - DiarizationConfig struct { - MinSpeakers int `json:"min_speakers"` - MaxSpeakers int `json:"max_speakers"` - NumberOfSpeakers int `json:"number_of_speakers"` - } `json:"diarization_config"` - EnableCodeSwitching bool `json:"enable_code_switching"` - DetectLanguage bool `json:"detect_language"` + Model string `json:"model,omitempty"` + LanguageConfig *LanguageConfig `json:"language_config,omitempty"` + Diarization bool `json:"diarization,omitempty"` + DiarizationConfig *DiarizationConfig `json:"diarization_config,omitempty"` Summarization bool `json:"summarization"` SummarizationConfig *SummarizationConfig `json:"summarization_config"` Translation bool `json:"translation"` @@ -195,7 +203,7 @@ func (c *GladiaClient) UploadFile(filePath string) (string, error) { return "", fmt.Errorf("failed to close multipart writer: %w", err) } - req, err := http.NewRequest("POST", "https://api.gladia.io/v2/upload/", body) + req, err := http.NewRequest("POST", c.apiURL("/v2/upload/"), body) if err != nil { return "", fmt.Errorf("failed to create request for upload: %w", err) } @@ -233,27 +241,17 @@ func (c *GladiaClient) TranscribeAudioURL(audioURL string, reqBody Transcription return nil, fmt.Errorf("failed to marshal transcription request: %w", err) } - httpReq, err := http.NewRequest("POST", "https://api.gladia.io/v2/transcription/", bytes.NewReader(requestData)) - if err != nil { - return nil, fmt.Errorf("failed to create transcription request: %w", err) - } - - httpReq.Header.Set("Content-Type", "application/json") - httpReq.Header.Set("x-gladia-key", c.ApiKey) - - resp, err := c.httpClient.Do(httpReq) + resp, err := c.createAndExecuteRequest("POST", c.apiURL("/v2/transcription/"), bytes.NewReader(requestData)) if err != nil { return nil, fmt.Errorf("transcription request failed: %w", err) } defer resp.Body.Close() - if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { - return nil, fmt.Errorf("transcription API returned non-OK status: %s", resp.Status) + if resp.StatusCode >= 300 { + return nil, c.decodeAPIError(resp) } - var transResp struct { - ResultURL string `json:"result_url"` - } + var transResp TranscriptionResponse if err := json.NewDecoder(resp.Body).Decode(&transResp); err != nil { return nil, fmt.Errorf("failed to decode transcription response: %w", err) } @@ -330,41 +328,16 @@ func (c *GladiaClient) pollForTranscriptionResult(resultURL string) (*Transcript } } -func (c *GladiaClient) GetTranscription(transcriptionRequest TranscriptionRequest) (*TranscriptionResult, error) { - requestBody, err := json.Marshal(transcriptionRequest) - if err != nil { - return nil, err - } - - resp, err := c.createAndExecuteRequest("POST", c.GladiaEndpoint+"/v2/transcription", bytes.NewBuffer(requestBody)) - if err != nil { - return nil, err +func (c *GladiaClient) decodeAPIError(resp *http.Response) error { + var respError struct { + Message string `json:"message"` + ValidationErrors []string `json:"validation_errors"` } - defer resp.Body.Close() - - if resp.StatusCode >= 300 { - var respError struct { - Message string `json:"message"` - Path string `json:"path"` - RequestID string `json:"request_id"` - StatusCode int `json:"statusCode"` - Timestamp string `json:"timestamp"` - ValidationErrors []string `json:"validation_errors"` - } - - if err := json.NewDecoder(resp.Body).Decode(&respError); err != nil { - return nil, fmt.Errorf("failed to decode error response: %v", err) - } - - errorMessage := fmt.Sprintf("Error message: %s \n Validation errors: %v", respError.Message, respError.ValidationErrors) - println(errorMessage) - return nil, fmt.Errorf("failed to request transcription, status code: %d %s", resp.StatusCode, respError.Message) + if err := json.NewDecoder(resp.Body).Decode(&respError); err != nil { + return fmt.Errorf("API error: %s", resp.Status) } - - var transcriptionResponse TranscriptionResponse - if err := json.NewDecoder(resp.Body).Decode(&transcriptionResponse); err != nil { - return nil, err + if len(respError.ValidationErrors) > 0 { + return fmt.Errorf("%s (%s)", respError.Message, strings.Join(respError.ValidationErrors, "; ")) } - - return c.pollForTranscriptionResult(transcriptionResponse.ResultURL) + return fmt.Errorf("%s", respError.Message) } diff --git a/pkg/client/transcribe_test.go b/pkg/client/transcribe_test.go new file mode 100644 index 0000000..4981a82 --- /dev/null +++ b/pkg/client/transcribe_test.go @@ -0,0 +1,214 @@ +package client + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestUploadFile_success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v2/upload/" { + t.Fatalf("path = %s", r.URL.Path) + } + if r.Header.Get("x-gladia-key") != "secret" { + t.Fatalf("api key header = %q", r.Header.Get("x-gladia-key")) + } + _ = json.NewEncoder(w).Encode(map[string]string{"audio_url": "https://cdn/audio.wav"}) + })) + defer server.Close() + + dir := t.TempDir() + path := filepath.Join(dir, "test.wav") + if err := os.WriteFile(path, []byte("data"), 0o644); err != nil { + t.Fatal(err) + } + + c := NewGladiaClient("secret", false) + c.GladiaEndpoint = server.URL + + url, err := c.UploadFile(path) + if err != nil { + t.Fatal(err) + } + if url != "https://cdn/audio.wav" { + t.Fatalf("got %q", url) + } +} + +func TestUploadFile_apiError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + defer server.Close() + + dir := t.TempDir() + path := filepath.Join(dir, "test.wav") + if err := os.WriteFile(path, []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + + c := NewGladiaClient("k", false) + c.GladiaEndpoint = server.URL + if _, err := c.UploadFile(path); err == nil { + t.Fatal("expected error") + } +} + +func TestTranscribeAudioURL_success(t *testing.T) { + done := TranscriptionResult{Status: "done"} + done.Result.Transcription.FullTranscript = "done text" + doneJSON, _ := json.Marshal(done) + + var posted TranscriptionRequest + server := httptest.NewServer(nil) + base := server.URL + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodPost && r.URL.Path == "/v2/transcription/": + _ = json.NewDecoder(r.Body).Decode(&posted) + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(TranscriptionResponse{ResultURL: base + "/poll"}) + case r.Method == http.MethodGet: + _, _ = w.Write(doneJSON) + default: + t.Fatalf("%s %s", r.Method, r.URL.Path) + } + }) + defer server.Close() + + c := NewGladiaClient("k", false) + c.GladiaEndpoint = server.URL + + req := TranscriptionRequest{ + LanguageConfig: &LanguageConfig{ + Languages: []string{"en", "fr"}, + CodeSwitching: true, + }, + } + result, err := c.TranscribeAudioURL("https://audio.example/x.wav", req) + if err != nil { + t.Fatal(err) + } + if result.Result.Transcription.FullTranscript != "done text" { + t.Fatalf("got %q", result.Result.Transcription.FullTranscript) + } + if posted.AudioURL != "https://audio.example/x.wav" { + t.Fatalf("posted audio_url = %q", posted.AudioURL) + } + if posted.LanguageConfig == nil || !posted.LanguageConfig.CodeSwitching { + t.Fatalf("language_config = %+v", posted.LanguageConfig) + } +} + +func TestTranscribeAudioURL_apiValidationError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnprocessableEntity) + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "message": "validation failed", + "validation_errors": []string{"bad field"}, + }) + })) + defer server.Close() + + c := NewGladiaClient("k", false) + c.GladiaEndpoint = server.URL + _, err := c.TranscribeAudioURL("https://a", TranscriptionRequest{}) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "validation failed") { + t.Fatalf("got %v", err) + } +} + +func TestPollForTranscriptionResult_errorStatus(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`{"status":"error","result":{"transcription":{"full_transcript":"boom"}}}`)) + })) + defer server.Close() + + c := NewGladiaClient("k", false) + c.GladiaEndpoint = server.URL + _, err := c.pollForTranscriptionResult(server.URL + "/status") + if err == nil || !strings.Contains(err.Error(), "boom") { + t.Fatalf("got %v", err) + } +} + +func TestDecodeAPIError(t *testing.T) { + c := NewGladiaClient("k", false) + rec := httptest.NewRecorder() + rec.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(rec).Encode(map[string]interface{}{ + "message": "bad request", + "validation_errors": []string{"a", "b"}, + }) + err := c.decodeAPIError(rec.Result()) + if err == nil || !strings.Contains(err.Error(), "bad request") || !strings.Contains(err.Error(), "a; b") { + t.Fatalf("got %v", err) + } +} + +func TestTranscriptionRequest_marshalDiarization(t *testing.T) { + t.Run("omits diarization when disabled", func(t *testing.T) { + data, err := json.Marshal(TranscriptionRequest{AudioURL: "https://a"}) + if err != nil { + t.Fatal(err) + } + var body map[string]interface{} + if err := json.Unmarshal(data, &body); err != nil { + t.Fatal(err) + } + if _, ok := body["diarization"]; ok { + t.Fatalf("diarization present: %v", body) + } + if _, ok := body["diarization_config"]; ok { + t.Fatalf("diarization_config present: %v", body) + } + }) + + t.Run("includes range without number_of_speakers", func(t *testing.T) { + data, err := json.Marshal(TranscriptionRequest{ + AudioURL: "https://a", + Diarization: true, + DiarizationConfig: &DiarizationConfig{ + MinSpeakers: 1, + MaxSpeakers: 8, + }, + }) + if err != nil { + t.Fatal(err) + } + var body map[string]interface{} + if err := json.Unmarshal(data, &body); err != nil { + t.Fatal(err) + } + if body["diarization"] != true { + t.Fatalf("diarization = %v", body["diarization"]) + } + cfg, ok := body["diarization_config"].(map[string]interface{}) + if !ok { + t.Fatalf("diarization_config = %v", body["diarization_config"]) + } + if cfg["min_speakers"] != float64(1) || cfg["max_speakers"] != float64(8) { + t.Fatalf("speaker range = %v", cfg) + } + if _, ok := cfg["number_of_speakers"]; ok { + t.Fatalf("number_of_speakers should be omitted: %v", cfg) + } + }) +} + +func TestSummarizationConfig_validate(t *testing.T) { + if err := (&SummarizationConfig{Type: "general"}).ValidateSummarizationType(); err != nil { + t.Fatal(err) + } + if err := (&SummarizationConfig{Type: "nope"}).ValidateSummarizationType(); err == nil { + t.Fatal("expected error") + } +} diff --git a/pkg/client/transcription.go b/pkg/client/transcription.go index db1bcea..af4bead 100644 --- a/pkg/client/transcription.go +++ b/pkg/client/transcription.go @@ -66,7 +66,7 @@ type TranscriptionListResponse struct { // ListTranscriptions fetches the list of transcriptions from the Gladia API. func (c *GladiaClient) ListTranscriptions(offset, limit int, status, kind, date, beforeDate, afterDate string) (*TranscriptionListResponse, error) { - req, err := http.NewRequest("GET", c.GladiaEndpoint+"/v2/transcription", nil) + req, err := http.NewRequest("GET", c.apiURL("/v2/transcription"), nil) if err != nil { return nil, err } diff --git a/pkg/client/types/languages.go b/pkg/client/types/languages.go index 6855bf8..13fd92a 100644 --- a/pkg/client/types/languages.go +++ b/pkg/client/types/languages.go @@ -2,6 +2,7 @@ package types import ( "fmt" + "strings" "golang.org/x/text/language" "golang.org/x/text/language/display" @@ -220,6 +221,80 @@ func (l Language) String() string { return string(l) } +// ParseLanguage validates a single ISO 639-1 language code. +// An empty string means no explicit language (auto-detect). +func ParseLanguage(s string) (Language, error) { + langs, err := ParseLanguages(s) + if err != nil { + return "", err + } + if len(langs) == 0 { + return "", nil + } + if len(langs) > 1 { + return "", fmt.Errorf("multiple languages %q: use ParseLanguages or comma-separated --language en,fr", s) + } + return langs[0], nil +} + +// ParseLanguages validates comma-separated ISO 639-1 codes (e.g. "en,fr,de"). +// An empty string means no explicit languages (auto-detect). +func ParseLanguages(s string) ([]Language, error) { + s = strings.TrimSpace(s) + if s == "" { + return nil, nil + } + parts := strings.Split(s, ",") + langs := make([]Language, 0, len(parts)) + seen := make(map[Language]struct{}, len(parts)) + for _, part := range parts { + code := strings.TrimSpace(strings.ToLower(part)) + if code == "" { + continue + } + lang := Language(code) + if _, ok := seen[lang]; ok { + continue + } + valid := false + for _, allowed := range allInputLanguages() { + if allowed == lang { + valid = true + break + } + } + if !valid { + return nil, fmt.Errorf("unknown language %q (use ISO 639-1 codes, e.g. en,fr; run: gladia languages)", code) + } + seen[lang] = struct{}{} + langs = append(langs, lang) + } + if len(langs) == 0 { + return nil, fmt.Errorf("no valid language codes in %q", s) + } + return langs, nil +} + +func allInputLanguages() []Language { + return []Language{ + LanguageAf, LanguageSq, LanguageAm, LanguageAr, LanguageHy, LanguageAs, LanguageAz, + LanguageBa, LanguageEu, LanguageBe, LanguageBn, LanguageBs, LanguageBr, LanguageBg, + LanguageCa, LanguageZh, LanguageHr, LanguageCs, LanguageDa, LanguageNl, LanguageEn, + LanguageAt, LanguageFo, LanguageFi, LanguageFr, LanguageGl, LanguageKa, LanguageDe, + LanguageEl, LanguageGu, LanguageHt, LanguageHa, LanguageHaw, LanguageHe, LanguageHi, + LanguageHu, LanguageIs, LanguageId, LanguageIt, LanguageJp, LanguageJv, LanguageKn, + LanguageKk, LanguageKm, LanguageKo, LanguageLo, LanguageLa, LanguageLv, LanguageLn, + LanguageLt, LanguageLb, LanguageMk, LanguageMg, LanguageMs, LanguageMl, LanguageMt, + LanguageMi, LanguageMr, LanguageMn, LanguageMymr, LanguageNe, LanguageNo, LanguageNn, + LanguageOc, LanguagePs, LanguageFa, LanguagePl, LanguagePt, LanguagePa, LanguageRo, + LanguageRu, LanguageSa, LanguageSr, LanguageSn, LanguageSd, LanguageSi, LanguageSk, + LanguageSl, LanguageSo, LanguageEs, LanguageSu, LanguageSw, LanguageSv, LanguageTl, + LanguageTg, LanguageTa, LanguageTt, LanguageTe, LanguageTh, LanguageBo, LanguageTr, + LanguageTk, LanguageUk, LanguageUr, LanguageUz, LanguageVi, LanguageCy, LanguageYi, + LanguageYo, + } +} + func DisplayAllInputLanguagesNames() (string, error) { // Slice of all TargetLanguage constants allLanguages := []TargetLanguage{ diff --git a/pkg/client/types/languages_test.go b/pkg/client/types/languages_test.go new file mode 100644 index 0000000..cff0d90 --- /dev/null +++ b/pkg/client/types/languages_test.go @@ -0,0 +1,83 @@ +package types + +import ( + "strings" + "testing" +) + +func TestParseLanguages_empty(t *testing.T) { + langs, err := ParseLanguages("") + if err != nil { + t.Fatal(err) + } + if langs != nil { + t.Fatalf("got %v, want nil", langs) + } +} + +func TestParseLanguages_single(t *testing.T) { + langs, err := ParseLanguages("en") + if err != nil { + t.Fatal(err) + } + if len(langs) != 1 || langs[0] != LanguageEn { + t.Fatalf("got %v", langs) + } +} + +func TestParseLanguages_multiple(t *testing.T) { + langs, err := ParseLanguages(" en , FR , de ") + if err != nil { + t.Fatal(err) + } + if len(langs) != 3 { + t.Fatalf("got %v", langs) + } + if langs[0] != LanguageEn || langs[1] != LanguageFr || langs[2] != LanguageDe { + t.Fatalf("got %v", langs) + } +} + +func TestParseLanguages_dedupes(t *testing.T) { + langs, err := ParseLanguages("en,en,fr") + if err != nil { + t.Fatal(err) + } + if len(langs) != 2 { + t.Fatalf("got %v", langs) + } +} + +func TestParseLanguages_invalid(t *testing.T) { + _, err := ParseLanguages("english") + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "unknown language") { + t.Fatalf("got %v", err) + } +} + +func TestParseLanguages_onlyCommas(t *testing.T) { + _, err := ParseLanguages(",,,") + if err == nil { + t.Fatal("expected error") + } +} + +func TestParseLanguage_single(t *testing.T) { + lang, err := ParseLanguage("fr") + if err != nil { + t.Fatal(err) + } + if lang != LanguageFr { + t.Fatalf("got %v", lang) + } +} + +func TestParseLanguage_multipleRejected(t *testing.T) { + _, err := ParseLanguage("en,fr") + if err == nil { + t.Fatal("expected error") + } +} diff --git a/pkg/dist/gladia-darwin-arm64 b/pkg/dist/gladia-darwin-arm64 deleted file mode 100755 index 792727e..0000000 Binary files a/pkg/dist/gladia-darwin-arm64 and /dev/null differ diff --git a/pkg/dist/gladia-darwin-x86_64 b/pkg/dist/gladia-darwin-x86_64 deleted file mode 100755 index a52693a..0000000 Binary files a/pkg/dist/gladia-darwin-x86_64 and /dev/null differ diff --git a/pkg/dist/gladia-linux-arm64 b/pkg/dist/gladia-linux-arm64 deleted file mode 100755 index 62e0cec..0000000 Binary files a/pkg/dist/gladia-linux-arm64 and /dev/null differ diff --git a/pkg/dist/gladia-linux-armhf b/pkg/dist/gladia-linux-armhf deleted file mode 100755 index 7a9f5c9..0000000 Binary files a/pkg/dist/gladia-linux-armhf and /dev/null differ diff --git a/pkg/dist/gladia-linux-i386 b/pkg/dist/gladia-linux-i386 deleted file mode 100755 index 079ec48..0000000 Binary files a/pkg/dist/gladia-linux-i386 and /dev/null differ diff --git a/pkg/dist/gladia-linux-x86_64 b/pkg/dist/gladia-linux-x86_64 deleted file mode 100755 index 0b0d82c..0000000 Binary files a/pkg/dist/gladia-linux-x86_64 and /dev/null differ diff --git a/python/build.bat b/python/build.bat deleted file mode 100755 index 14bf8a5..0000000 --- a/python/build.bat +++ /dev/null @@ -1,12 +0,0 @@ -@echo off - -setlocal enabledelayedexpansion - -set "cmd_opt=" -for /F %%G in (requirements.txt) do ( - set "cmd_opt=!cmd_opt! --hidden-import=%%G" -) - -echo py -m PyInstaller --onefile gladia_cli.py !cmd_opt! -py -m PyInstaller --onefile gladia_cli.py !cmd_opt! - diff --git a/python/build.sh b/python/build.sh deleted file mode 100755 index 57b0907..0000000 --- a/python/build.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -cmd_opt="" -while read package; do - cmd_opt+=" --hidden-import=$package" -done < requirements.txt - -echo "pyinstaller --onefile gladia_cli.py $cmd_opt" -pyinstaller --onefile gladia_cli.py $cmd_opt - diff --git a/python/dist/gladia_cli.exe b/python/dist/gladia_cli.exe deleted file mode 100755 index 096c25c..0000000 Binary files a/python/dist/gladia_cli.exe and /dev/null differ diff --git a/python/dist/linux_x64_gladia b/python/dist/linux_x64_gladia deleted file mode 100755 index d168f24..0000000 Binary files a/python/dist/linux_x64_gladia and /dev/null differ diff --git a/python/dist/macos_arm64_gladia b/python/dist/macos_arm64_gladia deleted file mode 100755 index 05d22c1..0000000 Binary files a/python/dist/macos_arm64_gladia and /dev/null differ diff --git a/python/gladia_cli.py b/python/gladia_cli.py deleted file mode 100644 index 8f67f14..0000000 --- a/python/gladia_cli.py +++ /dev/null @@ -1,422 +0,0 @@ -import os -import click - -import mimetypes -from prettytable import PrettyTable - -import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry - -mimetypes.init() - -def get_file_type(file_path): - file_extension = file_path.split(".")[-1] - mime_type = mimetypes.types_map["." + file_extension] - return mime_type - -GLADIA_API_URL = "https://api.gladia.io/audio/text/audio-transcription/" - -CONFIG_PATH = os.path.join(os.path.expanduser("~"), ".gladia") - -class Color: - PURPLE = '\033[95m' - CYAN = '\033[96m' - DARKCYAN = '\033[36m' - BLUE = '\033[94m' - GREEN = '\033[92m' - YELLOW = '\033[93m' - RED = '\033[91m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - END = '\033[0m' - -def save_gladia_key_to_file(gladia_key): - with open(CONFIG_PATH, "w") as f: - f.write(gladia_key) - click.echo("Gladia API key saved to {}".format(CONFIG_PATH)) - -def get_gladia_key(): - try: - with open(CONFIG_PATH, "r") as f: - return f.read().strip() - except FileNotFoundError: - click.echo("please provide your gladia key using --gladia-key or --save-gladia-key") - return None - - -@click.command() -@click.option("--audio-url", help="URL of the audio file to be transcribed.") -@click.option("--audio-file", help="Path to the audio file to be transcribed.") -@click.option("--language-behaviour", default="automatic multiple languages", help="Determines how to handle multi-language audio. Possible values: manual, automatic single language, automatic multiple languages.") -@click.option("--language", default="english", help="Language spoken in the audio file. Choose from the following options:\n" - "afrikaans\n" - "- albanian\n" - "- amharic\n" - "- arabic\n" - "- armenian\n" - "- assamese\n" - "- azerbaijani\n" - "- bashkir\n" - "- basque\n" - "- belarusian\n" - "- bengali\n" - "- bosnian\n" - "- breton\n" - "- bulgarian\n" - "- catalan\n" - "- chinese\n" - "- croatian\n" - "- czech\n" - "- danish\n" - "- dutch\n" - "- english\n" - "- estonian\n" - "- faroese\n" - "- finnish\n" - "- french\n" - "- galician\n" - "- georgian\n" - "- german\n" - "- greek\n" - "- gujarati\n" - "- haitian creole\n" - "- hausa\n" - "- hawaiian\n" - "- hebrew\n" - "- hindi\n" - "- hungarian\n" - "- icelandic\n" - "- indonesian\n" - "- italian\n" - "- japanese\n" - "- javanese\n" - "- kannada\n" - "- kazakh\n" - "- khmer\n" - "- korean\n" - "- lao\n" - "- latin\n" - "- latvian\n" - "- lingala\n" - "- lithuanian\n" - "- luxembourgish\n" - "- macedonian\n" - "- malagasy\n" - "- malay\n" - "- malayalam\n" - "- maltese\n" - "- maori\n" - "- marathi\n" - "- mongolian\n" - "- myanmar\n" - "- nepali\n" - "- norwegian\n" - "- nynorsk\n" - "- occitan\n" - "- pashto\n" - "- persian\n" - "- polish\n" - "- portuguese\n" - "- punjabi\n" - "- romanian\n" - "- russian\n" - "- sanskrit\n" - "- serbian\n" - "- shona\n" - "- sindhi\n" - "- sinhala\n" - "- slovak\n" - "- slovenian\n" - "- somali\n" - "- spanish\n" - "- sundanese\n" - "- swahili\n" - "- swedish\n" - "- tagalog\n" - "- tajik\n" - "- tamil\n" - "- tatar\n" - "- telugu\n" - "- thai\n" - "- tibetan\n" - "- turkish\n" - "- turkmen\n" - "- ukrainian\n" - "- urdu\n" - "- uzbek\n" - "- vietnamese\n" - "- welsh\n" - "- wolof\n" - "- yiddish\n" - "- yoruba") -@click.option("--transcription-hint", default="general", help="Hint to the transcription model. You can pass names, topics, custom vocabulary, etc.") -@click.option("--noise-reduction", is_flag=True, help="Apply noise reduction to the audio.") -@click.option("--diarization", is_flag=True, help="Perform speaker diarization.") -@click.option("--diarization-max-speakers", default="3", help="Determines the maximum number of speakers to be detected.") -@click.option("--direct-translate", is_flag=True, help="Activate direct translation to the specified language.") -@click.option("--direct-translate-language", default="english", help="Language to which to translate the transcription, need to activate the direct translation using --direct-translate. Choose from the following options:\n" -"afrikaans\n" -"- albanian\n" -"- amharic\n" -"- arabic\n" -"- armenian\n" -"- asturian\n" -"- azerbaijani\n" -"- bashkir\n" -"- belarusian\n" -"- bengali\n" -"- bosnian\n" -"- breton\n" -"- bulgarian\n" -"- burmese\n" -"- catalan\n" -"- cebuano\n" -"- chinese\n" -"- croatian\n" -"- czech\n" -"- danish\n" -"- dutch\n" -"- english\n" -"- estonian\n" -"- finnish\n" -"- flemish\n" -"- french\n" -"- western frisian\n" -"- fulah\n" -"- gaelic\n" -"- galician\n" -"- ganda\n" -"- georgian\n" -"- german\n" -"- greek\n" -"- gujarati\n" -"- haitian\n" -"- haitian creole\n" -"- hausa\n" -"- hebrew\n" -"- hindi\n" -"- hungarian\n" -"- icelandic\n" -"- igbo\n" -"- iloko\n" -"- indonesian\n" -"- irish\n" -"- italian\n" -"- japanese\n" -"- javanese\n" -"- kannada\n" -"- kazakh\n" -"- khmer\n" -"- korean\n" -"- lao\n" -"- latvian\n" -"- lingala\n" -"- lithuanian\n" -"- luxembourgish\n" -"- macedonian\n" -"- malagasy\n" -"- malay\n" -"- malayalam\n" -"- marathi\n" -"- moldavian\n" -"- moldovan\n" -"- mongolian\n" -"- nepali\n" -"- norwegian\n" -"- occitan\n" -"- oriya\n" -"- panjabi\n" -"- pashto\n" -"- persian\n" -"- polish\n" -"- portuguese\n" -"- pushto\n" -"- romanian\n" -"- russian\n" -"- serbian\n" -"- sindhi\n" -"- sinhala\n" -"- slovak\n" -"- slovenian\n" -"- somali\n" -"- spanish\n" -"- sundanese\n" -"- swahili\n" -"- swati\n" -"- swedish\n" -"- tagalog\n" -"- tamil\n" -"- thai\n" -"- tswana\n" -"- turkish\n" -"- ukrainian\n" -"- urdu\n" -"- uzbek\n" -"- valencian\n" -"- vietnamese\n" -"- welsh\n" -"- wolof\n" -"- xhosa\n" -"- yiddish\n" -"- yoruba" -) -@click.option("--text-emotion", is_flag=True, help="Activate text emotion recognition.") -@click.option("--summarization", is_flag=True, help="Activate summarization.") -@click.option("--output-format", default="table", help="Format in which to return the transcription results. Possible values: table, json, text, srt, vtt, plain.") -@click.option("--gladia-key", help="API key for Gladia. Get it at https://app.gladia.io/account") -@click.option("--save-gladia-key", is_flag=True, help="Save the API key to a configuration file.") -def transcribe( - audio_url: str, - audio_file: str, - language_behaviour: str, - language: str, - transcription_hint: str, - noise_reduction: bool, - diarization: bool, - diarization_max_speakers: int, - direct_translate: bool, - direct_translate_language: str, - text_emotion: bool, - summarization: bool, - output_format: str, - gladia_key: str, - save_gladia_key: bool - ): - """ - Transcribe an audio file or an audio url using the Gladia API. - """ - if gladia_key is None: - gladia_key = get_gladia_key() - - if save_gladia_key is True: - save_gladia_key_to_file(gladia_key) - - if gladia_key is None and not save_gladia_key: - click.echo("Error: Gladia API key not found.") - return - - if save_gladia_key is None and audio_url is None and audio_file is None: - click.echo("Error: --audio-url or --audio-file is required.") - return - - if not save_gladia_key: - if gladia_key != "": - - if direct_translate and direct_translate_language is None: - click.echo("Error: --direct-translate-language is required when using --direct-translate.") - return - - if diarization and diarization_max_speakers is None: - click.echo("Error: --diarization-max-speakers is required when using --diarization.") - return - - if audio_url is None and audio_file is None: - click.echo("Error: --audio-url or --audio-file is required.") - return - else: - click.echo("Transcribing audio file...") - click.echo("This may take a few seconds, please wait...") - headers = { - "accept": "application/json", - "x-gladia-key": gladia_key, - } - - if output_format == "table": - this_output_format = "json" - else: - this_output_format = output_format - - files = { - "language_behaviour": (None, language_behaviour), - "language": (None, language), - "toggle_noise_reduction": (None, "true" if noise_reduction else "false"), - "toggle_diarization": (None, "true" if diarization else "false"), - "diarization_max_speakers": (None, str(diarization_max_speakers)), - "toggle_direct_translate": (None, "true" if direct_translate else "false"), - "target_translation_language": (None, direct_translate_language), - "output_format": (None, this_output_format), - "transcription_hint": (None, transcription_hint), - } - - if audio_url: - files["audio_url"] = (None, audio_url) - else: - file_type = get_file_type(audio_file) - files["audio"] = (audio_file, open(audio_file, "rb"), file_type) - - # Use the session to make the POST request - response = requests.post(GLADIA_API_URL, headers=headers, files=files) - - - if response.status_code != 200: - click.echo(f"Error: {response.status_code} - {response.text}") - return - - click.echo(f"{Color.BOLD}Transcript{Color.END}\n") - - if output_format == "table": - table = PrettyTable() - table.align = "l" - table.padding_width = 1 - table.border = False - field_names = ["time_begin", "time_end", "confidence", "language"] - - if diarization: - field_names.append("speaker") - - if text_emotion: - field_names.append("emotion") - - field_names.append("transcription") - - table.field_names = field_names - - for sentence in response.json()["prediction"]: - confidences = [] - for words in sentence["words"]: - confidences.append(float(words["confidence"])) - - # calculate the average - - confidence = round(sum(confidences) / len(confidences), 2) - - - - row = [ - Color.GREEN + str("{:.3f}".format(sentence['time_begin'])) + Color.END, - Color.GREEN + str("{:.3f}".format(sentence['time_end'])) + Color.END, - Color.BLUE + str("{:.2f}".format(confidence)) + Color.END, - Color.CYAN + sentence['language'] + Color.END, - ] - - if diarization: - row.append(Color.YELLOW + sentence['speaker'] + Color.END) - - if text_emotion: - row.append(Color.YELLOW + sentence['emotion'] + Color.END) - - row.append(sentence['transcription']) - table.add_row(row) - - click.echo(table) - - if summarization: - click.echo("") - click.echo("=======") - click.echo("Summary") - click.echo("=======") - click.echo(response.json()["prediction_raw"]["summarization"]) - - elif output_format == "json": - click.echo(response.json()) - else: - click.echo(response.json()["prediction"]) - else: - click.echo("Error: Gladia API key not found.") - click.echo("Please provide your Gladia API key using --gladia-key or save it using --save-gladia-key.") - return - -if __name__ == "__main__": - transcribe() - diff --git a/python/gladia_cli.spec b/python/gladia_cli.spec deleted file mode 100644 index ceeb2ae..0000000 --- a/python/gladia_cli.spec +++ /dev/null @@ -1,44 +0,0 @@ -# -*- mode: python ; coding: utf-8 -*- - - -block_cipher = None - - -a = Analysis( - ['gladia_cli.py'], - pathex=[], - binaries=[], - datas=[], - hiddenimports=['click', 'pyinstaller', 'requests', 'prettytable', 'chardet'], - hookspath=[], - hooksconfig={}, - runtime_hooks=[], - excludes=[], - win_no_prefer_redirects=False, - win_private_assemblies=False, - cipher=block_cipher, - noarchive=False, -) -pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) - -exe = EXE( - pyz, - a.scripts, - a.binaries, - a.zipfiles, - a.datas, - [], - name='gladia_cli', - debug=False, - bootloader_ignore_signals=False, - strip=False, - upx=True, - upx_exclude=[], - runtime_tmpdir=None, - console=True, - disable_windowed_traceback=False, - argv_emulation=False, - target_arch=None, - codesign_identity=None, - entitlements_file=None, -) diff --git a/python/requirements.txt b/python/requirements.txt deleted file mode 100644 index 6d1d415..0000000 --- a/python/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -click -pyinstaller -requests -prettytable -chardet -charset-normalizer \ No newline at end of file diff --git a/python/setup.py b/python/setup.py deleted file mode 100644 index 342f7e0..0000000 --- a/python/setup.py +++ /dev/null @@ -1,21 +0,0 @@ -from setuptools import setup - -setup( - name='gladia-transcriber', - version='0.1.0', - author='Jean-Louis Queguiner', - author_email='jlqueguiner@gladia.io', - description='Transcribe audio files using the Gladia API', - packages=['gladia_transcriber', 'gladia_transcriber.config'], - package_data={'gladia_transcriber.config': ['config.ini']}, - install_requires=[ - 'click', - 'requests', - 'prettytable', - ], - entry_points={ - 'console_scripts': [ - 'gladia-transcriber = gladia_transcriber.transcribe:transcribe', - ], - }, -)