Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export GLADIA_API_KEY=your_key
./gladia transcribe podcast.mp3 --language en,fr,de
./gladia transcribe mixed.mp3 --code-switching --language en,fr
./gladia transcribe call.wav --diarize -o srt
./gladia transcribe podcast.mp3 --model solaria-3
./gladia transcribe podcast.mp3 --model solaria-3 --language en
```

## Commands
Expand All @@ -64,10 +64,10 @@ export GLADIA_API_KEY=your_key
| Flag | Default | Description |
|------|---------|-------------|
| `-o`, `--output` | `text` | Output: `text`, `json`, `json-full`, `srt`, `vtt` |
| `--language` | — | Expected language(s), comma-separated (`en` or `en,fr,de`) |
| `--code-switching`, `--code-switch` | off | Detect language per utterance |
| `--language` | — | Expected language(s), comma-separated (`en` or `en,fr,de`); narrows detection, does not enable code switching |
| `--cs`, `--code-switching` | off | Re-detect language on each utterance (mixed-language audio; solaria-1 only) |

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's should be -cs instead, wdyt?

| `--diarize` | off | **Optional.** Identify speakers in the transcript |
| `--model` | — | STT model: `solaria-1` or `solaria-3` (default: API default) |
| `--model` | — | STT model: `solaria-1` or `solaria-3`. Solaria-3 accepts at most one `--language` (`en`, `fr`, `de`, `es`, or `it`) and does not support code switching. |
| `-v`, `--verbose` | off | Show progress while polling |

**Global flag** (any command): `--gladia-key` — API key if not in env or `~/.gladia`
Expand All @@ -78,10 +78,10 @@ export GLADIA_API_KEY=your_key
|------|-------------|
| Auto-detect | `transcribe <source>` |
| Constrain detection | `--language en,fr,de` (no code switching) |
| Code switching | `--code-switching` (+ optional `--language` hints) |
| Code switching | `--cs` or `--code-switching` (+ optional `--language` hints) |

- **`--language`** — tells Gladia which language(s) to expect. Several codes (`en,fr,de`) narrow detection; they do **not** turn on code switching.
- **`--code-switching`** — separate option: re-detect language on each utterance. Combine with `--language` when you know which languages may appear.
- **`--language`** — limits which language(s) Gladia considers (`en,fr,de` is a hint list, not per-utterance switching).
- **`--cs`** / **`--code-switching`** — turns on per-utterance language detection. Add `--language` to restrict which languages may appear. Not available with `solaria-3`.

```bash
./gladia languages # list valid codes
Expand Down
170 changes: 161 additions & 9 deletions cmd/transcribe.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ Examples:
gladia transcribe podcast.mp3 --language en
gladia transcribe interview.mp3 --code-switching
gladia transcribe interview.mp3 --language en,fr,de
gladia transcribe call.wav --code-switch --language en -o json
gladia transcribe call.wav --cs --language en -o json
gladia transcribe call.wav --diarize -o srt
gladia transcribe podcast.mp3 --model solaria-3
gladia transcribe podcast.mp3 --model solaria-3 --language en
gladia transcribe https://example.com/audio.mp3 -o json`,
Args: cobra.ExactArgs(1),
Args: validateTranscribeArgs,
RunE: func(cmd *cobra.Command, args []string) error {
if err := validateOutputFormat(outputFormat); err != nil {
return err
Expand All @@ -45,12 +45,19 @@ Examples:
return err
}

if err := validateLanguageFlag(languageFlag); err != nil {
return err
}

langs, err := types.ParseLanguages(languageFlag)
if err != nil {
return err
}

codeSwitchSet := cmd.Flags().Changed("code-switching") || cmd.Flags().Changed("code-switch")
codeSwitchSet := cmd.Flags().Changed("code-switching") || cmd.Flags().Changed("cs")
if err := validateModelConfig(modelFlag, langs, codeSwitchSet, codeSwitching); err != nil {
return err
}
langConfig, err := buildLanguageConfig(langs, codeSwitching, codeSwitchSet)
if err != nil {
return err
Expand All @@ -69,7 +76,7 @@ Examples:
}

transcriptionReq := gladia.TranscriptionRequest{
Model: modelFlag,
Model: normalizeModel(modelFlag),
LanguageConfig: langConfig,
Diarization: diarization,
}
Expand All @@ -91,12 +98,37 @@ Examples:
}

cmd.Flags().StringVarP(&outputFormat, "output", "o", "text", "Output format: text, json, json-full, srt, vtt")
cmd.Flags().StringVar(&languageFlag, "language", "", "Optional ISO 639-1 code(s), comma-separated (e.g. en or en,fr,de)")
cmd.Flags().BoolVar(&codeSwitching, "code-switching", false, "Enable code switching (detect language per utterance; independent of --language)")
cmd.Flags().BoolVar(&codeSwitching, "code-switch", false, "Alias for --code-switching")
cmd.Flags().StringVar(&languageFlag, "language", "", "Expected language(s), comma-separated (e.g. en or en,fr,de); does not enable code switching")
const codeSwitchingUsage = "Re-detect language on each utterance (for mixed-language audio; solaria-1 only)"
cmd.Flags().BoolVar(&codeSwitching, "cs", false, codeSwitchingUsage)
cmd.Flags().BoolVar(&codeSwitching, "code-switching", false, codeSwitchingUsage)
cmd.Flags().Lookup("cs").Hidden = true
cmd.Flags().Lookup("code-switching").Hidden = true
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show progress while transcribing")
cmd.Flags().BoolVar(&diarization, "diarize", false, "Enable speaker diarization")
cmd.Flags().StringVar(&modelFlag, "model", "", "STT model: solaria-1 or solaria-3 (default: API default)")
cmd.Flags().StringVar(&modelFlag, "model", "", "STT model: solaria-1 or solaria-3 (solaria-3 accepts at most one --language: en, fr, de, es, or it)")

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not at most, it accepts only one


cmd.SetUsageTemplate(`Usage:{{if .Runnable}}
{{.UseLine}}{{end}}{{if .HasAvailableSubCommands}}
{{.CommandPath}} [command]{{end}}{{if gt (len .Aliases) 0}}

Aliases:
{{.NameAndAliases}}{{end}}{{if .HasExample}}

Examples:
{{.Example}}{{end}}{{if .HasAvailableLocalFlags}}

Flags:
--cs, --code-switching — ` + codeSwitchingUsage + `
{{.LocalFlags.FlagUsages | trimTrailingWhitespaces}}{{end}}{{if .HasAvailableInheritedFlags}}

Global Flags:
{{.InheritedFlags.FlagUsages | trimTrailingWhitespaces}}{{end}}{{if .HasHelpSubCommands}}

Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}}
{{rpad .CommandPath .CommandPathPadding}} {{.Short}}{{end}}{{end}}{{end}}{{if .HasAvailableSubCommands}}

Use "{{.CommandPath}} [command] --help" for more information about a command.{{end}}`)

return cmd
}
Expand Down Expand Up @@ -130,6 +162,7 @@ func validateOutputFormat(format string) error {
}

func validateModel(model string) error {
model = normalizeModel(model)
if model == "" {
return nil
}
Expand All @@ -141,6 +174,125 @@ func validateModel(model string) error {
}
}

var solaria3Languages = map[types.Language]bool{
types.LanguageEn: true,
types.LanguageFr: true,
types.LanguageDe: true,
types.LanguageEs: true,
types.LanguageIt: true,
}

func validateModelConfig(model string, langs []types.Language, codeSwitchSet, codeSwitching bool) error {
model = normalizeModel(model)
if model != "solaria-3" {
return nil
}
if codeSwitchSet && codeSwitching {
return fmt.Errorf("solaria-3 does not support code switching (use solaria-1 instead)")
}
switch len(langs) {
case 0:
return nil
case 1:
if !solaria3Languages[langs[0]] {
return fmt.Errorf("solaria-3 does not support language %q (use en, fr, de, es, or it)", langs[0])
}
return nil
default:
codes := make([]string, len(langs))
for i, lang := range langs {
codes[i] = string(lang)
}
return fmt.Errorf("solaria-3 accepts only one language, got %d (%s); use solaria-1 for multi-language", len(langs), strings.Join(codes, ", "))
}
}

func normalizeModel(model string) string {
model = strings.TrimSpace(strings.ToLower(model))
return strings.ReplaceAll(model, " ", "-")
}

func validateTranscribeArgs(cmd *cobra.Command, args []string) error {
if len(args) == 1 {
return nil
}

langFlag, _ := cmd.Flags().GetString("language")
langFlag = strings.TrimSpace(langFlag)

// gladia transcribe --language en fr meeting.wav
if len(args) == 2 && isKnownLanguageCode(args[0]) && !isKnownLanguageCode(args[1]) && langFlag != "" {
return spaceSeparatedLanguageError(joinLanguageCodes(langFlag, args[0]))
}

// gladia transcribe meeting.wav --language en fr
var extraLangs []string
for _, arg := range args[1:] {
if isKnownLanguageCode(arg) {
extraLangs = append(extraLangs, arg)
}
}
if langFlag != "" && len(extraLangs) > 0 {
return spaceSeparatedLanguageError(joinLanguageCodes(append([]string{langFlag}, extraLangs...)...))
}

return fmt.Errorf("accepts 1 arg(s), received %d", len(args))
}

func validateLanguageFlag(s string) error {
s = strings.TrimSpace(s)
if s == "" || strings.Contains(s, ",") {
return nil
}
if strings.Contains(s, " ") {
parts := strings.Fields(s)
if len(parts) > 1 && allKnownLanguageCodes(parts) {
return spaceSeparatedLanguageError(parts)
}
}
return nil
}

func spaceSeparatedLanguageError(codes []string) error {
normalized := make([]string, 0, len(codes))
for _, code := range codes {
code = strings.ToLower(strings.TrimSpace(code))
if code != "" {
normalized = append(normalized, code)
}
}
return fmt.Errorf("--language expects comma-separated codes (e.g. --language %s), not spaces", strings.Join(normalized, ","))
}

func joinLanguageCodes(codes ...string) []string {
out := make([]string, 0, len(codes))
for _, code := range codes {
code = strings.ToLower(strings.TrimSpace(code))
if code != "" {
out = append(out, code)
}
}
return out
}

func allKnownLanguageCodes(codes []string) bool {
for _, code := range codes {
if !isKnownLanguageCode(code) {
return false
}
}
return len(codes) > 0
}

func isKnownLanguageCode(code string) bool {
code = strings.TrimSpace(code)
if code == "" {
return false
}
_, err := types.ParseLanguage(code)
return err == nil
}

func isHTTPURL(s string) bool {
lower := strings.ToLower(s)
return strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://")
Expand Down
Loading
Loading