From 4a4d4cecc33f01a19d6e0e1b91d369ef53556103 Mon Sep 17 00:00:00 2001 From: Min Huang <70873102+min0625@users.noreply.github.com> Date: Wed, 1 Jul 2026 06:59:02 +0000 Subject: [PATCH] feat: canonical BCP-47 casing in verbose logs; always request OpenAI stream usage Verbose diagnostics now render language tags in conventional casing (zh-tw -> zh-TW, zh-hant -> zh-Hant) via a new canonicalLangTag helper, instead of the lowercase-normalized internal form. The OpenAI client now always sets stream_options.include_usage, since every documented compatible endpoint (OpenAI, Ollama, LM Studio) either supports it or ignores unknown fields; this drops the defaultEndpoint special-casing and its dead branch for custom base URLs. Also excludes _test.go files from the goconst linter and drops the resulting test-only string constants in main_test.go for readability. --- .golangci.yaml | 6 ++ .serena/memories/core.md | 2 + cmd/mint/main.go | 36 +++++++++- cmd/mint/main_test.go | 87 +++++++++++++++---------- docs/manual-testing.md | 36 +++++++++- internal/provider/openai/openai.go | 42 ++++++------ internal/provider/openai/openai_test.go | 39 +++++++++-- 7 files changed, 179 insertions(+), 69 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index 8f7c328..48d94da 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -114,3 +114,9 @@ linters: desc: Use github.com/go-resty/resty/v2 instead - pkg: github.com/aws/smithy-go/ptr$ desc: Use github.com/aws/aws-sdk-go-v2/aws instead + + exclusions: + rules: + - path: _test\.go + linters: + - goconst diff --git a/.serena/memories/core.md b/.serena/memories/core.md index 0a31a81..ee21c48 100644 --- a/.serena/memories/core.md +++ b/.serena/memories/core.md @@ -14,6 +14,8 @@ navigation and code-level detail that AGENTS.md does not spell out. ``` cmd/mint/main.go # entry; cobra root via newRootCmd() factory; viper wiring; target-lang resolution internal/llm/llm.go # Completer interface: Complete(ctx, system, user string, w io.Writer) (Usage, error); Usage = token counts +internal/llm/writer.go # TrailingNewlineWriter: wraps io.Writer, guarantees exactly one trailing '\n'; providers stream tokens through it + call Done() +internal/httpx/httpx.go # New() *http.Client — shared tuned transport (proxy-from-env, HTTP/2, conn pool); every provider uses it instead of http.DefaultClient internal/provider/config.go # Config struct; Config.ValidateConfig(); provider-name constants internal/provider/provider.go # NewCompleter(cfg) factory — dispatches on cfg.Provider internal/provider/googlegenai/ # Google Gemini HTTP client (implements Completer) diff --git a/cmd/mint/main.go b/cmd/mint/main.go index e10c0e4..4f9cdd0 100644 --- a/cmd/mint/main.go +++ b/cmd/mint/main.go @@ -131,7 +131,7 @@ func newRootCmd() *cobra.Command { // than treated as already-target text. sourceLang := resolveSourceLang(sourceLangFlag) if sourceLang != "" { - logv("source language: %s", sourceLang) + logv("source language: %s", canonicalLangTag(sourceLang)) } var totalUsage llm.Usage @@ -180,7 +180,7 @@ func newRootCmd() *cobra.Command { return fmt.Errorf("language detection failed: %w", err) } - logv("detected input language: %q", inputLang) + logv("detected input language: %q", canonicalLangTag(inputLang)) // Language-neutral content (numbers, symbols): output unchanged, // no rewrite call needed. @@ -198,7 +198,7 @@ func newRootCmd() *cobra.Command { actualTargetLang = determineActualTargetLang(inputLang, targetLangs) } - logv("target language: %s", actualTargetLang) + logv("target language: %s", canonicalLangTag(actualTargetLang)) // Rewrite the input in the target language, correcting grammar and // spelling along the way. Anchoring on the target tag also pins the @@ -264,6 +264,36 @@ func resolveInput(args []string) (string, error) { // normLang lowercases and trims whitespace from a language tag. func normLang(s string) string { return strings.ToLower(strings.TrimSpace(s)) } +// canonicalLangTag reformats a normalized BCP-47 tag into its conventional +// subtag casing for display: a lowercase primary language subtag, an uppercase +// two-letter region subtag (e.g. "zh-tw" → "zh-TW"), and a title-case +// four-letter script subtag (e.g. "zh-hant" → "zh-Hant"). Tags are lowercased +// internally for case-insensitive matching (see normLang); this is purely a +// presentation helper for verbose diagnostics and does not affect matching. +func canonicalLangTag(tag string) string { + if tag == "" { + return tag + } + + parts := strings.Split(tag, "-") + for i, p := range parts { + switch { + case i == 0: + parts[i] = strings.ToLower(p) + case len(p) == 2: + // Region subtag (e.g. TW, US). + parts[i] = strings.ToUpper(p) + case len(p) == 4: + // Script subtag (e.g. Hant) — title case. + parts[i] = strings.ToUpper(p[:1]) + strings.ToLower(p[1:]) + default: + parts[i] = strings.ToLower(p) + } + } + + return strings.Join(parts, "-") +} + // resolveTargetLangs resolves target languages based on priority: // 1. Flag Lang Code (--target / -t) - single language only // 2. Config Lang Code (MINT_TARGET_LANG) - single or comma-separated languages diff --git a/cmd/mint/main_test.go b/cmd/mint/main_test.go index b49dd28..93ed197 100644 --- a/cmd/mint/main_test.go +++ b/cmd/mint/main_test.go @@ -32,14 +32,6 @@ func (m *mockCompleter) Complete(_ context.Context, _, _ string, w io.Writer) (l return llm.Usage{}, nil } -const ( - langZhTW = "zh-TW" - langZhTw = "zh-tw" - argTarget = "--target" - inputHello = "Hello" - inputhello = "hello" -) - func TestLangMatches(t *testing.T) { tests := []struct { a, b string @@ -47,9 +39,9 @@ func TestLangMatches(t *testing.T) { }{ {"en", "en", true}, {"en", "fr", false}, - {langZhTW, "zh-HK", true}, - {langZhTW, "zh", true}, - {"zh", langZhTW, true}, + {"zh-TW", "zh-HK", true}, + {"zh-TW", "zh", true}, + {"zh", "zh-TW", true}, {"en-US", "en-GB", true}, {"en", "en-US", true}, {"", "", true}, @@ -72,11 +64,11 @@ func TestDetermineActualTargetLang(t *testing.T) { {"empty target list defaults to en", "fr", nil, "en"}, {"single target returns it directly", "fr", []string{"en"}, "en"}, {"single target same as input (correction mode)", "en", []string{"en"}, "en"}, - {"input matches first returns second", "en", []string{"en", langZhTW}, langZhTW}, - {"input matches middle returns next", langZhTW, []string{"en", langZhTW, "ja"}, "ja"}, - {"input matches last wraps to first", "ja", []string{"en", langZhTW, "ja"}, "en"}, - {"input not in list returns first", "fr", []string{"en", langZhTW}, "en"}, - {"match by primary subtag zh-HK → zh-TW slot", "zh-HK", []string{"en", langZhTW, "ja"}, "ja"}, + {"input matches first returns second", "en", []string{"en", "zh-TW"}, "zh-TW"}, + {"input matches middle returns next", "zh-TW", []string{"en", "zh-TW", "ja"}, "ja"}, + {"input matches last wraps to first", "ja", []string{"en", "zh-TW", "ja"}, "en"}, + {"input not in list returns first", "fr", []string{"en", "zh-TW"}, "en"}, + {"match by primary subtag zh-HK → zh-TW slot", "zh-HK", []string{"en", "zh-TW", "ja"}, "ja"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -97,13 +89,13 @@ func TestResolveTargetLangs(t *testing.T) { }{ {"flag takes priority over config", "ja", "en,zh-TW", []string{"ja"}}, {"flag with comma uses first part only", "en,zh-TW", "", []string{"en"}}, - {"flag normalized to lowercase", "ZH-TW", "", []string{langZhTw}}, + {"flag normalized to lowercase", "ZH-TW", "", []string{"zh-tw"}}, {"flag trimmed of whitespace", " fr ", "", []string{"fr"}}, {"config single lang", "", "fr", []string{"fr"}}, - {"config multiple langs", "", "en,zh-TW,ja", []string{"en", langZhTw, "ja"}}, - {"config langs trimmed and lowercased", "", " EN , ZH-TW ", []string{"en", langZhTw}}, + {"config multiple langs", "", "en,zh-TW,ja", []string{"en", "zh-tw", "ja"}}, + {"config langs trimmed and lowercased", "", " EN , ZH-TW ", []string{"en", "zh-tw"}}, {"config trailing comma ignored", "", "en,", []string{"en"}}, - {"config double comma ignored", "", "en,,zh-TW", []string{"en", langZhTw}}, + {"config double comma ignored", "", "en,,zh-TW", []string{"en", "zh-tw"}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -142,7 +134,7 @@ func TestNormalizeDetectedLang(t *testing.T) { want string }{ {"plain tag", "en", "en"}, - {"trims whitespace", " zh-TW \n", langZhTw}, + {"trims whitespace", " zh-TW \n", "zh-tw"}, {"lowercases", "JA", "ja"}, {"strips quotes", `"fr"`, "fr"}, {"strips backticks", "`de`", "de"}, @@ -160,15 +152,40 @@ func TestNormalizeDetectedLang(t *testing.T) { } } +func TestCanonicalLangTag(t *testing.T) { + tests := []struct { + name string + tag string + want string + }{ + {"empty passes through", "", ""}, + {"language only", "en", "en"}, + {"language only ja", "ja", "ja"}, + {"region uppercased", "zh-tw", "zh-TW"}, + {"region uppercased zh-hk", "zh-hk", "zh-HK"}, + {"region uppercased zh-cn", "zh-cn", "zh-CN"}, + {"script title-cased", "zh-hant", "zh-Hant"}, + {"already canonical is idempotent", "zh-TW", "zh-TW"}, + {"mixed case normalized", "ZH-Tw", "zh-TW"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := canonicalLangTag(tt.tag); got != tt.want { + t.Errorf("canonicalLangTag(%q) = %q, want %q", tt.tag, got, tt.want) + } + }) + } +} + func TestResolveInput(t *testing.T) { t.Run("positional arg used directly", func(t *testing.T) { - got, err := resolveInput([]string{inputhello}) + got, err := resolveInput([]string{"hello"}) if err != nil { t.Fatalf("unexpected error: %v", err) } - if got != inputhello { - t.Errorf("got %q, want %q", got, inputhello) + if got != "hello" { + t.Errorf("got %q, want %q", got, "hello") } }) @@ -362,7 +379,7 @@ func TestNewRootCmdLangNeutral(t *testing.T) { flush := captureStdout(t) cmd := newRootCmd() - cmd.SetArgs([]string{argTarget, "en", "12345"}) + cmd.SetArgs([]string{"--target", "en", "12345"}) if err := cmd.ExecuteContext(context.Background()); err != nil { t.Fatalf("unexpected error: %v", err) @@ -391,7 +408,7 @@ func TestNewRootCmdTranslation(t *testing.T) { flush := captureStdout(t) cmd := newRootCmd() - cmd.SetArgs([]string{argTarget, "fr", "--verbose", inputHello}) + cmd.SetArgs([]string{"--target", "fr", "--verbose", "Hello"}) if err := cmd.ExecuteContext(context.Background()); err != nil { _ = flush() @@ -418,7 +435,7 @@ func TestNewRootCmdTranslationError(t *testing.T) { t.Setenv("MINT_MODEL_NAME", "test-model") cmd := newRootCmd() - cmd.SetArgs([]string{argTarget, "fr", inputHello}) + cmd.SetArgs([]string{"--target", "fr", "Hello"}) err := cmd.ExecuteContext(context.Background()) if err == nil { @@ -535,7 +552,7 @@ func TestNewRootCmdDetectLangError(t *testing.T) { t.Setenv("MINT_TARGET_LANG", "en,fr") cmd := newRootCmd() - cmd.SetArgs([]string{inputHello}) + cmd.SetArgs([]string{"Hello"}) err := cmd.ExecuteContext(context.Background()) if err == nil { @@ -552,7 +569,7 @@ func TestNewRootCmdProviderError(t *testing.T) { t.Setenv("MINT_API_KEY", "test") cmd := newRootCmd() - cmd.SetArgs([]string{inputHello}) + cmd.SetArgs([]string{"Hello"}) if err := cmd.ExecuteContext(context.Background()); err == nil { t.Error("expected error for invalid provider, got nil") @@ -577,7 +594,7 @@ func TestNewRootCmdNoInput(t *testing.T) { defer func() { os.Stdin = old; _ = r.Close() }() cmd := newRootCmd() - cmd.SetArgs([]string{argTarget, "en"}) + cmd.SetArgs([]string{"--target", "en"}) if err := cmd.ExecuteContext(context.Background()); err == nil { t.Error("expected error for no input, got nil") @@ -590,7 +607,7 @@ func TestRunSuccess(t *testing.T) { old := os.Args - os.Args = []string{"mint", argTarget, "en", "12345"} + os.Args = []string{"mint", "--target", "en", "12345"} defer func() { os.Args = old }() flush := captureStdout(t) @@ -606,7 +623,7 @@ func TestRunError(t *testing.T) { old := os.Args - os.Args = []string{"mint", argTarget, "en", inputhello} + os.Args = []string{"mint", "--target", "en", "hello"} defer func() { os.Args = old }() // Suppress stderr to keep test output clean. @@ -642,7 +659,7 @@ func TestResolveSourceLang(t *testing.T) { {"trimmed of whitespace", " ja ", "ja"}, {"comma uses first part only", "fr,en", "fr"}, {"comma with whitespace trimmed", " fr , en ", "fr"}, - {"bcp-47 variant preserved", "zh-TW", langZhTw}, + {"bcp-47 variant preserved", "zh-TW", "zh-tw"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -712,7 +729,7 @@ func TestBuildRewritePrompt(t *testing.T) { // Distinct tags sharing a primary subtag are a deliberate script // conversion (zh-CN → zh-TW) and must keep the source anchor. - scriptSys, _, _ := buildRewritePrompt("zh-cn", langZhTw, "汉字") + scriptSys, _, _ := buildRewritePrompt("zh-cn", "zh-tw", "汉字") if !strings.Contains(scriptSys, "written in zh-cn") { t.Errorf("expected source anchor for script conversion, got: %q", scriptSys) } @@ -952,7 +969,7 @@ func TestNewRootCmdSourceLang(t *testing.T) { flush := captureStdout(t) cmd := newRootCmd() - cmd.SetArgs([]string{"--source", "fr", argTarget, "en", "chat"}) + cmd.SetArgs([]string{"--source", "fr", "--target", "en", "chat"}) if err := cmd.ExecuteContext(context.Background()); err != nil { _ = flush() diff --git a/docs/manual-testing.md b/docs/manual-testing.md index 4c89f9d..491d92e 100644 --- a/docs/manual-testing.md +++ b/docs/manual-testing.md @@ -111,12 +111,12 @@ mint "這係一個蘋果" # 這是一個蘋果 mint "这是一个苹果" # 這是一個蘋果 ``` -`-v` shows `single target — skipping language detection` and `target language: zh-tw` — the +`-v` shows `single target — skipping language detection` and `target language: zh-TW` — the rewrite prompt handles standardization without needing to detect the input language first. > **Rotation note:** in a multi-language list (e.g. `zh-TW,en`), zh-HK input occupies > the zh-TW slot and rotates to `en`, not to `zh-TW`. In multi-target mode `-v` confirms: -> `detected input language: "zh-hk"` → `target language: en`. See section 9 below. +> `detected input language: "zh-HK"` → `target language: en`. See section 9 below. ## 7. Language-neutral pass-through @@ -160,7 +160,7 @@ mint "這是一顆蘋果。" # This is an apple. (zh-TW matched at index > **zh-HK edge case:** zh-HK input matches the zh-TW slot (same primary subtag `zh`) and > therefore rotates to `en`. Use `-v` to confirm: -> `detected input language: "zh-hk"` → `target language: en`. +> `detected input language: "zh-HK"` → `target language: en`. ## 10. Language rotation — three languages (wrap-around) @@ -230,6 +230,36 @@ MINT_PROVIDER=invalid mint -t zh-TW "apple" # Missing API key (no MINT_BASE_URL set) unset MINT_API_KEY mint -t zh-TW "apple" # Error: MINT_API_KEY is required for provider: + +# MINT_BASE_URL set: API key is optional (proxy handles auth) +export MINT_PROVIDER=openai +export MINT_BASE_URL=http://localhost:11434 +export MINT_MODEL_NAME=translategemma:4b +unset MINT_API_KEY +mint -t zh-TW "hello" # 你好 (no API key required) ``` Ctrl+C / SIGTERM cancels any in-flight request and exits with code 130. +This applies when the signal arrives while an HTTP request is in progress; +a signal sent before the request starts is handled by the OS default (exit 143 for SIGTERM). + +## 14. `MINT_VERBOSE` environment variable + +`MINT_VERBOSE=true` is equivalent to the `-v` / `--verbose` flag. + +```sh +MINT_VERBOSE=true mint -t zh-TW "apple" # 蘋果 (diagnostics on stderr) +``` + +Verbose stderr output (single-target mode): +``` +[mint] provider: google-genai +[mint] model: gemini-3.1-flash-lite +[mint] single target — skipping language detection +[mint] target language: zh-TW +[mint] tokens: 125 in / 8 out +``` + +The `model:` line appears only when `MINT_MODEL_NAME` is set (likewise `base_url:` for +`MINT_BASE_URL`); with provider defaults both are omitted, as in the abbreviated example at +the top of this file. diff --git a/internal/provider/openai/openai.go b/internal/provider/openai/openai.go index dac5aaa..a250f54 100644 --- a/internal/provider/openai/openai.go +++ b/internal/provider/openai/openai.go @@ -29,11 +29,10 @@ const ( // Client is an OpenAI API client. type Client struct { - apiKey string - baseURL string - modelName string - defaultEndpoint bool - httpClient *http.Client + apiKey string + baseURL string + modelName string + httpClient *http.Client } // New creates a new OpenAI client. @@ -42,20 +41,15 @@ func New(apiKey, baseURL, modelName string) *Client { modelName = defaultModelName } - // A custom base URL targets a local or proxy server (Ollama, LM Studio, - // etc.); track that so we only send OpenAI-only request fields to the - // official endpoint. - defaultEndpoint := baseURL == "" if baseURL == "" { baseURL = defaultBaseURL } return &Client{ - apiKey: apiKey, - baseURL: baseURL, - modelName: modelName, - defaultEndpoint: defaultEndpoint, - httpClient: httpx.New(), + apiKey: apiKey, + baseURL: baseURL, + modelName: modelName, + httpClient: httpx.New(), } } @@ -96,6 +90,13 @@ type streamResponse struct { // Complete calls the OpenAI API with streaming and writes tokens to w as they arrive. // system is sent as a system-role message followed by user as a user-role message. +// +// stream_options.include_usage is always requested so token counts are +// available in verbose mode. It is an OpenAI extension, but every endpoint we +// document (OpenAI, Ollama, LM Studio) either supports it or ignores unknown +// fields. A strict OpenAI-compatible server that rejects it would surface a +// plain API error; graceful degradation can be added if such a case is ever +// reported. func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer) (llm.Usage, error) { body := requestBody{ Model: c.modelName, @@ -103,15 +104,9 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer) {Role: "system", Content: system}, {Role: "user", Content: user}, }, - Temperature: temperature, - Stream: true, - } - - // stream_options.include_usage is an OpenAI extension. Only request it on - // the official endpoint; local/proxy servers reached via a custom base URL - // may reject unknown fields, so omit it there. - if c.defaultEndpoint { - body.StreamOptions = &streamOptions{IncludeUsage: true} + Temperature: temperature, + Stream: true, + StreamOptions: &streamOptions{IncludeUsage: true}, } jsonBody, err := json.Marshal(body) @@ -131,6 +126,7 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer) if err != nil { return llm.Usage{}, fmt.Errorf("call API: %w", err) } + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { diff --git a/internal/provider/openai/openai_test.go b/internal/provider/openai/openai_test.go index b7d2e40..4aa69b9 100644 --- a/internal/provider/openai/openai_test.go +++ b/internal/provider/openai/openai_test.go @@ -138,9 +138,9 @@ func TestCompleteRoleSeparation(t *testing.T) { _, _ = openai.New("k", srv.URL, "").Complete(t.Context(), "my system instruction", "my user text", &sb) } -// A custom base URL targets a local/proxy server (Ollama, LM Studio) that may -// reject unknown fields, so the OpenAI-only stream_options must be omitted. -func TestCompleteOmitsStreamOptionsForCustomBaseURL(t *testing.T) { +// stream_options.include_usage is always requested (including for custom base +// URLs) so that token counts are available in verbose mode. +func TestCompleteRequestsStreamOptions(t *testing.T) { var gotBody []byte srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -161,8 +161,37 @@ func TestCompleteOmitsStreamOptionsForCustomBaseURL(t *testing.T) { t.Fatalf("unmarshal request body: %v", err) } - if _, ok := req["stream_options"]; ok { - t.Errorf("stream_options must be omitted for a custom base URL, body: %s", gotBody) + if _, ok := req["stream_options"]; !ok { + t.Errorf("stream_options must be present by default, body: %s", gotBody) + } +} + +// A 400 response is surfaced verbatim as an error, in a single request — the +// client sends the request as-is and does not attempt any recovery. +func TestCompleteSurfaces400(t *testing.T) { + var requests int + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + requests++ + + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":{"message":"model not found"}}`)) + })) + defer srv.Close() + + var sb strings.Builder + + _, err := openai.New("k", srv.URL, "").Complete(t.Context(), "sys", "usr", &sb) + if err == nil { + t.Fatal("expected error, got nil") + } + + if !strings.Contains(err.Error(), "model not found") { + t.Errorf("error %q should surface the 400 body", err.Error()) + } + + if requests != 1 { + t.Errorf("expected exactly one request, got %d", requests) } }