From 5fc6f2fb435348e2ecd132f61f7dec68eb626445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Monnom?= Date: Tue, 19 May 2026 15:11:51 -0700 Subject: [PATCH 1/2] console: disable agent audio I/O in text mode --- cmd/lk/console_tui.go | 32 ++++++++++++++++++++++++++++++++ pkg/console/pipeline.go | 21 +++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/cmd/lk/console_tui.go b/cmd/lk/console_tui.go index 67009912..668d8530 100644 --- a/cmd/lk/console_tui.go +++ b/cmd/lk/console_tui.go @@ -151,6 +151,7 @@ func (m consoleModel) Init() tea.Cmd { } if m.textMode { cmds = append(cmds, textinput.Blink) + m.applyTextMode(true) } return tea.Batch(cmds...) } @@ -216,6 +217,7 @@ func (m consoleModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.showShortcuts = false m.partialTranscript = "" m.textInput.Focus() + m.applyTextMode(true) return m, textinput.Blink case "?": m.showShortcuts = !m.showShortcuts @@ -259,6 +261,7 @@ func (m consoleModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.audioError = "" m.inputDev = "Default Input" m.outputDev = "Default Output" + m.applyTextMode(false) } return m, nil @@ -289,6 +292,7 @@ func (m *consoleModel) switchToAudio() tea.Cmd { m.showShortcuts = false m.textInput.Blur() m.audioError = "" + m.applyTextMode(false) return nil } // Lazy init audio in a goroutine @@ -297,6 +301,34 @@ func (m *consoleModel) switchToAudio() tea.Cmd { } } +// applyTextMode pauses the local audio pipeline and asks the agent to +// disable/enable audio I/O so STT/TTS aren't running in text mode. +func (m *consoleModel) applyTextMode(text bool) { + if m.pipeline.HasAudio() { + m.pipeline.SetPaused(text) + } + + m.reqCounter++ + reqID := fmt.Sprintf("console-io-%d", m.reqCounter) + audioOn := !text + transcriptionOn := !text + req := &agent.SessionRequest{ + RequestId: reqID, + Request: &agent.SessionRequest_UpdateIo{ + UpdateIo: &agent.SessionRequest_UpdateIO{ + Input: &agent.SessionRequest_UpdateIO_Input{ + AudioEnabled: &audioOn, + }, + Output: &agent.SessionRequest_UpdateIO_Output{ + AudioEnabled: &audioOn, + TranscriptionEnabled: &transcriptionOn, + }, + }, + }, + } + go m.pipeline.SendRequest(req) +} + func (m *consoleModel) beginShutdown() tea.Cmd { m.shuttingDown = true m.textMode = false diff --git a/pkg/console/pipeline.go b/pkg/console/pipeline.go index 8b6b399f..89a7c6db 100644 --- a/pkg/console/pipeline.go +++ b/pkg/console/pipeline.go @@ -72,6 +72,7 @@ type AudioPipeline struct { mu sync.Mutex fftBands [NumFFTBands]float64 muted bool + paused bool // true when audio I/O is paused (e.g. text mode); mic frames are not sent to the agent level float64 // capture level in dB playing bool // true when outputting real audio (not silence) @@ -267,6 +268,19 @@ func (p *AudioPipeline) Muted() bool { return p.muted } +// SetPaused stops mic frames from being sent to the agent and drops any +// queued playback. The hardware streams keep running so the pipeline can +// resume instantly. Used to keep audio off the wire in text mode. +func (p *AudioPipeline) SetPaused(paused bool) { + p.mu.Lock() + p.paused = paused + p.mu.Unlock() + + if paused && p.playbackRing != nil { + p.playbackRing.Reset() + } +} + func (p *AudioPipeline) Level() float64 { p.mu.Lock() defer p.mu.Unlock() @@ -389,6 +403,13 @@ func (p *AudioPipeline) speakerLoop(ctx context.Context) { } } + p.mu.Lock() + paused := p.paused + p.mu.Unlock() + if paused { + continue + } + p.computeMetrics(captureBuf) _ = p.writeMessage(&agent.AgentSessionMessage{ From f3f1ea1e06dbddc841684e2d185b1a6e9998f5d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Monnom?= Date: Tue, 19 May 2026 16:21:00 -0700 Subject: [PATCH 2/2] chore(deps): bump livekit/protocol for UpdateIO --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 28027440..e60119ba 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/google/go-containerregistry v0.20.6 github.com/google/go-querystring v1.2.0 github.com/joho/godotenv v1.5.1 - github.com/livekit/protocol v1.45.9-0.20260508203311-a249893d6a5d + github.com/livekit/protocol v1.45.9-0.20260519220930-d0b708548748 github.com/livekit/server-sdk-go/v2 v2.16.2 github.com/mattn/go-isatty v0.0.21 github.com/moby/patternmatcher v0.6.1 diff --git a/go.sum b/go.sum index 18dd9cfa..dcbc44d2 100644 --- a/go.sum +++ b/go.sum @@ -366,8 +366,8 @@ github.com/livekit/mageutil v0.0.0-20250511045019-0f1ff63f7731 h1:9x+U2HGLrSw5AT github.com/livekit/mageutil v0.0.0-20250511045019-0f1ff63f7731/go.mod h1:Rs3MhFwutWhGwmY1VQsygw28z5bWcnEYmS1OG9OxjOQ= github.com/livekit/mediatransportutil v0.0.0-20260309115634-0e2e24b36ee8 h1:coWig9fKxdb/nwOaIoGUUAogso12GblAJh/9SA9hcxk= github.com/livekit/mediatransportutil v0.0.0-20260309115634-0e2e24b36ee8/go.mod h1:RCd46PT+6sEztld6XpkCrG1xskb0u3SqxIjy4G897Ss= -github.com/livekit/protocol v1.45.9-0.20260508203311-a249893d6a5d h1:mE0/AjgGnvsF3q0ipiNGe1HZ3CUKUG7Y+zqey/2JrBE= -github.com/livekit/protocol v1.45.9-0.20260508203311-a249893d6a5d/go.mod h1:KEPIJ/ZdMFQ9tmmfv/uT9TjQEuEcZupCZBabuRGEC1k= +github.com/livekit/protocol v1.45.9-0.20260519220930-d0b708548748 h1:QXPwHAEWREHebK+NW+zdgz4x/Dw+I4U5WxVeEbZolwo= +github.com/livekit/protocol v1.45.9-0.20260519220930-d0b708548748/go.mod h1:KEPIJ/ZdMFQ9tmmfv/uT9TjQEuEcZupCZBabuRGEC1k= github.com/livekit/psrpc v0.7.1 h1:ms37az0QTD3UXIWuUC5D/SkmKOlRMVRsI261eBWu/Vw= github.com/livekit/psrpc v0.7.1/go.mod h1:bZ4iHFQptTkbPnB0LasvRNu/OBYXEu1NA6O5BMFo9kk= github.com/livekit/server-sdk-go/v2 v2.16.2 h1:eQe24cka3X+5zUivezyL72nwtAJTWFXgibeiyJ/Jm+Y=