Sendspin · kahrendt · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/docs/integration-guide.md b/docs/integration-guide.md
@@ -69,6 +69,7 @@ player_config.audio_formats = {
 player_config.audio_buffer_capacity = 1000000;   // Ring buffer size in bytes (default: 1000000)
 player_config.fixed_delay_us = 0;                // Fixed delay offset in microseconds
 player_config.initial_static_delay_ms = 0;       // Initial user-adjustable delay
+player_config.extra_startup_silence_ms = 50;     // Extra startup silence for decode headroom (default: 50)
 
 auto& player = client.add_player(std::move(player_config));
 ```
@@ -720,6 +721,7 @@ Configuration passed to `client.add_player()`.
 | `audio_buffer_capacity` | `size_t` | `1000000` | Internal ring buffer size in bytes. Larger buffers absorb more jitter at the cost of memory. |
 | `fixed_delay_us` | `int32_t` | `0` | Fixed platform-level delay offset in microseconds (e.g., a known I2S pipeline delay). Applied on top of the user-adjustable static delay. |
 | `initial_static_delay_ms` | `uint16_t` | `0` | Initial value for the user-adjustable static delay in milliseconds. Overridden by the persisted value if a `SendspinPersistenceProvider` is set. |
+| `extra_startup_silence_ms` | `uint16_t` | `50` | Extra silence inserted at stream start, after the first playback notification and before the first decoded chunk reaches the sink. Added on top of the initial-sync priming silence to give the decode pipeline more slack to stay ahead of the sink, preventing the initial-playback stutter caused by the decoder briefly falling behind. Larger values trade a longer startup delay for more underflow protection; set to `0` to disable. |
 | `psram_stack` | `bool` | `false` | Allocate sync/decode task stack in PSRAM (ESP-IDF only) |
 | `priority` | `unsigned` | `6` | FreeRTOS priority for the sync/decode task (ESP-IDF only). The default value, `6`, is one above the default `httpd_priority` (`5`). If you customize priorities, keep this above `httpd_priority` so the HTTP server task cannot starve the decoder during the initial burst of encoded audio that fills the buffer at stream start. |
 | `decode_buffer_location` | `MemoryLocation` | `PREFER_EXTERNAL` | Memory placement preference for the decode transfer buffer. `PREFER_EXTERNAL` tries SPIRAM first and falls back to internal RAM; `PREFER_INTERNAL` does the reverse. ESP-IDF only; ignored on host. |

diff --git a/docs/internals.md b/docs/internals.md
@@ -319,9 +319,9 @@ INITIAL_SYNC ──→ LOAD_CHUNK ──→ SYNCHRONIZE_AUDIO ──→ TRANSFER
      └──→ LOAD_CHUNK (once first playback progress callback confirms frames were consumed)
 ```
 
-**INITIAL_SYNC**: Fills the audio pipeline with silence to prime DMA buffers. Sleeps briefly after sending to let the audio stack start consuming.
+**INITIAL_SYNC**: Fills the audio pipeline with silence to prime DMA buffers. Sleeps briefly after sending to let the audio stack start consuming. Once the first playback-progress callback confirms frames were consumed, it queues `extra_startup_silence_ms` of additional silence (see `PlayerRoleConfig`) and drains it before advancing to LOAD_CHUNK. This extra lead gives the decode pipeline slack to stay ahead of the sink at stream start, preventing the initial-playback stutter caused by the decoder briefly falling behind.
 
-**LOAD_CHUNK**: Reads the next encoded chunk from the ring buffer. Waits for time sync if not yet available. Decodes audio via FLAC/Opus/PCM decoder.
+**LOAD_CHUNK**: Reads the next encoded chunk from the ring buffer. Waits for time sync if not yet available. Decodes audio via FLAC/Opus/PCM decoder. On a ring-buffer underflow (no chunk ready) **while still aligning** (startup or post-seek), it feeds silence toward the sink to keep the DAC fed while the decode pipeline catches up, instead of letting it run dry; SYNCHRONIZE_AUDIO then re-aligns the next chunk against wherever the silence carried us. In steady state it does **not** fill — an empty buffer there means the stream is winding down, and stuffing silence would pile up in the sink and delay a rapid restart (a genuine underrun instead surfaces as an error in SYNCHRONIZE_AUDIO).
 
 **SYNCHRONIZE_AUDIO**: Computes the sync error:
 

diff --git a/include/sendspin/config.h b/include/sendspin/config.h
@@ -120,6 +120,16 @@ struct PlayerRoleConfig {
     size_t audio_buffer_capacity{DEFAULT_AUDIO_BUFFER_CAPACITY};
     int32_t fixed_delay_us{0};
     uint16_t initial_static_delay_ms{0};
+
+    /// @brief Default extra silence (ms) inserted at stream start for decode-pipeline headroom
+    static constexpr uint16_t DEFAULT_EXTRA_STARTUP_SILENCE_MS = 50U;
+
+    /// @brief Extra silence (ms) inserted at stream start, after the first playback notification
+    /// and before the first decoded chunk, on top of the initial-sync priming silence. Gives the
+    /// decode pipeline slack to stay ahead of the sink, preventing the initial-playback stutter.
+    /// Larger values trade longer startup latency for more underflow protection; 0 disables.
+    uint16_t extra_startup_silence_ms{DEFAULT_EXTRA_STARTUP_SILENCE_MS};
+
     bool psram_stack{false};  ///< Allocate sync task stack in PSRAM (ESP-IDF only)
 
     /// @brief Default FreeRTOS priority for the sync/decode task (ESP-IDF only).

diff --git a/src/sync_task.cpp b/src/sync_task.cpp
@@ -48,6 +48,11 @@ static constexpr uint32_t WAIT_FOR_TIME_SYNC_MS = 15U;
 /// @brief Timeout (ms) for receiving the next encoded audio chunk from the ring buffer
 static constexpr uint32_t ENCODED_CHUNK_RECEIVE_TIMEOUT_MS = 15U;
 
+/// @brief Silence (ms) queued per encoded-chunk underflow to keep the DAC fed between chunks. A bit
+/// above ENCODED_CHUNK_RECEIVE_TIMEOUT_MS so it spans one more load wait, though not a strict
+/// bound: the fill bails as soon as a chunk lands and is paced by sink backpressure.
+static constexpr uint32_t UNDERFLOW_SILENCE_KEEPALIVE_MS = 20U;
+
 /// @brief Timeout (ms) for on_audio_write pushes; bounds how long the sync task blocks on the
 /// sink before returning to its inner loop to re-check flags and drift.
 static constexpr uint32_t AUDIO_WRITE_TIMEOUT_MS = 20U;
@@ -68,6 +73,15 @@ static constexpr size_t SILENCE_SCRATCH_BYTES = 1024;
 /// flood. .bss costs no heap and no flash; it is reserved and zeroed once at startup.
 static uint8_t silence_scratch[SILENCE_SCRATCH_BYTES] = {};
 
+/// @brief Byte count for `duration_ms` of silence, rounded down to whole frames so per-write chunks
+/// and track_sent_audio() accounting stay on frame boundaries (the ms->bytes result need not
+/// align).
+static size_t frame_aligned_silence_bytes(const AudioStreamInfo& stream_info,
+                                          uint32_t duration_ms) {
+    return stream_info.frames_to_bytes(
+        stream_info.bytes_to_frames(stream_info.ms_to_bytes(duration_ms)));
+}
+
 static const char* const TAG = "sendspin.sync_task";
 
 // ============================================================================
@@ -167,18 +181,19 @@ void SyncTask::notify_audio_played(uint32_t frames, int64_t timestamp) {
 
 SyncTaskState SyncTask::handle_initial_sync(SyncContext& sync_context) {
     if (!sync_context.initial_decode) {
-        // Priming done (the audio stack has started consuming) - drop any leftover priming silence
-        // so it is not injected before the first real chunk.
-        sync_context.silence_remaining = 0;
+        // Priming done. process_playback_progress() queued the extra startup silence on the first
+        // playback notification; drain it before the first real chunk so the decode pipeline has
+        // slack to stay ahead of the sink.
+        this->send_pending_silence(sync_context);
+        if (sync_context.silence_remaining > 0) {
+            return SyncTaskState::INITIAL_SYNC;
+        }
         return SyncTaskState::LOAD_CHUNK;
     }
 
     if (sync_context.silence_remaining == 0) {
-        // Keep the silence run frame-aligned so per-write chunks (and the playtime accounting in
-        // track_sent_audio) land on whole frames.
-        sync_context.silence_remaining = sync_context.current_stream_info.frames_to_bytes(
-            sync_context.current_stream_info.bytes_to_frames(
-                sync_context.current_stream_info.ms_to_bytes(INITIAL_SYNC_ZEROS_DURATION_MS)));
+        sync_context.silence_remaining = frame_aligned_silence_bytes(
+            sync_context.current_stream_info, INITIAL_SYNC_ZEROS_DURATION_MS);
     }
     this->send_pending_silence(sync_context);
 
@@ -194,6 +209,13 @@ SyncTaskState SyncTask::handle_load_chunk(SyncContext& sync_context) {
         return SyncTaskState::LOAD_CHUNK;
     }
     if (!this->load_next_chunk(sync_context)) {
+        // Bridge underflows with silence only while aligning (startup/post-seek). In steady state
+        // an empty buffer means the stream is winding down; stuffing silence would pile up in the
+        // sink and delay a rapid restart (and a real underrun is better surfaced as an error than
+        // masked).
+        if (sync_context.aligning) {
+            this->fill_underflow_silence(sync_context);
+        }
         return SyncTaskState::LOAD_CHUNK;
     }
     DecodeResult decode_result = this->decode_chunk(sync_context);
@@ -360,6 +382,29 @@ void SyncTask::send_pending_silence(SyncContext& sync_context) {
     }
 }
 
+void SyncTask::fill_underflow_silence(SyncContext& sync_context) {
+    // Bridge a startup/post-seek underflow: keep the sink fed with silence until the next chunk
+    // arrives instead of spinning and draining the DAC. Feeding silence advances
+    // new_audio_client_playtime, so handle_synchronize_audio() re-aligns the next chunk against it
+    // once one arrives.
+    if (this->player_impl_->listener == nullptr) {
+        return;
+    }
+    if (sync_context.silence_remaining == 0) {
+        sync_context.silence_remaining = frame_aligned_silence_bytes(
+            sync_context.current_stream_info, UNDERFLOW_SILENCE_KEEPALIVE_MS);
+    }
+    // Drain the window block by block; send_pending_silence() blocks on sink backpressure. The loop
+    // re-checks between blocks, so it stops after the current write once a chunk lands or a
+    // lifecycle command fires.
+    while (
+        (sync_context.silence_remaining > 0) &&
+        (this->encoded_ring_buffer_->chunks_waiting() == 0) &&
+        !(this->event_flags_.get() & (COMMAND_STOP | COMMAND_STREAM_END | COMMAND_STREAM_CLEAR))) {
+        this->send_pending_silence(sync_context);
+    }
+}
+
 bool SyncTask::transfer_audio(SyncContext& sync_context) {
     // Pending silence (priming or hard-sync gap fill) goes out before the decoded chunk
     this->send_pending_silence(sync_context);
@@ -703,7 +748,13 @@ void SyncTask::process_playback_progress(SyncContext& sync_context) {
         uint32_t frames_played = playback_progress.frames_played;
 
         if (sync_context.initial_decode && frames_played) {
+            // First audio reached the sink. Queue the extra startup silence (replacing any unsent
+            // priming silence) for decode-pipeline slack before the sink drains;
+            // handle_initial_sync() drains it before the first real chunk.
             sync_context.initial_decode = false;
+            sync_context.silence_remaining =
+                frame_aligned_silence_bytes(sync_context.current_stream_info,
+                                            this->player_impl_->config.extra_startup_silence_ms);
         }
 
         if (frames_played > sync_context.buffered_frames) {

diff --git a/src/sync_task.h b/src/sync_task.h
@@ -205,6 +205,12 @@ class SyncTask {
     /// the sink and updates the playtime estimate. No-op when no silence is pending.
     void send_pending_silence(SyncContext& sync_context);
 
+    /// @brief Bridges an encoded-chunk underflow while aligning (startup/post-seek) by feeding the
+    /// sink silence (up to UNDERFLOW_SILENCE_KEEPALIVE_MS) instead of letting the DAC run dry,
+    /// stopping after the current silence write once a chunk lands or a lifecycle command fires.
+    /// Only called while aligning; see handle_load_chunk().
+    void fill_underflow_silence(SyncContext& sync_context);
+
     /// @brief Transfers pending silence (if any) then the decoded chunk to the sink
     /// Returns true when all data has been sent, false if more transfers are needed.
     bool transfer_audio(SyncContext& sync_context);