diff --git a/crates/opentake-media/src/encode/mix.rs b/crates/opentake-media/src/encode/mix.rs
new file mode 100644
index 0000000..d06bc13
--- /dev/null
+++ b/crates/opentake-media/src/encode/mix.rs
@@ -0,0 +1,247 @@
+//! Pure linear audio mixdown for the export pipeline.
+//!
+//! The export orchestrator decodes each audio clip's source window to mono f32
+//! PCM (via [`crate::decode::extract_pcm`]), then this module lays every clip
+//! into one shared timeline buffer at its frame-derived sample offset, applies a
+//! per-sample gain (the clip's `volume_at` envelope, projected to the mix rate),
+//! sums overlapping clips, and hard-limits the result to `[-1.0, 1.0]`.
+//!
+//! Everything here is a pure function over plain `f32` slices — no ffmpeg, no
+//! domain types — so the linear-mix math is unit-tested offline. The encoder
+//! ([`crate::encode::VideoEncoder`]) muxes the produced buffer as a second
+//! ffmpeg input; the orchestrator (`src-tauri/src/export.rs`) supplies the clip
+//! placements.
+//!
+//! Scope of this first cut: a **linear** mixdown skeleton (sum + clamp). No
+//! resampling curve, no pan/stereo field, no dynamics — those are follow-ups.
+//! All clips are decoded at the mix sample rate up front, so mixing is a plain
+//! sample-aligned add.
+
+/// The canonical mixdown sample rate. 48 kHz is the export-audio standard and
+/// what the encoder requests from ffmpeg for the muxed AAC/LPCM track.
+pub const MIX_SAMPLE_RATE: u32 = 48_000;
+
+/// One audio clip's contribution to the mix: a mono f32 source window plus the
+/// per-sample gain to apply, laid down starting at `start_sample` on the shared
+/// timeline buffer.
+///
+/// `gains` is either empty (→ unity gain for every sample) or exactly as long as
+/// `samples` (→ element-wise gain, e.g. a `volume_at` fade envelope sampled at
+/// the mix rate). A mismatched non-empty length is treated as a hard error by
+/// [`mix_clips`] so callers can't silently drift the envelope.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ClipAudio {
+    /// Sample offset of this clip's first sample on the timeline (>= 0).
+    pub start_sample: usize,
+    /// Mono f32 PCM for the clip's visible source window, at [`MIX_SAMPLE_RATE`].
+    pub samples: Vec<f32>,
+    /// Per-sample linear gain. Empty = unity; else must match `samples.len()`.
+    pub gains: Vec<f32>,
+}
+
+impl ClipAudio {
+    /// A clip with a single static `gain` applied to every sample.
+    pub fn with_static_gain(start_sample: usize, samples: Vec<f32>, gain: f32) -> Self {
+        let gains = if (gain - 1.0).abs() < f32::EPSILON {
+            Vec::new()
+        } else {
+            vec![gain; samples.len()]
+        };
+        ClipAudio {
+            start_sample,
+            samples,
+            gains,
+        }
+    }
+
+    /// Last timeline sample index this clip touches (exclusive end).
+    fn end_sample(&self) -> usize {
+        self.start_sample + self.samples.len()
+    }
+}
+
+/// Mix every clip into one mono f32 buffer.
+///
+/// The output length is the furthest `end_sample` across all clips (so trailing
+/// silence past the last clip is not emitted). Overlapping clips sum; the final
+/// buffer is hard-limited to `[-1.0, 1.0]`. An empty input yields an empty
+/// buffer (the caller then mux's no audio).
+///
+/// Returns `Err` if any clip's non-empty `gains` length doesn't match its
+/// `samples` length — a programming error in the caller's per-sample envelope.
+pub fn mix_clips(clips: &[ClipAudio]) -> Result<Vec<f32>, String> {
+    for (i, c) in clips.iter().enumerate() {
+        if !c.gains.is_empty() && c.gains.len() != c.samples.len() {
+            return Err(format!(
+                "clip {i}: gains len {} != samples len {}",
+                c.gains.len(),
+                c.samples.len()
+            ));
+        }
+    }
+
+    let total = clips.iter().map(ClipAudio::end_sample).max().unwrap_or(0);
+    let mut out = vec![0.0f32; total];
+
+    for c in clips {
+        for (k, &s) in c.samples.iter().enumerate() {
+            let g = if c.gains.is_empty() { 1.0 } else { c.gains[k] };
+            out[c.start_sample + k] += s * g;
+        }
+    }
+
+    for v in &mut out {
+        *v = v.clamp(-1.0, 1.0);
+    }
+    Ok(out)
+}
+
+/// Convert a mono f32 buffer to interleaved 16-bit little-endian PCM bytes (the
+/// wire format the encoder writes into a temporary WAV for muxing). Each sample
+/// is scaled by 32767 and clamped, matching ffmpeg's `s16le` expectation.
+pub fn mono_f32_to_s16le(samples: &[f32]) -> Vec<u8> {
+    let mut out = Vec::with_capacity(samples.len() * 2);
+    for &s in samples {
+        let scaled = (s.clamp(-1.0, 1.0) * 32767.0).round() as i16;
+        out.extend_from_slice(&scaled.to_le_bytes());
+    }
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn empty_input_yields_empty_buffer() {
+        assert_eq!(mix_clips(&[]).unwrap(), Vec::<f32>::new());
+    }
+
+    #[test]
+    fn single_clip_unity_gain_passes_through() {
+        let c = ClipAudio {
+            start_sample: 0,
+            samples: vec![0.1, -0.2, 0.3],
+            gains: Vec::new(),
+        };
+        assert_eq!(mix_clips(&[c]).unwrap(), vec![0.1, -0.2, 0.3]);
+    }
+
+    #[test]
+    fn clip_offset_lays_after_leading_silence() {
+        let c = ClipAudio {
+            start_sample: 2,
+            samples: vec![0.5, 0.5],
+            gains: Vec::new(),
+        };
+        // two leading zeros, then the clip
+        assert_eq!(mix_clips(&[c]).unwrap(), vec![0.0, 0.0, 0.5, 0.5]);
+    }
+
+    #[test]
+    fn overlapping_clips_sum() {
+        let a = ClipAudio {
+            start_sample: 0,
+            samples: vec![0.2, 0.2, 0.2],
+            gains: Vec::new(),
+        };
+        let b = ClipAudio {
+            start_sample: 1,
+            samples: vec![0.3, 0.3],
+            gains: Vec::new(),
+        };
+        // index1: 0.2+0.3=0.5 ; index2: 0.2+0.3=0.5
+        assert_eq!(mix_clips(&[a, b]).unwrap(), vec![0.2, 0.5, 0.5]);
+    }
+
+    #[test]
+    fn summed_overshoot_is_hard_limited() {
+        let a = ClipAudio {
+            start_sample: 0,
+            samples: vec![0.8],
+            gains: Vec::new(),
+        };
+        let b = ClipAudio {
+            start_sample: 0,
+            samples: vec![0.8],
+            gains: Vec::new(),
+        };
+        // 1.6 -> clamped to 1.0
+        assert_eq!(mix_clips(&[a, b]).unwrap(), vec![1.0]);
+        // and the negative rail
+        let c = ClipAudio {
+            start_sample: 0,
+            samples: vec![-0.9, -0.9],
+            gains: Vec::new(),
+        };
+        let d = ClipAudio {
+            start_sample: 0,
+            samples: vec![-0.9, -0.9],
+            gains: Vec::new(),
+        };
+        assert_eq!(mix_clips(&[c, d]).unwrap(), vec![-1.0, -1.0]);
+    }
+
+    #[test]
+    fn per_sample_gain_is_applied() {
+        let c = ClipAudio {
+            start_sample: 0,
+            samples: vec![1.0, 1.0, 1.0],
+            gains: vec![0.0, 0.5, 1.0],
+        };
+        assert_eq!(mix_clips(&[c]).unwrap(), vec![0.0, 0.5, 1.0]);
+    }
+
+    #[test]
+    fn static_gain_helper_skips_envelope_at_unity() {
+        let c = ClipAudio::with_static_gain(0, vec![0.4, 0.4], 1.0);
+        assert!(c.gains.is_empty(), "unity gain stores no envelope");
+        let c2 = ClipAudio::with_static_gain(0, vec![0.4, 0.4], 0.5);
+        assert_eq!(c2.gains, vec![0.5, 0.5]);
+        assert_eq!(mix_clips(&[c2]).unwrap(), vec![0.2, 0.2]);
+    }
+
+    #[test]
+    fn mismatched_gain_length_errors() {
+        let c = ClipAudio {
+            start_sample: 0,
+            samples: vec![0.1, 0.2],
+            gains: vec![1.0], // wrong length
+        };
+        let err = mix_clips(&[c]).unwrap_err();
+        assert!(err.contains("gains len"), "got: {err}");
+    }
+
+    #[test]
+    fn output_length_is_furthest_clip_end() {
+        let a = ClipAudio {
+            start_sample: 0,
+            samples: vec![0.1],
+            gains: Vec::new(),
+        };
+        let b = ClipAudio {
+            start_sample: 10,
+            samples: vec![0.1, 0.1],
+            gains: Vec::new(),
+        };
+        // furthest end = 10 + 2 = 12
+        assert_eq!(mix_clips(&[a, b]).unwrap().len(), 12);
+    }
+
+    #[test]
+    fn s16le_encodes_unit_floats() {
+        // 0.0 -> 0 ; 1.0 -> 32767 ; -1.0 -> -32767
+        let bytes = mono_f32_to_s16le(&[0.0, 1.0, -1.0]);
+        assert_eq!(bytes.len(), 6);
+        assert_eq!(i16::from_le_bytes([bytes[0], bytes[1]]), 0);
+        assert_eq!(i16::from_le_bytes([bytes[2], bytes[3]]), 32767);
+        assert_eq!(i16::from_le_bytes([bytes[4], bytes[5]]), -32767);
+    }
+
+    #[test]
+    fn s16le_clamps_out_of_range() {
+        let bytes = mono_f32_to_s16le(&[2.0, -2.0]);
+        assert_eq!(i16::from_le_bytes([bytes[0], bytes[1]]), 32767);
+        assert_eq!(i16::from_le_bytes([bytes[2], bytes[3]]), -32767);
+    }
+}
diff --git a/crates/opentake-media/src/encode/mod.rs b/crates/opentake-media/src/encode/mod.rs
index 062bf02..c4f5547 100644
--- a/crates/opentake-media/src/encode/mod.rs
+++ b/crates/opentake-media/src/encode/mod.rs
@@ -7,12 +7,14 @@
 //! frames (SPEC §2.4 / §8.2). The arg builder ([`encode_args`]) is pure and
 //! unit-tested; the encode itself requires ffmpeg.
 
+pub mod mix;
 pub mod preset;
 
+pub use mix::{mix_clips, mono_f32_to_s16le, ClipAudio, MIX_SAMPLE_RATE};
 pub use preset::{even_dimension, ExportPreset, ExportResolution, VideoCodec};
 
 use std::io::Write;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 
 use crate::decode::pcm::PcmBuffer;
 use crate::error::{MediaError, Result};
@@ -49,16 +51,60 @@ fn encode_args(out: &Path, w: u32, h: u32, fps: i32, preset: &ExportPreset) -> V
     args
 }
 
+/// Build the ffmpeg arg list for the second mux pass: take the already-encoded
+/// (audio-less) video at `video_in` and a raw mono `s16le` PCM stream at
+/// `pcm_in`, copy the video stream untouched, encode the audio with `acodec`,
+/// and write the muxed container to `out`. Pure so the CLI contract is testable.
+///
+/// `-shortest` trims the muxed output to the shorter of the two streams, so a
+/// trailing audio tail past the last video frame doesn't extend the video.
+fn mux_args(
+    video_in: &Path,
+    pcm_in: &Path,
+    out: &Path,
+    sample_rate: u32,
+    acodec: &str,
+) -> Vec<String> {
+    vec![
+        "-y".into(),
+        // Input 0: the encoded video (audio-less).
+        "-i".into(),
+        video_in.to_string_lossy().into_owned(),
+        // Input 1: raw mono s16le PCM (the mixed audio).
+        "-f".into(),
+        "s16le".into(),
+        "-ar".into(),
+        sample_rate.to_string(),
+        "-ac".into(),
+        "1".into(),
+        "-i".into(),
+        pcm_in.to_string_lossy().into_owned(),
+        // Copy the video stream verbatim; (re-)encode the audio.
+        "-c:v".into(),
+        "copy".into(),
+        "-c:a".into(),
+        acodec.into(),
+        "-shortest".into(),
+        out.to_string_lossy().into_owned(),
+    ]
+}
+
 /// A streaming RGBA → video encoder. Push frames in order, then `finish`.
 ///
-/// Audio muxing for a pre-rendered mix is intentionally limited here: the export
-/// pipeline composites/mixes audio in `opentake-render`; a follow-up wires the
-/// mixed PCM as a second ffmpeg input. For now [`push_audio`] records the PCM so
-/// the render layer can supply it, and the video-only path is fully functional.
+/// When [`push_audio`] has supplied a mixed PCM buffer, `finish` runs a second
+/// ffmpeg pass that mux's the audio into the encoded container (`-c:v copy` +
+/// `-c:a aac`/`pcm_s16le`). Without audio the video-only first pass *is* the
+/// final file. The mux-args builder ([`mux_args`]) is pure and unit-tested; the
+/// mux itself requires ffmpeg.
 pub struct VideoEncoder {
     child: ffmpeg_sidecar::child::FfmpegChild,
     stdin: Option<std::process::ChildStdin>,
     expected_frame_bytes: usize,
+    /// Final output path (the video first pass writes here; the mux pass, when
+    /// audio is present, rewrites it from a temp video + the PCM).
+    out_path: PathBuf,
+    /// ffmpeg `-c:a` token for the mux pass (from the preset).
+    acodec: &'static str,
     pending_audio: Option<PcmBuffer>,
 }
 
@@ -74,6 +120,8 @@ impl VideoEncoder {
             child,
             stdin,
             expected_frame_bytes: w as usize * h as usize * 4,
+            out_path: out.to_path_buf(),
+            acodec: preset.acodec_arg(),
             pending_audio: None,
         })
     }
@@ -98,24 +146,93 @@ impl VideoEncoder {
         Ok(())
     }
 
-    /// Record the mixed audio PCM to mux. (Muxing is completed by the render
-    /// export pipeline; see the type docs.)
+    /// Record the mixed-down mono audio buffer to mux on `finish`. The buffer's
+    /// `spec.sample_rate` is the rate ffmpeg is told to read the muxed PCM at
+    /// (the orchestrator decodes/mixes at [`MIX_SAMPLE_RATE`]). An empty buffer
+    /// is ignored — `finish` then keeps the video-only output.
     pub fn push_audio(&mut self, pcm: PcmBuffer) {
-        self.pending_audio = Some(pcm);
+        if pcm.samples_f32.is_empty() {
+            self.pending_audio = None;
+        } else {
+            self.pending_audio = Some(pcm);
+        }
     }
 
-    /// Finish encoding: close stdin and wait for ffmpeg to flush the container.
+    /// Finish encoding: close stdin, wait for the video pass, then — when a
+    /// mixed audio buffer was supplied — run a second ffmpeg pass to mux it in.
+    ///
+    /// The video first pass writes `out_path` directly. To mux, the encoded
+    /// video is moved aside to a sibling temp file, the mixed PCM is written to
+    /// another temp file, and ffmpeg copies the video stream while encoding the
+    /// audio back into `out_path`. Both temp files are removed afterward (best
+    /// effort). Without audio this is exactly the old video-only `finish`.
     pub fn finish(mut self) -> Result<()> {
-        // Drop stdin to signal EOF to ffmpeg.
+        // Drop stdin to signal EOF to ffmpeg, then wait for the video pass.
         self.stdin.take();
         let status = self.child.wait().map_err(MediaError::Io)?;
         if !status.success() {
             return Err(MediaError::Encode(format!("ffmpeg exited {status}")));
         }
-        Ok(())
+
+        let Some(pcm) = self.pending_audio.take() else {
+            return Ok(()); // video-only: the first pass is the final file.
+        };
+
+        self.mux_audio(&pcm)
+    }
+
+    /// Second ffmpeg pass: mux `pcm` (mono f32, written as s16le) into the
+    /// already-encoded video at `self.out_path`, in place.
+    fn mux_audio(&self, pcm: &PcmBuffer) -> Result<()> {
+        let out = &self.out_path;
+        // Sibling temp paths next to the output (same dir → cheap rename, same
+        // filesystem). Suffixes keep them distinct from the final artifact.
+        let video_tmp = sibling_temp(out, "video");
+        let pcm_tmp = sibling_temp(out, "pcm");
+
+        // Move the encoded video aside so ffmpeg can rewrite `out` from it.
+        std::fs::rename(out, &video_tmp).map_err(MediaError::Io)?;
+
+        // Run the mux, cleaning up temps regardless of outcome.
+        let result = (|| {
+            let bytes = mix::mono_f32_to_s16le(&pcm.samples_f32);
+            std::fs::write(&pcm_tmp, &bytes).map_err(MediaError::Io)?;
+
+            let args = mux_args(&video_tmp, &pcm_tmp, out, pcm.spec.sample_rate, self.acodec);
+            let mut child = crate::ff::ffmpeg()
+                .args(args)
+                .spawn()
+                .map_err(|e| MediaError::Encode(format!("mux spawn: {e}")))?;
+            let status = child.wait().map_err(MediaError::Io)?;
+            if !status.success() {
+                return Err(MediaError::Encode(format!("ffmpeg mux exited {status}")));
+            }
+            Ok(())
+        })();
+
+        // Best-effort cleanup. If the mux failed, restore the video-only file so
+        // the caller still has a valid (audio-less) export rather than nothing.
+        let _ = std::fs::remove_file(&pcm_tmp);
+        if result.is_err() {
+            let _ = std::fs::rename(&video_tmp, out);
+        } else {
+            let _ = std::fs::remove_file(&video_tmp);
+        }
+        result
     }
 }
 
+/// Build a sibling temp path next to `out`: `<out>.<tag>.tmp`. Stays on the same
+/// filesystem so the rename in `mux_audio` is atomic and cheap.
+fn sibling_temp(out: &Path, tag: &str) -> PathBuf {
+    let mut name = out
+        .file_name()
+        .map(|n| n.to_os_string())
+        .unwrap_or_default();
+    name.push(format!(".{tag}.tmp"));
+    out.with_file_name(name)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -151,4 +268,44 @@ mod tests {
         // ProRes path does not add BT.709 color tags here.
         assert!(!args.windows(2).any(|w| w == ["-colorspace", "bt709"]));
     }
+
+    #[test]
+    fn mux_args_copy_video_and_encode_audio() {
+        let args = mux_args(
+            Path::new("/v.mp4"),
+            Path::new("/a.pcm"),
+            Path::new("/out.mp4"),
+            48_000,
+            "aac",
+        );
+        // video input first, then the raw s16le PCM input declared with rate/ch.
+        assert!(args.windows(2).any(|w| w == ["-i", "/v.mp4"]));
+        assert!(args.windows(2).any(|w| w == ["-f", "s16le"]));
+        assert!(args.windows(2).any(|w| w == ["-ar", "48000"]));
+        assert!(args.windows(2).any(|w| w == ["-ac", "1"]));
+        assert!(args.windows(2).any(|w| w == ["-i", "/a.pcm"]));
+        // copy the video stream, encode audio with the preset codec.
+        assert!(args.windows(2).any(|w| w == ["-c:v", "copy"]));
+        assert!(args.windows(2).any(|w| w == ["-c:a", "aac"]));
+        assert!(args.iter().any(|a| a == "-shortest"));
+        assert_eq!(args.last().unwrap(), "/out.mp4");
+    }
+
+    #[test]
+    fn mux_args_threads_prores_lpcm_codec() {
+        let args = mux_args(
+            Path::new("/v.mov"),
+            Path::new("/a.pcm"),
+            Path::new("/out.mov"),
+            48_000,
+            "pcm_s16le",
+        );
+        assert!(args.windows(2).any(|w| w == ["-c:a", "pcm_s16le"]));
+    }
+
+    #[test]
+    fn sibling_temp_keeps_directory_and_tags_name() {
+        let t = sibling_temp(Path::new("/tmp/clip/out.mp4"), "video");
+        assert_eq!(t, PathBuf::from("/tmp/clip/out.mp4.video.tmp"));
+    }
 }
diff --git a/src-tauri/src/export.rs b/src-tauri/src/export.rs
index 1031ffb..bd8f8b9 100644
--- a/src-tauri/src/export.rs
+++ b/src-tauri/src/export.rs
@@ -7,9 +7,13 @@
 //! (`opentake_media::VideoEncoder`) to produce a real `.mp4` on disk.
 //!
 //! Scope of this first cut (SPEC §2.4 / §8.2):
-//! - **Pure video** (no audio mix), **H.264 / .mp4** only. The encoder already
-//!   supports H.265 / ProRes presets and an audio side-channel; those land in a
-//!   follow-up so this slice stays a clean, verifiable spine.
+//! - **H.264 / .mp4** only. The encoder already supports H.265 / ProRes presets;
+//!   those land in a follow-up so this slice stays a clean, verifiable spine.
+//! - **Linear audio mixdown**: every audio-bearing clip's source window is
+//!   decoded to mono f32 at the mix rate, placed at its frame-derived sample
+//!   offset, scaled by its `volume_at` envelope, summed, hard-limited, and mux'd
+//!   in by the encoder (`-c:v copy` + AAC). A timeline with no audio still
+//!   produces the same video-only file as before.
 //! - Export renders at the **full** export resolution
 //!   ([`opentake_render::export_render_size`]), not the preview cap.
 //! - No progress callback / cancellation yet (the orchestrator runs to
@@ -29,10 +33,11 @@ use serde::{Deserialize, Serialize};
 use tauri::State;
 
 use opentake_core::AppCore;
-use opentake_domain::{ClipType, MediaSource, TextStyle};
+use opentake_domain::{Clip, ClipType, MediaSource, TextStyle};
+use opentake_media::encode::{mix, ClipAudio, MIX_SAMPLE_RATE};
 use opentake_media::{
-    decode_frame_at, ExportPreset, ExportResolution as EncodeResolution, FrameRequest, RgbaFrame,
-    VideoCodec, VideoEncoder,
+    decode_frame_at, extract_pcm, ExportPreset, ExportResolution as EncodeResolution, FrameRequest,
+    PcmBuffer, PcmFormat, PcmSpec, RgbaFrame, VideoCodec, VideoEncoder,
 };
 use opentake_render::gpu::texture::upload_rgba;
 use opentake_render::{
@@ -314,6 +319,124 @@ fn project_media(
     (sizes, media)
 }
 
+/// PCM spec the export decodes every audio source window into: mono f32 at the
+/// shared mix sample rate. Decoding at the mix rate up front makes the mixdown a
+/// plain sample-aligned add (no per-clip resampling in this cut).
+const AUDIO_DECODE_SPEC: PcmSpec = PcmSpec {
+    sample_rate: MIX_SAMPLE_RATE,
+    channels: 1,
+    format: PcmFormat::F32,
+};
+
+/// Project one audio clip into a [`ClipAudio`] for the mixdown: decode its
+/// visible source window, place it at its frame-derived sample offset, and build
+/// the per-sample `volume_at` gain envelope.
+///
+/// Returns `Ok(None)` when the clip contributes no audio (no media path, no
+/// audio track, zero-length window, or a fully-decoded-to-empty buffer). Decode
+/// failures other than "no audio track" propagate as `Err`.
+fn project_clip_audio(
+    clip: &Clip,
+    media: &HashMap<String, MediaInfo>,
+    timeline_fps: i32,
+) -> Result<Option<ClipAudio>, String> {
+    if clip.duration_frames <= 0 || timeline_fps <= 0 {
+        return Ok(None);
+    }
+    let Some(info) = media.get(&clip.media_ref) else {
+        return Ok(None);
+    };
+
+    // Source window in seconds: the clip's trim start through the frames it
+    // consumes, at the *source* fps. Falls back to the timeline fps when the
+    // source rate is unknown (audio-only assets often report no fps).
+    let src_fps = if info.fps > 0.0 {
+        info.fps
+    } else {
+        timeline_fps as f64
+    };
+    let lo = clip.trim_start_frame.max(0) as f64 / src_fps;
+    let consumed = clip.source_frames_consumed().max(0);
+    if consumed == 0 {
+        return Ok(None);
+    }
+    let hi = lo + consumed as f64 / src_fps;
+
+    let pcm = match extract_pcm(&info.path, &AUDIO_DECODE_SPEC, Some((lo, hi))) {
+        Ok(p) => p,
+        // A clip pointing at a video with no audio track simply contributes
+        // silence — not an export failure.
+        Err(opentake_media::MediaError::NoTrack(_, _)) => return Ok(None),
+        Err(e) => return Err(format!("audio decode failed for {}: {e}", clip.media_ref)),
+    };
+    if pcm.samples_f32.is_empty() {
+        return Ok(None);
+    }
+
+    // Placement: the clip's timeline start frame, in mix samples.
+    let start_sample = ((clip.start_frame.max(0) as f64) / timeline_fps as f64
+        * MIX_SAMPLE_RATE as f64)
+        .round() as usize;
+
+    // Per-sample gain from `volume_at`, sampled at the timeline frame each mix
+    // sample falls on. Unity throughout collapses to an empty envelope.
+    let samples_per_frame = MIX_SAMPLE_RATE as f64 / timeline_fps as f64;
+    let mut gains = Vec::with_capacity(pcm.samples_f32.len());
+    let mut all_unity = true;
+    for k in 0..pcm.samples_f32.len() {
+        let tl_frame = clip.start_frame + (k as f64 / samples_per_frame).floor() as i32;
+        let g = clip.volume_at(tl_frame) as f32;
+        if (g - 1.0).abs() > f32::EPSILON {
+            all_unity = false;
+        }
+        gains.push(g);
+    }
+
+    Ok(Some(ClipAudio {
+        start_sample,
+        samples: pcm.samples_f32,
+        gains: if all_unity { Vec::new() } else { gains },
+    }))
+}
+
+/// Decode + mix every audio-bearing clip on the timeline into one mono buffer.
+///
+/// Walks audio and video clips (video clips can carry an audio track), projects
+/// each through [`project_clip_audio`], and linearly mixes the lot. Returns
+/// `None` when nothing contributes audio (→ the caller keeps the video-only
+/// output). Errors surface decode/mix failures to the front-end.
+fn mix_timeline_audio(
+    timeline: &opentake_domain::Timeline,
+    media: &HashMap<String, MediaInfo>,
+) -> Result<Option<PcmBuffer>, String> {
+    let mut clips_audio: Vec<ClipAudio> = Vec::new();
+    for track in &timeline.tracks {
+        if track.muted {
+            continue;
+        }
+        for clip in &track.clips {
+            // Only audio and video clips carry sound; text/image/lottie don't.
+            if clip.media_type != ClipType::Audio && clip.media_type != ClipType::Video {
+                continue;
+            }
+            if let Some(ca) = project_clip_audio(clip, media, timeline.fps)? {
+                clips_audio.push(ca);
+            }
+        }
+    }
+    if clips_audio.is_empty() {
+        return Ok(None);
+    }
+    let mixed = mix::mix_clips(&clips_audio).map_err(|e| format!("audio mix failed: {e}"))?;
+    if mixed.is_empty() {
+        return Ok(None);
+    }
+    Ok(Some(PcmBuffer {
+        spec: AUDIO_DECODE_SPEC,
+        samples_f32: mixed,
+    }))
+}
+
 /// `export_video`: render the whole timeline to a video file on disk.
 ///
 /// Composites every frame at the full export resolution and encodes them to
@@ -402,6 +525,12 @@ pub fn run_export(
             .map_err(|e| format!("encode frame {f} failed: {e}"))?;
     }
 
+    // Decode + linearly mix every audio-bearing clip, then hand the mixed PCM to
+    // the encoder so `finish` mux's it into the container. No audio → video-only.
+    if let Some(pcm) = mix_timeline_audio(timeline, &media)? {
+        encoder.push_audio(pcm);
+    }
+
     encoder
         .finish()
         .map_err(|e| format!("encoder finish failed: {e}"))?;
@@ -507,4 +636,66 @@ mod tests {
         .expect("parse");
         assert_eq!(req.quality, ExportQuality::P720);
     }
+
+    use opentake_domain::{Timeline, Track};
+
+    #[test]
+    fn project_clip_audio_skips_clip_with_no_media_entry() {
+        // No matching manifest entry → no audio contribution, no decode attempt.
+        let clip = Clip::new("c1", "missing-asset", 0, 30);
+        let media: HashMap<String, MediaInfo> = HashMap::new();
+        let got = project_clip_audio(&clip, &media, 30).expect("ok");
+        assert!(got.is_none());
+    }
+
+    #[test]
+    fn project_clip_audio_skips_zero_duration() {
+        let clip = Clip::new("c1", "asset-1", 0, 0);
+        let mut media: HashMap<String, MediaInfo> = HashMap::new();
+        media.insert(
+            "asset-1".into(),
+            MediaInfo {
+                path: PathBuf::from("/nonexistent.wav"),
+                fps: 0.0,
+            },
+        );
+        // duration 0 short-circuits before any decode is attempted.
+        assert!(project_clip_audio(&clip, &media, 30).expect("ok").is_none());
+    }
+
+    #[test]
+    fn mix_timeline_audio_none_when_only_text_clips() {
+        // A text clip carries no sound; with no audio/video clips there's nothing
+        // to decode, so the result is None without touching the media map.
+        let mut tl = Timeline::new();
+        let mut track = Track::new("t1", ClipType::Text);
+        let mut clip = Clip::new("c1", "asset-1", 0, 30);
+        clip.media_type = ClipType::Text;
+        track.clips.push(clip);
+        tl.tracks.push(track);
+        let media: HashMap<String, MediaInfo> = HashMap::new();
+        assert!(mix_timeline_audio(&tl, &media).expect("ok").is_none());
+    }
+
+    #[test]
+    fn mix_timeline_audio_skips_muted_tracks() {
+        // A muted audio track is excluded; with no other audio the result is None
+        // and the (missing-path) asset is never decoded.
+        let mut tl = Timeline::new();
+        let mut track = Track::new("t1", ClipType::Audio);
+        track.muted = true;
+        let mut clip = Clip::new("c1", "asset-1", 0, 30);
+        clip.media_type = ClipType::Audio;
+        track.clips.push(clip);
+        tl.tracks.push(track);
+        let mut media: HashMap<String, MediaInfo> = HashMap::new();
+        media.insert(
+            "asset-1".into(),
+            MediaInfo {
+                path: PathBuf::from("/nonexistent.wav"),
+                fps: 0.0,
+            },
+        );
+        assert!(mix_timeline_audio(&tl, &media).expect("ok").is_none());
+    }
 }
diff --git a/src-tauri/tests/export_integration.rs b/src-tauri/tests/export_integration.rs
index b071a74..217f80b 100644
--- a/src-tauri/tests/export_integration.rs
+++ b/src-tauri/tests/export_integration.rs
@@ -54,6 +54,58 @@ fn make_video(path: &Path, w: u32, h: u32, fps: u32, frames: u32) -> bool {
         .unwrap_or(false)
 }
 
+/// Generate an N-frame test video *with* a sine audio track. Returns false on
+/// failure (→ skip).
+fn make_video_with_audio(path: &Path, w: u32, h: u32, fps: u32, frames: u32) -> bool {
+    let dur = frames as f64 / fps as f64;
+    Command::new("ffmpeg")
+        .args([
+            "-v",
+            "error",
+            "-f",
+            "lavfi",
+            "-i",
+            &format!("testsrc=duration={dur}:size={w}x{h}:rate={fps}"),
+            "-f",
+            "lavfi",
+            "-i",
+            &format!("sine=frequency=440:duration={dur}"),
+            "-c:v",
+            "libx264",
+            "-pix_fmt",
+            "yuv420p",
+            "-c:a",
+            "aac",
+            "-shortest",
+            "-y",
+        ])
+        .arg(path)
+        .status()
+        .map(|s| s.success())
+        .unwrap_or(false)
+}
+
+/// True when the file has at least one audio stream (per ffprobe).
+fn has_audio_stream(path: &Path) -> bool {
+    let out = Command::new("ffprobe")
+        .args([
+            "-v",
+            "error",
+            "-select_streams",
+            "a:0",
+            "-show_entries",
+            "stream=codec_type",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+        ])
+        .arg(path)
+        .output();
+    match out {
+        Ok(o) => String::from_utf8_lossy(&o.stdout).trim() == "audio",
+        Err(_) => false,
+    }
+}
+
 /// ffprobe a single stream field as a trimmed string.
 fn probe_field(path: &Path, entry: &str) -> Option<String> {
     let out = Command::new("ffprobe")
@@ -111,6 +163,18 @@ fn build_timeline(frames: i32, src_w: i32, src_h: i32, src_fps: f64) -> Timeline
 
 /// Build a manifest with one external video asset pointing at `media_path`.
 fn build_manifest(media_path: &Path, src_w: i32, src_h: i32, src_fps: f64) -> MediaManifest {
+    build_manifest_with_audio(media_path, src_w, src_h, src_fps, false)
+}
+
+/// Like [`build_manifest`] but lets the test declare whether the asset carries
+/// an audio track (so the export's audio mixdown path is exercised).
+fn build_manifest_with_audio(
+    media_path: &Path,
+    src_w: i32,
+    src_h: i32,
+    src_fps: f64,
+    has_audio: bool,
+) -> MediaManifest {
     let mut manifest = MediaManifest::new();
     manifest.entries.push(MediaManifestEntry {
         id: "asset-1".into(),
@@ -124,7 +188,7 @@ fn build_manifest(media_path: &Path, src_w: i32, src_h: i32, src_fps: f64) -> Me
         source_width: Some(src_w),
         source_height: Some(src_h),
         source_fps: Some(src_fps),
-        has_audio: Some(false),
+        has_audio: Some(has_audio),
         folder_id: None,
         cached_remote_url: None,
         cached_remote_url_expires_at: None,
@@ -204,4 +268,91 @@ fn export_full_timeline_produces_playable_mp4() {
         nframes, frames as u64,
         "encoded frame count matches timeline"
     );
+
+    // The video-only source has no audio track → export stays video-only.
+    assert!(
+        !has_audio_stream(&out),
+        "video-only timeline must not gain an audio stream"
+    );
+}
+
+#[test]
+fn export_with_audio_clip_mux_aac_stream() {
+    if !ffmpeg_ready() {
+        eprintln!("skip: ffmpeg/ffprobe not available");
+        return;
+    }
+
+    let dir = tempfile::tempdir().unwrap();
+    let src = dir.path().join("src_audio.mp4");
+    let out = dir.path().join("out_audio.mp4");
+
+    // Source: 320x240 @ 10fps, 10 frames (1.0s), WITH a 440 Hz sine track.
+    let (sw, sh, sfps, frames) = (320u32, 240u32, 10u32, 10u32);
+    if !make_video_with_audio(&src, sw, sh, sfps, frames) {
+        eprintln!("skip: could not generate audio fixture media");
+        return;
+    }
+    // Sanity: the fixture really has audio (else the assertion below is vacuous).
+    if !has_audio_stream(&src) {
+        eprintln!("skip: fixture lacks an audio stream");
+        return;
+    }
+
+    let timeline = build_timeline(frames as i32, sw as i32, sh as i32, sfps as f64);
+    let manifest = build_manifest_with_audio(&src, sw as i32, sh as i32, sfps as f64, true);
+
+    let req = ExportRequest {
+        out_path: out.to_string_lossy().into_owned(),
+        codec: Default::default(), // H.264 → AAC audio
+        quality: ExportQuality::P720,
+    };
+
+    let summary = match run_export(&timeline, &manifest, &None, &req) {
+        Ok(s) => s,
+        Err(e) => {
+            if e.contains("no GPU device") {
+                eprintln!("skip: no GPU adapter available ({e})");
+                return;
+            }
+            panic!("export failed: {e}");
+        }
+    };
+
+    assert!(out.exists(), "output file should exist");
+    assert_eq!(summary.frame_count, frames as i32);
+
+    // Video stream is still H.264 at the reported size.
+    let vcodec = probe_field(&out, "stream=codec_name").unwrap();
+    assert_eq!(vcodec, "h264", "video codec should be H.264");
+
+    // The mixdown muxed an audio stream into the container.
+    assert!(
+        has_audio_stream(&out),
+        "audio-bearing timeline must produce an audio stream"
+    );
+
+    // The muxed audio codec is AAC (H.264 preset's `-c:a aac`).
+    let acodec = Command::new("ffprobe")
+        .args([
+            "-v",
+            "error",
+            "-select_streams",
+            "a:0",
+            "-show_entries",
+            "stream=codec_name",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+        ])
+        .arg(&out)
+        .output()
+        .ok()
+        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string());
+    assert_eq!(acodec.as_deref(), Some("aac"), "muxed audio should be AAC");
+
+    // The temp mux artifacts are cleaned up (no `.tmp` siblings left behind).
+    let leftover_video = dir.path().join("out_audio.mp4.video.tmp");
+    let leftover_pcm = dir.path().join("out_audio.mp4.pcm.tmp");
+    assert!(!leftover_video.exists(), "video temp should be removed");
+    assert!(!leftover_pcm.exists(), "pcm temp should be removed");
 }