Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ quick-xml = { version = "0.39", features = [] }
image = { version = "0.25", default-features = false, features = ["png", "jpeg", "tiff"] }
imageproc = { version = "0.26", features = [] }

# Audio processing
hound = { version = "3.5", features = [] }

# Python interop
pyo3 = { version = "0.28", features = [] }
pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
Expand Down
5 changes: 4 additions & 1 deletion crates/nvisy-codec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,16 @@ uuid = { workspace = true, features = [] }
hipstr = { workspace = true, features = [] }

# Derive macros and error handling
derive_more = { workspace = true, features = ["as_ref", "deref", "deref_mut", "display", "from"] }
derive_more = { workspace = true, features = ["as_ref", "deref", "deref_mut", "display", "from", "into_iterator"] }
thiserror = { workspace = true, features = [] }

# Image processing
image = { workspace = true, features = [] }
imageproc = { workspace = true, features = [] }

# Audio processing
hound = { workspace = true, features = [] }

# PDF processing (feature-gated)
lopdf = { workspace = true, optional = true, features = [] }
pdfium-render = { workspace = true, optional = true, features = [] }
Expand Down
2 changes: 1 addition & 1 deletion crates/nvisy-codec/src/document/located.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use nvisy_core::content::ContentSource;
/// [`Redactions`] — the source is metadata about how the location
/// was produced, not part of its identity.
///
/// [`Redactions`]: crate::transform::Redactions
/// [`Redactions`]: crate::handler::Redactions
#[derive(Debug, Clone, PartialEq)]
pub struct Located<L> {
/// The handler-level source that produced this location.
Expand Down
18 changes: 8 additions & 10 deletions crates/nvisy-codec/src/document/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,19 @@ pub use self::located::Located;
pub use self::span::Span;
pub use self::stream::LocationStream;
use crate::handler::{
AudioData, AudioHandler, BoxedAudioHandler, BoxedImageHandler, BoxedRichHandler,
BoxedTabularHandler, BoxedTextHandler, CsvLoader, CsvParams, Handler, ImageData, ImageHandler,
JpegLoader, JpegParams, JsonLoader, JsonParams, Loader, MarkdownLoader, MarkdownParams,
Mp3Loader, Mp3Params, PngLoader, PngParams, TabularHandler, TextData, TextHandler, TiffLoader,
TiffParams, TxtLoader, TxtParams, WavLoader, WavParams, XlsxLoader, XlsxParams,
AudioData, AudioHandler, AudioRedaction, BoxedAudioHandler, BoxedImageHandler,
BoxedRichHandler, BoxedTabularHandler, BoxedTextHandler, CsvLoader, CsvParams, Handler,
ImageData, ImageHandler, ImageRedaction, JpegLoader, JpegParams, JsonLoader, JsonParams,
Loader, MarkdownLoader, MarkdownParams, Mp3Loader, Mp3Params, PngLoader, PngParams, Redactions,
TabularHandler, TabularRedaction, TextData, TextHandler, TextRedaction, TiffLoader, TiffParams,
TxtLoader, TxtParams, WavLoader, WavParams, XlsxLoader, XlsxParams,
};
#[cfg(feature = "docx")]
use crate::handler::{DocxLoader, DocxParams};
#[cfg(feature = "html")]
use crate::handler::{HtmlLoader, HtmlParams};
#[cfg(feature = "pdf")]
use crate::handler::{PdfLoader, PdfParams};
use crate::transform::{
AudioRedaction, ImageRedaction, Redactions, TabularRedaction, TextRedaction,
};

/// A fully type-erased document that can hold any supported format.
///
Expand Down Expand Up @@ -170,7 +168,7 @@ impl ContentHandle {
redactions: Redactions<TextLocation, TextRedaction>,
) -> Result<(), Error> {
match self {
Self::Text(h) => h.redact(redactions).await,
Self::Text(h) => TextHandler::redact(h, redactions).await,
Self::Rich(h) => TextHandler::redact(h, redactions).await,
Self::Tabular(_) | Self::Image(_) | Self::Audio(_) => Ok(()),
}
Expand All @@ -193,7 +191,7 @@ impl ContentHandle {
redactions: Redactions<ImageLocation, ImageRedaction>,
) -> Result<(), Error> {
match self {
Self::Image(h) => h.redact(redactions).await,
Self::Image(h) => ImageHandler::redact(h, redactions).await,
Self::Rich(h) => ImageHandler::redact(h, redactions).await,
Self::Text(_) | Self::Tabular(_) | Self::Audio(_) => Ok(()),
}
Expand Down
184 changes: 184 additions & 0 deletions crates/nvisy-codec/src/handler/audio/apply.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
//! Helper for applying a single [`AudioRedaction`] to a typed sample
//! buffer in place.

use nvisy_ontology::primitive::TimeSpan;

use crate::handler::{AudioOutput, AudioRedaction};

const TARGET: &str = "nvisy_codec::handler::audio";

/// Apply a single redaction to `samples` in place.
///
/// `samples` is a flat, channel-interleaved buffer of `S`. `channels`
/// is the number of channels (1 for mono, 2 for stereo). `sample_rate`
/// is the sample rate in Hz. The redaction expresses its range as a
/// [`TimeSpan`] supplied separately by the caller — under the
/// `(location, redaction)` shape the time span lives on the
/// [`AudioLocation`], not the redaction.
///
/// Ordering across multiple redactions is the caller's
/// responsibility: an [`AudioOutput::Remove`] shrinks the buffer, so
/// later time spans must be applied first to keep earlier ones'
/// indices valid. See [`AudioHandler::redact`].
///
/// [`AudioLocation`]: nvisy_ontology::entity::AudioLocation
/// [`AudioHandler::redact`]: crate::handler::AudioHandler::redact
pub(crate) fn apply_audio_redaction<S>(
samples: &mut Vec<S>,
time_span: TimeSpan,
redaction: &AudioRedaction,
sample_rate: u32,
channels: u16,
) where
S: Default + Clone,
{
let (start_sample, end_sample) =
samples_for_time_span(time_span.start_us, time_span.end_us, sample_rate, channels);
let start = start_sample.min(samples.len());
let end = end_sample.min(samples.len());
if start >= end {
return;
}
match &redaction.output {
AudioOutput::Silence => {
for s in &mut samples[start..end] {
*s = S::default();
}
}
AudioOutput::Remove => {
samples.drain(start..end);
}
AudioOutput::Replace { .. } => {
tracing::warn!(
target: TARGET,
start_us = time_span.start_us,
end_us = time_span.end_us,
"AudioOutput::Replace is not yet implemented, skipping",
);
}
}
}

/// Convert a `[start_us, end_us)` time span to a `[start_sample,
/// end_sample)` index range into a channel-interleaved sample buffer.
///
/// Rounds half-up at the frame boundary, then multiplies by `channels`
/// so the returned indices land on frame boundaries (no stereo channel
/// swap on [`AudioOutput::Remove`]).
fn samples_for_time_span(
start_us: i64,
end_us: i64,
sample_rate: u32,
channels: u16,
) -> (usize, usize) {
let start_frame = us_to_frame(start_us, sample_rate);
let end_frame = us_to_frame(end_us, sample_rate);
(
start_frame.saturating_mul(channels as usize),
end_frame.saturating_mul(channels as usize),
)
}

fn us_to_frame(us: i64, sample_rate: u32) -> usize {
if us <= 0 {
return 0;
}
let num = (us as u128) * (sample_rate as u128) + 500_000;
(num / 1_000_000) as usize
}

#[cfg(test)]
mod tests {
use super::*;

fn span(start_us: i64, end_us: i64) -> TimeSpan {
TimeSpan { start_us, end_us }
}

#[test]
fn silence_zeroes_range_mono() {
let mut samples: Vec<i16> = (1..=10).collect();
apply_audio_redaction(
&mut samples,
span(3_000, 6_000),
&AudioRedaction::new(AudioOutput::Silence),
1000,
1,
);
assert_eq!(samples, vec![1, 2, 3, 0, 0, 0, 7, 8, 9, 10]);
}

#[test]
fn remove_shrinks_range_mono() {
let mut samples: Vec<i16> = (1..=10).collect();
apply_audio_redaction(
&mut samples,
span(3_000, 6_000),
&AudioRedaction::new(AudioOutput::Remove),
1000,
1,
);
assert_eq!(samples, vec![1, 2, 3, 7, 8, 9, 10]);
}

#[test]
fn stereo_silence_aligns_to_frames() {
let mut samples: Vec<i16> = (1..=20).collect();
apply_audio_redaction(
&mut samples,
span(3_000, 6_000),
&AudioRedaction::new(AudioOutput::Silence),
1000,
2,
);
assert_eq!(
samples,
vec![
1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, 18, 19, 20
],
);
}

#[test]
fn stereo_remove_drops_frames_not_samples() {
let mut samples: Vec<i16> = (1..=20).collect();
apply_audio_redaction(
&mut samples,
span(3_000, 6_000),
&AudioRedaction::new(AudioOutput::Remove),
1000,
2,
);
assert_eq!(samples.len(), 14);
assert_eq!(
samples,
vec![1, 2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18, 19, 20]
);
}

#[test]
fn out_of_bounds_clipped() {
let mut samples: Vec<i16> = (1..=5).collect();
apply_audio_redaction(
&mut samples,
span(0, 999_999_000),
&AudioRedaction::new(AudioOutput::Silence),
1000,
1,
);
assert_eq!(samples, vec![0, 0, 0, 0, 0]);
}

#[test]
fn replace_is_warned_and_skipped() {
let mut samples: Vec<i16> = (1..=5).collect();
apply_audio_redaction(
&mut samples,
span(0, 3_000),
&AudioRedaction::new(AudioOutput::Replace { data: vec![] }),
1000,
1,
);
assert_eq!(samples, vec![1, 2, 3, 4, 5]);
}
}
10 changes: 5 additions & 5 deletions crates/nvisy-codec/src/handler/audio/audio_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ use nvisy_ontology::entity::AudioLocation;

use super::{AudioData, Mp3Handler, WavHandler};
use crate::document::LocationStream;
use crate::handler::{AudioHandler, Handler};
use crate::transform::{AudioRedaction, Redactions};
use crate::handler::{AudioHandler, AudioRedaction, Handler};

/// A type-erased audio handler backed by a boxed trait object.
pub struct BoxedAudioHandler(Box<dyn AudioHandler>);
Expand Down Expand Up @@ -66,10 +65,11 @@ impl AudioHandler for BoxedAudioHandler {
self.0.read(location).await
}

async fn redact(
async fn redact_at(
&mut self,
redactions: Redactions<AudioLocation, AudioRedaction>,
location: &AudioLocation,
redaction: AudioRedaction,
) -> Result<(), Error> {
self.0.redact(redactions).await
self.0.redact_at(location, redaction).await
}
}
Loading