Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions crates/nvisy-codec/src/document/located.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,3 @@ impl<L> Located<L> {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn map_transforms_inner() {
let src = ContentSource::new();
let l = Located::new(src, 7u32);
let mapped = l.map(|n| n.to_string());
assert_eq!(mapped.location, "7");
assert_eq!(mapped.source, src);
}

#[test]
fn into_location_discards_source() {
let l = Located::new(ContentSource::new(), 42u32);
assert_eq!(l.into_location(), 42);
}
}
89 changes: 67 additions & 22 deletions crates/nvisy-codec/src/document/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,40 @@ use nvisy_core::content::{Content, ContentData, ContentSource};
use nvisy_core::media::{
AudioFormat, DocumentType, ImageFormat, SpreadsheetFormat, TextFormat, WordFormat,
};
use nvisy_ontology::entity::{AudioLocation, ImageLocation, TextLocation};
use nvisy_ontology::entity::{AudioLocation, ImageLocation, TabularLocation, TextLocation};

pub use self::located::Located;
pub use self::span::Span;
pub use self::stream::LocationStream;
use crate::handler::{
AudioData, AudioHandler, BoxedAudioHandler, BoxedImageHandler, BoxedRichHandler,
BoxedTabularHandler, BoxedTextHandler, CsvLoader, CsvParams, Handler, ImageData, ImageHandler,
JpegLoader, JpegParams, JsonLoader, JsonParams, Loader, MarkdownLoader, MarkdownParams,
Mp3Loader, Mp3Params, PngLoader, PngParams, TabularHandler, TextData, TextHandler, TiffLoader,
TiffParams, TxtLoader, TxtParams, WavLoader, WavParams, XlsxLoader, XlsxParams,
};
#[cfg(feature = "docx")]
use crate::handler::{DocxLoader, DocxParams};
#[cfg(feature = "html")]
use crate::handler::{HtmlLoader, HtmlParams};
#[cfg(feature = "pdf")]
use crate::handler::{PdfLoader, PdfParams};
use crate::handler::{
AudioData, AudioHandler, BoxedAudioHandler, BoxedImageHandler, BoxedRichHandler,
BoxedTextHandler, CsvLoader, CsvParams, Handler, ImageData, ImageHandler, JpegLoader,
JpegParams, JsonLoader, JsonParams, Loader, MarkdownLoader, MarkdownParams, Mp3Loader,
Mp3Params, PngLoader, PngParams, TextData, TextHandler, TiffLoader, TiffParams, TxtLoader,
TxtParams, WavLoader, WavParams, XlsxLoader, XlsxParams,
use crate::transform::{
AudioRedaction, ImageRedaction, Redactions, TabularRedaction, TextRedaction,
};
use crate::transform::{AudioRedaction, ImageRedaction, Redactions, TextRedaction};

/// A fully type-erased document that can hold any supported format.
///
/// Groups documents into four modality families:
/// - **Text**: plain text, CSV, JSON, HTML, XLSX
/// Groups documents into five modality families:
/// - **Text**: plain text, JSON, HTML
/// - **Tabular**: CSV, XLSX (cell-coordinate addressed)
/// - **Image**: PNG, JPEG, TIFF
/// - **Audio**: WAV, MP3
/// - **Rich**: PDF, DOCX (multi-modal documents with text + images)
#[derive(From, IsVariant, TryInto)]
pub enum ContentHandle {
Text(BoxedTextHandler),
Tabular(BoxedTabularHandler),
Image(BoxedImageHandler),
Audio(BoxedAudioHandler),
Rich(BoxedRichHandler),
Expand All @@ -60,6 +64,7 @@ impl ContentHandle {
pub fn document_type(&self) -> DocumentType {
match self {
Self::Text(h) => h.document_type(),
Self::Tabular(h) => h.document_type(),
Self::Image(h) => h.document_type(),
Self::Audio(h) => h.document_type(),
Self::Rich(h) => h.document_type(),
Expand All @@ -70,6 +75,7 @@ impl ContentHandle {
pub fn source(&self) -> ContentSource {
match self {
Self::Text(h) => h.source(),
Self::Tabular(h) => h.source(),
Self::Image(h) => h.source(),
Self::Audio(h) => h.source(),
Self::Rich(h) => h.source(),
Expand All @@ -80,6 +86,7 @@ impl ContentHandle {
pub fn encode(&self) -> Result<ContentData, Error> {
match self {
Self::Text(h) => h.encode(),
Self::Tabular(h) => h.encode(),
Self::Image(h) => h.encode(),
Self::Audio(h) => h.encode(),
Self::Rich(h) => h.encode(),
Expand All @@ -91,7 +98,15 @@ impl ContentHandle {
match self {
Self::Text(h) => h.locations(),
Self::Rich(h) => TextHandler::locations(h),
Self::Image(_) | Self::Audio(_) => LocationStream::empty(),
Self::Tabular(_) | Self::Image(_) | Self::Audio(_) => LocationStream::empty(),
}
}

/// Stream tabular (cell) locations from spreadsheet documents.
pub fn tabular_locations(&self) -> LocationStream<'_, TabularLocation> {
match self {
Self::Tabular(h) => h.locations(),
_ => LocationStream::empty(),
}
}

Expand All @@ -100,7 +115,7 @@ impl ContentHandle {
match self {
Self::Image(h) => h.locations(),
Self::Rich(h) => ImageHandler::locations(h),
Self::Text(_) | Self::Audio(_) => LocationStream::empty(),
Self::Text(_) | Self::Tabular(_) | Self::Audio(_) => LocationStream::empty(),
}
}

Expand All @@ -120,7 +135,15 @@ impl ContentHandle {
match self {
Self::Text(h) => h.read(location).await,
Self::Rich(h) => TextHandler::read(h, location).await,
Self::Image(_) | Self::Audio(_) => None,
Self::Tabular(_) | Self::Image(_) | Self::Audio(_) => None,
}
}

/// Read the cell value at the given tabular location.
pub async fn read_tabular(&self, location: &TabularLocation) -> Option<TextData> {
match self {
Self::Tabular(h) => h.read(location).await,
_ => None,
}
}

Expand All @@ -129,7 +152,7 @@ impl ContentHandle {
match self {
Self::Image(h) => h.read(location).await,
Self::Rich(h) => ImageHandler::read(h, location).await,
Self::Text(_) | Self::Audio(_) => None,
Self::Text(_) | Self::Tabular(_) | Self::Audio(_) => None,
}
}

Expand All @@ -149,7 +172,18 @@ impl ContentHandle {
match self {
Self::Text(h) => h.redact(redactions).await,
Self::Rich(h) => TextHandler::redact(h, redactions).await,
Self::Image(_) | Self::Audio(_) => Ok(()),
Self::Tabular(_) | Self::Image(_) | Self::Audio(_) => Ok(()),
}
}

/// Apply a batch of tabular redactions to the document.
pub async fn apply_tabular_redactions(
&mut self,
redactions: Redactions<TabularLocation, TabularRedaction>,
) -> Result<(), Error> {
match self {
Self::Tabular(h) => h.redact(redactions).await,
_ => Ok(()),
}
}

Expand All @@ -161,7 +195,7 @@ impl ContentHandle {
match self {
Self::Image(h) => h.redact(redactions).await,
Self::Rich(h) => ImageHandler::redact(h, redactions).await,
Self::Text(_) | Self::Audio(_) => Ok(()),
Self::Text(_) | Self::Tabular(_) | Self::Audio(_) => Ok(()),
}
}

Expand All @@ -172,7 +206,7 @@ impl ContentHandle {
) -> Result<(), Error> {
match self {
Self::Audio(h) => h.redact(redactions).await,
Self::Text(_) | Self::Image(_) | Self::Rich(_) => Ok(()),
_ => Ok(()),
}
}

Expand All @@ -188,9 +222,8 @@ impl ContentHandle {
let data = content.data();

match doc_type {
DocumentType::Text(_) | DocumentType::Html | DocumentType::Spreadsheet(_) => {
Self::decode_text(doc_type, data).await
}
DocumentType::Text(_) | DocumentType::Html => Self::decode_text(doc_type, data).await,
DocumentType::Spreadsheet(_) => Self::decode_tabular(doc_type, data).await,
DocumentType::Image(_) => Self::decode_image(doc_type, data).await,
DocumentType::Audio(_) => Self::decode_audio(doc_type, data).await,
DocumentType::Pdf | DocumentType::Word(_) | DocumentType::Presentation(_) => {
Expand Down Expand Up @@ -218,6 +251,18 @@ impl ContentHandle {
.decode(content, &HtmlParams::default())
.await?
.into(),
_ => {
return Err(Error::validation(
format!("no text loader for: {doc_type}"),
"ContentHandle::decode_text",
));
}
};
Ok(Self::from(handler))
}

async fn decode_tabular(doc_type: DocumentType, content: &ContentData) -> Result<Self, Error> {
let handler: BoxedTabularHandler = match doc_type {
DocumentType::Spreadsheet(SpreadsheetFormat::Csv) => CsvLoader
.decode(content, &CsvParams::default())
.await?
Expand All @@ -228,8 +273,8 @@ impl ContentHandle {
}
_ => {
return Err(Error::validation(
format!("no text loader for: {doc_type}"),
"ContentHandle::decode_text",
format!("no tabular loader for: {doc_type}"),
"ContentHandle::decode_tabular",
));
}
};
Expand Down
23 changes: 0 additions & 23 deletions crates/nvisy-codec/src/document/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,3 @@ impl<L, D> Span<L, D> {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn from_located_preserves_source_and_location() {
let src = ContentSource::new();
let located = Located::new(src, 7u32);
let span = Span::from_located(located, "data");
assert_eq!(span.source, src);
assert_eq!(span.location, 7);
assert_eq!(span.data, "data");
}

#[test]
fn map_transforms_data() {
let span = Span::new(ContentSource::new(), 1u32, "hello");
let mapped = span.map(|d| d.len());
assert_eq!(mapped.location, 1);
assert_eq!(mapped.data, 5);
}
}
33 changes: 0 additions & 33 deletions crates/nvisy-codec/src/handler/audio/mp3_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,36 +32,3 @@ impl_audio_handler!(
"mp3-handler",
"mp3.encode"
);

#[cfg(test)]
mod tests {
use bytes::Bytes;
use futures::StreamExt;
use nvisy_core::Error;

use super::*;
use crate::handler::{AudioHandler, Handler};

#[tokio::test]
async fn locations_yields_single_location() {
let h = Mp3Handler::new(Bytes::from_static(b"ID3-mp3-data"));
let items: Vec<_> = h.locations().collect().await;
assert_eq!(items.len(), 1);
}

#[tokio::test]
async fn read_returns_full_audio() {
let h = Mp3Handler::new(Bytes::from_static(b"ID3-mp3-data"));
let items: Vec<_> = h.locations().collect().await;
let data = h.read(&items[0].location).await.unwrap();
assert_eq!(data.as_bytes().as_ref(), b"ID3-mp3-data");
}

#[test]
fn encode_returns_current_bytes() -> Result<(), Error> {
let h = Mp3Handler::new(Bytes::from_static(b"audio-data"));
let encoded = h.encode()?;
assert_eq!(encoded.as_bytes(), b"audio-data");
Ok(())
}
}
33 changes: 0 additions & 33 deletions crates/nvisy-codec/src/handler/audio/wav_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,36 +32,3 @@ impl_audio_handler!(
"wav-handler",
"wav.encode"
);

#[cfg(test)]
mod tests {
use bytes::Bytes;
use futures::StreamExt;
use nvisy_core::Error;

use super::*;
use crate::handler::{AudioHandler, Handler};

#[tokio::test]
async fn locations_yields_single_location() {
let h = WavHandler::new(Bytes::from_static(b"RIFF-wav-data"));
let items: Vec<_> = h.locations().collect().await;
assert_eq!(items.len(), 1);
}

#[tokio::test]
async fn read_returns_full_audio() {
let h = WavHandler::new(Bytes::from_static(b"RIFF-wav-data"));
let items: Vec<_> = h.locations().collect().await;
let data = h.read(&items[0].location).await.unwrap();
assert_eq!(data.as_bytes().as_ref(), b"RIFF-wav-data");
}

#[test]
fn encode_returns_current_bytes() -> Result<(), Error> {
let h = WavHandler::new(Bytes::from_static(b"audio-data"));
let encoded = h.encode()?;
assert_eq!(encoded.as_bytes(), b"audio-data");
Ok(())
}
}
3 changes: 1 addition & 2 deletions crates/nvisy-codec/src/handler/image/image_handler_macro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ macro_rules! impl_image_handler {
impl crate::handler::ImageHandler for $handler {
fn locations(
&self,
) -> crate::document::LocationStream<'_, nvisy_ontology::entity::ImageLocation>
{
) -> crate::document::LocationStream<'_, nvisy_ontology::entity::ImageLocation> {
use ::std::iter;

let (w, h) = (self.image.width(), self.image.height());
Expand Down
2 changes: 2 additions & 0 deletions crates/nvisy-codec/src/handler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ use nvisy_core::media::DocumentType;
mod audio;
mod image;
mod rich;
mod tabular;
mod text;

use nvisy_core::content::ContentSource;

pub use self::audio::*;
pub use self::image::*;
pub use self::rich::*;
pub use self::tabular::*;
pub use self::text::*;

/// Base trait implemented by all format handlers.
Expand Down
Loading