From eb20501860185b2b56ba6bb70f0a5ee9c9e6d5d0 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 18 Feb 2026 13:33:00 -0700 Subject: [PATCH 1/8] feat: add hashing --- crates/core/src/hash.rs | 330 ++++++++++++++++++++++++++++++++++++++++ crates/core/src/lib.rs | 6 + 2 files changed, 336 insertions(+) create mode 100644 crates/core/src/hash.rs diff --git a/crates/core/src/hash.rs b/crates/core/src/hash.rs new file mode 100644 index 000000000..bfdfc77eb --- /dev/null +++ b/crates/core/src/hash.rs @@ -0,0 +1,330 @@ +//! Sortable spatio-temporal hashing. +//! +//! Computes a sortable hash from a datetime, latitude, and longitude by +//! quantizing each dimension and interleaving their bits into a [Z-order +//! curve](https://en.wikipedia.org/wiki/Z-order_curve) (Morton code). Points +//! that are nearby in space and time tend to have numerically close hash values, +//! making the hashes useful for range queries and spatial indexing. +//! +//! Precision is controlled by providing spatial and temporal extents that define +//! the minimum cell size in each dimension. The number of bits per dimension is +//! derived from these extents automatically. +//! +//! # Example +//! +//! ``` +//! use chrono::{TimeDelta, TimeZone, Utc}; +//! use stac::hash::Hasher; +//! +//! let hasher = Hasher::new( +//! 1.0, // 1 degree spatial cells +//! TimeDelta::days(1), // 1 day temporal cells +//! Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap() +//! ..Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap(), +//! ) +//! .unwrap(); +//! +//! let hash = hasher.hash( +//! Utc.with_ymd_and_hms(2023, 6, 15, 12, 0, 0).unwrap(), +//! 40.0, +//! -105.0, +//! ); +//! ``` + +use chrono::{DateTime, TimeDelta, Utc}; +use std::ops::Range; + +const MAX_BITS_PER_DIMENSION: u8 = 21; + +/// A spatio-temporal hasher that produces sortable 64-bit hashes. +/// +/// The hasher quantizes latitude, longitude, and time into discrete bins, then +/// interleaves their bits to form a Z-order curve index. The number of bits per +/// dimension is determined by the spatial and temporal extents provided at +/// construction time. +#[derive(Debug, Clone)] +pub struct Hasher { + time_start_ms: i64, + time_total_ms: f64, + bits: u8, +} + +impl Hasher { + /// Creates a new hasher. + /// + /// # Arguments + /// + /// * `spatial_extent` — Minimum spatial cell size in degrees. Both latitude + /// and longitude are quantized at this resolution. + /// * `temporal_extent` — Minimum temporal cell size. + /// * `time_range` — The full time range that hashes must cover. Datetimes + /// outside this range are clamped to the boundaries. + /// + /// # Errors + /// + /// Returns an error if the spatial extent is not positive and finite, the + /// temporal extent is not positive, the time range is empty, or the + /// required bits per dimension exceeds the maximum (21). + pub fn new( + spatial_extent: f64, + temporal_extent: TimeDelta, + time_range: Range>, + ) -> Result { + if spatial_extent <= 0.0 || !spatial_extent.is_finite() { + return Err(Error::InvalidSpatialExtent); + } + if temporal_extent <= TimeDelta::zero() { + return Err(Error::InvalidTemporalExtent); + } + if time_range.start >= time_range.end { + return Err(Error::InvalidTimeRange); + } + + let lat_bits = bits_needed(180.0 / spatial_extent); + let lon_bits = bits_needed(360.0 / spatial_extent); + + let total_ms = (time_range.end - time_range.start).num_milliseconds() as f64; + let extent_ms = temporal_extent.num_milliseconds() as f64; + let time_bits = bits_needed(total_ms / extent_ms); + + let bits = lat_bits.max(lon_bits).max(time_bits); + if bits > MAX_BITS_PER_DIMENSION { + return Err(Error::TooManyBits { bits }); + } + + Ok(Self { + time_start_ms: time_range.start.timestamp_millis(), + time_total_ms: total_ms, + bits, + }) + } + + /// Returns the number of bits used per dimension. + pub fn bits_per_dimension(&self) -> u8 { + self.bits + } + + /// Returns the total number of bits in the hash. + pub fn total_bits(&self) -> u8 { + self.bits * 3 + } + + /// Computes a sortable hash for the given spatio-temporal point. + /// + /// Datetimes outside the configured time range are clamped. Latitudes are + /// clamped to \[-90, 90\] and longitudes to \[-180, 180\]. + pub fn hash(&self, datetime: DateTime, lat: f64, lon: f64) -> u64 { + let lat_norm = ((lat + 90.0) / 180.0).clamp(0.0, 1.0); + let lon_norm = ((lon + 180.0) / 360.0).clamp(0.0, 1.0); + + let time_offset_ms = (datetime.timestamp_millis() - self.time_start_ms).max(0) as f64; + let time_norm = (time_offset_ms / self.time_total_ms).clamp(0.0, 1.0); + + let max_val = ((1u64 << self.bits) - 1) as f64; + let time_q = (time_norm * max_val) as u64; + let lat_q = (lat_norm * max_val) as u64; + let lon_q = (lon_norm * max_val) as u64; + + interleave3(time_q, lat_q, lon_q, self.bits) + } +} + +/// Error enum for hash-related errors. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum Error { + /// The required bits per dimension exceeds the maximum. + #[error( + "required bits per dimension ({bits}) exceeds maximum ({MAX_BITS_PER_DIMENSION}), \ + use coarser extents" + )] + TooManyBits { + /// The number of bits that were required. + bits: u8, + }, + + /// The spatial extent must be positive and finite. + #[error("spatial extent must be positive")] + InvalidSpatialExtent, + + /// The temporal extent must be positive. + #[error("temporal extent must be positive")] + InvalidTemporalExtent, + + /// The time range must be non-empty (start < end). + #[error("time range must be non-empty")] + InvalidTimeRange, +} + +fn bits_needed(count: f64) -> u8 { + if count <= 1.0 { + return 0; + } + let bits = count.ceil().log2().ceil() as u8; + bits.max(1) +} + +fn interleave3(a: u64, b: u64, c: u64, bits: u8) -> u64 { + let mut result = 0u64; + for i in 0..bits { + let src = i as u64; + let dst = (i as u64) * 3; + result |= ((c >> src) & 1) << dst; + result |= ((b >> src) & 1) << (dst + 1); + result |= ((a >> src) & 1) << (dst + 2); + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::TimeZone; + + fn test_time_range() -> Range> { + Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap() + ..Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap() + } + + fn test_hasher() -> Hasher { + Hasher::new(1.0, TimeDelta::days(1), test_time_range()).unwrap() + } + + #[test] + fn deterministic() { + let hasher = test_hasher(); + let dt = Utc.with_ymd_and_hms(2023, 6, 15, 12, 0, 0).unwrap(); + let a = hasher.hash(dt, 40.0, -105.0); + let b = hasher.hash(dt, 40.0, -105.0); + assert_eq!(a, b); + } + + #[test] + fn nearby_points_have_close_hashes() { + let hasher = test_hasher(); + let dt = Utc.with_ymd_and_hms(2023, 6, 15, 12, 0, 0).unwrap(); + + let h1 = hasher.hash(dt, 40.0, -105.0); + let h2 = hasher.hash(dt, 40.1, -105.1); + let h_far = hasher.hash(dt, -40.0, 105.0); + + let diff_near = (h1 as i64 - h2 as i64).unsigned_abs(); + let diff_far = (h1 as i64 - h_far as i64).unsigned_abs(); + assert!(diff_near < diff_far); + } + + #[test] + fn origin_and_extremes() { + let hasher = test_hasher(); + let dt_start = Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap(); + let dt_end = Utc.with_ymd_and_hms(2024, 12, 31, 23, 59, 59).unwrap(); + + let h_min = hasher.hash(dt_start, -90.0, -180.0); + let h_max = hasher.hash(dt_end, 90.0, 180.0); + + assert_eq!(h_min, 0); + assert!(h_max > 0); + } + + #[test] + fn clamping() { + let hasher = test_hasher(); + let dt = Utc.with_ymd_and_hms(2023, 6, 15, 12, 0, 0).unwrap(); + let dt_before = Utc.with_ymd_and_hms(2019, 1, 1, 0, 0, 0).unwrap(); + let dt_after = Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(); + + let _ = hasher.hash(dt, -91.0, -181.0); + let _ = hasher.hash(dt, 91.0, 181.0); + let _ = hasher.hash(dt_before, 0.0, 0.0); + let _ = hasher.hash(dt_after, 0.0, 0.0); + } + + #[test] + fn time_ordering() { + let hasher = test_hasher(); + let dt1 = Utc.with_ymd_and_hms(2021, 1, 1, 0, 0, 0).unwrap(); + let dt2 = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap(); + + let h1 = hasher.hash(dt1, 0.0, 0.0); + let h2 = hasher.hash(dt2, 0.0, 0.0); + assert!(h1 < h2); + } + + #[test] + fn spatial_ordering_lat() { + let hasher = test_hasher(); + let dt = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap(); + + let h1 = hasher.hash(dt, -45.0, 0.0); + let h2 = hasher.hash(dt, 45.0, 0.0); + assert!(h1 < h2); + } + + #[test] + fn spatial_ordering_lon() { + let hasher = test_hasher(); + let dt = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap(); + + let h1 = hasher.hash(dt, 0.0, -90.0); + let h2 = hasher.hash(dt, 0.0, 90.0); + assert!(h1 < h2); + } + + #[test] + fn invalid_spatial_extent() { + assert!(Hasher::new(0.0, TimeDelta::days(1), test_time_range()).is_err()); + assert!(Hasher::new(-1.0, TimeDelta::days(1), test_time_range()).is_err()); + assert!(Hasher::new(f64::NAN, TimeDelta::days(1), test_time_range()).is_err()); + assert!(Hasher::new(f64::INFINITY, TimeDelta::days(1), test_time_range()).is_err()); + } + + #[test] + fn invalid_temporal_extent() { + assert!(Hasher::new(1.0, TimeDelta::zero(), test_time_range()).is_err()); + assert!(Hasher::new(1.0, TimeDelta::days(-1), test_time_range()).is_err()); + } + + #[test] + fn invalid_time_range() { + let dt = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap(); + assert!(Hasher::new(1.0, TimeDelta::days(1), dt..dt).is_err()); + } + + #[test] + fn fine_precision() { + let hasher = Hasher::new(0.01, TimeDelta::hours(1), test_time_range()).unwrap(); + assert!(hasher.bits_per_dimension() > 10); + } + + #[test] + fn coarse_precision() { + let hasher = Hasher::new(90.0, TimeDelta::days(365), test_time_range()).unwrap(); + assert!(hasher.bits_per_dimension() <= 5); + } + + #[test] + fn bits_needed_cases() { + assert_eq!(bits_needed(0.5), 0); + assert_eq!(bits_needed(1.0), 0); + assert_eq!(bits_needed(2.0), 1); + assert_eq!(bits_needed(3.0), 2); + assert_eq!(bits_needed(4.0), 2); + assert_eq!(bits_needed(5.0), 3); + assert_eq!(bits_needed(256.0), 8); + assert_eq!(bits_needed(257.0), 9); + } + + #[test] + fn interleave_simple() { + assert_eq!(interleave3(1, 0, 1, 1), 0b101); + assert_eq!(interleave3(1, 1, 1, 1), 0b111); + } + + #[test] + fn too_many_bits() { + let range = Utc.with_ymd_and_hms(2000, 1, 1, 0, 0, 0).unwrap() + ..Utc.with_ymd_and_hms(2100, 1, 1, 0, 0, 0).unwrap(); + let result = Hasher::new(0.0001, TimeDelta::seconds(1), range); + assert!(result.is_err()); + } +} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index cf77ba435..75e688f18 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -65,6 +65,11 @@ //! - `geo`: add some geo-enabled methods, see [geo] //! - `geoarrow`: read and write [geoarrow](https://geoarrow.org/), see [geoarrow] //! - `geoparquet`: read and write [geoparquet](https://geoparquet.org/), see [geoparquet] +//! +//! # Hashing +//! +//! The [hash] module provides sortable spatio-temporal hashing using Z-order +//! curves (Morton codes). #![deny( elided_lifetimes_in_paths, @@ -115,6 +120,7 @@ pub mod geo; pub mod geoarrow; #[cfg(feature = "geoparquet")] pub mod geoparquet; +pub mod hash; pub mod href; pub mod item; mod item_asset; From 56b9efbc6921598e72db353ec96a678e85e978af Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 18 Feb 2026 13:35:57 -0700 Subject: [PATCH 2/8] feat: add Item::hash --- crates/core/src/item.rs | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/crates/core/src/item.rs b/crates/core/src/item.rs index eeedc2673..bb5dc1d76 100644 --- a/crates/core/src/item.rs +++ b/crates/core/src/item.rs @@ -617,6 +617,48 @@ impl Item { let result = self.into_flat_item(true)?.matches_cql2(expr)?; Ok(result) } + + /// Computes a sortable spatio-temporal hash for this item. + /// + /// The hash is computed from the item's datetime (or the midpoint of + /// `start_datetime` and `end_datetime`) and the center of its bounding box. + /// + /// Returns `None` if the item has no bbox, or if neither `datetime` nor + /// both `start_datetime` and `end_datetime` are set. + /// + /// # Examples + /// + /// ``` + /// use chrono::{TimeDelta, TimeZone, Utc}; + /// use stac::{Item, Bbox}; + /// use stac::hash::Hasher; + /// + /// let mut item = Item::new("an-id"); + /// item.properties.datetime = Some(Utc.with_ymd_and_hms(2023, 6, 15, 12, 0, 0).unwrap()); + /// item.bbox = Some(Bbox::new(-106.0, 39.0, -104.0, 41.0)); + /// + /// let hasher = Hasher::new( + /// 1.0, + /// TimeDelta::days(1), + /// Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap() + /// ..Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap(), + /// ) + /// .unwrap(); + /// + /// assert!(item.hash(&hasher).is_some()); + /// ``` + pub fn hash(&self, hasher: &crate::hash::Hasher) -> Option { + let bbox = self.bbox.as_ref()?; + let lat = (bbox.ymin() + bbox.ymax()) / 2.0; + let lon = (bbox.xmin() + bbox.xmax()) / 2.0; + let datetime = self.properties.datetime.or_else(|| { + let start = self.properties.start_datetime?; + let end = self.properties.end_datetime?; + let midpoint = start + (end - start) / 2; + Some(midpoint) + })?; + Some(hasher.hash(datetime, lat, lon)) + } } impl Assets for Item { From 1ee0a09a334b1ddbf7dc1c25ec899ff54ad05acb Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 18 Feb 2026 13:38:19 -0700 Subject: [PATCH 3/8] feat: create a hasher for the collection --- crates/core/src/collection.rs | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/crates/core/src/collection.rs b/crates/core/src/collection.rs index f1d0f144e..58fd68f28 100644 --- a/crates/core/src/collection.rs +++ b/crates/core/src/collection.rs @@ -314,6 +314,44 @@ impl Collection { self.update_extents(item); self.maybe_add_item_link(item) } + + /// Creates a [Hasher](crate::hash::Hasher) from this collection's temporal extent. + /// + /// The `spatial_extent` and `temporal_extent` parameters control the + /// resolution (minimum cell size) of the hasher. The time range is derived + /// from the first interval in this collection's temporal extent. + /// + /// Returns `None` if the collection's first temporal interval does not have + /// both a start and end datetime. + /// + /// # Examples + /// + /// ``` + /// use chrono::{TimeDelta, TimeZone, Utc}; + /// use stac::Collection; + /// + /// let mut collection = Collection::new("an-id", "a description"); + /// collection.extent.temporal.interval = + /// vec![[ + /// Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()), + /// Some(Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap()), + /// ]]; + /// + /// let hasher = collection.hasher(1.0, TimeDelta::days(1)).unwrap(); + /// assert!(hasher.is_some()); + /// ``` + pub fn hasher( + &self, + spatial_extent: f64, + temporal_extent: chrono::TimeDelta, + ) -> std::result::Result, crate::hash::Error> { + let interval = self.extent.temporal.interval.first(); + let (start, end) = match interval { + Some([Some(start), Some(end)]) => (*start, *end), + _ => return Ok(None), + }; + crate::hash::Hasher::new(spatial_extent, temporal_extent, start..end).map(Some) + } } impl Provider { From 9ff8f58f5e56c0262d979617d89741c5054cfe08 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 18 Feb 2026 13:59:32 -0700 Subject: [PATCH 4/8] feat: hash --- crates/cli/Cargo.toml | 1 + crates/cli/src/lib.rs | 179 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+) diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index f00acae7d..a8585ccf7 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -18,6 +18,7 @@ duckdb-bundled = ["stac-duckdb/bundled"] [dependencies] anyhow.workspace = true +chrono.workspace = true async-stream.workspace = true axum.workspace = true clap = { workspace = true, features = ["derive"] } diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 9ed77da11..55bc8aff2 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -313,6 +313,47 @@ pub enum Command { shell: clap_complete::Shell, }, + /// Sort items by spatio-temporal hash and prefix their ids. + /// + /// Creates a sortable Z-order curve hash from each item's datetime and + /// bounding box center, then prefixes item ids with the base64-encoded + /// hash and sorts items by hash value. + Hash { + /// The input file. + /// + /// To read from standard input, pass `-` or don't provide an argument at all. + infile: Option, + + /// The output file. + /// + /// To write to standard output, pass `-` or don't provide an argument at all. + outfile: Option, + + /// Minimum spatial cell size in degrees. + #[arg(long)] + spatial_extent: f64, + + /// Minimum temporal cell size, as an ISO 8601 duration. + /// + /// Examples: P1D (1 day), PT1H (1 hour), P30D (30 days) + #[arg(long)] + temporal_extent: String, + + /// Sort items by hash value. + #[arg(long, default_value_t = false)] + sort: bool, + + /// Time range for the hasher, as a '/'-separated RFC 3339 interval. + /// + /// Examples: 2020-01-01T00:00:00Z/2025-01-01T00:00:00Z + /// + /// If provided, the hasher is created directly from this range and + /// items are streamed instead of loaded into memory (unless --sort + /// is also provided). + #[arg(long)] + time_range: Option, + }, + /// Generate a STAC collection from one or more items Collection { /// The input file. @@ -636,6 +677,92 @@ impl Rustac { clap_complete::generate(shell, &mut command, "rustac", &mut std::io::stdout()); Ok(()) } + Command::Hash { + ref infile, + ref outfile, + spatial_extent, + ref temporal_extent, + sort, + ref time_range, + } => { + let temporal_extent = parse_iso8601_duration(temporal_extent)?; + if let Some(time_range) = time_range { + let (start, end) = stac::datetime::parse(time_range)?; + let start = start + .ok_or_else(|| anyhow!("time range start must not be open"))? + .to_utc(); + let end = end + .ok_or_else(|| anyhow!("time range end must not be open"))? + .to_utc(); + let hasher = + stac::hash::Hasher::new(spatial_extent, temporal_extent, start..end)?; + if sort { + let items = self.get_item_stream(infile.as_deref()).await?; + let mut hashed: Vec<(u64, Item)> = items + .map(|item| { + let mut item = item?; + let hash = item.hash(&hasher).unwrap_or(0); + item.id = format!("{hash:016x}-{}", item.id); + Ok((hash, item)) + }) + .collect::>>()?; + hashed.sort_by_key(|(hash, _)| *hash); + let items: Vec = hashed.into_iter().map(|(_, item)| item).collect(); + self.put_item_stream(outfile.as_deref(), items.into_iter().map(Ok)) + .await + } else { + let items = self.get_item_stream(infile.as_deref()).await?; + let items = items.map(move |item| { + let mut item = item?; + let hash = item.hash(&hasher).unwrap_or(0); + item.id = format!("{hash:016x}-{}", item.id); + Ok(item) + }); + self.put_item_stream(outfile.as_deref(), items).await + } + } else { + let value = self.get(infile.as_deref()).await?; + let mut items = match value { + stac::Value::ItemCollection(ic) => ic.items, + stac::Value::Item(item) => vec![item], + other => { + return Err(anyhow!( + "expected an item collection or item, got {}", + other.type_name() + )); + } + }; + let mut collection = Collection::from_id_and_items("hash", &items); + if let Some([Some(start), Some(end)]) = + collection.extent.temporal.interval.first_mut() + { + if *start == *end { + *end = *end + temporal_extent; + } + } + let hasher = collection + .hasher(spatial_extent, temporal_extent)? + .ok_or_else(|| { + anyhow!( + "collection temporal extent does not have both start and end dates" + ) + })?; + let mut hashed: Vec<(u64, Item)> = items + .drain(..) + .map(|mut item| { + let hash = item.hash(&hasher).unwrap_or(0); + item.id = format!("{hash:016x}-{}", item.id); + (hash, item) + }) + .collect(); + if sort { + hashed.sort_by_key(|(hash, _)| *hash); + } + let items: Vec = hashed.into_iter().map(|(_, item)| item).collect(); + self.put_item_stream(outfile.as_deref(), items.into_iter().map(Ok)) + .await + } + } Command::Collection { ref infile, ref outfile, @@ -1018,5 +1145,57 @@ async fn crawl(value: stac::Value, store: StacStore) -> impl TryStream Result { + let s = s + .strip_prefix('P') + .ok_or_else(|| anyhow!("ISO 8601 duration must start with 'P': {s}"))?; + let (date_part, time_part) = if let Some(idx) = s.find('T') { + (&s[..idx], Some(&s[idx + 1..])) + } else { + (s, None) + }; + let mut total_seconds: i64 = 0; + let mut num_buf = String::new(); + for ch in date_part.chars() { + if ch.is_ascii_digit() { + num_buf.push(ch); + } else { + let n: i64 = num_buf + .parse() + .map_err(|_| anyhow!("invalid number in duration: {s}"))?; + num_buf.clear(); + match ch { + 'Y' => total_seconds += n * 365 * 86400, + 'W' => total_seconds += n * 7 * 86400, + 'D' => total_seconds += n * 86400, + _ => return Err(anyhow!("unknown date duration unit '{ch}' in: P{s}")), + } + } + } + if let Some(time_part) = time_part { + for ch in time_part.chars() { + if ch.is_ascii_digit() { + num_buf.push(ch); + } else { + let n: i64 = num_buf + .parse() + .map_err(|_| anyhow!("invalid number in duration: {s}"))?; + num_buf.clear(); + match ch { + 'H' => total_seconds += n * 3600, + 'M' => total_seconds += n * 60, + 'S' => total_seconds += n, + _ => return Err(anyhow!("unknown time duration unit '{ch}' in: P{s}")), + } + } + } + } + if total_seconds == 0 { + return Err(anyhow!("duration must be positive: P{s}")); + } + chrono::TimeDelta::try_seconds(total_seconds) + .ok_or_else(|| anyhow!("duration out of range: P{s}")) +} + #[cfg(test)] use {assert_cmd as _, rstest as _, tempfile as _}; From 170085d7f27d5c24525660822678426257adb3f6 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 18 Feb 2026 14:22:40 -0700 Subject: [PATCH 5/8] fix: add auto mode --- crates/cli/src/lib.rs | 59 ++++++++++--------- crates/core/src/hash.rs | 126 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 26 deletions(-) diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 55bc8aff2..fd1c09f55 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -316,8 +316,11 @@ pub enum Command { /// Sort items by spatio-temporal hash and prefix their ids. /// /// Creates a sortable Z-order curve hash from each item's datetime and - /// bounding box center, then prefixes item ids with the base64-encoded - /// hash and sorts items by hash value. + /// bounding box center, then prefixes item ids with the hex-encoded + /// hash and optionally sorts items by hash value. + /// + /// If --spatial-extent and --temporal-extent are not provided, the hasher + /// automatically uses maximum precision (21 bits per dimension). Hash { /// The input file. /// @@ -330,14 +333,20 @@ pub enum Command { outfile: Option, /// Minimum spatial cell size in degrees. - #[arg(long)] - spatial_extent: f64, + /// + /// Must be provided together with --temporal-extent. If neither is + /// provided, maximum precision is used automatically. + #[arg(long, requires = "temporal_extent")] + spatial_extent: Option, /// Minimum temporal cell size, as an ISO 8601 duration. /// + /// Must be provided together with --spatial-extent. If neither is + /// provided, maximum precision is used automatically. + /// /// Examples: P1D (1 day), PT1H (1 hour), P30D (30 days) - #[arg(long)] - temporal_extent: String, + #[arg(long, requires = "spatial_extent")] + temporal_extent: Option, /// Sort items by hash value. #[arg(long, default_value_t = false)] @@ -685,7 +694,10 @@ impl Rustac { sort, ref time_range, } => { - let temporal_extent = parse_iso8601_duration(temporal_extent)?; + let extents = match (spatial_extent, temporal_extent) { + (Some(s), Some(t)) => Some((s, parse_iso8601_duration(t)?)), + _ => None, + }; if let Some(time_range) = time_range { let (start, end) = stac::datetime::parse(time_range)?; let start = start @@ -694,8 +706,11 @@ impl Rustac { let end = end .ok_or_else(|| anyhow!("time range end must not be open"))? .to_utc(); - let hasher = - stac::hash::Hasher::new(spatial_extent, temporal_extent, start..end)?; + let hasher = if let Some((spatial_extent, temporal_extent)) = extents { + stac::hash::Hasher::new(spatial_extent, temporal_extent, start..end)? + } else { + stac::hash::Hasher::from_time_range(start..end)? + }; if sort { let items = self.get_item_stream(infile.as_deref()).await?; let mut hashed: Vec<(u64, Item)> = items @@ -722,7 +737,7 @@ impl Rustac { } } else { let value = self.get(infile.as_deref()).await?; - let mut items = match value { + let items = match value { stac::Value::ItemCollection(ic) => ic.items, stac::Value::Item(item) => vec![item], other => { @@ -732,23 +747,15 @@ impl Rustac { )); } }; - let mut collection = Collection::from_id_and_items("hash", &items); - if let Some([Some(start), Some(end)]) = - collection.extent.temporal.interval.first_mut() - { - if *start == *end { - *end = *end + temporal_extent; - } - } - let hasher = collection - .hasher(spatial_extent, temporal_extent)? - .ok_or_else(|| { - anyhow!( - "collection temporal extent does not have both start and end dates" - ) - })?; + let hasher = if let Some((spatial_extent, temporal_extent)) = extents { + stac::hash::Hasher::from_items(&items, spatial_extent, temporal_extent)? + .ok_or_else(|| anyhow!("no items have datetimes"))? + } else { + stac::hash::Hasher::from_items_auto(&items)? + .ok_or_else(|| anyhow!("no items have datetimes"))? + }; let mut hashed: Vec<(u64, Item)> = items - .drain(..) + .into_iter() .map(|mut item| { let hash = item.hash(&hasher).unwrap_or(0); item.id = format!("{hash:016x}-{}", item.id); diff --git a/crates/core/src/hash.rs b/crates/core/src/hash.rs index bfdfc77eb..ec5118e84 100644 --- a/crates/core/src/hash.rs +++ b/crates/core/src/hash.rs @@ -99,6 +99,132 @@ impl Hasher { }) } + /// Creates a hasher by deriving the time range from a slice of items. + /// + /// Scans all items to find the earliest and latest datetimes, then uses + /// that as the time range. Each item's datetime is determined by its + /// `datetime` property, or by its `start_datetime` and `end_datetime` + /// properties. + /// + /// Returns `None` if no items have datetimes. If all items share the same + /// datetime, the time range is extended by `temporal_extent` to ensure a + /// non-empty range. + /// + /// # Examples + /// + /// ``` + /// use chrono::TimeDelta; + /// use stac::Item; + /// use stac::hash::Hasher; + /// + /// let item: Item = stac::read("examples/simple-item.json").unwrap(); + /// let hasher = Hasher::from_items(&[item], 1.0, TimeDelta::days(1)).unwrap(); + /// assert!(hasher.is_some()); + /// ``` + pub fn from_items( + items: &[crate::Item], + spatial_extent: f64, + temporal_extent: TimeDelta, + ) -> Result, Error> { + let mut min: Option> = None; + let mut max: Option> = None; + for item in items { + let dt = item.properties.datetime; + let start = item.properties.start_datetime.or(dt); + let end = item.properties.end_datetime.or(dt); + if let Some(start) = start { + min = Some(min.map_or(start, |m: DateTime| m.min(start))); + } + if let Some(end) = end { + max = Some(max.map_or(end, |m: DateTime| m.max(end))); + } + } + match (min, max) { + (Some(start), Some(mut end)) => { + if start == end { + end = end + temporal_extent; + } + Self::new(spatial_extent, temporal_extent, start..end).map(Some) + } + _ => Ok(None), + } + } + + /// Creates a hasher that uses the maximum precision for the given time range. + /// + /// The spatial and temporal extents are chosen so that each dimension uses + /// the full 21 bits available, yielding 63-bit hashes with the finest + /// possible resolution. + /// + /// # Examples + /// + /// ``` + /// use chrono::{TimeZone, Utc}; + /// use stac::hash::Hasher; + /// + /// let hasher = Hasher::from_time_range( + /// Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap() + /// ..Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap(), + /// ) + /// .unwrap(); + /// assert_eq!(hasher.bits_per_dimension(), 21); + /// ``` + pub fn from_time_range(time_range: Range>) -> Result { + if time_range.start >= time_range.end { + return Err(Error::InvalidTimeRange); + } + let bins = (1u64 << MAX_BITS_PER_DIMENSION) as f64; + let spatial_extent = 360.0 / bins; + let total_ms = (time_range.end - time_range.start).num_milliseconds() as f64; + let extent_ms = (total_ms / bins).ceil().max(1.0); + let temporal_extent = + TimeDelta::try_milliseconds(extent_ms as i64).unwrap_or(TimeDelta::milliseconds(1)); + Self::new(spatial_extent, temporal_extent, time_range) + } + + /// Creates a maximum-precision hasher by deriving the time range from items. + /// + /// Combines the behavior of [`Hasher::from_items`] and + /// [`Hasher::from_time_range`]: scans items to find the time range, then + /// creates a hasher that uses the full 21 bits per dimension. + /// + /// Returns `None` if no items have datetimes. + /// + /// # Examples + /// + /// ``` + /// use stac::Item; + /// use stac::hash::Hasher; + /// + /// let item: Item = stac::read("examples/simple-item.json").unwrap(); + /// let hasher = Hasher::from_items_auto(&[item]).unwrap(); + /// assert!(hasher.is_some()); + /// ``` + pub fn from_items_auto(items: &[crate::Item]) -> Result, Error> { + let mut min: Option> = None; + let mut max: Option> = None; + for item in items { + let dt = item.properties.datetime; + let start = item.properties.start_datetime.or(dt); + let end = item.properties.end_datetime.or(dt); + if let Some(start) = start { + min = Some(min.map_or(start, |m: DateTime| m.min(start))); + } + if let Some(end) = end { + max = Some(max.map_or(end, |m: DateTime| m.max(end))); + } + } + match (min, max) { + (Some(start), Some(mut end)) => { + if start == end { + end = end + TimeDelta::milliseconds(1); + } + Self::from_time_range(start..end).map(Some) + } + _ => Ok(None), + } + } + /// Returns the number of bits used per dimension. pub fn bits_per_dimension(&self) -> u8 { self.bits From 6c1c9d7e0d10d84d503e87a9e81181b30442e896 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 18 Feb 2026 14:25:33 -0700 Subject: [PATCH 6/8] fix: docs --- crates/cli/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index fd1c09f55..a2f5fd8ad 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -313,7 +313,7 @@ pub enum Command { shell: clap_complete::Shell, }, - /// Sort items by spatio-temporal hash and prefix their ids. + /// Compute each item's spatio-temporal hash and prefix it's id. /// /// Creates a sortable Z-order curve hash from each item's datetime and /// bounding box center, then prefixes item ids with the hex-encoded From fe8e79de18e90ef96f729f9d0c65e2ed1941b5be Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Thu, 19 Feb 2026 05:54:53 -0700 Subject: [PATCH 7/8] fix: rename stuff --- crates/cli/src/lib.rs | 52 +++++++------ crates/core/src/collection.rs | 12 +-- crates/core/src/hash.rs | 142 +++++++++++++++++----------------- 3 files changed, 105 insertions(+), 101 deletions(-) diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index a2f5fd8ad..6c796d85d 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -319,7 +319,7 @@ pub enum Command { /// bounding box center, then prefixes item ids with the hex-encoded /// hash and optionally sorts items by hash value. /// - /// If --spatial-extent and --temporal-extent are not provided, the hasher + /// If --spatial-precision and --temporal-precision are not provided, the hasher /// automatically uses maximum precision (21 bits per dimension). Hash { /// The input file. @@ -334,33 +334,33 @@ pub enum Command { /// Minimum spatial cell size in degrees. /// - /// Must be provided together with --temporal-extent. If neither is + /// Must be provided together with --temporal-precision. If neither is /// provided, maximum precision is used automatically. - #[arg(long, requires = "temporal_extent")] - spatial_extent: Option, + #[arg(long, requires = "temporal_precision")] + spatial_precision: Option, /// Minimum temporal cell size, as an ISO 8601 duration. /// - /// Must be provided together with --spatial-extent. If neither is + /// Must be provided together with --spatial-precision. If neither is /// provided, maximum precision is used automatically. /// /// Examples: P1D (1 day), PT1H (1 hour), P30D (30 days) - #[arg(long, requires = "spatial_extent")] - temporal_extent: Option, + #[arg(long, requires = "spatial_precision")] + temporal_precision: Option, /// Sort items by hash value. #[arg(long, default_value_t = false)] sort: bool, - /// Time range for the hasher, as a '/'-separated RFC 3339 interval. + /// Temporal extent for the hasher, as a '/'-separated RFC 3339 interval. /// /// Examples: 2020-01-01T00:00:00Z/2025-01-01T00:00:00Z /// - /// If provided, the hasher is created directly from this range and + /// If provided, the hasher is created directly from this extent and /// items are streamed instead of loaded into memory (unless --sort /// is also provided). #[arg(long)] - time_range: Option, + temporal_extent: Option, }, /// Generate a STAC collection from one or more items @@ -689,27 +689,27 @@ impl Rustac { Command::Hash { ref infile, ref outfile, - spatial_extent, - ref temporal_extent, + spatial_precision, + ref temporal_precision, sort, - ref time_range, + ref temporal_extent, } => { - let extents = match (spatial_extent, temporal_extent) { + let precisions = match (spatial_precision, temporal_precision) { (Some(s), Some(t)) => Some((s, parse_iso8601_duration(t)?)), _ => None, }; - if let Some(time_range) = time_range { - let (start, end) = stac::datetime::parse(time_range)?; + if let Some(temporal_extent) = temporal_extent { + let (start, end) = stac::datetime::parse(temporal_extent)?; let start = start - .ok_or_else(|| anyhow!("time range start must not be open"))? + .ok_or_else(|| anyhow!("temporal extent start must not be open"))? .to_utc(); let end = end - .ok_or_else(|| anyhow!("time range end must not be open"))? + .ok_or_else(|| anyhow!("temporal extent end must not be open"))? .to_utc(); - let hasher = if let Some((spatial_extent, temporal_extent)) = extents { - stac::hash::Hasher::new(spatial_extent, temporal_extent, start..end)? + let hasher = if let Some((spatial_precision, temporal_precision)) = precisions { + stac::hash::Hasher::new(spatial_precision, temporal_precision, start..end)? } else { - stac::hash::Hasher::from_time_range(start..end)? + stac::hash::Hasher::from_temporal_extent(start..end)? }; if sort { let items = self.get_item_stream(infile.as_deref()).await?; @@ -747,9 +747,13 @@ impl Rustac { )); } }; - let hasher = if let Some((spatial_extent, temporal_extent)) = extents { - stac::hash::Hasher::from_items(&items, spatial_extent, temporal_extent)? - .ok_or_else(|| anyhow!("no items have datetimes"))? + let hasher = if let Some((spatial_precision, temporal_precision)) = precisions { + stac::hash::Hasher::from_items( + &items, + spatial_precision, + temporal_precision, + )? + .ok_or_else(|| anyhow!("no items have datetimes"))? } else { stac::hash::Hasher::from_items_auto(&items)? .ok_or_else(|| anyhow!("no items have datetimes"))? diff --git a/crates/core/src/collection.rs b/crates/core/src/collection.rs index 58fd68f28..a8147c64d 100644 --- a/crates/core/src/collection.rs +++ b/crates/core/src/collection.rs @@ -317,9 +317,9 @@ impl Collection { /// Creates a [Hasher](crate::hash::Hasher) from this collection's temporal extent. /// - /// The `spatial_extent` and `temporal_extent` parameters control the - /// resolution (minimum cell size) of the hasher. The time range is derived - /// from the first interval in this collection's temporal extent. + /// The `spatial_precision` and `temporal_precision` parameters control the + /// resolution (minimum cell size) of the hasher. The temporal extent is + /// derived from the first interval in this collection's temporal extent. /// /// Returns `None` if the collection's first temporal interval does not have /// both a start and end datetime. @@ -342,15 +342,15 @@ impl Collection { /// ``` pub fn hasher( &self, - spatial_extent: f64, - temporal_extent: chrono::TimeDelta, + spatial_precision: f64, + temporal_precision: chrono::TimeDelta, ) -> std::result::Result, crate::hash::Error> { let interval = self.extent.temporal.interval.first(); let (start, end) = match interval { Some([Some(start), Some(end)]) => (*start, *end), _ => return Ok(None), }; - crate::hash::Hasher::new(spatial_extent, temporal_extent, start..end).map(Some) + crate::hash::Hasher::new(spatial_precision, temporal_precision, start..end).map(Some) } } diff --git a/crates/core/src/hash.rs b/crates/core/src/hash.rs index ec5118e84..678bd1a43 100644 --- a/crates/core/src/hash.rs +++ b/crates/core/src/hash.rs @@ -6,9 +6,9 @@ //! that are nearby in space and time tend to have numerically close hash values, //! making the hashes useful for range queries and spatial indexing. //! -//! Precision is controlled by providing spatial and temporal extents that define +//! Precision is controlled by providing spatial and temporal precisions that define //! the minimum cell size in each dimension. The number of bits per dimension is -//! derived from these extents automatically. +//! derived from these precisions automatically. //! //! # Example //! @@ -40,7 +40,7 @@ const MAX_BITS_PER_DIMENSION: u8 = 21; /// /// The hasher quantizes latitude, longitude, and time into discrete bins, then /// interleaves their bits to form a Z-order curve index. The number of bits per -/// dimension is determined by the spatial and temporal extents provided at +/// dimension is determined by the spatial and temporal precisions provided at /// construction time. #[derive(Debug, Clone)] pub struct Hasher { @@ -54,38 +54,38 @@ impl Hasher { /// /// # Arguments /// - /// * `spatial_extent` — Minimum spatial cell size in degrees. Both latitude + /// * `spatial_precision` — Minimum spatial cell size in degrees. Both latitude /// and longitude are quantized at this resolution. - /// * `temporal_extent` — Minimum temporal cell size. - /// * `time_range` — The full time range that hashes must cover. Datetimes - /// outside this range are clamped to the boundaries. + /// * `temporal_precision` — Minimum temporal cell size. + /// * `temporal_extent` — The full temporal extent that hashes must cover. Datetimes + /// outside this extent are clamped to the boundaries. /// /// # Errors /// - /// Returns an error if the spatial extent is not positive and finite, the - /// temporal extent is not positive, the time range is empty, or the + /// Returns an error if the spatial precision is not positive and finite, the + /// temporal precision is not positive, the temporal extent is empty, or the /// required bits per dimension exceeds the maximum (21). pub fn new( - spatial_extent: f64, - temporal_extent: TimeDelta, - time_range: Range>, + spatial_precision: f64, + temporal_precision: TimeDelta, + temporal_extent: Range>, ) -> Result { - if spatial_extent <= 0.0 || !spatial_extent.is_finite() { - return Err(Error::InvalidSpatialExtent); + if spatial_precision <= 0.0 || !spatial_precision.is_finite() { + return Err(Error::InvalidSpatialPrecision); } - if temporal_extent <= TimeDelta::zero() { - return Err(Error::InvalidTemporalExtent); + if temporal_precision <= TimeDelta::zero() { + return Err(Error::InvalidTemporalPrecision); } - if time_range.start >= time_range.end { - return Err(Error::InvalidTimeRange); + if temporal_extent.start >= temporal_extent.end { + return Err(Error::InvalidTemporalExtent); } - let lat_bits = bits_needed(180.0 / spatial_extent); - let lon_bits = bits_needed(360.0 / spatial_extent); + let lat_bits = bits_needed(180.0 / spatial_precision); + let lon_bits = bits_needed(360.0 / spatial_precision); - let total_ms = (time_range.end - time_range.start).num_milliseconds() as f64; - let extent_ms = temporal_extent.num_milliseconds() as f64; - let time_bits = bits_needed(total_ms / extent_ms); + let total_ms = (temporal_extent.end - temporal_extent.start).num_milliseconds() as f64; + let precision_ms = temporal_precision.num_milliseconds() as f64; + let time_bits = bits_needed(total_ms / precision_ms); let bits = lat_bits.max(lon_bits).max(time_bits); if bits > MAX_BITS_PER_DIMENSION { @@ -93,22 +93,22 @@ impl Hasher { } Ok(Self { - time_start_ms: time_range.start.timestamp_millis(), + time_start_ms: temporal_extent.start.timestamp_millis(), time_total_ms: total_ms, bits, }) } - /// Creates a hasher by deriving the time range from a slice of items. + /// Creates a hasher by deriving the temporal extent from a slice of items. /// /// Scans all items to find the earliest and latest datetimes, then uses - /// that as the time range. Each item's datetime is determined by its + /// that as the temporal extent. Each item's datetime is determined by its /// `datetime` property, or by its `start_datetime` and `end_datetime` /// properties. /// /// Returns `None` if no items have datetimes. If all items share the same - /// datetime, the time range is extended by `temporal_extent` to ensure a - /// non-empty range. + /// datetime, the temporal extent is extended by `temporal_precision` to + /// ensure a non-empty range. /// /// # Examples /// @@ -123,8 +123,8 @@ impl Hasher { /// ``` pub fn from_items( items: &[crate::Item], - spatial_extent: f64, - temporal_extent: TimeDelta, + spatial_precision: f64, + temporal_precision: TimeDelta, ) -> Result, Error> { let mut min: Option> = None; let mut max: Option> = None; @@ -142,17 +142,17 @@ impl Hasher { match (min, max) { (Some(start), Some(mut end)) => { if start == end { - end = end + temporal_extent; + end = end + temporal_precision; } - Self::new(spatial_extent, temporal_extent, start..end).map(Some) + Self::new(spatial_precision, temporal_precision, start..end).map(Some) } _ => Ok(None), } } - /// Creates a hasher that uses the maximum precision for the given time range. + /// Creates a hasher that uses the maximum precision for the given temporal extent. /// - /// The spatial and temporal extents are chosen so that each dimension uses + /// The spatial and temporal precisions are chosen so that each dimension uses /// the full 21 bits available, yielding 63-bit hashes with the finest /// possible resolution. /// @@ -162,30 +162,30 @@ impl Hasher { /// use chrono::{TimeZone, Utc}; /// use stac::hash::Hasher; /// - /// let hasher = Hasher::from_time_range( + /// let hasher = Hasher::from_temporal_extent( /// Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap() /// ..Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap(), /// ) /// .unwrap(); /// assert_eq!(hasher.bits_per_dimension(), 21); /// ``` - pub fn from_time_range(time_range: Range>) -> Result { - if time_range.start >= time_range.end { - return Err(Error::InvalidTimeRange); + pub fn from_temporal_extent(temporal_extent: Range>) -> Result { + if temporal_extent.start >= temporal_extent.end { + return Err(Error::InvalidTemporalExtent); } let bins = (1u64 << MAX_BITS_PER_DIMENSION) as f64; - let spatial_extent = 360.0 / bins; - let total_ms = (time_range.end - time_range.start).num_milliseconds() as f64; - let extent_ms = (total_ms / bins).ceil().max(1.0); - let temporal_extent = - TimeDelta::try_milliseconds(extent_ms as i64).unwrap_or(TimeDelta::milliseconds(1)); - Self::new(spatial_extent, temporal_extent, time_range) + let spatial_precision = 360.0 / bins; + let total_ms = (temporal_extent.end - temporal_extent.start).num_milliseconds() as f64; + let precision_ms = (total_ms / bins).ceil().max(1.0); + let temporal_precision = + TimeDelta::try_milliseconds(precision_ms as i64).unwrap_or(TimeDelta::milliseconds(1)); + Self::new(spatial_precision, temporal_precision, temporal_extent) } - /// Creates a maximum-precision hasher by deriving the time range from items. + /// Creates a maximum-precision hasher by deriving the temporal extent from items. /// /// Combines the behavior of [`Hasher::from_items`] and - /// [`Hasher::from_time_range`]: scans items to find the time range, then + /// [`Hasher::from_temporal_extent`]: scans items to find the temporal extent, then /// creates a hasher that uses the full 21 bits per dimension. /// /// Returns `None` if no items have datetimes. @@ -219,7 +219,7 @@ impl Hasher { if start == end { end = end + TimeDelta::milliseconds(1); } - Self::from_time_range(start..end).map(Some) + Self::from_temporal_extent(start..end).map(Some) } _ => Ok(None), } @@ -237,7 +237,7 @@ impl Hasher { /// Computes a sortable hash for the given spatio-temporal point. /// - /// Datetimes outside the configured time range are clamped. Latitudes are + /// Datetimes outside the configured temporal extent are clamped. Latitudes are /// clamped to \[-90, 90\] and longitudes to \[-180, 180\]. pub fn hash(&self, datetime: DateTime, lat: f64, lon: f64) -> u64 { let lat_norm = ((lat + 90.0) / 180.0).clamp(0.0, 1.0); @@ -262,24 +262,24 @@ pub enum Error { /// The required bits per dimension exceeds the maximum. #[error( "required bits per dimension ({bits}) exceeds maximum ({MAX_BITS_PER_DIMENSION}), \ - use coarser extents" + use coarser precisions" )] TooManyBits { /// The number of bits that were required. bits: u8, }, - /// The spatial extent must be positive and finite. - #[error("spatial extent must be positive")] - InvalidSpatialExtent, + /// The spatial precision must be positive and finite. + #[error("spatial precision must be positive")] + InvalidSpatialPrecision, - /// The temporal extent must be positive. - #[error("temporal extent must be positive")] - InvalidTemporalExtent, + /// The temporal precision must be positive. + #[error("temporal precision must be positive")] + InvalidTemporalPrecision, - /// The time range must be non-empty (start < end). - #[error("time range must be non-empty")] - InvalidTimeRange, + /// The temporal extent must be non-empty (start < end). + #[error("temporal extent must be non-empty")] + InvalidTemporalExtent, } fn bits_needed(count: f64) -> u8 { @@ -307,13 +307,13 @@ mod tests { use super::*; use chrono::TimeZone; - fn test_time_range() -> Range> { + fn test_temporal_extent() -> Range> { Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap() ..Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap() } fn test_hasher() -> Hasher { - Hasher::new(1.0, TimeDelta::days(1), test_time_range()).unwrap() + Hasher::new(1.0, TimeDelta::days(1), test_temporal_extent()).unwrap() } #[test] @@ -397,34 +397,34 @@ mod tests { } #[test] - fn invalid_spatial_extent() { - assert!(Hasher::new(0.0, TimeDelta::days(1), test_time_range()).is_err()); - assert!(Hasher::new(-1.0, TimeDelta::days(1), test_time_range()).is_err()); - assert!(Hasher::new(f64::NAN, TimeDelta::days(1), test_time_range()).is_err()); - assert!(Hasher::new(f64::INFINITY, TimeDelta::days(1), test_time_range()).is_err()); + fn invalid_spatial_precision() { + assert!(Hasher::new(0.0, TimeDelta::days(1), test_temporal_extent()).is_err()); + assert!(Hasher::new(-1.0, TimeDelta::days(1), test_temporal_extent()).is_err()); + assert!(Hasher::new(f64::NAN, TimeDelta::days(1), test_temporal_extent()).is_err()); + assert!(Hasher::new(f64::INFINITY, TimeDelta::days(1), test_temporal_extent()).is_err()); } #[test] - fn invalid_temporal_extent() { - assert!(Hasher::new(1.0, TimeDelta::zero(), test_time_range()).is_err()); - assert!(Hasher::new(1.0, TimeDelta::days(-1), test_time_range()).is_err()); + fn invalid_temporal_precision() { + assert!(Hasher::new(1.0, TimeDelta::zero(), test_temporal_extent()).is_err()); + assert!(Hasher::new(1.0, TimeDelta::days(-1), test_temporal_extent()).is_err()); } #[test] - fn invalid_time_range() { + fn invalid_temporal_extent() { let dt = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap(); assert!(Hasher::new(1.0, TimeDelta::days(1), dt..dt).is_err()); } #[test] fn fine_precision() { - let hasher = Hasher::new(0.01, TimeDelta::hours(1), test_time_range()).unwrap(); + let hasher = Hasher::new(0.01, TimeDelta::hours(1), test_temporal_extent()).unwrap(); assert!(hasher.bits_per_dimension() > 10); } #[test] fn coarse_precision() { - let hasher = Hasher::new(90.0, TimeDelta::days(365), test_time_range()).unwrap(); + let hasher = Hasher::new(90.0, TimeDelta::days(365), test_temporal_extent()).unwrap(); assert!(hasher.bits_per_dimension() <= 5); } From 4580740746bac80d89120d835e67bb2e712ed919 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Thu, 5 Mar 2026 08:41:17 -0700 Subject: [PATCH 8/8] feat: make record_batch_to_json_rows public --- crates/core/src/geoarrow/json.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core/src/geoarrow/json.rs b/crates/core/src/geoarrow/json.rs index f72948c19..32cdc4cba 100644 --- a/crates/core/src/geoarrow/json.rs +++ b/crates/core/src/geoarrow/json.rs @@ -517,7 +517,8 @@ pub fn from_record_batch_reader( Ok(rows) } -pub(crate) fn record_batch_to_json_rows( +/// Converts a record batch to JSON values. +pub fn record_batch_to_json_rows( record_batch: RecordBatch, ) -> Result>, Error> { let mut rows: Vec>> =