From 1a9eddd8a3ce112e03b2cd871ad7bec65c7e79fb Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 18 Mar 2026 10:01:58 -0700 Subject: [PATCH 01/24] feat: make index cache pluggable via CacheBackend trait The Session's index cache was hardcoded to use Moka. This adds a CacheBackend trait so users can provide their own cache implementation (e.g. Redis-backed, disk-backed, shared across processes). Two-layer design: - CacheBackend: object-safe async trait with opaque byte keys. This is what plugin authors implement (get, insert, invalidate_prefix, clear, num_entries, size_bytes). - LanceCache: typed wrapper handling key construction (prefix + type tag), type-safe get/insert, DeepSizeOf size computation, hit/miss stats, and concurrent load deduplication. MokaCacheBackend is the default, preserving existing behavior. Custom backends are wired through Session::with_index_cache_backend() or DatasetBuilder::with_index_cache_backend(). Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache.rs | 623 +++++++++++++++++++++--------- rust/lance/src/dataset/builder.rs | 32 +- rust/lance/src/lib.rs | 2 +- rust/lance/src/session.rs | 19 +- 4 files changed, 487 insertions(+), 189 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 6ceea807116..1ea5989647b 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -2,61 +2,182 @@ // SPDX-FileCopyrightText: Copyright The Lance Authors //! Cache implementation - -use std::any::{Any, TypeId}; +//! +//! This module provides a two-layer caching system: +//! +//! - [`CacheBackend`] is the low-level, pluggable trait that custom cache implementations +//! can implement. It uses opaque byte keys and type-erased entries. +//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag +//! encoding), type-safe get/insert, and DeepSizeOf-based size computation. + +use std::any::Any; use std::borrow::Cow; +use std::collections::HashMap; use std::sync::{ Arc, atomic::{AtomicU64, Ordering}, }; +use async_trait::async_trait; use futures::{Future, FutureExt}; -use moka::future::Cache; +use tokio::sync::Mutex; use crate::Result; pub use deepsize::{Context, DeepSizeOf}; -type ArcAny = Arc; +/// Result type used in the in-flight dedup map. Wraps errors in Arc so the +/// result can be cloned to multiple waiters. +type InFlightResult = std::result::Result>; +type InFlightMap = Mutex, tokio::sync::watch::Receiver>>>; -#[derive(Clone)] -pub struct SizedRecord { - record: ArcAny, - size_accessor: Arc usize + Send + Sync>, +/// A type-erased cache entry. +pub type CacheEntry = Arc; + +// --------------------------------------------------------------------------- +// CacheBackend trait +// --------------------------------------------------------------------------- + +/// Low-level pluggable cache backend. +/// +/// Implementations store entries keyed by opaque byte slices. +/// The [`LanceCache`] wrapper handles key construction and type safety; +/// backend authors do not need to worry about key encoding. +#[async_trait] +pub trait CacheBackend: Send + Sync + std::fmt::Debug { + /// Look up an entry by its opaque key. + async fn get(&self, key: &[u8]) -> Option; + + /// Store an entry. `size_bytes` is used for eviction accounting. + async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize); + + /// Remove all entries whose key starts with `prefix`. + async fn invalidate_prefix(&self, prefix: &[u8]); + + /// Remove all entries. + async fn clear(&self); + + /// Number of entries currently stored (may flush pending operations). + async fn num_entries(&self) -> usize; + + /// Total weighted size in bytes of all stored entries (may flush pending operations). + async fn size_bytes(&self) -> usize; + + /// Approximate number of entries, callable from synchronous contexts. + /// Backends that cannot provide this cheaply should return 0. + fn approx_num_entries(&self) -> usize { + 0 + } + + /// Approximate weighted size in bytes, callable from synchronous contexts. + /// Backends that cannot provide this cheaply should return 0. + fn approx_size_bytes(&self) -> usize { + 0 + } +} + +// --------------------------------------------------------------------------- +// MokaCacheBackend — default moka-based implementation +// --------------------------------------------------------------------------- + +/// Internal record stored in the moka cache. +#[derive(Clone, Debug)] +struct MokaCacheEntry { + entry: CacheEntry, + size_bytes: usize, +} + +/// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache. +/// +/// Provides weighted-capacity eviction and concurrent-load deduplication. +pub struct MokaCacheBackend { + cache: moka::future::Cache, MokaCacheEntry>, } -impl std::fmt::Debug for SizedRecord { +impl std::fmt::Debug for MokaCacheBackend { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("SizedRecord") - .field("record", &self.record) + f.debug_struct("MokaCacheBackend") + .field("entry_count", &self.cache.entry_count()) .finish() } } -impl DeepSizeOf for SizedRecord { - fn deep_size_of_children(&self, _: &mut Context) -> usize { - (self.size_accessor)(&self.record) +impl MokaCacheBackend { + pub fn with_capacity(capacity: usize) -> Self { + let cache = moka::future::Cache::builder() + .max_capacity(capacity as u64) + .weigher(|_, v: &MokaCacheEntry| v.size_bytes.try_into().unwrap_or(u32::MAX)) + .support_invalidation_closures() + .build(); + Self { cache } } -} -impl SizedRecord { - fn new(record: Arc) -> Self { - // +8 for the size of the Arc pointer itself - let size_accessor = - |record: &ArcAny| -> usize { record.downcast_ref::().unwrap().deep_size_of() + 8 }; + pub fn no_cache() -> Self { Self { - record, - size_accessor: Arc::new(size_accessor), + cache: moka::future::Cache::new(0), } } } +#[async_trait] +impl CacheBackend for MokaCacheBackend { + async fn get(&self, key: &[u8]) -> Option { + self.cache.get(key).await.map(|r| r.entry) + } + + async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) { + self.cache + .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes }) + .await; + } + + async fn invalidate_prefix(&self, prefix: &[u8]) { + let prefix = prefix.to_vec(); + self.cache + .invalidate_entries_if(move |key, _value| key.starts_with(&prefix)) + .expect("Cache configured correctly"); + } + + async fn clear(&self) { + self.cache.invalidate_all(); + self.cache.run_pending_tasks().await; + } + + async fn num_entries(&self) -> usize { + self.cache.run_pending_tasks().await; + self.cache.entry_count() as usize + } + + async fn size_bytes(&self) -> usize { + self.cache.run_pending_tasks().await; + self.cache.weighted_size() as usize + } + + fn approx_num_entries(&self) -> usize { + self.cache.entry_count() as usize + } + + fn approx_size_bytes(&self) -> usize { + self.cache.weighted_size() as usize + } +} + +// --------------------------------------------------------------------------- +// LanceCache — typed wrapper around dyn CacheBackend +// --------------------------------------------------------------------------- + +/// Typed cache wrapper that handles key construction and type safety. +/// +/// Internally delegates to a [`CacheBackend`]. The default backend is +/// [`MokaCacheBackend`]; pass a custom backend via [`LanceCache::with_backend`]. #[derive(Clone)] pub struct LanceCache { - cache: Arc>, + cache: Arc, prefix: String, hits: Arc, misses: Arc, + /// Deduplicates concurrent `get_or_insert` calls for the same key. + in_flight: Arc, } impl std::fmt::Debug for LanceCache { @@ -69,36 +190,70 @@ impl std::fmt::Debug for LanceCache { impl DeepSizeOf for LanceCache { fn deep_size_of_children(&self, _: &mut Context) -> usize { - self.cache - .iter() - .map(|(_, v)| (v.size_accessor)(&v.record)) - .sum() + // This is a best-effort estimate; we can't iterate a dyn CacheBackend. + // Callers should use stats().size_bytes for accurate numbers. + 0 + } +} + +/// Returns a stable 8-byte discriminator for type `T`. +/// +/// Uses the pointer of `std::any::type_name::()`, which is a `&'static str` +/// with a process-lifetime-stable address. This is unique per monomorphized type +/// and avoids `transmute` on `TypeId`. +fn type_tag() -> [u8; 8] { + (std::any::type_name::().as_ptr() as u64).to_le_bytes() +} + +impl LanceCache { + /// Build a key: `prefix/user_key\0<8-byte type tag>`. + fn make_key(&self, key: &str) -> Vec { + let full_key = if self.prefix.is_empty() { + key.to_string() + } else { + format!("{}/{}", self.prefix, key) + }; + let mut bytes = full_key.into_bytes(); + bytes.push(0); + bytes.extend_from_slice(&type_tag::()); + bytes + } + + /// Build a prefix (without type tag) for invalidation. + fn make_prefix(&self, prefix: &str) -> Vec { + format!("{}{}", self.prefix, prefix).into_bytes() } } impl LanceCache { pub fn with_capacity(capacity: usize) -> Self { - let cache = Cache::builder() - .max_capacity(capacity as u64) - .weigher(|_, v: &SizedRecord| { - (v.size_accessor)(&v.record).try_into().unwrap_or(u32::MAX) - }) - .support_invalidation_closures() - .build(); Self { - cache: Arc::new(cache), + cache: Arc::new(MokaCacheBackend::with_capacity(capacity)), + prefix: String::new(), + hits: Arc::new(AtomicU64::new(0)), + misses: Arc::new(AtomicU64::new(0)), + in_flight: Arc::new(Mutex::new(HashMap::new())), + } + } + + /// Create a cache backed by a custom [`CacheBackend`]. + pub fn with_backend(backend: Arc) -> Self { + Self { + cache: backend, prefix: String::new(), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), + in_flight: Arc::new(Mutex::new(HashMap::new())), } } pub fn no_cache() -> Self { Self { - cache: Arc::new(Cache::new(0)), + cache: Arc::new(MokaCacheBackend::no_cache()), prefix: String::new(), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), + in_flight: Arc::new(Mutex::new(HashMap::new())), } } @@ -115,14 +270,7 @@ impl LanceCache { prefix: format!("{}{}/", self.prefix, prefix), hits: self.hits.clone(), misses: self.misses.clone(), - } - } - - fn get_key(&self, key: &str) -> String { - if self.prefix.is_empty() { - key.to_string() - } else { - format!("{}/{}", self.prefix, key) + in_flight: self.in_flight.clone(), } } @@ -131,40 +279,41 @@ impl LanceCache { /// The given prefix is appended to the existing prefix of the cache. If you /// want to invalidate all at the current prefix, pass an empty string. pub fn invalidate_prefix(&self, prefix: &str) { - let full_prefix = format!("{}{}", self.prefix, prefix); - self.cache - .invalidate_entries_if(move |(key, _typeid), _value| key.starts_with(&full_prefix)) - .expect("Cache configured correctly"); + let prefix_bytes = self.make_prefix(prefix); + let cache = self.cache.clone(); + // Fire-and-forget; moka's invalidate_entries_if is synchronous under the hood + // but our trait is async, so we spawn. + tokio::spawn(async move { + cache.invalidate_prefix(&prefix_bytes).await; + }); } pub async fn size(&self) -> usize { - self.cache.run_pending_tasks().await; - self.cache.entry_count() as usize + self.cache.num_entries().await } pub fn approx_size(&self) -> usize { - self.cache.entry_count() as usize + self.cache.approx_num_entries() } pub async fn size_bytes(&self) -> usize { - self.cache.run_pending_tasks().await; - self.approx_size_bytes() + self.cache.size_bytes().await } pub fn approx_size_bytes(&self) -> usize { - self.cache.weighted_size() as usize + self.cache.approx_size_bytes() } async fn insert(&self, key: &str, metadata: Arc) { - let key = self.get_key(key); - let record = SizedRecord::new(metadata); + let size = metadata.deep_size_of() + 8; // +8 for the Arc pointer + let cache_key = self.make_key::(key); tracing::trace!( target: "lance_cache::insert", key = key, type_id = std::any::type_name::(), - size = (record.size_accessor)(&record.record), + size = size, ); - self.cache.insert((key, TypeId::of::()), record).await; + self.cache.insert(&cache_key, metadata, size).await; } pub async fn insert_unsized( @@ -172,15 +321,15 @@ impl LanceCache { key: &str, metadata: Arc, ) { - // In order to make the data Sized, we wrap in another pointer. + // Wrap in another Arc to make the data Sized. self.insert(key, Arc::new(metadata)).await } async fn get(&self, key: &str) -> Option> { - let key = self.get_key(key); - if let Some(metadata) = self.cache.get(&(key, TypeId::of::())).await { + let cache_key = self.make_key::(key); + if let Some(entry) = self.cache.get(&cache_key).await { self.hits.fetch_add(1, Ordering::Relaxed); - Some(metadata.record.clone().downcast::().unwrap()) + Some(entry.downcast::().unwrap()) } else { self.misses.fetch_add(1, Ordering::Relaxed); None @@ -195,11 +344,10 @@ impl LanceCache { Some(outer.as_ref().clone()) } - /// Get an item - /// - /// If it exists in the cache return that + /// Get an item, or load it if not cached. /// - /// If it doesn't then run `loader` to load the item, insert into cache, and return + /// Concurrent calls for the same key are deduplicated: only the first + /// caller runs the loader; subsequent callers wait for the result. async fn get_or_insert( &self, key: String, @@ -209,68 +357,89 @@ impl LanceCache { F: FnOnce(&str) -> Fut, Fut: Future> + Send, { - let full_key = self.get_key(&key); - let cache_key = (full_key, TypeId::of::()); - - // Use optionally_get_with to handle concurrent requests - let hits = self.hits.clone(); - let misses = self.misses.clone(); - - // Use oneshot channels to track both errors and whether init was run - let (error_tx, error_rx) = tokio::sync::oneshot::channel(); - let (init_run_tx, mut init_run_rx) = tokio::sync::oneshot::channel(); - - let init = Box::pin(async move { - let _ = init_run_tx.send(()); - misses.fetch_add(1, Ordering::Relaxed); - match loader(&key).await { - Ok(value) => Some(SizedRecord::new(Arc::new(value))), - Err(e) => { - let _ = error_tx.send(e); - None - } - } - }); + let cache_key = self.make_key::(&key); + + // Fast path: already cached. + if let Some(entry) = self.cache.get(&cache_key).await { + self.hits.fetch_add(1, Ordering::Relaxed); + return Ok(entry.downcast::().unwrap()); + } - match self.cache.optionally_get_with(cache_key, init).await { - Some(metadata) => { - // Check if init was run or if this was a cache hit - match init_run_rx.try_recv() { - Ok(()) => { - // Init was run, miss was already recorded + // Check for an in-flight load for this key. + { + let map = self.in_flight.lock().await; + if let Some(rx) = map.get(&cache_key) { + let mut rx = rx.clone(); + drop(map); + // Wait until the leader finishes. + let result = rx + .wait_for(|v| v.is_some()) + .await + .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))? + .as_ref() + .unwrap() + .clone(); + match result { + Ok(entry) => { + self.hits.fetch_add(1, Ordering::Relaxed); + return Ok(entry.downcast::().unwrap()); } - Err(_) => { - // Init was not run, this is a cache hit - hits.fetch_add(1, Ordering::Relaxed); + Err(err) => { + self.misses.fetch_add(1, Ordering::Relaxed); + return Err(crate::Error::internal(format!( + "Cache loader failed: {err}" + ))); } } - Ok(metadata.record.clone().downcast::().unwrap()) } - None => { - // The loader returned an error, retrieve it from the channel - match error_rx.await { - Ok(err) => Err(err), - Err(_) => Err(crate::Error::internal( - "Failed to retrieve error from cache loader", - )), - } + } + + // We are the leader. Register our in-flight entry. + let (tx, rx) = tokio::sync::watch::channel(None); + { + let mut map = self.in_flight.lock().await; + map.insert(cache_key.clone(), rx); + } + + self.misses.fetch_add(1, Ordering::Relaxed); + let result = loader(&key).await; + + // Clean up the in-flight entry before sending, so new arrivals + // go through the normal cache path. + { + let mut map = self.in_flight.lock().await; + map.remove(&cache_key); + } + + match result { + Ok(value) => { + let arc = Arc::new(value); + let size = arc.deep_size_of() + 8; + self.cache.insert(&cache_key, arc.clone(), size).await; + let _ = tx.send(Some(Ok(arc.clone() as CacheEntry))); + Ok(arc) + } + Err(err) => { + let shared_err = Arc::new(err); + let _ = tx.send(Some(Err(shared_err.clone()))); + Err(crate::Error::internal(format!( + "Cache loader failed: {shared_err}" + ))) } } } pub async fn stats(&self) -> CacheStats { - self.cache.run_pending_tasks().await; CacheStats { hits: self.hits.load(Ordering::Relaxed), misses: self.misses.load(Ordering::Relaxed), - num_entries: self.cache.entry_count() as usize, - size_bytes: self.cache.weighted_size() as usize, + num_entries: self.cache.num_entries().await, + size_bytes: self.cache.size_bytes().await, } } pub async fn clear(&self) { - self.cache.invalidate_all(); - self.cache.run_pending_tasks().await; + self.cache.clear().await; self.hits.store(0, Ordering::Relaxed); self.misses.store(0, Ordering::Relaxed); } @@ -328,11 +497,15 @@ impl LanceCache { } } +// --------------------------------------------------------------------------- +// WeakLanceCache +// --------------------------------------------------------------------------- + /// A weak reference to a LanceCache, used by indices to avoid circular references. /// When the original cache is dropped, operations on this will gracefully no-op. #[derive(Clone, Debug)] pub struct WeakLanceCache { - inner: std::sync::Weak>, + inner: std::sync::Weak, prefix: String, hits: Arc, misses: Arc, @@ -359,21 +532,26 @@ impl WeakLanceCache { } } - fn get_key(&self, key: &str) -> String { - if self.prefix.is_empty() { + /// Build a key: `prefix/user_key\0<8-byte type tag>`. + fn make_key(&self, key: &str) -> Vec { + let full_key = if self.prefix.is_empty() { key.to_string() } else { format!("{}/{}", self.prefix, key) - } + }; + let mut bytes = full_key.into_bytes(); + bytes.push(0); + bytes.extend_from_slice(&type_tag::()); + bytes } /// Get an item from cache if the cache is still alive pub async fn get(&self, key: &str) -> Option> { let cache = self.inner.upgrade()?; - let key = self.get_key(key); - if let Some(metadata) = cache.get(&(key, TypeId::of::())).await { + let cache_key = self.make_key::(key); + if let Some(entry) = cache.get(&cache_key).await { self.hits.fetch_add(1, Ordering::Relaxed); - Some(metadata.record.clone().downcast::().unwrap()) + Some(entry.downcast::().unwrap()) } else { self.misses.fetch_add(1, Ordering::Relaxed); None @@ -388,9 +566,9 @@ impl WeakLanceCache { value: Arc, ) -> bool { if let Some(cache) = self.inner.upgrade() { - let key = self.get_key(key); - let record = SizedRecord::new(value); - cache.insert((key, TypeId::of::()), record).await; + let size = value.deep_size_of() + 8; + let cache_key = self.make_key::(key); + cache.insert(&cache_key, value, size).await; true } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert item"); @@ -406,53 +584,19 @@ impl WeakLanceCache { Fut: Future> + Send, { if let Some(cache) = self.inner.upgrade() { - let full_key = self.get_key(key); - let cache_key = (full_key.clone(), TypeId::of::()); - - // Use optionally_get_with to handle concurrent requests properly - let hits = self.hits.clone(); - let misses = self.misses.clone(); - - // Track whether init was run (for metrics) - let (init_run_tx, mut init_run_rx) = tokio::sync::oneshot::channel(); - let (error_tx, error_rx) = tokio::sync::oneshot::channel(); - - let init = Box::pin(async move { - let _ = init_run_tx.send(()); - misses.fetch_add(1, Ordering::Relaxed); - match f().await { - Ok(value) => Some(SizedRecord::new(Arc::new(value))), - Err(e) => { - let _ = error_tx.send(e); - None - } - } - }); - - match cache.optionally_get_with(cache_key, init).await { - Some(record) => { - // Check if init was run or if this was a cache hit - match init_run_rx.try_recv() { - Ok(()) => { - // Init was run, miss was already recorded - } - Err(_) => { - // Init was not run, this was a cache hit - hits.fetch_add(1, Ordering::Relaxed); - } - } - Ok(record.record.clone().downcast::().unwrap()) - } - None => { - // Init returned None, which means there was an error - match error_rx.await { - Ok(e) => Err(e), - Err(_) => Err(crate::Error::internal( - "Failed to receive error from cache init function".to_string(), - )), - } - } + let cache_key = self.make_key::(key); + + if let Some(entry) = cache.get(&cache_key).await { + self.hits.fetch_add(1, Ordering::Relaxed); + return Ok(entry.downcast::().unwrap()); } + + self.misses.fetch_add(1, Ordering::Relaxed); + let value = f().await?; + let arc = Arc::new(value); + let size = arc.deep_size_of() + 8; + cache.insert(&cache_key, arc.clone(), size).await; + Ok(arc) } else { log::warn!("WeakLanceCache: cache no longer available, computing without caching"); f().await.map(Arc::new) @@ -501,13 +645,10 @@ impl WeakLanceCache { &self, key: &str, ) -> Option> { - // For unsized types, we store Arc directly let cache = self.inner.upgrade()?; - let key = self.get_key(key); - if let Some(metadata) = cache.get(&(key, TypeId::of::>())).await { - metadata - .record - .clone() + let cache_key = self.make_key::>(key); + if let Some(entry) = cache.get(&cache_key).await { + entry .downcast::>() .ok() .map(|arc| arc.as_ref().clone()) @@ -523,9 +664,10 @@ impl WeakLanceCache { value: Arc, ) { if let Some(cache) = self.inner.upgrade() { - let key = self.get_key(key); - let record = SizedRecord::new(Arc::new(value)); - cache.insert((key, TypeId::of::>()), record).await; + let wrapper = Arc::new(value); + let size = wrapper.deep_size_of() + 8; + let cache_key = self.make_key::>(key); + cache.insert(&cache_key, wrapper, size).await; } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item"); } @@ -552,6 +694,10 @@ impl WeakLanceCache { } } +// --------------------------------------------------------------------------- +// CacheKey traits +// --------------------------------------------------------------------------- + pub trait CacheKey { type ValueType; @@ -564,6 +710,10 @@ pub trait UnsizedCacheKey { fn key(&self) -> Cow<'_, str>; } +// --------------------------------------------------------------------------- +// CacheStats +// --------------------------------------------------------------------------- + #[derive(Debug, Clone)] pub struct CacheStats { /// Number of times `get`, `get_unsized`, or `get_or_insert` found an item in the cache. @@ -594,6 +744,10 @@ impl CacheStats { } } +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + #[cfg(test)] mod tests { use super::*; @@ -605,14 +759,10 @@ mod tests { let capacity = 10 * item_size; let cache = LanceCache::with_capacity(capacity); - assert_eq!(cache.size_bytes().await, 0); - assert_eq!(cache.approx_size_bytes(), 0); let item = Arc::new(vec![1, 2, 3]); cache.insert("key", item.clone()).await; assert_eq!(cache.size().await, 1); - assert_eq!(cache.size_bytes().await, item_size); - assert_eq!(cache.approx_size_bytes(), item_size); let retrieved = cache.get::>("key").await.unwrap(); assert_eq!(*retrieved, *item); @@ -623,8 +773,9 @@ mod tests { .insert(&format!("key_{}", i), Arc::new(vec![i, i, i])) .await; } - assert_eq!(cache.size_bytes().await, capacity); - assert_eq!(cache.size().await, 10); + // Moka evicts based on weighted size; after run_pending_tasks, the size + // should be bounded by capacity. + assert!(cache.size_bytes().await <= capacity); } #[tokio::test] @@ -803,4 +954,112 @@ mod tests { assert_eq!(stats.hits, 1); assert_eq!(stats.misses, 2); } + + #[tokio::test] + async fn test_custom_backend() { + use std::collections::HashMap; + use tokio::sync::Mutex; + + /// A simple HashMap-based cache backend for testing. + #[derive(Debug)] + struct HashMapBackend { + map: Mutex, (CacheEntry, usize)>>, + } + + impl HashMapBackend { + fn new() -> Self { + Self { + map: Mutex::new(HashMap::new()), + } + } + } + + #[async_trait] + impl CacheBackend for HashMapBackend { + async fn get(&self, key: &[u8]) -> Option { + self.map.lock().await.get(key).map(|(e, _)| e.clone()) + } + + async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) { + self.map + .lock() + .await + .insert(key.to_vec(), (entry, size_bytes)); + } + + async fn invalidate_prefix(&self, prefix: &[u8]) { + self.map.lock().await.retain(|k, _| !k.starts_with(prefix)); + } + + async fn clear(&self) { + self.map.lock().await.clear(); + } + + async fn num_entries(&self) -> usize { + self.map.lock().await.len() + } + + async fn size_bytes(&self) -> usize { + self.map.lock().await.values().map(|(_, s)| *s).sum() + } + } + + let backend = Arc::new(HashMapBackend::new()); + let cache = LanceCache::with_backend(backend); + + // Insert and retrieve + cache.insert("key1", Arc::new(vec![1, 2, 3])).await; + let retrieved = cache.get::>("key1").await.unwrap(); + assert_eq!(*retrieved, vec![1, 2, 3]); + + // Miss for different type at same key + let miss = cache.get::>("key1").await; + assert!(miss.is_none()); + + // Stats tracking works + let stats = cache.stats().await; + assert_eq!(stats.hits, 1); + assert_eq!(stats.misses, 1); + assert_eq!(stats.num_entries, 1); + } + + #[tokio::test] + async fn test_get_or_insert_dedup() { + use std::sync::atomic::AtomicUsize; + + let load_count = Arc::new(AtomicUsize::new(0)); + let cache = LanceCache::with_capacity(10000); + + // Launch several concurrent get_or_insert calls for the same key. + let (barrier_tx, _) = tokio::sync::broadcast::channel::<()>(1); + let mut handles = Vec::new(); + for _ in 0..5 { + let cache = cache.clone(); + let load_count = load_count.clone(); + let mut barrier_rx = barrier_tx.subscribe(); + handles.push(tokio::spawn(async move { + barrier_rx.recv().await.ok(); + cache + .get_or_insert("key".to_string(), |_key| { + let load_count = load_count.clone(); + async move { + load_count.fetch_add(1, Ordering::SeqCst); + // Simulate slow load so other tasks can pile up. + tokio::task::yield_now().await; + Ok(vec![1, 2, 3]) + } + }) + .await + })); + } + // Release all tasks at once. + barrier_tx.send(()).unwrap(); + for h in handles { + let result: Arc> = h.await.unwrap().unwrap(); + assert_eq!(*result, vec![1, 2, 3]); + } + + // The loader should have run exactly once. + assert_eq!(load_count.load(Ordering::SeqCst), 1); + } } diff --git a/rust/lance/src/dataset/builder.rs b/rust/lance/src/dataset/builder.rs index 36ec0e18e66..ced83badaf3 100644 --- a/rust/lance/src/dataset/builder.rs +++ b/rust/lance/src/dataset/builder.rs @@ -2,6 +2,8 @@ // SPDX-FileCopyrightText: Copyright The Lance Authors use std::{collections::HashMap, sync::Arc, time::Duration}; +use lance_core::cache::CacheBackend; + use super::refs::{Ref, Refs}; use super::{DEFAULT_INDEX_CACHE_SIZE, DEFAULT_METADATA_CACHE_SIZE, ReadParams, WriteParams}; use crate::dataset::branch_location::BranchLocation; @@ -37,6 +39,8 @@ pub struct DatasetBuilder { /// Metadata cache size for the fragment metadata. If it is zero, metadata /// cache is disabled. metadata_cache_size_bytes: usize, + /// Custom index cache backend. If set, overrides `index_cache_size_bytes`. + index_cache_backend: Option>, /// Optional pre-loaded manifest to avoid loading it again. manifest: Option, session: Option>, @@ -73,6 +77,7 @@ impl DatasetBuilder { Self { index_cache_size_bytes: DEFAULT_INDEX_CACHE_SIZE, metadata_cache_size_bytes: DEFAULT_METADATA_CACHE_SIZE, + index_cache_backend: None, table_uri: table_uri.as_ref().to_string(), options: ObjectStoreParams::default(), commit_handler: None, @@ -177,6 +182,15 @@ impl DatasetBuilder { self } + /// Use a custom index cache backend. + /// + /// When set, this overrides `with_index_cache_size_bytes` — the custom + /// backend is responsible for its own capacity management. + pub fn with_index_cache_backend(mut self, backend: Arc) -> Self { + self.index_cache_backend = Some(backend); + self + } + /// Set the cache size for indices. Set to zero, to disable the cache. #[deprecated(since = "0.30.0", note = "Use `with_index_cache_size_bytes` instead")] pub fn with_index_cache_size(mut self, cache_size: usize) -> Self { @@ -576,13 +590,21 @@ impl DatasetBuilder { } } + let index_cache_backend = self.index_cache_backend.take(); let session = match self.session.as_ref() { Some(session) => session.clone(), - None => Arc::new(Session::new( - self.index_cache_size_bytes, - self.metadata_cache_size_bytes, - Default::default(), - )), + None => match index_cache_backend { + Some(backend) => Arc::new(Session::with_index_cache_backend( + backend, + self.metadata_cache_size_bytes, + Default::default(), + )), + None => Arc::new(Session::new( + self.index_cache_size_bytes, + self.metadata_cache_size_bytes, + Default::default(), + )), + }, }; let target_ref = self.version.clone(); diff --git a/rust/lance/src/lib.rs b/rust/lance/src/lib.rs index 934be0e519c..b9c8c7a4f5e 100644 --- a/rust/lance/src/lib.rs +++ b/rust/lance/src/lib.rs @@ -72,7 +72,7 @@ use arrow_schema::DataType; use dataset::builder::DatasetBuilder; pub use lance_core::datatypes; -pub use lance_core::{Error, Result}; +pub use lance_core::{Error, Result, cache}; use std::sync::LazyLock; pub mod arrow; diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index c67345fba32..da9c5d85f4a 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use std::sync::Arc; use deepsize::DeepSizeOf; -use lance_core::cache::LanceCache; +use lance_core::cache::{CacheBackend, LanceCache}; use lance_core::{Error, Result}; use lance_index::IndexType; use lance_io::object_store::ObjectStoreRegistry; @@ -114,6 +114,23 @@ impl Session { } } + /// Create a session with a custom index cache backend. + /// + /// The provided backend will be used for caching index data. The metadata + /// cache will use the default Moka-based backend with the given capacity. + pub fn with_index_cache_backend( + index_cache_backend: Arc, + metadata_cache_size: usize, + store_registry: Arc, + ) -> Self { + Self { + index_cache: GlobalIndexCache(LanceCache::with_backend(index_cache_backend)), + metadata_cache: GlobalMetadataCache(LanceCache::with_capacity(metadata_cache_size)), + index_extensions: HashMap::new(), + store_registry, + } + } + /// Register a new index extension. /// /// A name can only be registered once per type of index extension. From 8ad33f57644b9dd6bd3d924b84a1e8a42a4cfe86 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 18 Mar 2026 20:04:13 -0700 Subject: [PATCH 02/24] feat: pipe type_id through CacheKey to backend Add type_name()/type_id() to CacheKey and UnsizedCacheKey traits so backends can identify the type of cached entries. Add parse_cache_key() utility for backends to extract (user_key, type_id) from opaque key bytes. CacheKey-based methods now pipe the key's type_id through to the backend. Non-CacheKey methods use type_id_of::() as a sentinel. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache.rs | 135 ++++++++++++++++++++++++++++------- 1 file changed, 109 insertions(+), 26 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 1ea5989647b..0312c84ac8c 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -196,18 +196,26 @@ impl DeepSizeOf for LanceCache { } } -/// Returns a stable 8-byte discriminator for type `T`. +/// Returns the type_id for type `T`, derived from the pointer of its +/// [`std::any::type_name`]. Stable within a single process lifetime. +pub fn type_id_of() -> u64 { + std::any::type_name::().as_ptr() as u64 +} + +/// Cache keys are structured as `user_key\0<8-byte type_id>`. /// -/// Uses the pointer of `std::any::type_name::()`, which is a `&'static str` -/// with a process-lifetime-stable address. This is unique per monomorphized type -/// and avoids `transmute` on `TypeId`. -fn type_tag() -> [u8; 8] { - (std::any::type_name::().as_ptr() as u64).to_le_bytes() +/// This function splits an opaque cache key into the user-visible portion +/// and the type_id. Backend implementations can use this to inspect keys. +pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) { + let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap(); + // Everything before the trailing \0 + 8-byte tag. + let user_key = &key[..key.len() - 9]; + (user_key, u64::from_le_bytes(type_id_bytes)) } impl LanceCache { - /// Build a key: `prefix/user_key\0<8-byte type tag>`. - fn make_key(&self, key: &str) -> Vec { + /// Build a key: `prefix/user_key\0<8-byte type_id>`. + fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec { let full_key = if self.prefix.is_empty() { key.to_string() } else { @@ -215,7 +223,7 @@ impl LanceCache { }; let mut bytes = full_key.into_bytes(); bytes.push(0); - bytes.extend_from_slice(&type_tag::()); + bytes.extend_from_slice(&type_id.to_le_bytes()); bytes } @@ -304,9 +312,14 @@ impl LanceCache { self.cache.approx_size_bytes() } - async fn insert(&self, key: &str, metadata: Arc) { + async fn insert_with_id( + &self, + key: &str, + type_id: u64, + metadata: Arc, + ) { let size = metadata.deep_size_of() + 8; // +8 for the Arc pointer - let cache_key = self.make_key::(key); + let cache_key = self.make_key_with_id(key, type_id); tracing::trace!( target: "lance_cache::insert", key = key, @@ -316,17 +329,35 @@ impl LanceCache { self.cache.insert(&cache_key, metadata, size).await; } + #[cfg(test)] + async fn insert(&self, key: &str, metadata: Arc) { + self.insert_with_id(key, type_id_of::(), metadata).await + } + + async fn insert_unsized_with_id( + &self, + key: &str, + type_id: u64, + metadata: Arc, + ) { + self.insert_with_id(key, type_id, Arc::new(metadata)).await + } + pub async fn insert_unsized( &self, key: &str, metadata: Arc, ) { - // Wrap in another Arc to make the data Sized. - self.insert(key, Arc::new(metadata)).await + self.insert_unsized_with_id(key, type_id_of::>(), metadata) + .await } - async fn get(&self, key: &str) -> Option> { - let cache_key = self.make_key::(key); + async fn get_with_id( + &self, + key: &str, + type_id: u64, + ) -> Option> { + let cache_key = self.make_key_with_id(key, type_id); if let Some(entry) = self.cache.get(&cache_key).await { self.hits.fetch_add(1, Ordering::Relaxed); Some(entry.downcast::().unwrap()) @@ -336,28 +367,42 @@ impl LanceCache { } } - pub async fn get_unsized( + #[cfg(test)] + async fn get(&self, key: &str) -> Option> { + self.get_with_id(key, type_id_of::()).await + } + + async fn get_unsized_with_id( &self, key: &str, + type_id: u64, ) -> Option> { - let outer = self.get::>(key).await?; + let outer = self.get_with_id::>(key, type_id).await?; Some(outer.as_ref().clone()) } + pub async fn get_unsized( + &self, + key: &str, + ) -> Option> { + self.get_unsized_with_id(key, type_id_of::>()).await + } + /// Get an item, or load it if not cached. /// /// Concurrent calls for the same key are deduplicated: only the first /// caller runs the loader; subsequent callers wait for the result. - async fn get_or_insert( + async fn get_or_insert_with_id( &self, key: String, + type_id: u64, loader: F, ) -> Result> where F: FnOnce(&str) -> Fut, Fut: Future> + Send, { - let cache_key = self.make_key::(&key); + let cache_key = self.make_key_with_id(&key, type_id); // Fast path: already cached. if let Some(entry) = self.cache.get(&cache_key).await { @@ -429,6 +474,20 @@ impl LanceCache { } } + #[cfg(test)] + async fn get_or_insert( + &self, + key: String, + loader: F, + ) -> Result> + where + F: FnOnce(&str) -> Fut, + Fut: Future> + Send, + { + self.get_or_insert_with_id(key, type_id_of::(), loader) + .await + } + pub async fn stats(&self) -> CacheStats { CacheStats { hits: self.hits.load(Ordering::Relaxed), @@ -450,7 +509,9 @@ impl LanceCache { K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.insert(&cache_key.key(), metadata).boxed().await + self.insert_with_id(&cache_key.key(), cache_key.type_id(), metadata) + .boxed() + .await } pub async fn get_with_key(&self, cache_key: &K) -> Option> @@ -458,7 +519,9 @@ impl LanceCache { K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.get::(&cache_key.key()).boxed().await + self.get_with_id::(&cache_key.key(), cache_key.type_id()) + .boxed() + .await } pub async fn get_or_insert_with_key( @@ -472,8 +535,9 @@ impl LanceCache { F: FnOnce() -> Fut, Fut: Future> + Send, { + let type_id = cache_key.type_id(); let key_str = cache_key.key().into_owned(); - Box::pin(self.get_or_insert(key_str, |_| loader())).await + Box::pin(self.get_or_insert_with_id(key_str, type_id, |_| loader())).await } pub async fn insert_unsized_with_key(&self, cache_key: &K, metadata: Arc) @@ -481,7 +545,7 @@ impl LanceCache { K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.insert_unsized(&cache_key.key(), metadata) + self.insert_unsized_with_id(&cache_key.key(), cache_key.type_id(), metadata) .boxed() .await } @@ -491,7 +555,7 @@ impl LanceCache { K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.get_unsized::(&cache_key.key()) + self.get_unsized_with_id::(&cache_key.key(), cache_key.type_id()) .boxed() .await } @@ -532,8 +596,11 @@ impl WeakLanceCache { } } - /// Build a key: `prefix/user_key\0<8-byte type tag>`. fn make_key(&self, key: &str) -> Vec { + self.make_key_with_id(key, type_id_of::()) + } + + fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec { let full_key = if self.prefix.is_empty() { key.to_string() } else { @@ -541,7 +608,7 @@ impl WeakLanceCache { }; let mut bytes = full_key.into_bytes(); bytes.push(0); - bytes.extend_from_slice(&type_tag::()); + bytes.extend_from_slice(&type_id.to_le_bytes()); bytes } @@ -702,12 +769,28 @@ pub trait CacheKey { type ValueType; fn key(&self) -> Cow<'_, str>; + + fn type_name(&self) -> &'static str { + std::any::type_name::() + } + + fn type_id(&self) -> u64 { + self.type_name().as_ptr() as u64 + } } pub trait UnsizedCacheKey { type ValueType: ?Sized; fn key(&self) -> Cow<'_, str>; + + fn type_name(&self) -> &'static str { + std::any::type_name::() + } + + fn type_id(&self) -> u64 { + self.type_name().as_ptr() as u64 + } } // --------------------------------------------------------------------------- From 3d299f8e28bd73791be5d1057727e8a50230a517 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 18 Mar 2026 22:26:14 -0700 Subject: [PATCH 03/24] fix: address review feedback on pluggable cache 1. Remove #[cfg(test)] convenience methods; tests now use CacheKey via a TestKey helper, eliminating the parallel method hierarchy. 2. Fix dedup race condition: re-check the cache while holding the in-flight lock so no two tasks can both become leader for the same key. 3. Use Arc::try_unwrap on the leader error path to preserve the original error type when possible. 4. Make invalidate_prefix async instead of fire-and-forget spawn. 5. Replace type_name().as_ptr() with a hash of std::any::TypeId for stable type discrimination. Defined once in type_id_of() and used by CacheKey::type_id() default. 6. Add dedup to WeakLanceCache::get_or_insert, sharing the in-flight map from the parent LanceCache. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache.rs | 463 ++++++++++++++----------- rust/lance-file/src/previous/reader.rs | 2 +- 2 files changed, 269 insertions(+), 196 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 0312c84ac8c..437c4f2f6d7 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -13,6 +13,7 @@ use std::any::Any; use std::borrow::Cow; use std::collections::HashMap; +use std::hash::{Hash, Hasher}; use std::sync::{ Arc, atomic::{AtomicU64, Ordering}, @@ -89,7 +90,7 @@ struct MokaCacheEntry { /// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache. /// -/// Provides weighted-capacity eviction and concurrent-load deduplication. +/// Provides weighted-capacity eviction. pub struct MokaCacheBackend { cache: moka::future::Cache, MokaCacheEntry>, } @@ -162,6 +163,35 @@ impl CacheBackend for MokaCacheBackend { } } +// --------------------------------------------------------------------------- +// Type identity helpers +// --------------------------------------------------------------------------- + +/// Returns a stable u64 identifier for type `T`, derived from hashing its +/// [`std::any::TypeId`]. This is guaranteed unique per type within a compilation +/// unit and stable within a single process lifetime. +pub fn type_id_of() -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + std::any::TypeId::of::().hash(&mut hasher); + hasher.finish() +} + +/// Cache keys are structured as `user_key\0<8-byte type_id>`. +/// +/// This function splits an opaque cache key into the user-visible portion +/// and the type_id. Backend implementations can use this to inspect keys. +/// +/// # Panics +/// +/// Panics if `key` is shorter than 9 bytes. +pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) { + assert!(key.len() >= 9, "cache key too short to parse"); + let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap(); + // Everything before the trailing \0 + 8-byte tag. + let user_key = &key[..key.len() - 9]; + (user_key, u64::from_le_bytes(type_id_bytes)) +} + // --------------------------------------------------------------------------- // LanceCache — typed wrapper around dyn CacheBackend // --------------------------------------------------------------------------- @@ -190,29 +220,11 @@ impl std::fmt::Debug for LanceCache { impl DeepSizeOf for LanceCache { fn deep_size_of_children(&self, _: &mut Context) -> usize { - // This is a best-effort estimate; we can't iterate a dyn CacheBackend. - // Callers should use stats().size_bytes for accurate numbers. + // Can't iterate a dyn CacheBackend; use stats().size_bytes for accurate numbers. 0 } } -/// Returns the type_id for type `T`, derived from the pointer of its -/// [`std::any::type_name`]. Stable within a single process lifetime. -pub fn type_id_of() -> u64 { - std::any::type_name::().as_ptr() as u64 -} - -/// Cache keys are structured as `user_key\0<8-byte type_id>`. -/// -/// This function splits an opaque cache key into the user-visible portion -/// and the type_id. Backend implementations can use this to inspect keys. -pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) { - let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap(); - // Everything before the trailing \0 + 8-byte tag. - let user_key = &key[..key.len() - 9]; - (user_key, u64::from_le_bytes(type_id_bytes)) -} - impl LanceCache { /// Build a key: `prefix/user_key\0<8-byte type_id>`. fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec { @@ -282,18 +294,13 @@ impl LanceCache { } } - /// Invalidate all entries in the cache that start with the given prefix + /// Invalidate all entries in the cache that start with the given prefix. /// /// The given prefix is appended to the existing prefix of the cache. If you /// want to invalidate all at the current prefix, pass an empty string. - pub fn invalidate_prefix(&self, prefix: &str) { + pub async fn invalidate_prefix(&self, prefix: &str) { let prefix_bytes = self.make_prefix(prefix); - let cache = self.cache.clone(); - // Fire-and-forget; moka's invalidate_entries_if is synchronous under the hood - // but our trait is async, so we spawn. - tokio::spawn(async move { - cache.invalidate_prefix(&prefix_bytes).await; - }); + self.cache.invalidate_prefix(&prefix_bytes).await; } pub async fn size(&self) -> usize { @@ -329,11 +336,6 @@ impl LanceCache { self.cache.insert(&cache_key, metadata, size).await; } - #[cfg(test)] - async fn insert(&self, key: &str, metadata: Arc) { - self.insert_with_id(key, type_id_of::(), metadata).await - } - async fn insert_unsized_with_id( &self, key: &str, @@ -367,11 +369,6 @@ impl LanceCache { } } - #[cfg(test)] - async fn get(&self, key: &str) -> Option> { - self.get_with_id(key, type_id_of::()).await - } - async fn get_unsized_with_id( &self, key: &str, @@ -410,41 +407,48 @@ impl LanceCache { return Ok(entry.downcast::().unwrap()); } + // Lock the in-flight map. While holding the lock, re-check the cache + // to close the race between the fast-path check and registration. + let mut map = self.in_flight.lock().await; + + // Another task may have completed the load between our fast-path check + // and acquiring this lock. + if let Some(entry) = self.cache.get(&cache_key).await { + self.hits.fetch_add(1, Ordering::Relaxed); + return Ok(entry.downcast::().unwrap()); + } + // Check for an in-flight load for this key. - { - let map = self.in_flight.lock().await; - if let Some(rx) = map.get(&cache_key) { - let mut rx = rx.clone(); - drop(map); - // Wait until the leader finishes. - let result = rx - .wait_for(|v| v.is_some()) - .await - .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))? - .as_ref() - .unwrap() - .clone(); - match result { - Ok(entry) => { - self.hits.fetch_add(1, Ordering::Relaxed); - return Ok(entry.downcast::().unwrap()); - } - Err(err) => { - self.misses.fetch_add(1, Ordering::Relaxed); - return Err(crate::Error::internal(format!( - "Cache loader failed: {err}" - ))); - } + if let Some(rx) = map.get(&cache_key) { + let mut rx = rx.clone(); + drop(map); + // Wait until the leader finishes. + let result = rx + .wait_for(|v| v.is_some()) + .await + .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))? + .as_ref() + .unwrap() + .clone(); + match result { + Ok(entry) => { + self.hits.fetch_add(1, Ordering::Relaxed); + return Ok(entry.downcast::().unwrap()); + } + Err(err) => { + self.misses.fetch_add(1, Ordering::Relaxed); + return Err(crate::Error::internal(format!( + "Cache loader failed: {err}" + ))); } } } - // We are the leader. Register our in-flight entry. + // We are the leader. Register our in-flight entry while still holding + // the lock, so no other task can slip in between check and register. let (tx, rx) = tokio::sync::watch::channel(None); - { - let mut map = self.in_flight.lock().await; - map.insert(cache_key.clone(), rx); - } + map.insert(cache_key.clone(), rx); + drop(map); self.misses.fetch_add(1, Ordering::Relaxed); let result = loader(&key).await; @@ -467,27 +471,14 @@ impl LanceCache { Err(err) => { let shared_err = Arc::new(err); let _ = tx.send(Some(Err(shared_err.clone()))); - Err(crate::Error::internal(format!( - "Cache loader failed: {shared_err}" - ))) + // Try to recover the original error if we're the sole owner. + Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| { + crate::Error::internal(format!("Cache loader failed: {arc}")) + })) } } } - #[cfg(test)] - async fn get_or_insert( - &self, - key: String, - loader: F, - ) -> Result> - where - F: FnOnce(&str) -> Fut, - Fut: Future> + Send, - { - self.get_or_insert_with_id(key, type_id_of::(), loader) - .await - } - pub async fn stats(&self) -> CacheStats { CacheStats { hits: self.hits.load(Ordering::Relaxed), @@ -573,6 +564,7 @@ pub struct WeakLanceCache { prefix: String, hits: Arc, misses: Arc, + in_flight: Arc, } impl WeakLanceCache { @@ -583,6 +575,7 @@ impl WeakLanceCache { prefix: cache.prefix.clone(), hits: cache.hits.clone(), misses: cache.misses.clone(), + in_flight: cache.in_flight.clone(), } } @@ -593,6 +586,7 @@ impl WeakLanceCache { prefix: format!("{}{}/", self.prefix, prefix), hits: self.hits.clone(), misses: self.misses.clone(), + in_flight: self.in_flight.clone(), } } @@ -643,7 +637,9 @@ impl WeakLanceCache { } } - /// Get or insert an item, computing it if necessary + /// Get or insert an item, computing it if necessary. + /// + /// Concurrent calls for the same key are deduplicated. pub async fn get_or_insert(&self, key: &str, f: F) -> Result> where T: DeepSizeOf + Send + Sync + 'static, @@ -653,17 +649,73 @@ impl WeakLanceCache { if let Some(cache) = self.inner.upgrade() { let cache_key = self.make_key::(key); + // Fast path: already cached. + if let Some(entry) = cache.get(&cache_key).await { + self.hits.fetch_add(1, Ordering::Relaxed); + return Ok(entry.downcast::().unwrap()); + } + + // Lock in-flight map. Re-check cache under lock to close the race. + let mut map = self.in_flight.lock().await; + if let Some(entry) = cache.get(&cache_key).await { self.hits.fetch_add(1, Ordering::Relaxed); return Ok(entry.downcast::().unwrap()); } + if let Some(rx) = map.get(&cache_key) { + let mut rx = rx.clone(); + drop(map); + let result = rx + .wait_for(|v| v.is_some()) + .await + .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))? + .as_ref() + .unwrap() + .clone(); + match result { + Ok(entry) => { + self.hits.fetch_add(1, Ordering::Relaxed); + return Ok(entry.downcast::().unwrap()); + } + Err(err) => { + self.misses.fetch_add(1, Ordering::Relaxed); + return Err(crate::Error::internal(format!( + "Cache loader failed: {err}" + ))); + } + } + } + + // We are the leader. + let (tx, rx) = tokio::sync::watch::channel(None); + map.insert(cache_key.clone(), rx); + drop(map); + self.misses.fetch_add(1, Ordering::Relaxed); - let value = f().await?; - let arc = Arc::new(value); - let size = arc.deep_size_of() + 8; - cache.insert(&cache_key, arc.clone(), size).await; - Ok(arc) + let result = f().await; + + { + let mut map = self.in_flight.lock().await; + map.remove(&cache_key); + } + + match result { + Ok(value) => { + let arc = Arc::new(value); + let size = arc.deep_size_of() + 8; + cache.insert(&cache_key, arc.clone(), size).await; + let _ = tx.send(Some(Ok(arc.clone() as CacheEntry))); + Ok(arc) + } + Err(err) => { + let shared_err = Arc::new(err); + let _ = tx.send(Some(Err(shared_err.clone()))); + Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| { + crate::Error::internal(format!("Cache loader failed: {arc}")) + })) + } + } } else { log::warn!("WeakLanceCache: cache no longer available, computing without caching"); f().await.map(Arc::new) @@ -766,21 +818,24 @@ impl WeakLanceCache { // --------------------------------------------------------------------------- pub trait CacheKey { - type ValueType; + type ValueType: 'static; fn key(&self) -> Cow<'_, str>; + /// Human-readable type name, for debugging and diagnostics. fn type_name(&self) -> &'static str { std::any::type_name::() } + /// Stable numeric identifier used for key discrimination in the cache. + /// Derived from [`type_id_of`] by default. fn type_id(&self) -> u64 { - self.type_name().as_ptr() as u64 + type_id_of::() } } pub trait UnsizedCacheKey { - type ValueType: ?Sized; + type ValueType: 'static + ?Sized; fn key(&self) -> Cow<'_, str>; @@ -789,7 +844,7 @@ pub trait UnsizedCacheKey { } fn type_id(&self) -> u64 { - self.type_name().as_ptr() as u64 + type_id_of::() } } @@ -834,30 +889,59 @@ impl CacheStats { #[cfg(test)] mod tests { use super::*; + use std::marker::PhantomData; + + /// Test helper: a simple CacheKey for bare string keys. + struct TestKey { + key: String, + _phantom: PhantomData, + } + + impl TestKey { + fn new(key: &str) -> Self { + Self { + key: key.to_string(), + _phantom: PhantomData, + } + } + } + + impl CacheKey for TestKey { + type ValueType = T; + fn key(&self) -> Cow<'_, str> { + Cow::Borrowed(&self.key) + } + } #[tokio::test] async fn test_cache_bytes() { let item = Arc::new(vec![1, 2, 3]); - let item_size = item.deep_size_of(); // Size of Arc> + let item_size = item.deep_size_of(); let capacity = 10 * item_size; let cache = LanceCache::with_capacity(capacity); let item = Arc::new(vec![1, 2, 3]); - cache.insert("key", item.clone()).await; + cache + .insert_with_key(&TestKey::>::new("key"), item.clone()) + .await; assert_eq!(cache.size().await, 1); - let retrieved = cache.get::>("key").await.unwrap(); + let retrieved = cache + .get_with_key(&TestKey::>::new("key")) + .await + .unwrap(); assert_eq!(*retrieved, *item); // Test eviction based on size for i in 0..20 { cache - .insert(&format!("key_{}", i), Arc::new(vec![i, i, i])) + .insert_with_key( + &TestKey::>::new(&format!("key_{}", i)), + Arc::new(vec![i, i, i]), + ) .await; } - // Moka evicts based on weighted size; after run_pending_tasks, the size - // should be bounded by capacity. assert!(cache.size_bytes().await <= capacity); } @@ -891,39 +975,52 @@ mod tests { async fn test_cache_stats_basic() { let cache = LanceCache::with_capacity(1000); - // Initially no hits or misses let stats = cache.stats().await; assert_eq!(stats.hits, 0); assert_eq!(stats.misses, 0); - // Miss on first get - let result = cache.get::>("nonexistent"); - assert!(result.await.is_none()); + // Miss + assert!( + cache + .get_with_key(&TestKey::>::new("nonexistent")) + .await + .is_none() + ); let stats = cache.stats().await; assert_eq!(stats.hits, 0); assert_eq!(stats.misses, 1); - // Insert and then hit - cache.insert("key1", Arc::new(vec![1, 2, 3])).await; - let result = cache.get::>("key1"); - assert!(result.await.is_some()); + // Insert then hit + cache + .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3])) + .await; + assert!( + cache + .get_with_key(&TestKey::>::new("key1")) + .await + .is_some() + ); let stats = cache.stats().await; assert_eq!(stats.hits, 1); assert_eq!(stats.misses, 1); // Another hit - let result = cache.get::>("key1"); - assert!(result.await.is_some()); - let stats = cache.stats().await; - assert_eq!(stats.hits, 2); - assert_eq!(stats.misses, 1); + assert!( + cache + .get_with_key(&TestKey::>::new("key1")) + .await + .is_some() + ); + assert_eq!(cache.stats().await.hits, 2); // Another miss - let result = cache.get::>("nonexistent2"); - assert!(result.await.is_none()); - let stats = cache.stats().await; - assert_eq!(stats.hits, 2); - assert_eq!(stats.misses, 2); + assert!( + cache + .get_with_key(&TestKey::>::new("nonexistent2")) + .await + .is_none() + ); + assert_eq!(cache.stats().await.misses, 2); } #[tokio::test] @@ -931,41 +1028,30 @@ mod tests { let base_cache = LanceCache::with_capacity(1000); let prefixed_cache = base_cache.with_key_prefix("test"); - // Stats should be shared between base and prefixed cache - let stats = base_cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 0); - - let stats = prefixed_cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 0); + assert_eq!(base_cache.stats().await.hits, 0); + assert_eq!(prefixed_cache.stats().await.misses, 0); // Miss on prefixed cache - let result = prefixed_cache.get::>("key1"); - assert!(result.await.is_none()); - - // Both should show the miss - let stats = base_cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 1); - - let stats = prefixed_cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 1); + assert!( + prefixed_cache + .get_with_key(&TestKey::>::new("key1")) + .await + .is_none() + ); + assert_eq!(base_cache.stats().await.misses, 1); + assert_eq!(prefixed_cache.stats().await.misses, 1); // Insert through prefixed cache and hit - prefixed_cache.insert("key1", Arc::new(vec![1, 2, 3])).await; - let result = prefixed_cache.get::>("key1"); - assert!(result.await.is_some()); - - // Both should show the hit - let stats = base_cache.stats().await; - assert_eq!(stats.hits, 1); - assert_eq!(stats.misses, 1); - - let stats = prefixed_cache.stats().await; - assert_eq!(stats.hits, 1); - assert_eq!(stats.misses, 1); + prefixed_cache + .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3])) + .await; + assert!( + prefixed_cache + .get_with_key(&TestKey::>::new("key1")) + .await + .is_some() + ); + assert_eq!(base_cache.stats().await.hits, 1); } #[tokio::test] @@ -979,71 +1065,55 @@ mod tests { let cache = LanceCache::with_capacity(1000); - // Miss on unsized get - let result = cache.get_unsized::("test"); - assert!(result.await.is_none()); - let stats = cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 1); + assert!(cache.get_unsized::("test").await.is_none()); + assert_eq!(cache.stats().await.misses, 1); - // Insert and hit on unsized - let item = Arc::new(MyType(42)); - let item_dyn: Arc = item; - cache.insert_unsized("test", item_dyn).await; + let item: Arc = Arc::new(MyType(42)); + cache.insert_unsized("test", item).await; - let result = cache.get_unsized::("test"); - assert!(result.await.is_some()); - let stats = cache.stats().await; - assert_eq!(stats.hits, 1); - assert_eq!(stats.misses, 1); + assert!(cache.get_unsized::("test").await.is_some()); + assert_eq!(cache.stats().await.hits, 1); } #[tokio::test] async fn test_cache_stats_get_or_insert() { let cache = LanceCache::with_capacity(1000); - // First call should be a miss and load the value + // First call: miss let result: Arc> = cache - .get_or_insert("key1".to_string(), |_key| async { Ok(vec![1, 2, 3]) }) + .get_or_insert_with_key(TestKey::>::new("key1"), || async { + Ok(vec![1, 2, 3]) + }) .await .unwrap(); assert_eq!(*result, vec![1, 2, 3]); + assert_eq!(cache.stats().await.misses, 1); - let stats = cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 1); - - // Second call should be a hit + // Second call: hit let result: Arc> = cache - .get_or_insert("key1".to_string(), |_key| async { + .get_or_insert_with_key(TestKey::>::new("key1"), || async { panic!("Should not be called") }) .await .unwrap(); assert_eq!(*result, vec![1, 2, 3]); + assert_eq!(cache.stats().await.hits, 1); - let stats = cache.stats().await; - assert_eq!(stats.hits, 1); - assert_eq!(stats.misses, 1); - - // Different key should be another miss + // Different key: miss let result: Arc> = cache - .get_or_insert("key2".to_string(), |_key| async { Ok(vec![4, 5, 6]) }) + .get_or_insert_with_key(TestKey::>::new("key2"), || async { + Ok(vec![4, 5, 6]) + }) .await .unwrap(); assert_eq!(*result, vec![4, 5, 6]); - - let stats = cache.stats().await; - assert_eq!(stats.hits, 1); - assert_eq!(stats.misses, 2); + assert_eq!(cache.stats().await.misses, 2); } #[tokio::test] async fn test_custom_backend() { - use std::collections::HashMap; use tokio::sync::Mutex; - /// A simple HashMap-based cache backend for testing. #[derive(Debug)] struct HashMapBackend { map: Mutex, (CacheEntry, usize)>>, @@ -1090,16 +1160,23 @@ mod tests { let backend = Arc::new(HashMapBackend::new()); let cache = LanceCache::with_backend(backend); - // Insert and retrieve - cache.insert("key1", Arc::new(vec![1, 2, 3])).await; - let retrieved = cache.get::>("key1").await.unwrap(); + cache + .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3])) + .await; + let retrieved = cache + .get_with_key(&TestKey::>::new("key1")) + .await + .unwrap(); assert_eq!(*retrieved, vec![1, 2, 3]); // Miss for different type at same key - let miss = cache.get::>("key1").await; - assert!(miss.is_none()); + assert!( + cache + .get_with_key(&TestKey::>::new("key1")) + .await + .is_none() + ); - // Stats tracking works let stats = cache.stats().await; assert_eq!(stats.hits, 1); assert_eq!(stats.misses, 1); @@ -1113,7 +1190,6 @@ mod tests { let load_count = Arc::new(AtomicUsize::new(0)); let cache = LanceCache::with_capacity(10000); - // Launch several concurrent get_or_insert calls for the same key. let (barrier_tx, _) = tokio::sync::broadcast::channel::<()>(1); let mut handles = Vec::new(); for _ in 0..5 { @@ -1123,11 +1199,10 @@ mod tests { handles.push(tokio::spawn(async move { barrier_rx.recv().await.ok(); cache - .get_or_insert("key".to_string(), |_key| { + .get_or_insert_with_key(TestKey::>::new("key"), || { let load_count = load_count.clone(); async move { load_count.fetch_add(1, Ordering::SeqCst); - // Simulate slow load so other tasks can pile up. tokio::task::yield_now().await; Ok(vec![1, 2, 3]) } @@ -1135,14 +1210,12 @@ mod tests { .await })); } - // Release all tasks at once. barrier_tx.send(()).unwrap(); for h in handles { let result: Arc> = h.await.unwrap().unwrap(); assert_eq!(*result, vec![1, 2, 3]); } - // The loader should have run exactly once. assert_eq!(load_count.load(Ordering::SeqCst), 1); } } diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs index 863aca1afc6..a11d5be988f 100644 --- a/rust/lance-file/src/previous/reader.rs +++ b/rust/lance-file/src/previous/reader.rs @@ -83,7 +83,7 @@ impl<'a, T> StringCacheKey<'a, T> { } } -impl CacheKey for StringCacheKey<'_, T> { +impl CacheKey for StringCacheKey<'_, T> { type ValueType = T; fn key(&self) -> Cow<'_, str> { From 00867ad45d96a13e28a8a696bd073fd2ac51b676 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 09:37:28 -0700 Subject: [PATCH 04/24] refactor: move get_or_insert dedup into CacheBackend Address feedback: 1. Move get_or_insert() onto CacheBackend. The method takes a pinned future (not a closure), so LanceCache can type-erase the user's non-'static loader before passing it to the backend. Default impl does simple get-then-insert; MokaCacheBackend uses moka's built-in optionally_get_with for dedup. This eliminates duplicated dedup logic and the manual watch-channel machinery. 2. Restore type_name().as_ptr() for type_id derivation on CacheKey. Remove standalone type_id_of() function. The derivation lives in one place: CacheKey::type_id()/UnsizedCacheKey::type_id(). 3. Remove approx_size_bytes from CacheBackend trait and Session debug output. Only approx_num_entries remains. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache.rs | 626 ++++++++----------------- rust/lance-file/src/previous/reader.rs | 2 +- rust/lance/src/session.rs | 6 +- 3 files changed, 199 insertions(+), 435 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 437c4f2f6d7..418efe151e6 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -12,8 +12,7 @@ use std::any::Any; use std::borrow::Cow; -use std::collections::HashMap; -use std::hash::{Hash, Hasher}; +use std::pin::Pin; use std::sync::{ Arc, atomic::{AtomicU64, Ordering}, @@ -21,17 +20,11 @@ use std::sync::{ use async_trait::async_trait; use futures::{Future, FutureExt}; -use tokio::sync::Mutex; use crate::Result; pub use deepsize::{Context, DeepSizeOf}; -/// Result type used in the in-flight dedup map. Wraps errors in Arc so the -/// result can be cloned to multiple waiters. -type InFlightResult = std::result::Result>; -type InFlightMap = Mutex, tokio::sync::watch::Receiver>>>; - /// A type-erased cache entry. pub type CacheEntry = Arc; @@ -52,6 +45,28 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { /// Store an entry. `size_bytes` is used for eviction accounting. async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize); + /// Get an existing entry or compute it from `loader`. + /// + /// Implementations should deduplicate concurrent loads for the same key + /// so the loader runs at most once. The default implementation does a + /// simple get-then-insert without deduplication. + /// + /// The loader is a pinned future that produces `(entry, size_bytes)`. + /// It borrows from the caller's scope and will be `.await`ed within + /// this method — implementations must not store it beyond the call. + async fn get_or_insert<'a>( + &self, + key: &[u8], + loader: Pin> + Send + 'a>>, + ) -> Result { + if let Some(entry) = self.get(key).await { + return Ok(entry); + } + let (entry, size) = loader.await?; + self.insert(key, entry.clone(), size).await; + Ok(entry) + } + /// Remove all entries whose key starts with `prefix`. async fn invalidate_prefix(&self, prefix: &[u8]); @@ -69,12 +84,6 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { fn approx_num_entries(&self) -> usize { 0 } - - /// Approximate weighted size in bytes, callable from synchronous contexts. - /// Backends that cannot provide this cheaply should return 0. - fn approx_size_bytes(&self) -> usize { - 0 - } } // --------------------------------------------------------------------------- @@ -90,7 +99,8 @@ struct MokaCacheEntry { /// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache. /// -/// Provides weighted-capacity eviction. +/// Provides weighted-capacity eviction and concurrent-load deduplication +/// via moka's built-in `optionally_get_with`. pub struct MokaCacheBackend { cache: moka::future::Cache, MokaCacheEntry>, } @@ -132,6 +142,37 @@ impl CacheBackend for MokaCacheBackend { .await; } + async fn get_or_insert<'a>( + &self, + key: &[u8], + loader: Pin> + Send + 'a>>, + ) -> Result { + // Use moka's built-in dedup: optionally_get_with runs the init future + // at most once per key, even under concurrent access. + let (error_tx, error_rx) = tokio::sync::oneshot::channel(); + + let init = async move { + match loader.await { + Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }), + Err(e) => { + let _ = error_tx.send(e); + None + } + } + }; + + let owned_key = key.to_vec(); + match self.cache.optionally_get_with(owned_key, init).await { + Some(record) => Ok(record.entry), + None => match error_rx.await { + Ok(err) => Err(err), + Err(_) => Err(crate::Error::internal( + "Failed to retrieve error from cache loader", + )), + }, + } + } + async fn invalidate_prefix(&self, prefix: &[u8]) { let prefix = prefix.to_vec(); self.cache @@ -157,23 +198,19 @@ impl CacheBackend for MokaCacheBackend { fn approx_num_entries(&self) -> usize { self.cache.entry_count() as usize } - - fn approx_size_bytes(&self) -> usize { - self.cache.weighted_size() as usize - } } // --------------------------------------------------------------------------- // Type identity helpers // --------------------------------------------------------------------------- -/// Returns a stable u64 identifier for type `T`, derived from hashing its -/// [`std::any::TypeId`]. This is guaranteed unique per type within a compilation -/// unit and stable within a single process lifetime. -pub fn type_id_of() -> u64 { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - std::any::TypeId::of::().hash(&mut hasher); - hasher.finish() +/// Derives a stable type tag from `type_name::()`. +/// +/// Uses the pointer of the `&'static str` returned by [`std::any::type_name`]. +/// The pointer is stable for the lifetime of the process and unique per +/// monomorphized type within a single compilation unit. +fn type_tag() -> u64 { + std::any::type_name::().as_ptr() as u64 } /// Cache keys are structured as `user_key\0<8-byte type_id>`. @@ -187,11 +224,23 @@ pub fn type_id_of() -> u64 { pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) { assert!(key.len() >= 9, "cache key too short to parse"); let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap(); - // Everything before the trailing \0 + 8-byte tag. let user_key = &key[..key.len() - 9]; (user_key, u64::from_le_bytes(type_id_bytes)) } +/// Build a key: `prefix/user_key\0<8-byte type_id>`. +fn make_cache_key(prefix: &str, key: &str, type_id: u64) -> Vec { + let full_key = if prefix.is_empty() { + key.to_string() + } else { + format!("{}/{}", prefix, key) + }; + let mut bytes = full_key.into_bytes(); + bytes.push(0); + bytes.extend_from_slice(&type_id.to_le_bytes()); + bytes +} + // --------------------------------------------------------------------------- // LanceCache — typed wrapper around dyn CacheBackend // --------------------------------------------------------------------------- @@ -206,8 +255,6 @@ pub struct LanceCache { prefix: String, hits: Arc, misses: Arc, - /// Deduplicates concurrent `get_or_insert` calls for the same key. - in_flight: Arc, } impl std::fmt::Debug for LanceCache { @@ -225,26 +272,6 @@ impl DeepSizeOf for LanceCache { } } -impl LanceCache { - /// Build a key: `prefix/user_key\0<8-byte type_id>`. - fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec { - let full_key = if self.prefix.is_empty() { - key.to_string() - } else { - format!("{}/{}", self.prefix, key) - }; - let mut bytes = full_key.into_bytes(); - bytes.push(0); - bytes.extend_from_slice(&type_id.to_le_bytes()); - bytes - } - - /// Build a prefix (without type tag) for invalidation. - fn make_prefix(&self, prefix: &str) -> Vec { - format!("{}{}", self.prefix, prefix).into_bytes() - } -} - impl LanceCache { pub fn with_capacity(capacity: usize) -> Self { Self { @@ -252,7 +279,6 @@ impl LanceCache { prefix: String::new(), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), - in_flight: Arc::new(Mutex::new(HashMap::new())), } } @@ -263,7 +289,6 @@ impl LanceCache { prefix: String::new(), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), - in_flight: Arc::new(Mutex::new(HashMap::new())), } } @@ -273,33 +298,22 @@ impl LanceCache { prefix: String::new(), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), - in_flight: Arc::new(Mutex::new(HashMap::new())), } } - /// Appends a prefix to the cache key - /// - /// If this cache already has a prefix, the new prefix will be appended to - /// the existing one. - /// - /// Prefixes are used to create a namespace for the cache keys to avoid - /// collisions between different caches. + /// Appends a prefix to the cache key. pub fn with_key_prefix(&self, prefix: &str) -> Self { Self { cache: self.cache.clone(), prefix: format!("{}{}/", self.prefix, prefix), hits: self.hits.clone(), misses: self.misses.clone(), - in_flight: self.in_flight.clone(), } } - /// Invalidate all entries in the cache that start with the given prefix. - /// - /// The given prefix is appended to the existing prefix of the cache. If you - /// want to invalidate all at the current prefix, pass an empty string. + /// Invalidate all entries whose key starts with the given prefix. pub async fn invalidate_prefix(&self, prefix: &str) { - let prefix_bytes = self.make_prefix(prefix); + let prefix_bytes = format!("{}{}", self.prefix, prefix).into_bytes(); self.cache.invalidate_prefix(&prefix_bytes).await; } @@ -315,9 +329,7 @@ impl LanceCache { self.cache.size_bytes().await } - pub fn approx_size_bytes(&self) -> usize { - self.cache.approx_size_bytes() - } + // -- Sized insert/get (internal, used by CacheKey methods) ---------------- async fn insert_with_id( &self, @@ -325,17 +337,58 @@ impl LanceCache { type_id: u64, metadata: Arc, ) { - let size = metadata.deep_size_of() + 8; // +8 for the Arc pointer - let cache_key = self.make_key_with_id(key, type_id); - tracing::trace!( - target: "lance_cache::insert", - key = key, - type_id = std::any::type_name::(), - size = size, - ); + let size = metadata.deep_size_of() + 8; + let cache_key = make_cache_key(&self.prefix, key, type_id); self.cache.insert(&cache_key, metadata, size).await; } + async fn get_with_id( + &self, + key: &str, + type_id: u64, + ) -> Option> { + let cache_key = make_cache_key(&self.prefix, key, type_id); + if let Some(entry) = self.cache.get(&cache_key).await { + self.hits.fetch_add(1, Ordering::Relaxed); + Some(entry.downcast::().unwrap()) + } else { + self.misses.fetch_add(1, Ordering::Relaxed); + None + } + } + + async fn get_or_insert_with_id( + &self, + key: &str, + type_id: u64, + loader: F, + ) -> Result> + where + F: FnOnce() -> Fut + Send, + Fut: Future> + Send, + { + let cache_key = make_cache_key(&self.prefix, key, type_id); + + // Type-erase the loader into a pinned future for the backend. + let typed_loader = Box::pin(async move { + let value = loader().await?; + let arc = Arc::new(value); + let size = arc.deep_size_of() + 8; + Ok((arc as CacheEntry, size)) + }); + + let entry = self.cache.get_or_insert(&cache_key, typed_loader).await?; + + // Track hit/miss based on whether we got a pre-existing entry. + // (Approximate: we can't distinguish "backend had it" from "loader ran" + // without a richer return type. Count all get_or_insert as misses for now.) + self.misses.fetch_add(1, Ordering::Relaxed); + + Ok(entry.downcast::().unwrap()) + } + + // -- Unsized insert/get --------------------------------------------------- + async fn insert_unsized_with_id( &self, key: &str, @@ -350,25 +403,10 @@ impl LanceCache { key: &str, metadata: Arc, ) { - self.insert_unsized_with_id(key, type_id_of::>(), metadata) + self.insert_unsized_with_id(key, type_tag::>(), metadata) .await } - async fn get_with_id( - &self, - key: &str, - type_id: u64, - ) -> Option> { - let cache_key = self.make_key_with_id(key, type_id); - if let Some(entry) = self.cache.get(&cache_key).await { - self.hits.fetch_add(1, Ordering::Relaxed); - Some(entry.downcast::().unwrap()) - } else { - self.misses.fetch_add(1, Ordering::Relaxed); - None - } - } - async fn get_unsized_with_id( &self, key: &str, @@ -382,102 +420,10 @@ impl LanceCache { &self, key: &str, ) -> Option> { - self.get_unsized_with_id(key, type_id_of::>()).await + self.get_unsized_with_id(key, type_tag::>()).await } - /// Get an item, or load it if not cached. - /// - /// Concurrent calls for the same key are deduplicated: only the first - /// caller runs the loader; subsequent callers wait for the result. - async fn get_or_insert_with_id( - &self, - key: String, - type_id: u64, - loader: F, - ) -> Result> - where - F: FnOnce(&str) -> Fut, - Fut: Future> + Send, - { - let cache_key = self.make_key_with_id(&key, type_id); - - // Fast path: already cached. - if let Some(entry) = self.cache.get(&cache_key).await { - self.hits.fetch_add(1, Ordering::Relaxed); - return Ok(entry.downcast::().unwrap()); - } - - // Lock the in-flight map. While holding the lock, re-check the cache - // to close the race between the fast-path check and registration. - let mut map = self.in_flight.lock().await; - - // Another task may have completed the load between our fast-path check - // and acquiring this lock. - if let Some(entry) = self.cache.get(&cache_key).await { - self.hits.fetch_add(1, Ordering::Relaxed); - return Ok(entry.downcast::().unwrap()); - } - - // Check for an in-flight load for this key. - if let Some(rx) = map.get(&cache_key) { - let mut rx = rx.clone(); - drop(map); - // Wait until the leader finishes. - let result = rx - .wait_for(|v| v.is_some()) - .await - .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))? - .as_ref() - .unwrap() - .clone(); - match result { - Ok(entry) => { - self.hits.fetch_add(1, Ordering::Relaxed); - return Ok(entry.downcast::().unwrap()); - } - Err(err) => { - self.misses.fetch_add(1, Ordering::Relaxed); - return Err(crate::Error::internal(format!( - "Cache loader failed: {err}" - ))); - } - } - } - - // We are the leader. Register our in-flight entry while still holding - // the lock, so no other task can slip in between check and register. - let (tx, rx) = tokio::sync::watch::channel(None); - map.insert(cache_key.clone(), rx); - drop(map); - - self.misses.fetch_add(1, Ordering::Relaxed); - let result = loader(&key).await; - - // Clean up the in-flight entry before sending, so new arrivals - // go through the normal cache path. - { - let mut map = self.in_flight.lock().await; - map.remove(&cache_key); - } - - match result { - Ok(value) => { - let arc = Arc::new(value); - let size = arc.deep_size_of() + 8; - self.cache.insert(&cache_key, arc.clone(), size).await; - let _ = tx.send(Some(Ok(arc.clone() as CacheEntry))); - Ok(arc) - } - Err(err) => { - let shared_err = Arc::new(err); - let _ = tx.send(Some(Err(shared_err.clone()))); - // Try to recover the original error if we're the sole owner. - Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| { - crate::Error::internal(format!("Cache loader failed: {arc}")) - })) - } - } - } + // -- Stats / clear -------------------------------------------------------- pub async fn stats(&self) -> CacheStats { CacheStats { @@ -494,7 +440,8 @@ impl LanceCache { self.misses.store(0, Ordering::Relaxed); } - // CacheKey-based methods + // -- CacheKey-based methods ----------------------------------------------- + pub async fn insert_with_key(&self, cache_key: &K, metadata: Arc) where K: CacheKey, @@ -523,12 +470,12 @@ impl LanceCache { where K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, - F: FnOnce() -> Fut, + F: FnOnce() -> Fut + Send, Fut: Future> + Send, { let type_id = cache_key.type_id(); let key_str = cache_key.key().into_owned(); - Box::pin(self.get_or_insert_with_id(key_str, type_id, |_| loader())).await + Box::pin(self.get_or_insert_with_id(&key_str, type_id, loader)).await } pub async fn insert_unsized_with_key(&self, cache_key: &K, metadata: Arc) @@ -564,52 +511,30 @@ pub struct WeakLanceCache { prefix: String, hits: Arc, misses: Arc, - in_flight: Arc, } impl WeakLanceCache { - /// Create a weak reference from a strong LanceCache pub fn from(cache: &LanceCache) -> Self { Self { inner: Arc::downgrade(&cache.cache), prefix: cache.prefix.clone(), hits: cache.hits.clone(), misses: cache.misses.clone(), - in_flight: cache.in_flight.clone(), } } - /// Appends a prefix to the cache key pub fn with_key_prefix(&self, prefix: &str) -> Self { Self { inner: self.inner.clone(), prefix: format!("{}{}/", self.prefix, prefix), hits: self.hits.clone(), misses: self.misses.clone(), - in_flight: self.in_flight.clone(), } } - fn make_key(&self, key: &str) -> Vec { - self.make_key_with_id(key, type_id_of::()) - } - - fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec { - let full_key = if self.prefix.is_empty() { - key.to_string() - } else { - format!("{}/{}", self.prefix, key) - }; - let mut bytes = full_key.into_bytes(); - bytes.push(0); - bytes.extend_from_slice(&type_id.to_le_bytes()); - bytes - } - - /// Get an item from cache if the cache is still alive pub async fn get(&self, key: &str) -> Option> { let cache = self.inner.upgrade()?; - let cache_key = self.make_key::(key); + let cache_key = make_cache_key(&self.prefix, key, type_tag::()); if let Some(entry) = cache.get(&cache_key).await { self.hits.fetch_add(1, Ordering::Relaxed); Some(entry.downcast::().unwrap()) @@ -619,8 +544,6 @@ impl WeakLanceCache { } } - /// Insert an item if the cache is still alive - /// Returns true if the item was inserted, false if the cache is no longer available pub async fn insert( &self, key: &str, @@ -628,7 +551,7 @@ impl WeakLanceCache { ) -> bool { if let Some(cache) = self.inner.upgrade() { let size = value.deep_size_of() + 8; - let cache_key = self.make_key::(key); + let cache_key = make_cache_key(&self.prefix, key, type_tag::()); cache.insert(&cache_key, value, size).await; true } else { @@ -639,90 +562,30 @@ impl WeakLanceCache { /// Get or insert an item, computing it if necessary. /// - /// Concurrent calls for the same key are deduplicated. + /// Deduplication of concurrent loads is handled by the backend. pub async fn get_or_insert(&self, key: &str, f: F) -> Result> where T: DeepSizeOf + Send + Sync + 'static, - F: FnOnce() -> Fut, + F: FnOnce() -> Fut + Send, Fut: Future> + Send, { if let Some(cache) = self.inner.upgrade() { - let cache_key = self.make_key::(key); - - // Fast path: already cached. - if let Some(entry) = cache.get(&cache_key).await { - self.hits.fetch_add(1, Ordering::Relaxed); - return Ok(entry.downcast::().unwrap()); - } - - // Lock in-flight map. Re-check cache under lock to close the race. - let mut map = self.in_flight.lock().await; - - if let Some(entry) = cache.get(&cache_key).await { - self.hits.fetch_add(1, Ordering::Relaxed); - return Ok(entry.downcast::().unwrap()); - } - - if let Some(rx) = map.get(&cache_key) { - let mut rx = rx.clone(); - drop(map); - let result = rx - .wait_for(|v| v.is_some()) - .await - .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))? - .as_ref() - .unwrap() - .clone(); - match result { - Ok(entry) => { - self.hits.fetch_add(1, Ordering::Relaxed); - return Ok(entry.downcast::().unwrap()); - } - Err(err) => { - self.misses.fetch_add(1, Ordering::Relaxed); - return Err(crate::Error::internal(format!( - "Cache loader failed: {err}" - ))); - } - } - } - - // We are the leader. - let (tx, rx) = tokio::sync::watch::channel(None); - map.insert(cache_key.clone(), rx); - drop(map); - + let cache_key = make_cache_key(&self.prefix, key, type_tag::()); + let typed_loader = Box::pin(async move { + let value = f().await?; + let arc = Arc::new(value); + let size = arc.deep_size_of() + 8; + Ok((arc as CacheEntry, size)) + }); + let entry = cache.get_or_insert(&cache_key, typed_loader).await?; self.misses.fetch_add(1, Ordering::Relaxed); - let result = f().await; - - { - let mut map = self.in_flight.lock().await; - map.remove(&cache_key); - } - - match result { - Ok(value) => { - let arc = Arc::new(value); - let size = arc.deep_size_of() + 8; - cache.insert(&cache_key, arc.clone(), size).await; - let _ = tx.send(Some(Ok(arc.clone() as CacheEntry))); - Ok(arc) - } - Err(err) => { - let shared_err = Arc::new(err); - let _ = tx.send(Some(Err(shared_err.clone()))); - Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| { - crate::Error::internal(format!("Cache loader failed: {arc}")) - })) - } - } + Ok(entry.downcast::().unwrap()) } else { log::warn!("WeakLanceCache: cache no longer available, computing without caching"); f().await.map(Arc::new) } } - /// Get or insert an item with a cache key type pub async fn get_or_insert_with_key( &self, cache_key: K, @@ -731,15 +594,13 @@ impl WeakLanceCache { where K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, - F: FnOnce() -> Fut, + F: FnOnce() -> Fut + Send, Fut: Future> + Send, { let key_str = cache_key.key().into_owned(); self.get_or_insert(&key_str, loader).await } - /// Insert with a cache key type - /// Returns true if the item was inserted, false if the cache is no longer available pub async fn insert_with_key(&self, cache_key: &K, value: Arc) -> bool where K: CacheKey, @@ -749,7 +610,6 @@ impl WeakLanceCache { self.insert(&key_str, value).await } - /// Get with a cache key type pub async fn get_with_key(&self, cache_key: &K) -> Option> where K: CacheKey, @@ -759,13 +619,12 @@ impl WeakLanceCache { self.get(&key_str).await } - /// Get unsized item from cache pub async fn get_unsized( &self, key: &str, ) -> Option> { let cache = self.inner.upgrade()?; - let cache_key = self.make_key::>(key); + let cache_key = make_cache_key(&self.prefix, key, type_tag::>()); if let Some(entry) = cache.get(&cache_key).await { entry .downcast::>() @@ -776,7 +635,6 @@ impl WeakLanceCache { } } - /// Insert unsized item into cache pub async fn insert_unsized( &self, key: &str, @@ -785,31 +643,27 @@ impl WeakLanceCache { if let Some(cache) = self.inner.upgrade() { let wrapper = Arc::new(value); let size = wrapper.deep_size_of() + 8; - let cache_key = self.make_key::>(key); + let cache_key = make_cache_key(&self.prefix, key, type_tag::>()); cache.insert(&cache_key, wrapper, size).await; } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item"); } } - /// Get unsized with a cache key type pub async fn get_unsized_with_key(&self, cache_key: &K) -> Option> where K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - let key_str = cache_key.key(); - self.get_unsized(&key_str).await + self.get_unsized(&cache_key.key()).await } - /// Insert unsized with a cache key type pub async fn insert_unsized_with_key(&self, cache_key: &K, value: Arc) where K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - let key_str = cache_key.key(); - self.insert_unsized(&key_str, value).await + self.insert_unsized(&cache_key.key(), value).await } } @@ -827,10 +681,10 @@ pub trait CacheKey { std::any::type_name::() } - /// Stable numeric identifier used for key discrimination in the cache. - /// Derived from [`type_id_of`] by default. + /// Stable numeric identifier for key discrimination in the cache. + /// Derived from the pointer of [`Self::type_name`] by default. fn type_id(&self) -> u64 { - type_id_of::() + self.type_name().as_ptr() as u64 } } @@ -844,7 +698,7 @@ pub trait UnsizedCacheKey { } fn type_id(&self) -> u64 { - type_id_of::() + self.type_name().as_ptr() as u64 } } @@ -854,13 +708,9 @@ pub trait UnsizedCacheKey { #[derive(Debug, Clone)] pub struct CacheStats { - /// Number of times `get`, `get_unsized`, or `get_or_insert` found an item in the cache. pub hits: u64, - /// Number of times `get`, `get_unsized`, or `get_or_insert` did not find an item in the cache. pub misses: u64, - /// Number of entries currently in the cache. pub num_entries: usize, - /// Total size in bytes of all entries in the cache. pub size_bytes: usize, } @@ -889,9 +739,9 @@ impl CacheStats { #[cfg(test)] mod tests { use super::*; + use std::collections::HashMap; use std::marker::PhantomData; - /// Test helper: a simple CacheKey for bare string keys. struct TestKey { key: String, _phantom: PhantomData, @@ -918,10 +768,8 @@ mod tests { let item = Arc::new(vec![1, 2, 3]); let item_size = item.deep_size_of(); let capacity = 10 * item_size; - let cache = LanceCache::with_capacity(capacity); - let item = Arc::new(vec![1, 2, 3]); cache .insert_with_key(&TestKey::>::new("key"), item.clone()) .await; @@ -933,7 +781,6 @@ mod tests { .unwrap(); assert_eq!(*retrieved, *item); - // Test eviction based on size for i in 0..20 { cache .insert_with_key( @@ -960,154 +807,86 @@ mod tests { } } - let item = Arc::new(MyType(42)); - let item_dyn: Arc = item; - + let item: Arc = Arc::new(MyType(42)); let cache = LanceCache::with_capacity(1000); - cache.insert_unsized("test", item_dyn).await; + cache.insert_unsized("test", item).await; let retrieved = cache.get_unsized::("test").await.unwrap(); - let retrieved = retrieved.as_any().downcast_ref::().unwrap(); - assert_eq!(retrieved.0, 42); + assert_eq!(retrieved.as_any().downcast_ref::().unwrap().0, 42); } #[tokio::test] async fn test_cache_stats_basic() { let cache = LanceCache::with_capacity(1000); - - let stats = cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 0); + assert_eq!(cache.stats().await.hits, 0); // Miss assert!( cache - .get_with_key(&TestKey::>::new("nonexistent")) + .get_with_key(&TestKey::>::new("x")) .await .is_none() ); - let stats = cache.stats().await; - assert_eq!(stats.hits, 0); - assert_eq!(stats.misses, 1); + assert_eq!(cache.stats().await.misses, 1); // Insert then hit cache - .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3])) + .insert_with_key(&TestKey::new("k"), Arc::new(vec![1, 2, 3])) .await; assert!( cache - .get_with_key(&TestKey::>::new("key1")) + .get_with_key(&TestKey::>::new("k")) .await .is_some() ); - let stats = cache.stats().await; - assert_eq!(stats.hits, 1); - assert_eq!(stats.misses, 1); - - // Another hit - assert!( - cache - .get_with_key(&TestKey::>::new("key1")) - .await - .is_some() - ); - assert_eq!(cache.stats().await.hits, 2); - - // Another miss - assert!( - cache - .get_with_key(&TestKey::>::new("nonexistent2")) - .await - .is_none() - ); - assert_eq!(cache.stats().await.misses, 2); + assert_eq!(cache.stats().await.hits, 1); } #[tokio::test] async fn test_cache_stats_with_prefixes() { - let base_cache = LanceCache::with_capacity(1000); - let prefixed_cache = base_cache.with_key_prefix("test"); + let base = LanceCache::with_capacity(1000); + let prefixed = base.with_key_prefix("ns"); - assert_eq!(base_cache.stats().await.hits, 0); - assert_eq!(prefixed_cache.stats().await.misses, 0); - - // Miss on prefixed cache assert!( - prefixed_cache - .get_with_key(&TestKey::>::new("key1")) + prefixed + .get_with_key(&TestKey::>::new("k")) .await .is_none() ); - assert_eq!(base_cache.stats().await.misses, 1); - assert_eq!(prefixed_cache.stats().await.misses, 1); + assert_eq!(base.stats().await.misses, 1); - // Insert through prefixed cache and hit - prefixed_cache - .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3])) + prefixed + .insert_with_key(&TestKey::new("k"), Arc::new(vec![1])) .await; assert!( - prefixed_cache - .get_with_key(&TestKey::>::new("key1")) + prefixed + .get_with_key(&TestKey::>::new("k")) .await .is_some() ); - assert_eq!(base_cache.stats().await.hits, 1); + assert_eq!(base.stats().await.hits, 1); } #[tokio::test] - async fn test_cache_stats_unsized() { - #[derive(Debug, DeepSizeOf)] - struct MyType(i32); - - trait MyTrait: DeepSizeOf + Send + Sync + Any {} - - impl MyTrait for MyType {} - + async fn test_cache_get_or_insert() { let cache = LanceCache::with_capacity(1000); - assert!(cache.get_unsized::("test").await.is_none()); - assert_eq!(cache.stats().await.misses, 1); - - let item: Arc = Arc::new(MyType(42)); - cache.insert_unsized("test", item).await; - - assert!(cache.get_unsized::("test").await.is_some()); - assert_eq!(cache.stats().await.hits, 1); - } - - #[tokio::test] - async fn test_cache_stats_get_or_insert() { - let cache = LanceCache::with_capacity(1000); - - // First call: miss - let result: Arc> = cache - .get_or_insert_with_key(TestKey::>::new("key1"), || async { + let v: Arc> = cache + .get_or_insert_with_key(TestKey::>::new("k"), || async { Ok(vec![1, 2, 3]) }) .await .unwrap(); - assert_eq!(*result, vec![1, 2, 3]); - assert_eq!(cache.stats().await.misses, 1); + assert_eq!(*v, vec![1, 2, 3]); - // Second call: hit - let result: Arc> = cache - .get_or_insert_with_key(TestKey::>::new("key1"), || async { - panic!("Should not be called") + // Second call should not invoke loader + let v: Arc> = cache + .get_or_insert_with_key(TestKey::>::new("k"), || async { + panic!("should not be called") }) .await .unwrap(); - assert_eq!(*result, vec![1, 2, 3]); - assert_eq!(cache.stats().await.hits, 1); - - // Different key: miss - let result: Arc> = cache - .get_or_insert_with_key(TestKey::>::new("key2"), || async { - Ok(vec![4, 5, 6]) - }) - .await - .unwrap(); - assert_eq!(*result, vec![4, 5, 6]); - assert_eq!(cache.stats().await.misses, 2); + assert_eq!(*v, vec![1, 2, 3]); } #[tokio::test] @@ -1132,55 +911,44 @@ mod tests { async fn get(&self, key: &[u8]) -> Option { self.map.lock().await.get(key).map(|(e, _)| e.clone()) } - async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) { self.map .lock() .await .insert(key.to_vec(), (entry, size_bytes)); } - async fn invalidate_prefix(&self, prefix: &[u8]) { self.map.lock().await.retain(|k, _| !k.starts_with(prefix)); } - async fn clear(&self) { self.map.lock().await.clear(); } - async fn num_entries(&self) -> usize { self.map.lock().await.len() } - async fn size_bytes(&self) -> usize { self.map.lock().await.values().map(|(_, s)| *s).sum() } } - let backend = Arc::new(HashMapBackend::new()); - let cache = LanceCache::with_backend(backend); + let cache = LanceCache::with_backend(Arc::new(HashMapBackend::new())); cache - .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3])) + .insert_with_key(&TestKey::new("k"), Arc::new(vec![1, 2, 3])) .await; - let retrieved = cache - .get_with_key(&TestKey::>::new("key1")) - .await - .unwrap(); - assert_eq!(*retrieved, vec![1, 2, 3]); - - // Miss for different type at same key assert!( cache - .get_with_key(&TestKey::>::new("key1")) + .get_with_key(&TestKey::>::new("k")) + .await + .is_some() + ); + // Different type at same key = miss + assert!( + cache + .get_with_key(&TestKey::>::new("k")) .await .is_none() ); - - let stats = cache.stats().await; - assert_eq!(stats.hits, 1); - assert_eq!(stats.misses, 1); - assert_eq!(stats.num_entries, 1); } #[tokio::test] diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs index a11d5be988f..884bc793130 100644 --- a/rust/lance-file/src/previous/reader.rs +++ b/rust/lance-file/src/previous/reader.rs @@ -238,7 +238,7 @@ impl FileReader { loader: F, ) -> Result> where - F: Fn(&str) -> Fut, + F: Fn(&str) -> Fut + Send + Sync, Fut: Future> + Send, { if let Some(cache) = cache { diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index da9c5d85f4a..9650c3bf2d0 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -77,11 +77,7 @@ impl std::fmt::Debug for Session { ) .field( "file_metadata_cache", - &format!( - "LanceCache(items={}, size_bytes={})", - self.metadata_cache.0.approx_size(), - self.metadata_cache.0.approx_size_bytes(), - ), + &format!("LanceCache(items={})", self.metadata_cache.0.approx_size(),), ) .field( "index_extensions", From 376a2f7ef0f5e47e0c375294f72d3e30e43e87c0 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 10:05:04 -0700 Subject: [PATCH 05/24] refactor: remove non-CacheKey methods, type_tag, approx_size_bytes Remove all methods that bypass CacheKey from WeakLanceCache (get, insert, get_or_insert, get_unsized, insert_unsized). Remove insert_unsized/get_unsized from LanceCache. Remove type_tag helper. All cache access now goes through CacheKey/UnsizedCacheKey. Make parse_cache_key return (empty, 0) instead of panicking on short keys. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache.rs | 184 ++++++++++++++--------------------- rust/lance/src/session.rs | 12 ++- 2 files changed, 84 insertions(+), 112 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 418efe151e6..e6b4fa37eaa 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -204,25 +204,15 @@ impl CacheBackend for MokaCacheBackend { // Type identity helpers // --------------------------------------------------------------------------- -/// Derives a stable type tag from `type_name::()`. -/// -/// Uses the pointer of the `&'static str` returned by [`std::any::type_name`]. -/// The pointer is stable for the lifetime of the process and unique per -/// monomorphized type within a single compilation unit. -fn type_tag() -> u64 { - std::any::type_name::().as_ptr() as u64 -} - /// Cache keys are structured as `user_key\0<8-byte type_id>`. /// /// This function splits an opaque cache key into the user-visible portion /// and the type_id. Backend implementations can use this to inspect keys. -/// -/// # Panics -/// -/// Panics if `key` is shorter than 9 bytes. +/// Returns `(empty slice, 0)` if the key is too short to parse. pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) { - assert!(key.len() >= 9, "cache key too short to parse"); + if key.len() < 9 { + return (&[], 0); + } let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap(); let user_key = &key[..key.len() - 9]; (user_key, u64::from_le_bytes(type_id_bytes)) @@ -398,15 +388,6 @@ impl LanceCache { self.insert_with_id(key, type_id, Arc::new(metadata)).await } - pub async fn insert_unsized( - &self, - key: &str, - metadata: Arc, - ) { - self.insert_unsized_with_id(key, type_tag::>(), metadata) - .await - } - async fn get_unsized_with_id( &self, key: &str, @@ -416,13 +397,6 @@ impl LanceCache { Some(outer.as_ref().clone()) } - pub async fn get_unsized( - &self, - key: &str, - ) -> Option> { - self.get_unsized_with_id(key, type_tag::>()).await - } - // -- Stats / clear -------------------------------------------------------- pub async fn stats(&self) -> CacheStats { @@ -532,27 +506,31 @@ impl WeakLanceCache { } } - pub async fn get(&self, key: &str) -> Option> { + pub async fn get_with_key(&self, cache_key: &K) -> Option> + where + K: CacheKey, + K::ValueType: DeepSizeOf + Send + Sync + 'static, + { let cache = self.inner.upgrade()?; - let cache_key = make_cache_key(&self.prefix, key, type_tag::()); - if let Some(entry) = cache.get(&cache_key).await { + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + if let Some(entry) = cache.get(&key).await { self.hits.fetch_add(1, Ordering::Relaxed); - Some(entry.downcast::().unwrap()) + Some(entry.downcast::().unwrap()) } else { self.misses.fetch_add(1, Ordering::Relaxed); None } } - pub async fn insert( - &self, - key: &str, - value: Arc, - ) -> bool { + pub async fn insert_with_key(&self, cache_key: &K, value: Arc) -> bool + where + K: CacheKey, + K::ValueType: DeepSizeOf + Send + Sync + 'static, + { if let Some(cache) = self.inner.upgrade() { let size = value.deep_size_of() + 8; - let cache_key = make_cache_key(&self.prefix, key, type_tag::()); - cache.insert(&cache_key, value, size).await; + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + cache.insert(&key, value, size).await; true } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert item"); @@ -563,71 +541,44 @@ impl WeakLanceCache { /// Get or insert an item, computing it if necessary. /// /// Deduplication of concurrent loads is handled by the backend. - pub async fn get_or_insert(&self, key: &str, f: F) -> Result> + pub async fn get_or_insert_with_key( + &self, + cache_key: K, + loader: F, + ) -> Result> where - T: DeepSizeOf + Send + Sync + 'static, + K: CacheKey, + K::ValueType: DeepSizeOf + Send + Sync + 'static, F: FnOnce() -> Fut + Send, - Fut: Future> + Send, + Fut: Future> + Send, { if let Some(cache) = self.inner.upgrade() { - let cache_key = make_cache_key(&self.prefix, key, type_tag::()); + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); let typed_loader = Box::pin(async move { - let value = f().await?; + let value = loader().await?; let arc = Arc::new(value); let size = arc.deep_size_of() + 8; Ok((arc as CacheEntry, size)) }); - let entry = cache.get_or_insert(&cache_key, typed_loader).await?; + let entry = cache.get_or_insert(&key, typed_loader).await?; self.misses.fetch_add(1, Ordering::Relaxed); - Ok(entry.downcast::().unwrap()) + Ok(entry.downcast::().unwrap()) } else { log::warn!("WeakLanceCache: cache no longer available, computing without caching"); - f().await.map(Arc::new) + loader().await.map(Arc::new) } } - pub async fn get_or_insert_with_key( - &self, - cache_key: K, - loader: F, - ) -> Result> - where - K: CacheKey, - K::ValueType: DeepSizeOf + Send + Sync + 'static, - F: FnOnce() -> Fut + Send, - Fut: Future> + Send, - { - let key_str = cache_key.key().into_owned(); - self.get_or_insert(&key_str, loader).await - } - - pub async fn insert_with_key(&self, cache_key: &K, value: Arc) -> bool - where - K: CacheKey, - K::ValueType: DeepSizeOf + Send + Sync + 'static, - { - let key_str = cache_key.key().into_owned(); - self.insert(&key_str, value).await - } - - pub async fn get_with_key(&self, cache_key: &K) -> Option> + pub async fn get_unsized_with_key(&self, cache_key: &K) -> Option> where - K: CacheKey, + K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - let key_str = cache_key.key().into_owned(); - self.get(&key_str).await - } - - pub async fn get_unsized( - &self, - key: &str, - ) -> Option> { let cache = self.inner.upgrade()?; - let cache_key = make_cache_key(&self.prefix, key, type_tag::>()); - if let Some(entry) = cache.get(&cache_key).await { + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + if let Some(entry) = cache.get(&key).await { entry - .downcast::>() + .downcast::>() .ok() .map(|arc| arc.as_ref().clone()) } else { @@ -635,36 +586,20 @@ impl WeakLanceCache { } } - pub async fn insert_unsized( - &self, - key: &str, - value: Arc, - ) { + pub async fn insert_unsized_with_key(&self, cache_key: &K, value: Arc) + where + K: UnsizedCacheKey, + K::ValueType: DeepSizeOf + Send + Sync + 'static, + { if let Some(cache) = self.inner.upgrade() { let wrapper = Arc::new(value); let size = wrapper.deep_size_of() + 8; - let cache_key = make_cache_key(&self.prefix, key, type_tag::>()); - cache.insert(&cache_key, wrapper, size).await; + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + cache.insert(&key, wrapper, size).await; } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item"); } } - - pub async fn get_unsized_with_key(&self, cache_key: &K) -> Option> - where - K: UnsizedCacheKey, - K::ValueType: DeepSizeOf + Send + Sync + 'static, - { - self.get_unsized(&cache_key.key()).await - } - - pub async fn insert_unsized_with_key(&self, cache_key: &K, value: Arc) - where - K: UnsizedCacheKey, - K::ValueType: DeepSizeOf + Send + Sync + 'static, - { - self.insert_unsized(&cache_key.key(), value).await - } } // --------------------------------------------------------------------------- @@ -763,6 +698,28 @@ mod tests { } } + /// Test helper: an UnsizedCacheKey for trait object values. + struct TestUnsizedKey { + key: String, + _phantom: PhantomData, + } + + impl TestUnsizedKey { + fn new(key: &str) -> Self { + Self { + key: key.to_string(), + _phantom: PhantomData, + } + } + } + + impl UnsizedCacheKey for TestUnsizedKey { + type ValueType = T; + fn key(&self) -> Cow<'_, str> { + Cow::Borrowed(&self.key) + } + } + #[tokio::test] async fn test_cache_bytes() { let item = Arc::new(vec![1, 2, 3]); @@ -809,9 +766,14 @@ mod tests { let item: Arc = Arc::new(MyType(42)); let cache = LanceCache::with_capacity(1000); - cache.insert_unsized("test", item).await; + cache + .insert_unsized_with_key(&TestUnsizedKey::::new("test"), item) + .await; - let retrieved = cache.get_unsized::("test").await.unwrap(); + let retrieved = cache + .get_unsized_with_key(&TestUnsizedKey::::new("test")) + .await + .unwrap(); assert_eq!(retrieved.as_any().downcast_ref::().unwrap().0, 42); } diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index 9650c3bf2d0..c27eddf7b27 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -219,7 +219,17 @@ impl Default for Session { #[cfg(test)] mod tests { use super::*; + use lance_core::cache::UnsizedCacheKey; use lance_index::vector::VectorIndex; + use std::borrow::Cow; + + struct TestUnsizedKey(&'static str); + impl UnsizedCacheKey for TestUnsizedKey { + type ValueType = dyn VectorIndex; + fn key(&self) -> Cow<'_, str> { + Cow::Borrowed(self.0) + } + } #[tokio::test] async fn test_disable_index_cache() { @@ -227,7 +237,7 @@ mod tests { assert!( no_cache .index_cache - .get_unsized::("abc") + .get_unsized_with_key(&TestUnsizedKey("abc")) .await .is_none() ); From 74fdc2cd78b10eb54664fe3b7e54cb368b9bf4fc Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 10:30:51 -0700 Subject: [PATCH 06/24] cleanup --- rust/lance-core/src/cache.rs | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index e6b4fa37eaa..a2bf6cf5239 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -48,8 +48,7 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { /// Get an existing entry or compute it from `loader`. /// /// Implementations should deduplicate concurrent loads for the same key - /// so the loader runs at most once. The default implementation does a - /// simple get-then-insert without deduplication. + /// so the loader runs at most once. /// /// The loader is a pinned future that produces `(entry, size_bytes)`. /// It borrows from the caller's scope and will be `.await`ed within @@ -58,14 +57,7 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { &self, key: &[u8], loader: Pin> + Send + 'a>>, - ) -> Result { - if let Some(entry) = self.get(key).await { - return Ok(entry); - } - let (entry, size) = loader.await?; - self.insert(key, entry.clone(), size).await; - Ok(entry) - } + ) -> Result; /// Remove all entries whose key starts with `prefix`. async fn invalidate_prefix(&self, prefix: &[u8]); @@ -879,6 +871,22 @@ mod tests { .await .insert(key.to_vec(), (entry, size_bytes)); } + async fn get_or_insert<'a>( + &self, + key: &[u8], + loader: Pin> + Send + 'a>>, + ) -> Result { + if let Some((entry, _)) = self.map.lock().await.get(key) { + Ok(entry.clone()) + } else { + let (entry, size) = loader.await?; + self.map + .lock() + .await + .insert(key.to_vec(), (entry.clone(), size)); + Ok(entry) + } + } async fn invalidate_prefix(&self, prefix: &[u8]) { self.map.lock().await.retain(|k, _| !k.starts_with(prefix)); } From 1ba4ac3347859f7a55c66d10a9fd1e7ed5e3b84d Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 11:10:39 -0700 Subject: [PATCH 07/24] cleanup Restore approx_size_bytes on CacheBackend so DeepSizeOf on LanceCache reports actual cache memory usage (used by Session::size_bytes). Fixes test_metadata_cache_size Python test. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index a2bf6cf5239..98b7e5028e2 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -76,6 +76,13 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { fn approx_num_entries(&self) -> usize { 0 } + + /// Approximate weighted size in bytes, callable from synchronous contexts. + /// Used by `DeepSizeOf` to report cache memory usage. + /// Backends that cannot provide this cheaply should return 0. + fn approx_size_bytes(&self) -> usize { + 0 + } } // --------------------------------------------------------------------------- @@ -190,6 +197,10 @@ impl CacheBackend for MokaCacheBackend { fn approx_num_entries(&self) -> usize { self.cache.entry_count() as usize } + + fn approx_size_bytes(&self) -> usize { + self.cache.iter().map(|(_, v)| v.size_bytes).sum() + } } // --------------------------------------------------------------------------- @@ -249,8 +260,7 @@ impl std::fmt::Debug for LanceCache { impl DeepSizeOf for LanceCache { fn deep_size_of_children(&self, _: &mut Context) -> usize { - // Can't iterate a dyn CacheBackend; use stats().size_bytes for accurate numbers. - 0 + self.cache.approx_size_bytes() } } From 135740982c73d7f671ef57c90b3783ab801cf1e8 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 16:00:37 -0700 Subject: [PATCH 08/24] refactor: replace type_id u64 with &'static str on CacheKey The type_name().as_ptr() approach for type discrimination was unstable across crate boundaries due to monomorphization. Replace with an explicit fn type_id() -> &'static str that each CacheKey impl provides as a short human-readable literal (e.g. 'Vec', 'Manifest'). Key format changes from user_key\0<8 LE bytes> to user_key\0. parse_cache_key() now returns (&[u8], &str). --- rust/lance-core/src/cache.rs | 66 +++++++++---------- .../src/encodings/logical/primitive.rs | 4 ++ rust/lance-file/src/previous/reader.rs | 4 ++ rust/lance-index/src/scalar/bitmap.rs | 4 ++ rust/lance-index/src/scalar/btree.rs | 4 ++ rust/lance-index/src/scalar/inverted/index.rs | 8 +++ rust/lance-index/src/scalar/ngram.rs | 4 ++ rust/lance-index/src/scalar/rtree.rs | 4 ++ rust/lance/src/dataset/fragment.rs | 4 ++ rust/lance/src/index.rs | 16 +++++ rust/lance/src/index/vector/ivf.rs | 4 ++ rust/lance/src/index/vector/ivf/v2.rs | 6 ++ rust/lance/src/session.rs | 4 ++ rust/lance/src/session/caches.rs | 24 +++++-- rust/lance/src/session/index_caches.rs | 12 ++++ 15 files changed, 127 insertions(+), 41 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 98b7e5028e2..83c8bb09acb 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -207,22 +207,23 @@ impl CacheBackend for MokaCacheBackend { // Type identity helpers // --------------------------------------------------------------------------- -/// Cache keys are structured as `user_key\0<8-byte type_id>`. +/// Cache keys are structured as `user_key\0type_id`. /// /// This function splits an opaque cache key into the user-visible portion -/// and the type_id. Backend implementations can use this to inspect keys. -/// Returns `(empty slice, 0)` if the key is too short to parse. -pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) { - if key.len() < 9 { - return (&[], 0); - } - let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap(); - let user_key = &key[..key.len() - 9]; - (user_key, u64::from_le_bytes(type_id_bytes)) +/// and the type_id string. Backend implementations can use this to inspect keys. +/// Returns `(empty slice, "")` if no separator is found. +pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) { + if let Some(sep) = key.iter().position(|&b| b == 0) { + let user_key = &key[..sep]; + let type_id = std::str::from_utf8(&key[sep + 1..]).unwrap_or(""); + (user_key, type_id) + } else { + (key, "") + } } -/// Build a key: `prefix/user_key\0<8-byte type_id>`. -fn make_cache_key(prefix: &str, key: &str, type_id: u64) -> Vec { +/// Build a key: `prefix/user_key\0type_id`. +fn make_cache_key(prefix: &str, key: &str, type_id: &str) -> Vec { let full_key = if prefix.is_empty() { key.to_string() } else { @@ -230,7 +231,7 @@ fn make_cache_key(prefix: &str, key: &str, type_id: u64) -> Vec { }; let mut bytes = full_key.into_bytes(); bytes.push(0); - bytes.extend_from_slice(&type_id.to_le_bytes()); + bytes.extend_from_slice(type_id.as_bytes()); bytes } @@ -326,7 +327,7 @@ impl LanceCache { async fn insert_with_id( &self, key: &str, - type_id: u64, + type_id: &str, metadata: Arc, ) { let size = metadata.deep_size_of() + 8; @@ -337,7 +338,7 @@ impl LanceCache { async fn get_with_id( &self, key: &str, - type_id: u64, + type_id: &str, ) -> Option> { let cache_key = make_cache_key(&self.prefix, key, type_id); if let Some(entry) = self.cache.get(&cache_key).await { @@ -352,7 +353,7 @@ impl LanceCache { async fn get_or_insert_with_id( &self, key: &str, - type_id: u64, + type_id: &str, loader: F, ) -> Result> where @@ -384,7 +385,7 @@ impl LanceCache { async fn insert_unsized_with_id( &self, key: &str, - type_id: u64, + type_id: &str, metadata: Arc, ) { self.insert_with_id(key, type_id, Arc::new(metadata)).await @@ -393,7 +394,7 @@ impl LanceCache { async fn get_unsized_with_id( &self, key: &str, - type_id: u64, + type_id: &str, ) -> Option> { let outer = self.get_with_id::>(key, type_id).await?; Some(outer.as_ref().clone()) @@ -613,16 +614,11 @@ pub trait CacheKey { fn key(&self) -> Cow<'_, str>; - /// Human-readable type name, for debugging and diagnostics. - fn type_name(&self) -> &'static str { - std::any::type_name::() - } - - /// Stable numeric identifier for key discrimination in the cache. - /// Derived from the pointer of [`Self::type_name`] by default. - fn type_id(&self) -> u64 { - self.type_name().as_ptr() as u64 - } + /// Short, stable string that distinguishes this value type from others in + /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_id`). + /// Must be consistent across crate boundaries — use a short literal, not + /// `type_name` pointers. + fn type_id(&self) -> &'static str; } pub trait UnsizedCacheKey { @@ -630,13 +626,7 @@ pub trait UnsizedCacheKey { fn key(&self) -> Cow<'_, str>; - fn type_name(&self) -> &'static str { - std::any::type_name::() - } - - fn type_id(&self) -> u64 { - self.type_name().as_ptr() as u64 - } + fn type_id(&self) -> &'static str; } // --------------------------------------------------------------------------- @@ -698,6 +688,9 @@ mod tests { fn key(&self) -> Cow<'_, str> { Cow::Borrowed(&self.key) } + fn type_id(&self) -> &'static str { + std::any::type_name::() + } } /// Test helper: an UnsizedCacheKey for trait object values. @@ -720,6 +713,9 @@ mod tests { fn key(&self) -> Cow<'_, str> { Cow::Borrowed(&self.key) } + fn type_id(&self) -> &'static str { + std::any::type_name::() + } } #[tokio::test] diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs index b5de0912a32..232d2659631 100644 --- a/rust/lance-encoding/src/encodings/logical/primitive.rs +++ b/rust/lance-encoding/src/encodings/logical/primitive.rs @@ -3416,6 +3416,10 @@ impl CacheKey for FieldDataCacheKey { fn key(&self) -> std::borrow::Cow<'_, str> { self.column_index.to_string().into() } + + fn type_id(&self) -> &'static str { + "FieldData" + } } impl StructuralFieldScheduler for StructuralPrimitiveFieldScheduler { diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs index 884bc793130..6dd40af45c1 100644 --- a/rust/lance-file/src/previous/reader.rs +++ b/rust/lance-file/src/previous/reader.rs @@ -89,6 +89,10 @@ impl CacheKey for StringCacheKey<'_, T> { fn key(&self) -> Cow<'_, str> { self.key.into() } + + fn type_id(&self) -> &'static str { + std::any::type_name::() + } } impl FileReader { diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs index 86931d8a64f..2ef83d92c9b 100644 --- a/rust/lance-index/src/scalar/bitmap.rs +++ b/rust/lance-index/src/scalar/bitmap.rs @@ -128,6 +128,10 @@ impl CacheKey for BitmapKey { fn key(&self) -> std::borrow::Cow<'_, str> { format!("{}", self.value.0).into() } + + fn type_id(&self) -> &'static str { + "Bitmap" + } } impl BitmapIndex { diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs index 9930c96793e..04fc146d9aa 100644 --- a/rust/lance-index/src/scalar/btree.rs +++ b/rust/lance-index/src/scalar/btree.rs @@ -989,6 +989,10 @@ impl CacheKey for BTreePageKey { fn key(&self) -> std::borrow::Cow<'_, str> { format!("page-{}", self.page_number).into() } + + fn type_id(&self) -> &'static str { + "BTreePage" + } } /// Note: this is very similar to the IVF index except we store the IVF part in a btree diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs index 53ef5fc6a66..4bebc720872 100644 --- a/rust/lance-index/src/scalar/inverted/index.rs +++ b/rust/lance-index/src/scalar/inverted/index.rs @@ -1602,6 +1602,10 @@ impl CacheKey for PostingListKey { fn key(&self) -> std::borrow::Cow<'_, str> { format!("postings-{}", self.token_id).into() } + + fn type_id(&self) -> &'static str { + "PostingList" + } } #[derive(Debug, Clone)] @@ -1615,6 +1619,10 @@ impl CacheKey for PositionKey { fn key(&self) -> std::borrow::Cow<'_, str> { format!("positions-{}", self.token_id).into() } + + fn type_id(&self) -> &'static str { + "Position" + } } #[derive(Debug, Clone, DeepSizeOf)] diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs index ce992af70ab..f03e50c9195 100644 --- a/rust/lance-index/src/scalar/ngram.rs +++ b/rust/lance-index/src/scalar/ngram.rs @@ -170,6 +170,10 @@ impl CacheKey for NGramPostingListKey { fn key(&self) -> std::borrow::Cow<'_, str> { format!("posting-list-{}", self.row_offset).into() } + + fn type_id(&self) -> &'static str { + "NGramPostingList" + } } impl NGramPostingList { diff --git a/rust/lance-index/src/scalar/rtree.rs b/rust/lance-index/src/scalar/rtree.rs index 598bef43f50..37d715539eb 100644 --- a/rust/lance-index/src/scalar/rtree.rs +++ b/rust/lance-index/src/scalar/rtree.rs @@ -249,6 +249,10 @@ impl CacheKey for RTreeCacheKey { Self::Nulls => "nulls".into(), } } + + fn type_id(&self) -> &'static str { + "RTree" + } } #[derive(Clone)] diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs index 5be98a9b23d..55c23211871 100644 --- a/rust/lance/src/dataset/fragment.rs +++ b/rust/lance/src/dataset/fragment.rs @@ -1879,6 +1879,10 @@ impl CacheKey for FileMetadataCacheKey { fn key(&self) -> std::borrow::Cow<'_, str> { "".into() } + + fn type_id(&self) -> &'static str { + "FileMetadata" + } } impl From for Fragment { diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 8c1b2404a42..16faab4e48f 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -110,6 +110,10 @@ impl UnsizedCacheKey for ScalarIndexCacheKey<'_> { self.uuid.into() } } + + fn type_id(&self) -> &'static str { + "ScalarIndex" + } } #[derive(Debug, Clone)] @@ -134,6 +138,10 @@ impl UnsizedCacheKey for VectorIndexCacheKey<'_> { self.uuid.into() } } + + fn type_id(&self) -> &'static str { + "VectorIndex" + } } #[derive(Debug, Clone)] @@ -158,6 +166,10 @@ impl CacheKey for FragReuseIndexCacheKey<'_> { self.uuid.into() } } + + fn type_id(&self) -> &'static str { + "FragReuseIndex" + } } #[derive(Debug, Clone)] @@ -182,6 +194,10 @@ impl CacheKey for MemWalCacheKey<'_> { self.uuid.to_string().into() } } + + fn type_id(&self) -> &'static str { + "MemWalIndex" + } } // Whether to auto-migrate a dataset when we encounter corruption. diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs index a39bd58e3c3..c44f73563d3 100644 --- a/rust/lance/src/index/vector/ivf.rs +++ b/rust/lance/src/index/vector/ivf.rs @@ -123,6 +123,10 @@ impl UnsizedCacheKey for LegacyIVFPartitionKey { fn key(&self) -> std::borrow::Cow<'_, str> { format!("ivf-{}", self.partition_id).into() } + + fn type_id(&self) -> &'static str { + "LegacyIVFPartition" + } } /// IVF Index. diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index d781bb5456c..14f71612ddb 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -96,6 +96,12 @@ impl CacheKey for IVFPartit fn key(&self) -> std::borrow::Cow<'_, str> { format!("ivf-{}", self.partition_id).into() } + + fn type_id(&self) -> &'static str { + // Using type_name is safe here: the impl is in the same crate as the + // types, so the monomorphized pointer is consistent. + std::any::type_name::>() + } } /// IVF Index. diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index c27eddf7b27..4876224cb75 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -229,6 +229,10 @@ mod tests { fn key(&self) -> Cow<'_, str> { Cow::Borrowed(self.0) } + + fn type_id(&self) -> &'static str { + "TestUnsized" + } } #[tokio::test] diff --git a/rust/lance/src/session/caches.rs b/rust/lance/src/session/caches.rs index 67c684c98de..e4ac180d563 100644 --- a/rust/lance/src/session/caches.rs +++ b/rust/lance/src/session/caches.rs @@ -75,7 +75,6 @@ pub struct ManifestKey<'a> { impl CacheKey for ManifestKey<'_> { type ValueType = Manifest; - fn key(&self) -> Cow<'_, str> { if let Some(e_tag) = self.e_tag { Cow::Owned(format!("manifest/{}/{}", self.version, e_tag)) @@ -83,6 +82,9 @@ impl CacheKey for ManifestKey<'_> { Cow::Owned(format!("manifest/{}", self.version)) } } + fn type_id(&self) -> &'static str { + "Manifest" + } } #[derive(Debug)] @@ -92,10 +94,12 @@ pub struct TransactionKey { impl CacheKey for TransactionKey { type ValueType = Transaction; - fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("txn/{}", self.version)) } + fn type_id(&self) -> &'static str { + "Transaction" + } } #[derive(Debug)] @@ -106,7 +110,6 @@ pub struct DeletionFileKey<'a> { impl CacheKey for DeletionFileKey<'_> { type ValueType = DeletionVector; - fn key(&self) -> Cow<'_, str> { Cow::Owned(format!( "deletion/{}/{}/{}/{}", @@ -116,6 +119,9 @@ impl CacheKey for DeletionFileKey<'_> { self.deletion_file.file_type.suffix() )) } + fn type_id(&self) -> &'static str { + "DeletionVector" + } } #[derive(Debug)] @@ -125,10 +131,12 @@ pub struct RowAddrMaskKey { impl CacheKey for RowAddrMaskKey { type ValueType = RowAddrMask; - fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_addr_mask/{}", self.version)) } + fn type_id(&self) -> &'static str { + "RowAddrMask" + } } #[derive(Debug)] @@ -138,10 +146,12 @@ pub struct RowIdIndexKey { impl CacheKey for RowIdIndexKey { type ValueType = RowIdIndex; - fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_id_index/{}", self.version)) } + fn type_id(&self) -> &'static str { + "RowIdIndex" + } } #[derive(Debug)] @@ -151,10 +161,12 @@ pub struct RowIdSequenceKey { impl CacheKey for RowIdSequenceKey { type ValueType = RowIdSequence; - fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_id_sequence/{}", self.fragment_id)) } + fn type_id(&self) -> &'static str { + "RowIdSequence" + } } impl DSMetadataCache { diff --git a/rust/lance/src/session/index_caches.rs b/rust/lance/src/session/index_caches.rs index d9578d43112..c3430f4c840 100644 --- a/rust/lance/src/session/index_caches.rs +++ b/rust/lance/src/session/index_caches.rs @@ -88,6 +88,10 @@ impl CacheKey for FragReuseIndexKey<'_> { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("frag_reuse/{}", self.uuid)) } + + fn type_id(&self) -> &'static str { + "FragReuseIndex" + } } #[derive(Debug)] @@ -101,6 +105,10 @@ impl CacheKey for IndexMetadataKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(self.version.to_string()) } + + fn type_id(&self) -> &'static str { + "Vec" + } } pub struct ProstAny(pub Arc); @@ -128,4 +136,8 @@ impl CacheKey for ScalarIndexDetailsKey<'_> { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("type/{}", self.uuid)) } + + fn type_id(&self) -> &'static str { + "ScalarIndexDetails" + } } From 2e7602ebcdb0b7d4da8253a5932aca1ffd552af1 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 12:19:09 -0700 Subject: [PATCH 09/24] feat: add partition serde for all quantizer types (PR #6223) --- rust/lance/src/index/vector/ivf.rs | 4 + .../src/index/vector/ivf/partition_serde.rs | 1153 +++++++++++++++++ 2 files changed, 1157 insertions(+) create mode 100644 rust/lance/src/index/vector/ivf/partition_serde.rs diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs index c44f73563d3..88460c18005 100644 --- a/rust/lance/src/index/vector/ivf.rs +++ b/rust/lance/src/index/vector/ivf.rs @@ -102,6 +102,7 @@ use uuid::Uuid; pub mod builder; pub mod io; +pub mod partition_serde; pub mod v2; // Cache wrapper for vector index trait objects @@ -2671,6 +2672,7 @@ mod tests { index_version: VECTOR_INDEX_VERSION as i32, created_at: Some(chrono::Utc::now()), base_id: None, + files: None, }; // We need to commit this index to the dataset so that it can be found @@ -2709,6 +2711,7 @@ mod tests { index_version: VECTOR_INDEX_VERSION as i32, created_at: None, // Test index, not setting timestamp base_id: None, + files: None, }; let prefilter = Arc::new(DatasetPreFilter::new(dataset.clone(), &[index_meta], None)); @@ -2774,6 +2777,7 @@ mod tests { index_version: VECTOR_INDEX_VERSION as i32, created_at: Some(chrono::Utc::now()), base_id: None, + files: None, }; // We need to commit this new index to the dataset so it can be found diff --git a/rust/lance/src/index/vector/ivf/partition_serde.rs b/rust/lance/src/index/vector/ivf/partition_serde.rs new file mode 100644 index 00000000000..9139e940c2a --- /dev/null +++ b/rust/lance/src/index/vector/ivf/partition_serde.rs @@ -0,0 +1,1153 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Serialization and zero-copy deserialization for IVF partition cache entries. +//! +//! The format is a simple binary layout designed for ephemeral caching (not stable across versions): +//! +//! ```text +//! [header_len: u64 LE] +//! [header: JSON bytes] +//! [sub_index IPC file bytes] +//! [... quantizer-specific IPC sections ...] +//! [storage batch IPC file bytes] +//! ``` +//! +//! Each IPC section is a complete Arrow IPC file. On deserialization, the IPC +//! sections are read zero-copy using [`FileDecoder`] so that Arrow arrays +//! reference the original buffer directly. + +use std::sync::Arc; + +use arrow_array::{FixedSizeListArray, RecordBatch}; +use arrow_buffer::Buffer; +use arrow_ipc::convert::fb_to_schema; +use arrow_ipc::reader::{FileDecoder, read_footer_length}; +use arrow_ipc::root_as_footer; +use arrow_ipc::writer::FileWriter; +use arrow_schema::{DataType, Field, Schema}; +use bytes::Bytes; +use lance_core::{Error, Result}; +use lance_index::vector::bq::RQRotationType; +use lance_index::vector::bq::builder::RabitQuantizer; +use lance_index::vector::bq::storage::RabitQuantizationMetadata; +use lance_index::vector::flat::index::{FlatMetadata, FlatQuantizer}; +use lance_index::vector::pq::ProductQuantizer; +use lance_index::vector::pq::storage::ProductQuantizationMetadata; +use lance_index::vector::quantizer::{Quantization, QuantizerStorage}; +use lance_index::vector::sq::ScalarQuantizer; +use lance_index::vector::sq::storage::ScalarQuantizationMetadata; +use lance_index::vector::storage::VectorStore; +use lance_index::vector::v3::subindex::IvfSubIndex; +use lance_linalg::distance::DistanceType; +use serde::{Deserialize, Serialize}; + +use super::v2::PartitionEntry; + +// --------------------------------------------------------------------------- +// Common helpers +// --------------------------------------------------------------------------- + +fn distance_type_to_u8(dt: DistanceType) -> u8 { + match dt { + DistanceType::L2 => 0, + DistanceType::Cosine => 1, + DistanceType::Dot => 2, + DistanceType::Hamming => 3, + } +} + +fn u8_to_distance_type(v: u8) -> Result { + match v { + 0 => Ok(DistanceType::L2), + 1 => Ok(DistanceType::Cosine), + 2 => Ok(DistanceType::Dot), + 3 => Ok(DistanceType::Hamming), + _ => Err(Error::io(format!("unknown distance type: {v}"))), + } +} + +fn rotation_type_to_u8(rt: RQRotationType) -> u8 { + match rt { + RQRotationType::Matrix => 0, + RQRotationType::Fast => 1, + } +} + +fn u8_to_rotation_type(v: u8) -> Result { + match v { + 0 => Ok(RQRotationType::Matrix), + 1 => Ok(RQRotationType::Fast), + _ => Err(Error::io(format!("unknown rotation type: {v}"))), + } +} + +/// Write one or more RecordBatches as a complete Arrow IPC file into a Vec. +/// +/// Panics if `batches` is empty (caller is responsible for checking). +fn write_ipc_batches(batches: &[RecordBatch]) -> Result> { + let mut buf = Vec::new(); + let mut writer = FileWriter::try_new(&mut buf, batches[0].schema_ref())?; + for batch in batches { + writer.write(batch)?; + } + writer.finish()?; + Ok(buf) +} + +/// Write a single RecordBatch as a complete Arrow IPC file into a Vec. +fn write_ipc(batch: &RecordBatch) -> Result> { + write_ipc_batches(std::slice::from_ref(batch)) +} + +/// Decode the IPC footer and schema from a `Buffer`, returning the decoder and +/// the list of record-batch blocks. Zero-copy: all returned data references +/// the original buffer. +fn parse_ipc_footer(data: &Buffer) -> Result<(FileDecoder, Vec)> { + let trailer_start = data + .len() + .checked_sub(10) + .ok_or_else(|| Error::io("IPC section too small to contain footer".to_string()))?; + let footer_len = read_footer_length( + data[trailer_start..] + .try_into() + .map_err(|_| Error::io("IPC section too small for footer length".to_string()))?, + )?; + let footer_start = trailer_start + .checked_sub(footer_len) + .ok_or_else(|| Error::io("IPC footer length exceeds section size".to_string()))?; + let footer = root_as_footer(&data[footer_start..trailer_start]) + .map_err(|e| Error::io(format!("failed to parse IPC footer: {e}")))?; + + let schema = + Arc::new(fb_to_schema(footer.schema().ok_or_else(|| { + Error::io("IPC footer missing schema".to_string()) + })?)); + + let mut decoder = FileDecoder::new(schema, footer.version()); + + for block in footer.dictionaries().iter().flatten() { + let block_len = block.bodyLength() as usize + block.metaDataLength() as usize; + let block_data = data.slice_with_length(block.offset() as usize, block_len); + decoder.read_dictionary(block, &block_data)?; + } + + let batch_blocks: Vec = footer + .recordBatches() + .map(|b| b.iter().copied().collect()) + .unwrap_or_default(); + + Ok((decoder, batch_blocks)) +} + +/// Read all RecordBatches from an Arrow IPC file stored in a `Buffer`, zero-copy. +/// +/// The returned arrays reference slices of the provided buffer directly. +fn read_ipc_all_zero_copy(data: Buffer) -> Result> { + let (decoder, batch_blocks) = parse_ipc_footer(&data)?; + batch_blocks + .iter() + .map(|block| { + let block_len = block.bodyLength() as usize + block.metaDataLength() as usize; + let block_data = data.slice_with_length(block.offset() as usize, block_len); + decoder + .read_record_batch(block, &block_data)? + .ok_or_else(|| Error::io("IPC record batch was None".to_string())) + }) + .collect() +} + +/// Read a single RecordBatch from an Arrow IPC file stored in a `Buffer`, zero-copy. +/// +/// The returned `RecordBatch`'s arrays reference slices of the provided buffer +/// directly, avoiding copies. +fn read_ipc_zero_copy(data: Buffer) -> Result { + let (decoder, batch_blocks) = parse_ipc_footer(&data)?; + if batch_blocks.is_empty() { + return Err(Error::io("IPC file contains no record batches".to_string())); + } + let block = &batch_blocks[0]; + let block_len = block.bodyLength() as usize + block.metaDataLength() as usize; + let block_data = data.slice_with_length(block.offset() as usize, block_len); + decoder + .read_record_batch(block, &block_data)? + .ok_or_else(|| Error::io("IPC record batch was None".to_string())) +} + +/// Wrap a `FixedSizeListArray` in a single-column RecordBatch with the given column name. +fn fsl_to_batch(arr: &FixedSizeListArray, name: &str) -> Result { + let field = Field::new( + name, + DataType::FixedSizeList( + Arc::new(Field::new("item", arr.value_type(), true)), + arr.value_length(), + ), + false, + ); + let schema = Arc::new(Schema::new(vec![field])); + Ok(RecordBatch::try_new(schema, vec![Arc::new(arr.clone())])?) +} + +/// Extract a `FixedSizeListArray` from the first column of a RecordBatch. +fn batch_to_fsl(batch: &RecordBatch) -> Result { + let col = batch.column(0); + col.as_any() + .downcast_ref::() + .cloned() + .ok_or_else(|| Error::io("column is not FixedSizeListArray".to_string())) +} + +fn codebook_to_batch(codebook: &FixedSizeListArray) -> Result { + fsl_to_batch(codebook, "codebook") +} + +fn batch_to_codebook(batch: &RecordBatch) -> Result { + batch_to_fsl(batch) +} + +// --------------------------------------------------------------------------- +// PQ +// --------------------------------------------------------------------------- + +#[derive(Serialize, Deserialize)] +struct PqPartitionHeader { + distance_type: u8, + nbits: u32, + num_sub_vectors: usize, + dimension: usize, + transposed: bool, + /// Length of the sub-index IPC section in bytes. + sub_index_len: u64, + /// Length of the codebook IPC section in bytes. + codebook_len: u64, + /// Length of the storage batch IPC section in bytes. + storage_len: u64, +} + +impl PartitionEntry { + /// Serialize this partition entry to bytes. + /// + /// The sub-index, PQ codebook, and storage batch are each written as Arrow + /// IPC file sections, preceded by a small JSON header containing scalar + /// metadata and section lengths. + pub fn serialize(&self) -> Result> { + let metadata = self.storage.metadata(); + let distance_type = self.storage.distance_type(); + + // Serialize the three Arrow sections. + let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; + let codebook = metadata.codebook.as_ref().ok_or_else(|| { + Error::io("PQ metadata missing codebook during serialization".to_string()) + })?; + let codebook_ipc = write_ipc(&codebook_to_batch(codebook)?)?; + let storage_batches: Vec<_> = self.storage.to_batches()?.collect(); + let storage_ipc = if storage_batches.len() == 1 { + write_ipc(&storage_batches[0])? + } else { + return Err(Error::io( + "expected exactly one storage batch for PQ storage".to_string(), + )); + }; + + let header = PqPartitionHeader { + distance_type: distance_type_to_u8(distance_type), + nbits: metadata.nbits, + num_sub_vectors: metadata.num_sub_vectors, + dimension: metadata.dimension, + transposed: metadata.transposed, + sub_index_len: sub_index_ipc.len() as u64, + codebook_len: codebook_ipc.len() as u64, + storage_len: storage_ipc.len() as u64, + }; + + let header_json = serde_json::to_vec(&header)?; + + let total_len = + 8 + header_json.len() + sub_index_ipc.len() + codebook_ipc.len() + storage_ipc.len(); + let mut out = Vec::with_capacity(total_len); + out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); + out.extend_from_slice(&header_json); + out.extend_from_slice(&sub_index_ipc); + out.extend_from_slice(&codebook_ipc); + out.extend_from_slice(&storage_ipc); + + Ok(out) + } + + /// Deserialize a partition entry from bytes, zero-copy for Arrow data. + /// + /// The Arrow IPC sections are decoded using [`FileDecoder`] so that the + /// resulting arrays reference slices of the provided `Bytes` buffer directly. + pub fn deserialize(data: Bytes) -> Result { + if data.len() < 8 { + return Err(Error::io("partition data too small".to_string())); + } + + let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; + let header_end = 8 + header_len; + if data.len() < header_end { + return Err(Error::io("partition data truncated in header".to_string())); + } + + let header: PqPartitionHeader = serde_json::from_slice(&data[8..header_end])?; + let distance_type = u8_to_distance_type(header.distance_type)?; + + let sub_index_start = header_end; + let sub_index_end = sub_index_start + header.sub_index_len as usize; + let codebook_start = sub_index_end; + let codebook_end = codebook_start + header.codebook_len as usize; + let storage_start = codebook_end; + let storage_end = storage_start + header.storage_len as usize; + + if data.len() < storage_end { + return Err(Error::io( + "partition data truncated in IPC sections".to_string(), + )); + } + + // Zero-copy: create Buffer slices backed by the original Bytes. + let buffer = Buffer::from(data); + let sub_index_buf = + buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); + let codebook_buf = buffer.slice_with_length(codebook_start, header.codebook_len as usize); + let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); + + let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; + let codebook_batch = read_ipc_zero_copy(codebook_buf)?; + let storage_batch = read_ipc_zero_copy(storage_buf)?; + + let index = S::load(sub_index_batch)?; + let codebook = batch_to_codebook(&codebook_batch)?; + + let metadata = ProductQuantizationMetadata { + codebook_position: 0, + nbits: header.nbits, + num_sub_vectors: header.num_sub_vectors, + dimension: header.dimension, + codebook: Some(codebook), + codebook_tensor: Vec::new(), + transposed: header.transposed, + }; + + let storage = ::Storage::try_from_batch( + storage_batch, + &metadata, + distance_type, + None, + )?; + + Ok(Self { index, storage }) + } +} + +// --------------------------------------------------------------------------- +// Flat +// --------------------------------------------------------------------------- + +#[derive(Serialize, Deserialize)] +struct FlatPartitionHeader { + distance_type: u8, + dim: usize, + sub_index_len: u64, + storage_len: u64, +} + +impl PartitionEntry { + /// Serialize this partition entry to bytes. + pub fn serialize(&self) -> Result> { + let metadata = self.storage.metadata(); + let distance_type = self.storage.distance_type(); + + let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; + let storage_batches: Vec<_> = self.storage.to_batches()?.collect(); + let storage_ipc = if storage_batches.len() == 1 { + write_ipc(&storage_batches[0])? + } else { + return Err(Error::io( + "expected exactly one storage batch for Flat storage".to_string(), + )); + }; + + let header = FlatPartitionHeader { + distance_type: distance_type_to_u8(distance_type), + dim: metadata.dim, + sub_index_len: sub_index_ipc.len() as u64, + storage_len: storage_ipc.len() as u64, + }; + + let header_json = serde_json::to_vec(&header)?; + let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len(); + let mut out = Vec::with_capacity(total_len); + out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); + out.extend_from_slice(&header_json); + out.extend_from_slice(&sub_index_ipc); + out.extend_from_slice(&storage_ipc); + Ok(out) + } + + /// Deserialize a partition entry from bytes, zero-copy for Arrow data. + pub fn deserialize(data: Bytes) -> Result { + if data.len() < 8 { + return Err(Error::io("partition data too small".to_string())); + } + let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; + let header_end = 8 + header_len; + if data.len() < header_end { + return Err(Error::io("partition data truncated in header".to_string())); + } + + let header: FlatPartitionHeader = serde_json::from_slice(&data[8..header_end])?; + let distance_type = u8_to_distance_type(header.distance_type)?; + + let sub_index_start = header_end; + let sub_index_end = sub_index_start + header.sub_index_len as usize; + let storage_start = sub_index_end; + let storage_end = storage_start + header.storage_len as usize; + + if data.len() < storage_end { + return Err(Error::io( + "partition data truncated in IPC sections".to_string(), + )); + } + + let buffer = Buffer::from(data); + let sub_index_buf = + buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); + let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); + + let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; + let storage_batch = read_ipc_zero_copy(storage_buf)?; + + let index = S::load(sub_index_batch)?; + let metadata = FlatMetadata { dim: header.dim }; + let storage = ::Storage::try_from_batch( + storage_batch, + &metadata, + distance_type, + None, + )?; + + Ok(Self { index, storage }) + } +} + +// --------------------------------------------------------------------------- +// SQ +// --------------------------------------------------------------------------- + +#[derive(Serialize, Deserialize)] +struct SqPartitionHeader { + distance_type: u8, + num_bits: u16, + dim: usize, + bounds_start: f64, + bounds_end: f64, + sub_index_len: u64, + storage_len: u64, +} + +impl PartitionEntry { + /// Serialize this partition entry to bytes. + /// + /// Multiple SQ storage chunks are concatenated into a single IPC section. + pub fn serialize(&self) -> Result> { + let metadata = self.storage.metadata(); + let distance_type = self.storage.distance_type(); + + let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; + + // Write all SQ chunks as multiple record batches in one IPC file, avoiding copies. + let batches: Vec<_> = self.storage.to_batches()?.collect(); + if batches.is_empty() { + return Err(Error::io("SQ storage has no batches".to_string())); + } + let storage_ipc = write_ipc_batches(&batches)?; + + let header = SqPartitionHeader { + distance_type: distance_type_to_u8(distance_type), + num_bits: metadata.num_bits, + dim: metadata.dim, + bounds_start: metadata.bounds.start, + bounds_end: metadata.bounds.end, + sub_index_len: sub_index_ipc.len() as u64, + storage_len: storage_ipc.len() as u64, + }; + + let header_json = serde_json::to_vec(&header)?; + let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len(); + let mut out = Vec::with_capacity(total_len); + out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); + out.extend_from_slice(&header_json); + out.extend_from_slice(&sub_index_ipc); + out.extend_from_slice(&storage_ipc); + Ok(out) + } + + /// Deserialize a partition entry from bytes, zero-copy for Arrow data. + pub fn deserialize(data: Bytes) -> Result { + if data.len() < 8 { + return Err(Error::io("partition data too small".to_string())); + } + let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; + let header_end = 8 + header_len; + if data.len() < header_end { + return Err(Error::io("partition data truncated in header".to_string())); + } + + let header: SqPartitionHeader = serde_json::from_slice(&data[8..header_end])?; + let distance_type = u8_to_distance_type(header.distance_type)?; + + let sub_index_start = header_end; + let sub_index_end = sub_index_start + header.sub_index_len as usize; + let storage_start = sub_index_end; + let storage_end = storage_start + header.storage_len as usize; + + if data.len() < storage_end { + return Err(Error::io( + "partition data truncated in IPC sections".to_string(), + )); + } + + let buffer = Buffer::from(data); + let sub_index_buf = + buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); + let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); + + let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; + let storage_batches = read_ipc_all_zero_copy(storage_buf)?; + + let index = S::load(sub_index_batch)?; + let metadata = ScalarQuantizationMetadata { + dim: header.dim, + num_bits: header.num_bits, + bounds: header.bounds_start..header.bounds_end, + }; + let storage = ::Storage::try_new( + metadata.num_bits, + distance_type, + metadata.bounds, + storage_batches, + None, + )?; + + Ok(Self { index, storage }) + } +} + +// --------------------------------------------------------------------------- +// RabitQ +// --------------------------------------------------------------------------- + +#[derive(Serialize, Deserialize)] +struct RabitPartitionHeader { + distance_type: u8, + num_bits: u8, + code_dim: u32, + /// 0 = Matrix, 1 = Fast + rotation_type: u8, + /// Fast rotation signs (only set when rotation_type == Fast). + fast_rotation_signs: Option>, + sub_index_len: u64, + /// Length of the rotation matrix IPC section; 0 when rotation_type == Fast. + rotate_mat_len: u64, + storage_len: u64, +} + +impl PartitionEntry { + /// Serialize this partition entry to bytes. + /// + /// For Matrix rotation the rotation matrix is stored as an Arrow IPC section. + /// For Fast rotation the signs are stored compactly in the JSON header. + /// + /// The storage batch is stored with already-packed codes so deserialization + /// can skip re-packing. + pub fn serialize(&self) -> Result> { + let metadata = self.storage.metadata(); + let distance_type = self.storage.distance_type(); + + let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; + + let rotate_mat_ipc = match metadata.rotation_type { + RQRotationType::Matrix => { + let mat = metadata.rotate_mat.as_ref().ok_or_else(|| { + Error::io( + "RabitQ Matrix metadata missing rotate_mat during serialization" + .to_string(), + ) + })?; + write_ipc(&fsl_to_batch(mat, "rotate_mat")?)? + } + RQRotationType::Fast => Vec::new(), + }; + + let storage_batches: Vec<_> = self.storage.to_batches()?.collect(); + let storage_ipc = if storage_batches.len() == 1 { + write_ipc(&storage_batches[0])? + } else { + return Err(Error::io( + "expected exactly one storage batch for RabitQ storage".to_string(), + )); + }; + + let header = RabitPartitionHeader { + distance_type: distance_type_to_u8(distance_type), + num_bits: metadata.num_bits, + code_dim: metadata.code_dim, + rotation_type: rotation_type_to_u8(metadata.rotation_type), + fast_rotation_signs: metadata.fast_rotation_signs.clone(), + sub_index_len: sub_index_ipc.len() as u64, + rotate_mat_len: rotate_mat_ipc.len() as u64, + storage_len: storage_ipc.len() as u64, + }; + + let header_json = serde_json::to_vec(&header)?; + let total_len = + 8 + header_json.len() + sub_index_ipc.len() + rotate_mat_ipc.len() + storage_ipc.len(); + let mut out = Vec::with_capacity(total_len); + out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); + out.extend_from_slice(&header_json); + out.extend_from_slice(&sub_index_ipc); + out.extend_from_slice(&rotate_mat_ipc); + out.extend_from_slice(&storage_ipc); + Ok(out) + } + + /// Deserialize a partition entry from bytes, zero-copy for Arrow data. + pub fn deserialize(data: Bytes) -> Result { + if data.len() < 8 { + return Err(Error::io("partition data too small".to_string())); + } + let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; + let header_end = 8 + header_len; + if data.len() < header_end { + return Err(Error::io("partition data truncated in header".to_string())); + } + + let header: RabitPartitionHeader = serde_json::from_slice(&data[8..header_end])?; + let distance_type = u8_to_distance_type(header.distance_type)?; + let rotation_type = u8_to_rotation_type(header.rotation_type)?; + + let sub_index_start = header_end; + let sub_index_end = sub_index_start + header.sub_index_len as usize; + let rotate_mat_start = sub_index_end; + let rotate_mat_end = rotate_mat_start + header.rotate_mat_len as usize; + let storage_start = rotate_mat_end; + let storage_end = storage_start + header.storage_len as usize; + + if data.len() < storage_end { + return Err(Error::io( + "partition data truncated in IPC sections".to_string(), + )); + } + + let buffer = Buffer::from(data); + let sub_index_buf = + buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); + let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); + + let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; + let storage_batch = read_ipc_zero_copy(storage_buf)?; + + let rotate_mat = if header.rotate_mat_len > 0 { + let rotate_mat_buf = + buffer.slice_with_length(rotate_mat_start, header.rotate_mat_len as usize); + let mat_batch = read_ipc_zero_copy(rotate_mat_buf)?; + Some(batch_to_fsl(&mat_batch)?) + } else { + None + }; + + let index = S::load(sub_index_batch)?; + let metadata = RabitQuantizationMetadata { + rotate_mat, + rotate_mat_position: None, + fast_rotation_signs: header.fast_rotation_signs, + rotation_type, + code_dim: header.code_dim, + num_bits: header.num_bits, + // The storage batch already has packed codes; skip re-packing. + packed: true, + }; + let storage = ::Storage::try_from_batch( + storage_batch, + &metadata, + distance_type, + None, + )?; + + Ok(Self { index, storage }) + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + use arrow_array::cast::AsArray; + use arrow_array::{ + Float32Array, UInt8Array, UInt64Array, + types::{Float32Type, UInt8Type}, + }; + use arrow_schema::{DataType, Field, Schema}; + use lance_arrow::FixedSizeListArrayExt; + use lance_index::vector::bq::storage::RABIT_CODE_COLUMN; + use lance_index::vector::bq::transform::{ADD_FACTORS_COLUMN, SCALE_FACTORS_COLUMN}; + use lance_index::vector::bq::{RQRotationType, builder::RabitQuantizer}; + use lance_index::vector::flat::index::FlatIndex; + use lance_index::vector::flat::storage::FlatFloatStorage; + use lance_index::vector::sq::storage::ScalarQuantizationStorage; + + // ----- PQ helpers ------------------------------------------------------- + + fn make_test_codebook(dim: usize, num_sub_vectors: usize) -> FixedSizeListArray { + let sub_dim = dim / num_sub_vectors; + let num_centroids = 256; + let total_values = num_sub_vectors * num_centroids * sub_dim; + let values: Vec = (0..total_values).map(|i| i as f32 * 0.01).collect(); + let values_array = Float32Array::from(values); + FixedSizeListArray::try_new_from_values(values_array, sub_dim as i32).unwrap() + } + + fn make_test_pq_storage( + num_rows: usize, + dim: usize, + num_sub_vectors: usize, + ) -> ::Storage { + let codebook = make_test_codebook(dim, num_sub_vectors); + let row_ids = UInt64Array::from((0..num_rows as u64).collect::>()); + let pq_codes_flat: Vec = (0..num_rows * num_sub_vectors) + .map(|i| (i % 256) as u8) + .collect(); + let pq_codes = UInt8Array::from(pq_codes_flat); + let pq_codes_fsl = + FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap(); + + let schema = Arc::new(Schema::new(vec![ + Field::new(lance_core::ROW_ID, DataType::UInt64, false), + Field::new( + lance_index::vector::PQ_CODE_COLUMN, + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::UInt8, true)), + num_sub_vectors as i32, + ), + false, + ), + ])); + + let batch = + RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)]).unwrap(); + + ::Storage::new( + codebook, + batch, + 8, + num_sub_vectors, + dim, + DistanceType::L2, + false, + None, + ) + .unwrap() + } + + // ----- PQ tests --------------------------------------------------------- + + #[test] + fn test_roundtrip_flat_pq() { + let dim = 128; + let num_sub_vectors = 16; + let num_rows = 100; + + let storage = make_test_pq_storage(num_rows, dim, num_sub_vectors); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + + let serialized = entry.serialize().unwrap(); + let deserialized = + PartitionEntry::::deserialize(serialized.into()).unwrap(); + + assert_eq!(entry.storage, deserialized.storage); + } + + #[test] + fn test_roundtrip_preserves_distance_type() { + for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { + let dim = 32; + let num_sub_vectors = 4; + let codebook = make_test_codebook(dim, num_sub_vectors); + let row_ids = UInt64Array::from(vec![0u64, 1, 2]); + let pq_codes = UInt8Array::from(vec![0u8; 3 * num_sub_vectors]); + let pq_codes_fsl = + FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap(); + + let schema = Arc::new(Schema::new(vec![ + Field::new(lance_core::ROW_ID, DataType::UInt64, false), + Field::new( + lance_index::vector::PQ_CODE_COLUMN, + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::UInt8, true)), + num_sub_vectors as i32, + ), + false, + ), + ])); + let batch = + RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)]) + .unwrap(); + + let storage = ::Storage::new( + codebook, + batch, + 8, + num_sub_vectors, + dim, + dt, + false, + None, + ) + .unwrap(); + + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + assert_eq!( + restored.storage.distance_type(), + entry.storage.distance_type() + ); + } + } + + #[test] + fn test_empty_partition() { + let dim = 16; + let num_sub_vectors = 2; + let storage = make_test_pq_storage(0, dim, num_sub_vectors); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + + let serialized = entry.serialize().unwrap(); + let deserialized = + PartitionEntry::::deserialize(serialized.into()).unwrap(); + assert_eq!(entry.storage, deserialized.storage); + } + + #[test] + fn test_truncated_data_errors() { + assert!( + PartitionEntry::::deserialize(Bytes::from_static( + b"short" + )) + .is_err() + ); + } + + // ----- Flat helpers ----------------------------------------------------- + + fn make_flat_storage(num_rows: usize, dim: usize) -> FlatFloatStorage { + let values: Vec = (0..num_rows * dim).map(|i| i as f32 * 0.01).collect(); + let values_array = Float32Array::from(values); + let vectors = FixedSizeListArray::try_new_from_values(values_array, dim as i32).unwrap(); + FlatFloatStorage::new(vectors, DistanceType::L2) + } + + // ----- Flat tests ------------------------------------------------------- + + #[test] + fn test_roundtrip_flat_flat() { + let storage = make_flat_storage(50, 64); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + + assert_eq!( + restored.storage.metadata().dim, + entry.storage.metadata().dim + ); + assert_eq!( + restored.storage.distance_type(), + entry.storage.distance_type() + ); + assert_eq!(restored.storage.len(), entry.storage.len()); + let orig_batch = entry.storage.to_batches().unwrap().next().unwrap(); + let rest_batch = restored.storage.to_batches().unwrap().next().unwrap(); + assert_eq!(orig_batch, rest_batch); + } + + #[test] + fn test_flat_distance_types() { + for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { + let values = Float32Array::from(vec![1.0f32; 32]); + let vectors = FixedSizeListArray::try_new_from_values(values, 32).unwrap(); + let storage = FlatFloatStorage::new(vectors, dt); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + assert_eq!(restored.storage.distance_type(), dt); + } + } + + // ----- SQ helpers ------------------------------------------------------- + + fn make_sq_storage( + num_rows: usize, + dim: usize, + distance_type: DistanceType, + ) -> ScalarQuantizationStorage { + let row_ids = UInt64Array::from_iter_values(0..num_rows as u64); + let sq_codes_flat: Vec = (0..num_rows * dim).map(|i| (i % 256) as u8).collect(); + let sq_codes = UInt8Array::from(sq_codes_flat); + let sq_codes_fsl = FixedSizeListArray::try_new_from_values(sq_codes, dim as i32).unwrap(); + + let schema = Arc::new(Schema::new(vec![ + Field::new(lance_core::ROW_ID, DataType::UInt64, false), + Field::new( + lance_index::vector::SQ_CODE_COLUMN, + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::UInt8, true)), + dim as i32, + ), + false, + ), + ])); + let batch = + RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(sq_codes_fsl)]).unwrap(); + + ScalarQuantizationStorage::try_new(8, distance_type, -1.0..1.0, [batch], None).unwrap() + } + + // ----- SQ tests --------------------------------------------------------- + + #[test] + fn test_roundtrip_flat_sq() { + let storage = make_sq_storage(100, 64, DistanceType::L2); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + + let m = entry.storage.metadata(); + let rm = restored.storage.metadata(); + assert_eq!(rm.dim, m.dim); + assert_eq!(rm.num_bits, m.num_bits); + assert_eq!(rm.bounds, m.bounds); + assert_eq!( + restored.storage.distance_type(), + entry.storage.distance_type() + ); + assert_eq!(restored.storage.len(), entry.storage.len()); + + // Verify row IDs are preserved. + let orig_ids: Vec = entry.storage.row_ids().copied().collect(); + let rest_ids: Vec = restored.storage.row_ids().copied().collect(); + assert_eq!(orig_ids, rest_ids); + } + + #[test] + fn test_sq_distance_types() { + for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { + let storage = make_sq_storage(10, 16, dt); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + assert_eq!(restored.storage.distance_type(), dt); + } + } + + #[test] + fn test_sq_multiple_chunks_no_copy() { + // Build SQ storage with multiple chunks by appending batches separately. + let dim = 16usize; + let make_batch = |start: u64, n: usize| { + let row_ids = UInt64Array::from_iter_values(start..start + n as u64); + let codes = UInt8Array::from(vec![0u8; n * dim]); + let fsl = FixedSizeListArray::try_new_from_values(codes, dim as i32).unwrap(); + let schema = Arc::new(Schema::new(vec![ + Field::new(lance_core::ROW_ID, DataType::UInt64, false), + Field::new( + lance_index::vector::SQ_CODE_COLUMN, + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::UInt8, true)), + dim as i32, + ), + false, + ), + ])); + RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(fsl)]).unwrap() + }; + // Three chunks with 10 rows each. + let storage = ScalarQuantizationStorage::try_new( + 8, + DistanceType::L2, + -1.0..1.0, + [make_batch(0, 10), make_batch(10, 10), make_batch(20, 10)], + None, + ) + .unwrap(); + assert_eq!(storage.len(), 30); + + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + + assert_eq!(restored.storage.len(), 30); + let orig_ids: Vec = entry.storage.row_ids().copied().collect(); + let rest_ids: Vec = restored.storage.row_ids().copied().collect(); + assert_eq!(orig_ids, rest_ids); + } + + // ----- RabitQ helpers --------------------------------------------------- + + fn make_rabit_storage_fast( + num_rows: usize, + code_dim: usize, + distance_type: DistanceType, + ) -> ::Storage { + use lance_arrow::FixedSizeListArrayExt; + + let quantizer = RabitQuantizer::new_with_rotation::( + 1, + code_dim as i32, + RQRotationType::Fast, + ); + // Generate float vectors and quantize them to binary codes. + let values: Vec = (0..num_rows * code_dim) + .map(|i| (i % 100) as f32 / 100.0 - 0.5) + .collect(); + let values_arr = Float32Array::from(values); + let vectors = FixedSizeListArray::try_new_from_values(values_arr, code_dim as i32).unwrap(); + let codes = quantizer + .quantize(&vectors) + .unwrap() + .as_fixed_size_list() + .clone(); + + let metadata = quantizer.metadata(None); + let batch = RecordBatch::try_from_iter(vec![ + ( + lance_core::ROW_ID, + Arc::new(UInt64Array::from_iter_values(0..num_rows as u64)) + as Arc, + ), + ( + RABIT_CODE_COLUMN, + Arc::new(codes) as Arc, + ), + ( + ADD_FACTORS_COLUMN, + Arc::new(Float32Array::from_iter_values( + (0..num_rows).map(|i| i as f32 * 0.1), + )) as Arc, + ), + ( + SCALE_FACTORS_COLUMN, + Arc::new(Float32Array::from_iter_values( + (0..num_rows).map(|i| i as f32 * 0.01 + 0.5), + )) as Arc, + ), + ]) + .unwrap(); + + ::Storage::try_from_batch( + batch, + &metadata, + distance_type, + None, + ) + .unwrap() + } + + // ----- RabitQ tests ----------------------------------------------------- + + #[test] + fn test_roundtrip_flat_rabitq_fast() { + let num_rows = 50; + let code_dim = 64; + let storage = make_rabit_storage_fast(num_rows, code_dim, DistanceType::L2); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + + let m = entry.storage.metadata(); + let rm = restored.storage.metadata(); + assert_eq!(rm.num_bits, m.num_bits); + assert_eq!(rm.code_dim, m.code_dim); + assert_eq!(rm.rotation_type, m.rotation_type); + assert_eq!(rm.fast_rotation_signs, m.fast_rotation_signs); + assert!(rm.packed); + assert_eq!( + restored.storage.distance_type(), + entry.storage.distance_type() + ); + assert_eq!(restored.storage.len(), entry.storage.len()); + + // Verify row IDs are preserved. + let orig_ids: Vec = entry.storage.row_ids().copied().collect(); + let rest_ids: Vec = restored.storage.row_ids().copied().collect(); + assert_eq!(orig_ids, rest_ids); + + // Verify codes are preserved. + let orig_batch = entry.storage.to_batches().unwrap().next().unwrap(); + let rest_batch = restored.storage.to_batches().unwrap().next().unwrap(); + let orig_codes = orig_batch[RABIT_CODE_COLUMN].as_fixed_size_list(); + let rest_codes = rest_batch[RABIT_CODE_COLUMN].as_fixed_size_list(); + assert_eq!( + orig_codes.values().as_primitive::().values(), + rest_codes.values().as_primitive::().values(), + ); + } + + #[test] + fn test_rabitq_distance_types() { + for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { + let storage = make_rabit_storage_fast(10, 32, dt); + let entry = PartitionEntry:: { + index: FlatIndex::default(), + storage, + }; + let bytes = entry.serialize().unwrap(); + let restored = + PartitionEntry::::deserialize(bytes.into()).unwrap(); + assert_eq!(restored.storage.distance_type(), dt); + } + } +} From f1ed93430b09a7266abb84164d4d5653e972156f Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 12:29:58 -0700 Subject: [PATCH 10/24] chore: make index_caches module public for downstream codec registration --- rust/lance/src/session.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index 4876224cb75..a24dc82d3cb 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -17,7 +17,7 @@ use crate::session::index_caches::GlobalIndexCache; use self::index_extension::IndexExtension; pub(crate) mod caches; -pub(crate) mod index_caches; +pub mod index_caches; pub(crate) mod index_extension; /// A user session holds the runtime state for a [`crate::Dataset`] From a38be575c39d998501ce5bcd9308e6f6fae77716 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 19 Mar 2026 21:31:09 -0700 Subject: [PATCH 11/24] feat: add cacheable_state() for VectorIndex disk caching Add IvfIndexState struct and serialization to lance-index, enabling IVFIndex to export its reconstructable state (IVF model, quantizer metadata) without non-serializable handles. Add reconstruct_vector_index() which rebuilds an IVFIndex from cached state by re-opening FileReaders (cheap with warm metadata cache) instead of re-fetching global buffers from object storage. Also adds IvfQuantizationStorage::from_cached() to skip global buffer reads during reconstruction, and Session::file_metadata_cache() to expose the metadata cache for the reconstruction context. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-index/src/vector.rs | 131 +++++++++++++++ rust/lance-index/src/vector/storage.rs | 18 ++ rust/lance/src/index/vector/ivf/v2.rs | 221 ++++++++++++++++++++++++- rust/lance/src/session.rs | 5 + 4 files changed, 374 insertions(+), 1 deletion(-) diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs index 0fbff4475cb..66f423c844d 100644 --- a/rust/lance-index/src/vector.rs +++ b/rust/lance-index/src/vector.rs @@ -11,12 +11,14 @@ use std::{collections::HashMap, sync::Arc}; use arrow_array::{ArrayRef, Float32Array, RecordBatch, UInt32Array}; use arrow_schema::Field; use async_trait::async_trait; +use bytes::Bytes; use datafusion::execution::SendableRecordBatchStream; use deepsize::DeepSizeOf; use ivf::storage::IvfModel; use lance_core::{ROW_ID_FIELD, Result}; use lance_io::traits::Reader; use lance_linalg::distance::DistanceType; +use prost::Message; use quantizer::{QuantizationType, Quantizer}; use std::sync::LazyLock; use v3::subindex::SubIndexType; @@ -140,6 +142,129 @@ impl From for pb::VectorMetricType { } } +/// Serializable state of an IVF index, sufficient to reconstruct the index +/// without re-reading global buffers from object storage. +/// +/// Produced by [`VectorIndex::cacheable_state`] and consumed by a +/// reconstruction function that re-opens FileReaders using cached file metadata. +pub struct IvfIndexState { + /// Object-store path to the index file (before `to_local_path` conversion). + pub index_file_path: String, + pub uuid: String, + pub ivf: IvfModel, + pub distance_type: DistanceType, + pub sub_index_metadata: Vec, + /// JSON serialization of `Q::Metadata` (quantizer-specific metadata). + pub quantizer_metadata_json: String, + /// Large quantizer data (PQ codebook, RQ rotation matrix) from `extra_metadata()`. + pub quantizer_extra_data: Option>, + pub sub_index_type: SubIndexType, + pub quantization_type: QuantizationType, +} + +/// Serialization header for [`IvfIndexState`]. +#[derive(serde::Serialize, serde::Deserialize)] +struct IvfIndexStateHeader { + index_file_path: String, + uuid: String, + distance_type: String, + sub_index_metadata: Vec, + sub_index_type: String, + quantization_type: String, + quantizer_metadata_json: String, +} + +impl IvfIndexState { + /// Wire format: + /// `[header_json_len: u64 LE][header JSON][ivf_pb_len: u64 LE][ivf protobuf] + /// [extra_len: u64 LE][extra bytes]` + pub fn serialize(&self) -> Result> { + let header = IvfIndexStateHeader { + index_file_path: self.index_file_path.clone(), + uuid: self.uuid.clone(), + distance_type: self.distance_type.to_string(), + sub_index_metadata: self.sub_index_metadata.clone(), + sub_index_type: self.sub_index_type.to_string(), + quantization_type: self.quantization_type.to_string(), + quantizer_metadata_json: self.quantizer_metadata_json.clone(), + }; + let header_json = serde_json::to_vec(&header) + .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?; + + let ivf_pb = pb::Ivf::try_from(&self.ivf)?; + let ivf_bytes = ivf_pb.encode_to_vec(); + + let extra = self.quantizer_extra_data.as_deref().unwrap_or(&[]); + + let total = 8 + header_json.len() + 8 + ivf_bytes.len() + 8 + extra.len(); + let mut buf = Vec::with_capacity(total); + buf.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); + buf.extend_from_slice(&header_json); + buf.extend_from_slice(&(ivf_bytes.len() as u64).to_le_bytes()); + buf.extend_from_slice(&ivf_bytes); + buf.extend_from_slice(&(extra.len() as u64).to_le_bytes()); + buf.extend_from_slice(extra); + Ok(buf) + } + + pub fn deserialize(data: Bytes) -> Result { + let mut offset = 0; + + let read_u64 = |data: &[u8], off: &mut usize| -> Result { + if *off + 8 > data.len() { + return Err(lance_core::Error::io("IvfIndexState data truncated")); + } + let val = u64::from_le_bytes(data[*off..*off + 8].try_into().unwrap()); + *off += 8; + Ok(val) + }; + + let header_len = read_u64(&data, &mut offset)? as usize; + if offset + header_len > data.len() { + return Err(lance_core::Error::io("IvfIndexState header truncated")); + } + let header: IvfIndexStateHeader = + serde_json::from_slice(&data[offset..offset + header_len]) + .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?; + offset += header_len; + + let ivf_len = read_u64(&data, &mut offset)? as usize; + if offset + ivf_len > data.len() { + return Err(lance_core::Error::io("IvfIndexState IVF data truncated")); + } + let ivf_pb = pb::Ivf::decode(&data[offset..offset + ivf_len]) + .map_err(|e| lance_core::Error::io(format!("IvfIndexState IVF decode: {e}")))?; + let ivf = IvfModel::try_from(ivf_pb)?; + offset += ivf_len; + + let extra_len = read_u64(&data, &mut offset)? as usize; + if offset + extra_len > data.len() { + return Err(lance_core::Error::io("IvfIndexState extra data truncated")); + } + let quantizer_extra_data = if extra_len > 0 { + Some(data[offset..offset + extra_len].to_vec()) + } else { + None + }; + + let distance_type = DistanceType::try_from(header.distance_type.as_str())?; + let sub_index_type = SubIndexType::try_from(header.sub_index_type.as_str())?; + let quantization_type = header.quantization_type.parse::()?; + + Ok(Self { + index_file_path: header.index_file_path, + uuid: header.uuid, + ivf, + distance_type, + sub_index_metadata: header.sub_index_metadata, + quantizer_metadata_json: header.quantizer_metadata_json, + quantizer_extra_data, + sub_index_type, + quantization_type, + }) + } +} + /// Vector Index for (Approximate) Nearest Neighbor (ANN) Search. /// /// Vector indices are often built as a chain of indices. For example, IVF -> PQ @@ -264,6 +389,12 @@ pub trait VectorIndex: Send + Sync + std::fmt::Debug + Index { /// the index type of this vector index. fn sub_index_type(&self) -> (SubIndexType, QuantizationType); + + /// Export the index state needed for reconstruction from a disk cache. + /// Returns `None` if this index type doesn't support persistent caching. + fn cacheable_state(&self) -> Option { + None + } } // it can be an IVF index or a partition of IVF index diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs index 5a1c0e7e6f5..1879774ce84 100644 --- a/rust/lance-index/src/vector/storage.rs +++ b/rust/lance-index/src/vector/storage.rs @@ -239,6 +239,24 @@ impl IvfQuantizationStorage { }) } + /// Construct from pre-parsed metadata, skipping global buffer reads. + /// Used when reconstructing from a disk cache. + pub fn from_cached( + reader: FileReader, + ivf: IvfModel, + metadata: Q::Metadata, + distance_type: DistanceType, + frag_reuse_index: Option>, + ) -> Self { + Self { + reader, + distance_type, + metadata, + ivf, + frag_reuse_index, + } + } + pub fn num_rows(&self) -> u64 { self.reader.num_rows() } diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 14f71612ddb..90ca1a4d619 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -29,15 +29,16 @@ use lance_encoding::decoder::{DecoderPlugins, FilterExpression}; use lance_file::reader::{FileReader, FileReaderOptions}; use lance_index::frag_reuse::FragReuseIndex; use lance_index::metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector}; -use lance_index::vector::VectorIndexCacheEntry; use lance_index::vector::flat::index::{FlatIndex, FlatQuantizer}; use lance_index::vector::hnsw::HNSW; use lance_index::vector::ivf::storage::IvfModel; use lance_index::vector::pq::ProductQuantizer; +use lance_index::vector::quantizer::QuantizerMetadata; use lance_index::vector::quantizer::{QuantizationType, Quantizer}; use lance_index::vector::sq::ScalarQuantizer; use lance_index::vector::storage::VectorStore; use lance_index::vector::v3::subindex::SubIndexType; +use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry}; use lance_index::{ INDEX_AUXILIARY_FILE_NAME, INDEX_FILE_NAME, Index, IndexType, pb, vector::{ @@ -225,6 +226,34 @@ impl IVFIndex { }) } + /// Reconstruct from cached state, skipping global buffer reads. + pub(crate) fn from_cached_state( + uri: String, + uuid: String, + ivf: IvfModel, + reader: FileReader, + storage: IvfQuantizationStorage, + sub_index_metadata: Vec, + distance_type: DistanceType, + index_cache: LanceCache, + io_parallelism: usize, + ) -> Self { + let num_partitions = ivf.num_partitions(); + Self { + uri, + uuid, + ivf, + reader, + storage, + partition_locks: PartitionLoadLock::new(num_partitions), + sub_index_metadata, + distance_type, + index_cache: WeakLanceCache::from(&index_cache), + io_parallelism, + _marker: PhantomData, + } + } + #[instrument(level = "debug", skip(self, metrics))] pub async fn load_partition( &self, @@ -595,6 +624,25 @@ impl VectorIndex for IVFInd fn metric_type(&self) -> DistanceType { self.distance_type } + + fn cacheable_state(&self) -> Option { + let extra_data = self.storage.metadata().extra_metadata().ok().flatten(); + let metadata_json = serde_json::to_string(self.storage.metadata()).ok()?; + let (sub_index_type, quantization_type) = self.sub_index_type(); + // Convert local path back to object_store Path (undo to_local_path's "/" prefix) + let index_file_path = self.uri.trim_start_matches('/').to_string(); + Some(IvfIndexState { + index_file_path, + uuid: self.uuid.clone(), + ivf: self.ivf.clone(), + distance_type: self.distance_type, + sub_index_metadata: self.sub_index_metadata.clone(), + quantizer_metadata_json: metadata_json, + quantizer_extra_data: extra_data.map(|b| b.to_vec()), + sub_index_type, + quantization_type, + }) + } } pub type IvfFlatIndex = IVFIndex; @@ -602,6 +650,177 @@ pub type IvfPq = IVFIndex; pub type IvfHnswSqIndex = IVFIndex; pub type IvfHnswPqIndex = IVFIndex; +/// Reconstruct a concrete `IVFIndex` from cached state. +async fn reconstruct_typed( + state: IvfIndexState, + object_store: Arc, + file_metadata_cache: &LanceCache, + index_cache: LanceCache, +) -> Result> +where + Q::Metadata: serde::de::DeserializeOwned, +{ + let io_parallelism = object_store.io_parallelism(); + let scheduler_config = SchedulerConfig::max_bandwidth(&object_store); + let scheduler = ScanScheduler::new(object_store, scheduler_config); + + let index_path = Path::parse(&state.index_file_path) + .map_err(|e| Error::io(format!("invalid index path: {e}")))?; + + // Re-open index FileReader (cheap if file metadata cache is warm) + let index_reader = FileReader::try_open( + scheduler + .open_file(&index_path, &CachedFileSize::unknown()) + .await?, + None, + Arc::::default(), + file_metadata_cache, + FileReaderOptions::default(), + ) + .await?; + + // Derive aux file path: replace the filename with INDEX_AUXILIARY_FILE_NAME. + // index_path is like "path/to/{uuid}/index.lance", aux is "path/to/{uuid}/aux.lance". + let index_path_str = index_path.as_ref(); + let parent_str = index_path_str + .rsplit_once('/') + .map(|(p, _)| p) + .unwrap_or(""); + let aux_path = Path::parse(format!("{}/{}", parent_str, INDEX_AUXILIARY_FILE_NAME)) + .map_err(|e| Error::io(format!("invalid aux path: {e}")))?; + let storage_reader = FileReader::try_open( + scheduler + .open_file(&aux_path, &CachedFileSize::unknown()) + .await?, + None, + Arc::::default(), + file_metadata_cache, + FileReaderOptions::default(), + ) + .await?; + + // Parse quantizer metadata from cached JSON + let mut metadata: Q::Metadata = serde_json::from_str(&state.quantizer_metadata_json)?; + if let Some(extra) = state.quantizer_extra_data { + metadata.parse_buffer(extra.into())?; + } + + let storage = IvfQuantizationStorage::from_cached( + storage_reader, + state.ivf.clone(), + metadata, + state.distance_type, + None, // frag_reuse_index not cached + ); + + let index = IVFIndex::::from_cached_state( + to_local_path(&index_path), + state.uuid, + state.ivf, + index_reader, + storage, + state.sub_index_metadata, + state.distance_type, + index_cache, + io_parallelism, + ); + + Ok(Arc::new(index)) +} + +/// Reconstruct a `dyn VectorIndex` from a cached [`IvfIndexState`], dispatching +/// on the stored sub-index and quantization types. +pub async fn reconstruct_vector_index( + state: IvfIndexState, + object_store: Arc, + file_metadata_cache: &LanceCache, + index_cache: LanceCache, +) -> Result> { + use lance_index::vector::bq::builder::RabitQuantizer; + + // Extract type tags before consuming state. + let sub_idx = state.sub_index_type.to_string(); + let quant = state.quantization_type.to_string(); + + match (sub_idx.as_str(), quant.as_str()) { + ("FLAT", "FLAT") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + ("FLAT", "PQ") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + ("FLAT", "SQ") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + ("FLAT", "RQ") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + ("HNSW", "PQ") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + ("HNSW", "SQ") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + ("HNSW", "FLAT") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + ("HNSW", "RQ") => { + reconstruct_typed::( + state, + object_store, + file_metadata_cache, + index_cache, + ) + .await + } + (s, q) => Err(Error::index(format!( + "unsupported index type for reconstruction: sub_index={s}, quantization={q}" + ))), + } +} + #[cfg(test)] mod tests { use std::collections::HashSet; diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index a24dc82d3cb..aa450483553 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -195,6 +195,11 @@ impl Session { self.store_registry.clone() } + /// Get a reference to the raw metadata cache (for use in index reconstruction). + pub fn file_metadata_cache(&self) -> &LanceCache { + &self.metadata_cache.0 + } + /// Fetch statistics for the metadata cache pub async fn metadata_cache_stats(&self) -> lance_core::cache::CacheStats { self.metadata_cache.0.stats().await From a575f18efe54057665f848e59c197f4aa60ce7e5 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 20 Mar 2026 08:45:36 -0700 Subject: [PATCH 12/24] feat: add cache_key_prefix to IvfIndexState for reconstruction Reconstructed VectorIndex instances need the original cache key prefix to share partition entries with the two-tier cache backend. Also adds LanceCache::with_backend_and_prefix() and WeakLanceCache::prefix(). Co-Authored-By: Claude Haiku 4.5 --- rust/lance-core/src/cache.rs | 16 ++++++++++++++++ rust/lance-index/src/vector.rs | 7 +++++++ rust/lance/src/index/vector/ivf/v2.rs | 1 + 3 files changed, 24 insertions(+) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 83c8bb09acb..66778c5149d 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -294,6 +294,17 @@ impl LanceCache { } } + /// Create a cache with the given backend and an exact prefix string. + /// Unlike `with_key_prefix`, this sets the prefix verbatim (no trailing slash added). + pub fn with_backend_and_prefix(backend: Arc, prefix: String) -> Self { + Self { + cache: backend, + prefix, + hits: Arc::new(AtomicU64::new(0)), + misses: Arc::new(AtomicU64::new(0)), + } + } + /// Appends a prefix to the cache key. pub fn with_key_prefix(&self, prefix: &str) -> Self { Self { @@ -509,6 +520,11 @@ impl WeakLanceCache { } } + /// The key prefix used for all entries in this cache. + pub fn prefix(&self) -> &str { + &self.prefix + } + pub async fn get_with_key(&self, cache_key: &K) -> Option> where K: CacheKey, diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs index 66f423c844d..76e37a824aa 100644 --- a/rust/lance-index/src/vector.rs +++ b/rust/lance-index/src/vector.rs @@ -160,6 +160,9 @@ pub struct IvfIndexState { pub quantizer_extra_data: Option>, pub sub_index_type: SubIndexType, pub quantization_type: QuantizationType, + /// The cache key prefix used by the original index's WeakLanceCache. + /// Needed to reconnect the reconstructed index to the shared cache backend. + pub cache_key_prefix: String, } /// Serialization header for [`IvfIndexState`]. @@ -172,6 +175,8 @@ struct IvfIndexStateHeader { sub_index_type: String, quantization_type: String, quantizer_metadata_json: String, + #[serde(default)] + cache_key_prefix: String, } impl IvfIndexState { @@ -187,6 +192,7 @@ impl IvfIndexState { sub_index_type: self.sub_index_type.to_string(), quantization_type: self.quantization_type.to_string(), quantizer_metadata_json: self.quantizer_metadata_json.clone(), + cache_key_prefix: self.cache_key_prefix.clone(), }; let header_json = serde_json::to_vec(&header) .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?; @@ -261,6 +267,7 @@ impl IvfIndexState { quantizer_extra_data, sub_index_type, quantization_type, + cache_key_prefix: header.cache_key_prefix, }) } } diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 90ca1a4d619..f952546b25f 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -641,6 +641,7 @@ impl VectorIndex for IVFInd quantizer_extra_data: extra_data.map(|b| b.to_vec()), sub_index_type, quantization_type, + cache_key_prefix: self.index_cache.prefix().to_string(), }) } } From ddc3f773d1849fc7d079f6ed692e05692d510c91 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 20 Mar 2026 12:32:15 -0700 Subject: [PATCH 13/24] refactor: move VectorIndex reconstruction from cache to call site Previously, the disk cache codec reconstructed `Arc` from `IvfIndexState` during deserialization, requiring a `ReconstructionContext` with deferred OnceLock initialization and sync-to-async runtime juggling. The ObjectStore in that context also lacked proper credential wrappers. Now the cache stores `Arc` (serializable state) instead of `Arc` (live index). Lance's `open_vector_index()` detects cached state and reconstructs using its own ObjectStore (with credentials) and metadata cache. This eliminates the ReconstructionContext, OnceLock pattern, and runtime juggling. Changes: - Add VectorIndexData trait (lance-index) with write_to/as_any/tag - Add DeepSizeOf impl for IvfIndexState - Change VectorIndexCacheKey::ValueType to dyn VectorIndexData - Add reconstruction-from-cache path in open_vector_index() - Fix panicking downcast in LanceCache::get_with_id (return None) - Add Debug/Clone/Copy derives to SubIndexType Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache.rs | 15 ++++- rust/lance-index/src/vector.rs | 64 +++++++++++++++++++++- rust/lance-index/src/vector/v3/subindex.rs | 1 + rust/lance/src/index.rs | 51 +++++++++++------ rust/lance/src/index/vector/ivf/v2.rs | 8 +-- 5 files changed, 115 insertions(+), 24 deletions(-) diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs index 66778c5149d..2013522ec62 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache.rs @@ -353,8 +353,19 @@ impl LanceCache { ) -> Option> { let cache_key = make_cache_key(&self.prefix, key, type_id); if let Some(entry) = self.cache.get(&cache_key).await { - self.hits.fetch_add(1, Ordering::Relaxed); - Some(entry.downcast::().unwrap()) + match entry.downcast::() { + Ok(val) => { + self.hits.fetch_add(1, Ordering::Relaxed); + Some(val) + } + Err(_) => { + // Type mismatch: the backend returned a different concrete + // type than expected (e.g. a disk cache may store + // intermediate state). Treat as a miss. + self.misses.fetch_add(1, Ordering::Relaxed); + None + } + } } else { self.misses.fetch_add(1, Ordering::Relaxed); None diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs index 76e37a824aa..973a7339cd4 100644 --- a/rust/lance-index/src/vector.rs +++ b/rust/lance-index/src/vector.rs @@ -142,11 +142,41 @@ impl From for pb::VectorMetricType { } } +/// Serializable snapshot of a vector index, suitable for disk caching. +/// +/// Implementations must be cheaply reconstructable into a live +/// [`VectorIndex`] given an ObjectStore, file metadata cache, and partition +/// cache. The reconstruction cost should be dominated by re-opening +/// `FileReader`s, which is cheap when the file metadata cache is warm. +pub trait VectorIndexData: Send + Sync + DeepSizeOf + std::fmt::Debug { + /// Serialize this state into `writer`. Called on a blocking thread by + /// the disk cache codec. + fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()>; + + /// Tag used to dispatch deserialization to the correct concrete type. + fn index_type_tag(&self) -> &'static str; + + /// Downcast to `&dyn Any` for concrete type access during reconstruction. + fn as_any(&self) -> &dyn Any; +} + +/// Deserialize a [`VectorIndexData`] from bytes previously written by +/// [`VectorIndexData::write_to`]. +pub fn deserialize_vector_index_data(data: Bytes) -> Result> { + // Currently only IVF indices support disk caching. The serialization + // format is self-describing (IvfIndexState header), so no external tag + // is needed yet. When additional index types are added, prepend a + // version/tag byte to the wire format. + let state = IvfIndexState::deserialize(data)?; + Ok(Arc::new(state)) +} + /// Serializable state of an IVF index, sufficient to reconstruct the index /// without re-reading global buffers from object storage. /// /// Produced by [`VectorIndex::cacheable_state`] and consumed by a /// reconstruction function that re-opens FileReaders using cached file metadata. +#[derive(Debug, Clone)] pub struct IvfIndexState { /// Object-store path to the index file (before `to_local_path` conversion). pub index_file_path: String, @@ -272,6 +302,38 @@ impl IvfIndexState { } } +impl DeepSizeOf for IvfIndexState { + fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize { + self.index_file_path.deep_size_of_children(context) + + self.uuid.deep_size_of_children(context) + + self.ivf.deep_size_of_children(context) + + self.sub_index_metadata.deep_size_of_children(context) + + self.quantizer_metadata_json.deep_size_of_children(context) + + self + .quantizer_extra_data + .as_ref() + .map(|v| v.deep_size_of_children(context)) + .unwrap_or(0) + + self.cache_key_prefix.deep_size_of_children(context) + } +} + +impl VectorIndexData for IvfIndexState { + fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()> { + let bytes = self.serialize()?; + writer.write_all(&bytes)?; + Ok(()) + } + + fn index_type_tag(&self) -> &'static str { + "IVF" + } + + fn as_any(&self) -> &dyn Any { + self + } +} + /// Vector Index for (Approximate) Nearest Neighbor (ANN) Search. /// /// Vector indices are often built as a chain of indices. For example, IVF -> PQ @@ -399,7 +461,7 @@ pub trait VectorIndex: Send + Sync + std::fmt::Debug + Index { /// Export the index state needed for reconstruction from a disk cache. /// Returns `None` if this index type doesn't support persistent caching. - fn cacheable_state(&self) -> Option { + fn cacheable_state(&self) -> Option> { None } } diff --git a/rust/lance-index/src/vector/v3/subindex.rs b/rust/lance-index/src/vector/v3/subindex.rs index af0bb337352..dd5d2b078a9 100644 --- a/rust/lance-index/src/vector/v3/subindex.rs +++ b/rust/lance-index/src/vector/v3/subindex.rs @@ -59,6 +59,7 @@ pub trait IvfSubIndex: Send + Sync + Debug + DeepSizeOf { fn to_batch(&self) -> Result; } +#[derive(Debug, Clone, Copy)] pub enum SubIndexType { Flat, Hnsw, diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 16faab4e48f..aacc0284fdc 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -42,6 +42,7 @@ use lance_index::vector::flat::index::{FlatBinQuantizer, FlatIndex, FlatQuantize use lance_index::vector::hnsw::HNSW; use lance_index::vector::pq::ProductQuantizer; use lance_index::vector::sq::ScalarQuantizer; +use lance_index::vector::{IvfIndexState, VectorIndexData}; use lance_index::{DatasetIndexExt, INDEX_METADATA_SCHEMA_KEY, IndexDescription}; use lance_index::{INDEX_FILE_NAME, Index, IndexType, pb, vector::VectorIndex}; use lance_index::{ @@ -129,7 +130,7 @@ impl<'a> VectorIndexCacheKey<'a> { } impl UnsizedCacheKey for VectorIndexCacheKey<'_> { - type ValueType = dyn VectorIndex; + type ValueType = dyn VectorIndexData; fn key(&self) -> std::borrow::Cow<'_, str> { if let Some(fri_uuid) = self.fri_uuid { @@ -1296,22 +1297,16 @@ impl DatasetIndexInternalExt for Dataset { uuid: &str, metrics: &dyn MetricsCollector, ) -> Result> { - // Checking for cache existence is cheap so we just check both scalar and vector caches + // Quick cache checks for scalar and frag-reuse indices. VectorIndex + // is not checked here because the cache stores VectorIndexData (serializable + // state), not a live VectorIndex — reconstruction is handled by + // open_vector_index. let frag_reuse_uuid = self.frag_reuse_index_uuid().await; let cache_key = ScalarIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await { return Ok(index.as_index()); } - let vector_cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); - if let Some(index) = self - .index_cache - .get_unsized_with_key(&vector_cache_key) - .await - { - return Ok(index.as_index()); - } - let frag_reuse_cache_key = FragReuseIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); if let Some(index) = self.index_cache.get_with_key(&frag_reuse_cache_key).await { return Ok(index.as_index()); @@ -1378,9 +1373,26 @@ impl DatasetIndexInternalExt for Dataset { let frag_reuse_uuid = self.frag_reuse_index_uuid().await; let cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); - if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await { - log::debug!("Found vector index in cache uuid: {}", uuid); - return Ok(index); + // Check cache for serialized VectorIndexData and reconstruct if found. + if let Some(data) = self.index_cache.get_unsized_with_key(&cache_key).await { + if let Some(state) = data.as_any().downcast_ref::() { + log::debug!( + "Reconstructing vector index from cached state uuid: {}", + uuid + ); + let partition_cache = self.index_cache.with_key_prefix(&cache_key.key()); + // Namespace the file metadata cache by the index file path, + // matching what the full-load path does. + let index_path = object_store::path::Path::from(state.index_file_path.as_str()); + let fmc = self.metadata_cache.file_metadata_cache(&index_path); + return vector::ivf::v2::reconstruct_vector_index( + state.clone(), + self.object_store.clone(), + &fmc, + partition_cache, + ) + .await; + } } let frag_reuse_index = self.open_frag_reuse_index(metrics).await?; @@ -1596,9 +1608,14 @@ impl DatasetIndexInternalExt for Dataset { }; let index = index?; metrics.record_index_load(); - self.index_cache - .insert_unsized_with_key(&cache_key, index.clone()) - .await; + // Cache the serializable state, not the live index. The live index + // holds FileReader handles that can't survive serialization; the + // state can be cheaply reconstructed on the next cache hit. + if let Some(state) = index.cacheable_state() { + self.index_cache + .insert_unsized_with_key(&cache_key, Arc::from(state)) + .await; + } Ok(index) } diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index f952546b25f..b3724285bd3 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -38,7 +38,7 @@ use lance_index::vector::quantizer::{QuantizationType, Quantizer}; use lance_index::vector::sq::ScalarQuantizer; use lance_index::vector::storage::VectorStore; use lance_index::vector::v3::subindex::SubIndexType; -use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry}; +use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry, VectorIndexData}; use lance_index::{ INDEX_AUXILIARY_FILE_NAME, INDEX_FILE_NAME, Index, IndexType, pb, vector::{ @@ -625,13 +625,13 @@ impl VectorIndex for IVFInd self.distance_type } - fn cacheable_state(&self) -> Option { + fn cacheable_state(&self) -> Option> { let extra_data = self.storage.metadata().extra_metadata().ok().flatten(); let metadata_json = serde_json::to_string(self.storage.metadata()).ok()?; let (sub_index_type, quantization_type) = self.sub_index_type(); // Convert local path back to object_store Path (undo to_local_path's "/" prefix) let index_file_path = self.uri.trim_start_matches('/').to_string(); - Some(IvfIndexState { + Some(Box::new(IvfIndexState { index_file_path, uuid: self.uuid.clone(), ivf: self.ivf.clone(), @@ -642,7 +642,7 @@ impl VectorIndex for IVFInd sub_index_type, quantization_type, cache_key_prefix: self.index_cache.prefix().to_string(), - }) + })) } } From 4fdbe5199816ba265af84a366fb433070a4d1fb1 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 20 Mar 2026 14:02:49 -0700 Subject: [PATCH 14/24] fix --- rust/lance/src/index/vector/ivf/v2.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 00e076d0a62..ffd502550c2 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -234,6 +234,7 @@ impl IVFIndex { } /// Reconstruct from cached state, skipping global buffer reads. + #[allow(clippy::too_many_arguments)] pub(crate) fn from_cached_state( uri: String, uuid: String, From 9ff1ab961b6c9f71369a3f00cb338b14b46df7c8 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 20 Mar 2026 16:06:34 -0700 Subject: [PATCH 15/24] refactor: address PR review feedback - Split cache.rs into submodules (backend, keys, moka, mod) - Rename CacheKey::type_id() to type_name() across all implementors - Improve CacheBackend and get_or_insert docs - Add Spillable trait with writer-based serialize for partition_serde - Cache file metadata and file sizes to enable zero-IO reconstruction - Add test_reconstruct_from_cache_zero_io test Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache/backend.rs | 76 ++++ rust/lance-core/src/cache/keys.rs | 52 +++ .../lance-core/src/{cache.rs => cache/mod.rs} | 324 +++--------------- rust/lance-core/src/cache/moka.rs | 125 +++++++ .../src/encodings/logical/primitive.rs | 2 +- rust/lance-file/src/previous/reader.rs | 2 +- rust/lance-file/src/reader.rs | 10 + rust/lance-index/src/scalar/bitmap.rs | 2 +- rust/lance-index/src/scalar/btree.rs | 2 +- rust/lance-index/src/scalar/inverted/index.rs | 4 +- rust/lance-index/src/scalar/ngram.rs | 2 +- rust/lance-index/src/scalar/rtree.rs | 2 +- rust/lance-index/src/vector.rs | 12 + rust/lance-index/src/vector/storage.rs | 4 + rust/lance/src/dataset/fragment.rs | 2 +- rust/lance/src/index.rs | 14 +- rust/lance/src/index/vector/ivf.rs | 2 +- .../src/index/vector/ivf/partition_serde.rs | 124 ++++--- rust/lance/src/index/vector/ivf/v2.rs | 160 +++++++-- rust/lance/src/session.rs | 2 +- rust/lance/src/session/caches.rs | 12 +- rust/lance/src/session/index_caches.rs | 6 +- 22 files changed, 563 insertions(+), 378 deletions(-) create mode 100644 rust/lance-core/src/cache/backend.rs create mode 100644 rust/lance-core/src/cache/keys.rs rename rust/lance-core/src/{cache.rs => cache/mod.rs} (70%) create mode 100644 rust/lance-core/src/cache/moka.rs diff --git a/rust/lance-core/src/cache/backend.rs b/rust/lance-core/src/cache/backend.rs new file mode 100644 index 00000000000..970fb75888c --- /dev/null +++ b/rust/lance-core/src/cache/backend.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use std::any::Any; +use std::pin::Pin; +use std::sync::Arc; + +use async_trait::async_trait; +use futures::Future; + +use crate::Result; + +/// A type-erased cache entry. +pub type CacheEntry = Arc; + +/// Low-level pluggable cache backend. +/// +/// Implementations store entries keyed by opaque byte slices. +/// The [`LanceCache`](super::LanceCache) wrapper handles key construction and type safety; +/// backend authors do not need to worry about key encoding. +/// +/// Keys are structured as `user_key\0type_name` where `type_name` comes from +/// [`CacheKey::type_name()`](super::CacheKey::type_name). Backend authors who need to +/// inspect keys can use [`parse_cache_key()`](super::parse_cache_key) to split them. +#[async_trait] +pub trait CacheBackend: Send + Sync + std::fmt::Debug { + /// Look up an entry by its opaque key. + async fn get(&self, key: &[u8]) -> Option; + + /// Store an entry. `size_bytes` is used for eviction accounting. + async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize); + + /// Get an existing entry or compute it from `loader`. + /// + /// Implementations should deduplicate concurrent loads for the same key + /// so the loader runs at most once. + /// + /// The loader is a pinned, boxed future rather than a generic closure + /// because `async_trait` erases the `Self` lifetime, making it impossible + /// to express a generic closure whose returned future borrows from the + /// caller. Boxing the future once at the call site (in `LanceCache`) + /// avoids this lifetime conflict while keeping the trait object-safe. + /// + /// The future borrows from the caller's scope and will be `.await`ed within + /// this method — implementations must not store it beyond the call. + async fn get_or_insert<'a>( + &self, + key: &[u8], + loader: Pin> + Send + 'a>>, + ) -> Result; + + /// Remove all entries whose key starts with `prefix`. + async fn invalidate_prefix(&self, prefix: &[u8]); + + /// Remove all entries. + async fn clear(&self); + + /// Number of entries currently stored (may flush pending operations). + async fn num_entries(&self) -> usize; + + /// Total weighted size in bytes of all stored entries (may flush pending operations). + async fn size_bytes(&self) -> usize; + + /// Approximate number of entries, callable from synchronous contexts. + /// Backends that cannot provide this cheaply should return 0. + fn approx_num_entries(&self) -> usize { + 0 + } + + /// Approximate weighted size in bytes, callable from synchronous contexts. + /// Used by `DeepSizeOf` to report cache memory usage. + /// Backends that cannot provide this cheaply should return 0. + fn approx_size_bytes(&self) -> usize { + 0 + } +} diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs new file mode 100644 index 00000000000..db412cc632f --- /dev/null +++ b/rust/lance-core/src/cache/keys.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use std::borrow::Cow; + +/// Cache keys are structured as `user_key\0type_name`. +/// +/// This function splits an opaque cache key into the user-visible portion +/// and the type_name string. Backend implementations can use this to inspect keys. +/// Returns `(empty slice, "")` if no separator is found. +pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) { + if let Some(sep) = key.iter().position(|&b| b == 0) { + let user_key = &key[..sep]; + let type_name = std::str::from_utf8(&key[sep + 1..]).unwrap_or(""); + (user_key, type_name) + } else { + (key, "") + } +} + +/// Build a key: `prefix/user_key\0type_name`. +pub(super) fn make_cache_key(prefix: &str, key: &str, type_name: &str) -> Vec { + let full_key = if prefix.is_empty() { + key.to_string() + } else { + format!("{}/{}", prefix, key) + }; + let mut bytes = full_key.into_bytes(); + bytes.push(0); + bytes.extend_from_slice(type_name.as_bytes()); + bytes +} + +pub trait CacheKey { + type ValueType: 'static; + + fn key(&self) -> Cow<'_, str>; + + /// Short, stable string that distinguishes this value type from others in + /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_name`). + /// Must be consistent across crate boundaries — use a short literal, not + /// `std::any::type_name` pointers. + fn type_name(&self) -> &'static str; +} + +pub trait UnsizedCacheKey { + type ValueType: 'static + ?Sized; + + fn key(&self) -> Cow<'_, str>; + + fn type_name(&self) -> &'static str; +} diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache/mod.rs similarity index 70% rename from rust/lance-core/src/cache.rs rename to rust/lance-core/src/cache/mod.rs index 2013522ec62..6bdd0c07152 100644 --- a/rust/lance-core/src/cache.rs +++ b/rust/lance-core/src/cache/mod.rs @@ -9,231 +9,31 @@ //! can implement. It uses opaque byte keys and type-erased entries. //! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag //! encoding), type-safe get/insert, and DeepSizeOf-based size computation. +//! +//! Cache keys are handled by the [`keys`] submodule: [`CacheKey`] / [`UnsizedCacheKey`] +//! define the typed key interface, and [`parse_cache_key`] lets backends inspect the +//! encoded `user_key\0type_name` format. + +mod backend; +mod keys; +mod moka; + +pub use backend::{CacheBackend, CacheEntry}; +pub use keys::{CacheKey, UnsizedCacheKey, parse_cache_key}; +pub use moka::MokaCacheBackend; -use std::any::Any; -use std::borrow::Cow; -use std::pin::Pin; use std::sync::{ Arc, atomic::{AtomicU64, Ordering}, }; -use async_trait::async_trait; use futures::{Future, FutureExt}; use crate::Result; pub use deepsize::{Context, DeepSizeOf}; -/// A type-erased cache entry. -pub type CacheEntry = Arc; - -// --------------------------------------------------------------------------- -// CacheBackend trait -// --------------------------------------------------------------------------- - -/// Low-level pluggable cache backend. -/// -/// Implementations store entries keyed by opaque byte slices. -/// The [`LanceCache`] wrapper handles key construction and type safety; -/// backend authors do not need to worry about key encoding. -#[async_trait] -pub trait CacheBackend: Send + Sync + std::fmt::Debug { - /// Look up an entry by its opaque key. - async fn get(&self, key: &[u8]) -> Option; - - /// Store an entry. `size_bytes` is used for eviction accounting. - async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize); - - /// Get an existing entry or compute it from `loader`. - /// - /// Implementations should deduplicate concurrent loads for the same key - /// so the loader runs at most once. - /// - /// The loader is a pinned future that produces `(entry, size_bytes)`. - /// It borrows from the caller's scope and will be `.await`ed within - /// this method — implementations must not store it beyond the call. - async fn get_or_insert<'a>( - &self, - key: &[u8], - loader: Pin> + Send + 'a>>, - ) -> Result; - - /// Remove all entries whose key starts with `prefix`. - async fn invalidate_prefix(&self, prefix: &[u8]); - - /// Remove all entries. - async fn clear(&self); - - /// Number of entries currently stored (may flush pending operations). - async fn num_entries(&self) -> usize; - - /// Total weighted size in bytes of all stored entries (may flush pending operations). - async fn size_bytes(&self) -> usize; - - /// Approximate number of entries, callable from synchronous contexts. - /// Backends that cannot provide this cheaply should return 0. - fn approx_num_entries(&self) -> usize { - 0 - } - - /// Approximate weighted size in bytes, callable from synchronous contexts. - /// Used by `DeepSizeOf` to report cache memory usage. - /// Backends that cannot provide this cheaply should return 0. - fn approx_size_bytes(&self) -> usize { - 0 - } -} - -// --------------------------------------------------------------------------- -// MokaCacheBackend — default moka-based implementation -// --------------------------------------------------------------------------- - -/// Internal record stored in the moka cache. -#[derive(Clone, Debug)] -struct MokaCacheEntry { - entry: CacheEntry, - size_bytes: usize, -} - -/// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache. -/// -/// Provides weighted-capacity eviction and concurrent-load deduplication -/// via moka's built-in `optionally_get_with`. -pub struct MokaCacheBackend { - cache: moka::future::Cache, MokaCacheEntry>, -} - -impl std::fmt::Debug for MokaCacheBackend { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MokaCacheBackend") - .field("entry_count", &self.cache.entry_count()) - .finish() - } -} - -impl MokaCacheBackend { - pub fn with_capacity(capacity: usize) -> Self { - let cache = moka::future::Cache::builder() - .max_capacity(capacity as u64) - .weigher(|_, v: &MokaCacheEntry| v.size_bytes.try_into().unwrap_or(u32::MAX)) - .support_invalidation_closures() - .build(); - Self { cache } - } - - pub fn no_cache() -> Self { - Self { - cache: moka::future::Cache::new(0), - } - } -} - -#[async_trait] -impl CacheBackend for MokaCacheBackend { - async fn get(&self, key: &[u8]) -> Option { - self.cache.get(key).await.map(|r| r.entry) - } - - async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) { - self.cache - .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes }) - .await; - } - - async fn get_or_insert<'a>( - &self, - key: &[u8], - loader: Pin> + Send + 'a>>, - ) -> Result { - // Use moka's built-in dedup: optionally_get_with runs the init future - // at most once per key, even under concurrent access. - let (error_tx, error_rx) = tokio::sync::oneshot::channel(); - - let init = async move { - match loader.await { - Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }), - Err(e) => { - let _ = error_tx.send(e); - None - } - } - }; - - let owned_key = key.to_vec(); - match self.cache.optionally_get_with(owned_key, init).await { - Some(record) => Ok(record.entry), - None => match error_rx.await { - Ok(err) => Err(err), - Err(_) => Err(crate::Error::internal( - "Failed to retrieve error from cache loader", - )), - }, - } - } - - async fn invalidate_prefix(&self, prefix: &[u8]) { - let prefix = prefix.to_vec(); - self.cache - .invalidate_entries_if(move |key, _value| key.starts_with(&prefix)) - .expect("Cache configured correctly"); - } - - async fn clear(&self) { - self.cache.invalidate_all(); - self.cache.run_pending_tasks().await; - } - - async fn num_entries(&self) -> usize { - self.cache.run_pending_tasks().await; - self.cache.entry_count() as usize - } - - async fn size_bytes(&self) -> usize { - self.cache.run_pending_tasks().await; - self.cache.weighted_size() as usize - } - - fn approx_num_entries(&self) -> usize { - self.cache.entry_count() as usize - } - - fn approx_size_bytes(&self) -> usize { - self.cache.iter().map(|(_, v)| v.size_bytes).sum() - } -} - -// --------------------------------------------------------------------------- -// Type identity helpers -// --------------------------------------------------------------------------- - -/// Cache keys are structured as `user_key\0type_id`. -/// -/// This function splits an opaque cache key into the user-visible portion -/// and the type_id string. Backend implementations can use this to inspect keys. -/// Returns `(empty slice, "")` if no separator is found. -pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) { - if let Some(sep) = key.iter().position(|&b| b == 0) { - let user_key = &key[..sep]; - let type_id = std::str::from_utf8(&key[sep + 1..]).unwrap_or(""); - (user_key, type_id) - } else { - (key, "") - } -} - -/// Build a key: `prefix/user_key\0type_id`. -fn make_cache_key(prefix: &str, key: &str, type_id: &str) -> Vec { - let full_key = if prefix.is_empty() { - key.to_string() - } else { - format!("{}/{}", prefix, key) - }; - let mut bytes = full_key.into_bytes(); - bytes.push(0); - bytes.extend_from_slice(type_id.as_bytes()); - bytes -} +use keys::make_cache_key; // --------------------------------------------------------------------------- // LanceCache — typed wrapper around dyn CacheBackend @@ -338,20 +138,20 @@ impl LanceCache { async fn insert_with_id( &self, key: &str, - type_id: &str, + type_name: &str, metadata: Arc, ) { let size = metadata.deep_size_of() + 8; - let cache_key = make_cache_key(&self.prefix, key, type_id); + let cache_key = make_cache_key(&self.prefix, key, type_name); self.cache.insert(&cache_key, metadata, size).await; } async fn get_with_id( &self, key: &str, - type_id: &str, + type_name: &str, ) -> Option> { - let cache_key = make_cache_key(&self.prefix, key, type_id); + let cache_key = make_cache_key(&self.prefix, key, type_name); if let Some(entry) = self.cache.get(&cache_key).await { match entry.downcast::() { Ok(val) => { @@ -375,14 +175,14 @@ impl LanceCache { async fn get_or_insert_with_id( &self, key: &str, - type_id: &str, + type_name: &str, loader: F, ) -> Result> where F: FnOnce() -> Fut + Send, Fut: Future> + Send, { - let cache_key = make_cache_key(&self.prefix, key, type_id); + let cache_key = make_cache_key(&self.prefix, key, type_name); // Type-erase the loader into a pinned future for the backend. let typed_loader = Box::pin(async move { @@ -407,18 +207,19 @@ impl LanceCache { async fn insert_unsized_with_id( &self, key: &str, - type_id: &str, + type_name: &str, metadata: Arc, ) { - self.insert_with_id(key, type_id, Arc::new(metadata)).await + self.insert_with_id(key, type_name, Arc::new(metadata)) + .await } async fn get_unsized_with_id( &self, key: &str, - type_id: &str, + type_name: &str, ) -> Option> { - let outer = self.get_with_id::>(key, type_id).await?; + let outer = self.get_with_id::>(key, type_name).await?; Some(outer.as_ref().clone()) } @@ -446,7 +247,7 @@ impl LanceCache { K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.insert_with_id(&cache_key.key(), cache_key.type_id(), metadata) + self.insert_with_id(&cache_key.key(), cache_key.type_name(), metadata) .boxed() .await } @@ -456,7 +257,7 @@ impl LanceCache { K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.get_with_id::(&cache_key.key(), cache_key.type_id()) + self.get_with_id::(&cache_key.key(), cache_key.type_name()) .boxed() .await } @@ -472,9 +273,9 @@ impl LanceCache { F: FnOnce() -> Fut + Send, Fut: Future> + Send, { - let type_id = cache_key.type_id(); + let type_name = cache_key.type_name(); let key_str = cache_key.key().into_owned(); - Box::pin(self.get_or_insert_with_id(&key_str, type_id, loader)).await + Box::pin(self.get_or_insert_with_id(&key_str, type_name, loader)).await } pub async fn insert_unsized_with_key(&self, cache_key: &K, metadata: Arc) @@ -482,7 +283,7 @@ impl LanceCache { K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.insert_unsized_with_id(&cache_key.key(), cache_key.type_id(), metadata) + self.insert_unsized_with_id(&cache_key.key(), cache_key.type_name(), metadata) .boxed() .await } @@ -492,7 +293,7 @@ impl LanceCache { K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.get_unsized_with_id::(&cache_key.key(), cache_key.type_id()) + self.get_unsized_with_id::(&cache_key.key(), cache_key.type_name()) .boxed() .await } @@ -542,7 +343,7 @@ impl WeakLanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { let cache = self.inner.upgrade()?; - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); if let Some(entry) = cache.get(&key).await { self.hits.fetch_add(1, Ordering::Relaxed); Some(entry.downcast::().unwrap()) @@ -559,7 +360,7 @@ impl WeakLanceCache { { if let Some(cache) = self.inner.upgrade() { let size = value.deep_size_of() + 8; - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); cache.insert(&key, value, size).await; true } else { @@ -583,7 +384,7 @@ impl WeakLanceCache { Fut: Future> + Send, { if let Some(cache) = self.inner.upgrade() { - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); let typed_loader = Box::pin(async move { let value = loader().await?; let arc = Arc::new(value); @@ -605,7 +406,7 @@ impl WeakLanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { let cache = self.inner.upgrade()?; - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); if let Some(entry) = cache.get(&key).await { entry .downcast::>() @@ -624,7 +425,7 @@ impl WeakLanceCache { if let Some(cache) = self.inner.upgrade() { let wrapper = Arc::new(value); let size = wrapper.deep_size_of() + 8; - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id()); + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); cache.insert(&key, wrapper, size).await; } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item"); @@ -632,39 +433,19 @@ impl WeakLanceCache { } } -// --------------------------------------------------------------------------- -// CacheKey traits -// --------------------------------------------------------------------------- - -pub trait CacheKey { - type ValueType: 'static; - - fn key(&self) -> Cow<'_, str>; - - /// Short, stable string that distinguishes this value type from others in - /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_id`). - /// Must be consistent across crate boundaries — use a short literal, not - /// `type_name` pointers. - fn type_id(&self) -> &'static str; -} - -pub trait UnsizedCacheKey { - type ValueType: 'static + ?Sized; - - fn key(&self) -> Cow<'_, str>; - - fn type_id(&self) -> &'static str; -} - // --------------------------------------------------------------------------- // CacheStats // --------------------------------------------------------------------------- #[derive(Debug, Clone)] pub struct CacheStats { + /// Number of times `get`, `get_unsized`, or `get_or_insert` found an item in the cache. pub hits: u64, + /// Number of times `get`, `get_unsized`, or `get_or_insert` did not find an item in the cache. pub misses: u64, + /// Number of entries currently in the cache. pub num_entries: usize, + /// Total size in bytes of all entries in the cache. pub size_bytes: usize, } @@ -686,10 +467,6 @@ impl CacheStats { } } -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - #[cfg(test)] mod tests { use super::*; @@ -712,10 +489,10 @@ mod tests { impl CacheKey for TestKey { type ValueType = T; - fn key(&self) -> Cow<'_, str> { - Cow::Borrowed(&self.key) + fn key(&self) -> std::borrow::Cow<'_, str> { + std::borrow::Cow::Borrowed(&self.key) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { std::any::type_name::() } } @@ -737,10 +514,10 @@ mod tests { impl UnsizedCacheKey for TestUnsizedKey { type ValueType = T; - fn key(&self) -> Cow<'_, str> { - Cow::Borrowed(&self.key) + fn key(&self) -> std::borrow::Cow<'_, str> { + std::borrow::Cow::Borrowed(&self.key) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { std::any::type_name::() } } @@ -779,12 +556,12 @@ mod tests { #[derive(Debug, DeepSizeOf)] struct MyType(i32); - trait MyTrait: DeepSizeOf + Send + Sync + Any { - fn as_any(&self) -> &dyn Any; + trait MyTrait: DeepSizeOf + Send + Sync + std::any::Any { + fn as_any(&self) -> &dyn std::any::Any; } impl MyTrait for MyType { - fn as_any(&self) -> &dyn Any { + fn as_any(&self) -> &dyn std::any::Any { self } } @@ -878,6 +655,7 @@ mod tests { #[tokio::test] async fn test_custom_backend() { + use async_trait::async_trait; use tokio::sync::Mutex; #[derive(Debug)] @@ -907,7 +685,9 @@ mod tests { async fn get_or_insert<'a>( &self, key: &[u8], - loader: Pin> + Send + 'a>>, + loader: std::pin::Pin< + Box> + Send + 'a>, + >, ) -> Result { if let Some((entry, _)) = self.map.lock().await.get(key) { Ok(entry.clone()) diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs new file mode 100644 index 00000000000..6a2cd673409 --- /dev/null +++ b/rust/lance-core/src/cache/moka.rs @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use std::pin::Pin; + +use async_trait::async_trait; +use futures::Future; + +use crate::Result; + +use super::backend::{CacheBackend, CacheEntry}; + +/// Internal record stored in the moka cache. +#[derive(Clone, Debug)] +struct MokaCacheEntry { + entry: CacheEntry, + size_bytes: usize, +} + +/// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache. +/// +/// Provides weighted-capacity eviction and concurrent-load deduplication +/// via moka's built-in `optionally_get_with`. +pub struct MokaCacheBackend { + cache: moka::future::Cache, MokaCacheEntry>, +} + +impl std::fmt::Debug for MokaCacheBackend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MokaCacheBackend") + .field("entry_count", &self.cache.entry_count()) + .finish() + } +} + +impl MokaCacheBackend { + pub fn with_capacity(capacity: usize) -> Self { + let cache = moka::future::Cache::builder() + .max_capacity(capacity as u64) + .weigher(|_, v: &MokaCacheEntry| v.size_bytes.try_into().unwrap_or(u32::MAX)) + .support_invalidation_closures() + .build(); + Self { cache } + } + + pub fn no_cache() -> Self { + Self { + cache: moka::future::Cache::new(0), + } + } +} + +#[async_trait] +impl CacheBackend for MokaCacheBackend { + async fn get(&self, key: &[u8]) -> Option { + self.cache.get(key).await.map(|r| r.entry) + } + + async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) { + self.cache + .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes }) + .await; + } + + async fn get_or_insert<'a>( + &self, + key: &[u8], + loader: Pin> + Send + 'a>>, + ) -> Result { + // Use moka's built-in dedup: optionally_get_with runs the init future + // at most once per key, even under concurrent access. + let (error_tx, error_rx) = tokio::sync::oneshot::channel(); + + let init = async move { + match loader.await { + Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }), + Err(e) => { + let _ = error_tx.send(e); + None + } + } + }; + + let owned_key = key.to_vec(); + match self.cache.optionally_get_with(owned_key, init).await { + Some(record) => Ok(record.entry), + None => match error_rx.await { + Ok(err) => Err(err), + Err(_) => Err(crate::Error::internal( + "Failed to retrieve error from cache loader", + )), + }, + } + } + + async fn invalidate_prefix(&self, prefix: &[u8]) { + let prefix = prefix.to_vec(); + self.cache + .invalidate_entries_if(move |key, _value| key.starts_with(&prefix)) + .expect("Cache configured correctly"); + } + + async fn clear(&self) { + self.cache.invalidate_all(); + self.cache.run_pending_tasks().await; + } + + async fn num_entries(&self) -> usize { + self.cache.run_pending_tasks().await; + self.cache.entry_count() as usize + } + + async fn size_bytes(&self) -> usize { + self.cache.run_pending_tasks().await; + self.cache.weighted_size() as usize + } + + fn approx_num_entries(&self) -> usize { + self.cache.entry_count() as usize + } + + fn approx_size_bytes(&self) -> usize { + self.cache.iter().map(|(_, v)| v.size_bytes).sum() + } +} diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs index ef1f1ca1faf..ba8a551f737 100644 --- a/rust/lance-encoding/src/encodings/logical/primitive.rs +++ b/rust/lance-encoding/src/encodings/logical/primitive.rs @@ -3417,7 +3417,7 @@ impl CacheKey for FieldDataCacheKey { self.column_index.to_string().into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "FieldData" } } diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs index 6dd40af45c1..fac113b4c10 100644 --- a/rust/lance-file/src/previous/reader.rs +++ b/rust/lance-file/src/previous/reader.rs @@ -90,7 +90,7 @@ impl CacheKey for StringCacheKey<'_, T> { self.key.into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { std::any::type_name::() } } diff --git a/rust/lance-file/src/reader.rs b/rust/lance-file/src/reader.rs index 29c1aa3ccc0..3a3f41854c0 100644 --- a/rust/lance-file/src/reader.rs +++ b/rust/lance-file/src/reader.rs @@ -105,6 +105,16 @@ pub struct CachedFileMetadata { pub minor_version: u16, } +impl CachedFileMetadata { + /// Total file size in bytes. + pub fn file_size(&self) -> u64 { + self.num_data_bytes + + self.num_global_buffer_bytes + + self.num_column_metadata_bytes + + self.num_footer_bytes + } +} + impl DeepSizeOf for CachedFileMetadata { // TODO: include size for `column_metadatas` and `column_infos`. fn deep_size_of_children(&self, context: &mut Context) -> usize { diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs index a55b317860f..c593bb72a63 100644 --- a/rust/lance-index/src/scalar/bitmap.rs +++ b/rust/lance-index/src/scalar/bitmap.rs @@ -129,7 +129,7 @@ impl CacheKey for BitmapKey { format!("{}", self.value.0).into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "Bitmap" } } diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs index 9e208b8eea4..8fbe2377d13 100644 --- a/rust/lance-index/src/scalar/btree.rs +++ b/rust/lance-index/src/scalar/btree.rs @@ -990,7 +990,7 @@ impl CacheKey for BTreePageKey { format!("page-{}", self.page_number).into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "BTreePage" } } diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs index 89b64c18ffd..e5caf09cd78 100644 --- a/rust/lance-index/src/scalar/inverted/index.rs +++ b/rust/lance-index/src/scalar/inverted/index.rs @@ -1889,7 +1889,7 @@ impl CacheKey for PostingListKey { format!("postings-{}", self.token_id).into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "PostingList" } } @@ -1906,7 +1906,7 @@ impl CacheKey for PositionKey { format!("positions-{}", self.token_id).into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "Position" } } diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs index d2095841428..2a439ae6b34 100644 --- a/rust/lance-index/src/scalar/ngram.rs +++ b/rust/lance-index/src/scalar/ngram.rs @@ -171,7 +171,7 @@ impl CacheKey for NGramPostingListKey { format!("posting-list-{}", self.row_offset).into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "NGramPostingList" } } diff --git a/rust/lance-index/src/scalar/rtree.rs b/rust/lance-index/src/scalar/rtree.rs index 724f5479e1c..225e3be6e2a 100644 --- a/rust/lance-index/src/scalar/rtree.rs +++ b/rust/lance-index/src/scalar/rtree.rs @@ -250,7 +250,7 @@ impl CacheKey for RTreeCacheKey { } } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "RTree" } } diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs index 973a7339cd4..0a1b50297d3 100644 --- a/rust/lance-index/src/vector.rs +++ b/rust/lance-index/src/vector.rs @@ -193,6 +193,10 @@ pub struct IvfIndexState { /// The cache key prefix used by the original index's WeakLanceCache. /// Needed to reconnect the reconstructed index to the shared cache backend. pub cache_key_prefix: String, + /// File sizes for the index and auxiliary files, used to avoid HEAD requests + /// when reconstructing from cache. + pub index_file_size: u64, + pub aux_file_size: u64, } /// Serialization header for [`IvfIndexState`]. @@ -207,6 +211,10 @@ struct IvfIndexStateHeader { quantizer_metadata_json: String, #[serde(default)] cache_key_prefix: String, + #[serde(default)] + index_file_size: u64, + #[serde(default)] + aux_file_size: u64, } impl IvfIndexState { @@ -223,6 +231,8 @@ impl IvfIndexState { quantization_type: self.quantization_type.to_string(), quantizer_metadata_json: self.quantizer_metadata_json.clone(), cache_key_prefix: self.cache_key_prefix.clone(), + index_file_size: self.index_file_size, + aux_file_size: self.aux_file_size, }; let header_json = serde_json::to_vec(&header) .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?; @@ -298,6 +308,8 @@ impl IvfIndexState { sub_index_type, quantization_type, cache_key_prefix: header.cache_key_prefix, + index_file_size: header.index_file_size, + aux_file_size: header.aux_file_size, }) } } diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs index 1879774ce84..526ba8e78d8 100644 --- a/rust/lance-index/src/vector/storage.rs +++ b/rust/lance-index/src/vector/storage.rs @@ -257,6 +257,10 @@ impl IvfQuantizationStorage { } } + pub fn reader(&self) -> &FileReader { + &self.reader + } + pub fn num_rows(&self) -> u64 { self.reader.num_rows() } diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs index 55c23211871..81e1473c921 100644 --- a/rust/lance/src/dataset/fragment.rs +++ b/rust/lance/src/dataset/fragment.rs @@ -1880,7 +1880,7 @@ impl CacheKey for FileMetadataCacheKey { "".into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "FileMetadata" } } diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 851cd113268..3c920e19426 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -112,7 +112,7 @@ impl UnsizedCacheKey for ScalarIndexCacheKey<'_> { } } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "ScalarIndex" } } @@ -140,7 +140,7 @@ impl UnsizedCacheKey for VectorIndexCacheKey<'_> { } } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "VectorIndex" } } @@ -168,7 +168,7 @@ impl CacheKey for FragReuseIndexCacheKey<'_> { } } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "FragReuseIndex" } } @@ -196,7 +196,7 @@ impl CacheKey for MemWalCacheKey<'_> { } } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "MemWalIndex" } } @@ -1431,14 +1431,10 @@ impl DatasetIndexInternalExt for Dataset { uuid ); let partition_cache = self.index_cache.with_key_prefix(&cache_key.key()); - // Namespace the file metadata cache by the index file path, - // matching what the full-load path does. - let index_path = object_store::path::Path::from(state.index_file_path.as_str()); - let fmc = self.metadata_cache.file_metadata_cache(&index_path); return vector::ivf::v2::reconstruct_vector_index( state.clone(), self.object_store.clone(), - &fmc, + &self.metadata_cache, partition_cache, ) .await; diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs index ec05278b311..3768fad045c 100644 --- a/rust/lance/src/index/vector/ivf.rs +++ b/rust/lance/src/index/vector/ivf.rs @@ -125,7 +125,7 @@ impl UnsizedCacheKey for LegacyIVFPartitionKey { format!("ivf-{}", self.partition_id).into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "LegacyIVFPartition" } } diff --git a/rust/lance/src/index/vector/ivf/partition_serde.rs b/rust/lance/src/index/vector/ivf/partition_serde.rs index 9139e940c2a..3db0dcc634a 100644 --- a/rust/lance/src/index/vector/ivf/partition_serde.rs +++ b/rust/lance/src/index/vector/ivf/partition_serde.rs @@ -17,6 +17,7 @@ //! sections are read zero-copy using [`FileDecoder`] so that Arrow arrays //! reference the original buffer directly. +use std::io::Write; use std::sync::Arc; use arrow_array::{FixedSizeListArray, RecordBatch}; @@ -44,6 +45,17 @@ use serde::{Deserialize, Serialize}; use super::v2::PartitionEntry; +/// Serialization interface for spilling cache entries to an external store. +/// +/// `serialize` writes the entry into the provided writer and returns the +/// number of bytes written. `deserialize` reconstructs the entry from a +/// contiguous `Bytes` buffer (typically obtained by reading back whatever +/// was written). +pub trait Spillable: Sized { + fn serialize(&self, writer: &mut dyn Write) -> Result; + fn deserialize(data: Bytes) -> Result; +} + // --------------------------------------------------------------------------- // Common helpers // --------------------------------------------------------------------------- @@ -224,13 +236,13 @@ struct PqPartitionHeader { storage_len: u64, } -impl PartitionEntry { +impl Spillable for PartitionEntry { /// Serialize this partition entry to bytes. /// /// The sub-index, PQ codebook, and storage batch are each written as Arrow /// IPC file sections, preceded by a small JSON header containing scalar /// metadata and section lengths. - pub fn serialize(&self) -> Result> { + fn serialize(&self, writer: &mut dyn Write) -> Result { let metadata = self.storage.metadata(); let distance_type = self.storage.distance_type(); @@ -261,24 +273,23 @@ impl PartitionEntry { }; let header_json = serde_json::to_vec(&header)?; - let total_len = 8 + header_json.len() + sub_index_ipc.len() + codebook_ipc.len() + storage_ipc.len(); - let mut out = Vec::with_capacity(total_len); - out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); - out.extend_from_slice(&header_json); - out.extend_from_slice(&sub_index_ipc); - out.extend_from_slice(&codebook_ipc); - out.extend_from_slice(&storage_ipc); - - Ok(out) + + writer.write_all(&(header_json.len() as u64).to_le_bytes())?; + writer.write_all(&header_json)?; + writer.write_all(&sub_index_ipc)?; + writer.write_all(&codebook_ipc)?; + writer.write_all(&storage_ipc)?; + + Ok(total_len) } /// Deserialize a partition entry from bytes, zero-copy for Arrow data. /// /// The Arrow IPC sections are decoded using [`FileDecoder`] so that the /// resulting arrays reference slices of the provided `Bytes` buffer directly. - pub fn deserialize(data: Bytes) -> Result { + fn deserialize(data: Bytes) -> Result { if data.len() < 8 { return Err(Error::io("partition data too small".to_string())); } @@ -352,9 +363,9 @@ struct FlatPartitionHeader { storage_len: u64, } -impl PartitionEntry { +impl Spillable for PartitionEntry { /// Serialize this partition entry to bytes. - pub fn serialize(&self) -> Result> { + fn serialize(&self, writer: &mut dyn Write) -> Result { let metadata = self.storage.metadata(); let distance_type = self.storage.distance_type(); @@ -377,16 +388,17 @@ impl PartitionEntry { let header_json = serde_json::to_vec(&header)?; let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len(); - let mut out = Vec::with_capacity(total_len); - out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); - out.extend_from_slice(&header_json); - out.extend_from_slice(&sub_index_ipc); - out.extend_from_slice(&storage_ipc); - Ok(out) + + writer.write_all(&(header_json.len() as u64).to_le_bytes())?; + writer.write_all(&header_json)?; + writer.write_all(&sub_index_ipc)?; + writer.write_all(&storage_ipc)?; + + Ok(total_len) } /// Deserialize a partition entry from bytes, zero-copy for Arrow data. - pub fn deserialize(data: Bytes) -> Result { + fn deserialize(data: Bytes) -> Result { if data.len() < 8 { return Err(Error::io("partition data too small".to_string())); } @@ -446,11 +458,11 @@ struct SqPartitionHeader { storage_len: u64, } -impl PartitionEntry { +impl Spillable for PartitionEntry { /// Serialize this partition entry to bytes. /// /// Multiple SQ storage chunks are concatenated into a single IPC section. - pub fn serialize(&self) -> Result> { + fn serialize(&self, writer: &mut dyn Write) -> Result { let metadata = self.storage.metadata(); let distance_type = self.storage.distance_type(); @@ -475,16 +487,17 @@ impl PartitionEntry { let header_json = serde_json::to_vec(&header)?; let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len(); - let mut out = Vec::with_capacity(total_len); - out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); - out.extend_from_slice(&header_json); - out.extend_from_slice(&sub_index_ipc); - out.extend_from_slice(&storage_ipc); - Ok(out) + + writer.write_all(&(header_json.len() as u64).to_le_bytes())?; + writer.write_all(&header_json)?; + writer.write_all(&sub_index_ipc)?; + writer.write_all(&storage_ipc)?; + + Ok(total_len) } /// Deserialize a partition entry from bytes, zero-copy for Arrow data. - pub fn deserialize(data: Bytes) -> Result { + fn deserialize(data: Bytes) -> Result { if data.len() < 8 { return Err(Error::io("partition data too small".to_string())); } @@ -553,7 +566,7 @@ struct RabitPartitionHeader { storage_len: u64, } -impl PartitionEntry { +impl Spillable for PartitionEntry { /// Serialize this partition entry to bytes. /// /// For Matrix rotation the rotation matrix is stored as an Arrow IPC section. @@ -561,7 +574,7 @@ impl PartitionEntry { /// /// The storage batch is stored with already-packed codes so deserialization /// can skip re-packing. - pub fn serialize(&self) -> Result> { + fn serialize(&self, writer: &mut dyn Write) -> Result { let metadata = self.storage.metadata(); let distance_type = self.storage.distance_type(); @@ -603,17 +616,18 @@ impl PartitionEntry { let header_json = serde_json::to_vec(&header)?; let total_len = 8 + header_json.len() + sub_index_ipc.len() + rotate_mat_ipc.len() + storage_ipc.len(); - let mut out = Vec::with_capacity(total_len); - out.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); - out.extend_from_slice(&header_json); - out.extend_from_slice(&sub_index_ipc); - out.extend_from_slice(&rotate_mat_ipc); - out.extend_from_slice(&storage_ipc); - Ok(out) + + writer.write_all(&(header_json.len() as u64).to_le_bytes())?; + writer.write_all(&header_json)?; + writer.write_all(&sub_index_ipc)?; + writer.write_all(&rotate_mat_ipc)?; + writer.write_all(&storage_ipc)?; + + Ok(total_len) } /// Deserialize a partition entry from bytes, zero-copy for Arrow data. - pub fn deserialize(data: Bytes) -> Result { + fn deserialize(data: Bytes) -> Result { if data.len() < 8 { return Err(Error::io("partition data too small".to_string())); } @@ -769,7 +783,8 @@ mod tests { storage, }; - let serialized = entry.serialize().unwrap(); + let mut serialized = Vec::new(); + entry.serialize(&mut serialized).unwrap(); let deserialized = PartitionEntry::::deserialize(serialized.into()).unwrap(); @@ -819,7 +834,8 @@ mod tests { storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); assert_eq!( @@ -839,7 +855,8 @@ mod tests { storage, }; - let serialized = entry.serialize().unwrap(); + let mut serialized = Vec::new(); + entry.serialize(&mut serialized).unwrap(); let deserialized = PartitionEntry::::deserialize(serialized.into()).unwrap(); assert_eq!(entry.storage, deserialized.storage); @@ -874,7 +891,8 @@ mod tests { storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); @@ -902,7 +920,8 @@ mod tests { index: FlatIndex::default(), storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); assert_eq!(restored.storage.distance_type(), dt); @@ -948,7 +967,8 @@ mod tests { storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); @@ -977,7 +997,8 @@ mod tests { index: FlatIndex::default(), storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); assert_eq!(restored.storage.distance_type(), dt); @@ -1020,7 +1041,8 @@ mod tests { index: FlatIndex::default(), storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); @@ -1103,7 +1125,8 @@ mod tests { storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); @@ -1144,7 +1167,8 @@ mod tests { index: FlatIndex::default(), storage, }; - let bytes = entry.serialize().unwrap(); + let mut bytes = Vec::new(); + entry.serialize(&mut bytes).unwrap(); let restored = PartitionEntry::::deserialize(bytes.into()).unwrap(); assert_eq!(restored.storage.distance_type(), dt); diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index ffd502550c2..4dce97ebdb1 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -26,7 +26,7 @@ use lance_core::utils::tokio::spawn_cpu; use lance_core::utils::tracing::{IO_TYPE_LOAD_VECTOR_PART, TRACE_IO_EVENTS}; use lance_core::{Error, ROW_ID, Result}; use lance_encoding::decoder::{DecoderPlugins, FilterExpression}; -use lance_file::reader::{FileReader, FileReaderOptions}; +use lance_file::reader::{CachedFileMetadata, FileReader, FileReaderOptions}; use lance_index::frag_reuse::FragReuseIndex; use lance_index::metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector}; use lance_index::vector::flat::index::{FlatIndex, FlatQuantizer}; @@ -98,7 +98,7 @@ impl CacheKey for IVFPartit format!("ivf-{}", self.partition_id).into() } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { // Using type_name is safe here: the impl is in the same crate as the // types, so the monomorphized pointer is consistent. std::any::type_name::>() @@ -153,7 +153,7 @@ impl IVFIndex { ) -> Result { let io_parallelism = object_store.io_parallelism(); let scheduler_config = SchedulerConfig::max_bandwidth(&object_store); - let scheduler = ScanScheduler::new(object_store, scheduler_config); + let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config)); let uri = index_dir.child(uuid.as_str()).child(INDEX_FILE_NAME); let cached_size = file_sizes @@ -168,6 +168,11 @@ impl IVFIndex { FileReaderOptions::default(), ) .await?; + // Cache file metadata so reconstruct_typed can skip the metadata read. + file_metadata_cache + .with_key_prefix(uri.as_ref()) + .insert_with_key(&FileMetadataCacheKey, index_reader.metadata().clone()) + .await; let index_metadata: IndexMetadata = serde_json::from_str( index_reader .schema() @@ -199,21 +204,22 @@ impl IVFIndex { .get(INDEX_AUXILIARY_FILE_NAME) .map(|&size| CachedFileSize::new(size)) .unwrap_or_else(CachedFileSize::unknown); + let aux_path = index_dir + .child(uuid.as_str()) + .child(INDEX_AUXILIARY_FILE_NAME); let storage_reader = FileReader::try_open( - scheduler - .open_file( - &index_dir - .child(uuid.as_str()) - .child(INDEX_AUXILIARY_FILE_NAME), - &aux_cached_size, - ) - .await?, + scheduler.open_file(&aux_path, &aux_cached_size).await?, None, Arc::::default(), file_metadata_cache, FileReaderOptions::default(), ) .await?; + // Cache aux file metadata for reconstruction. + file_metadata_cache + .with_key_prefix(aux_path.as_ref()) + .insert_with_key(&FileMetadataCacheKey, storage_reader.metadata().clone()) + .await; let storage = IvfQuantizationStorage::try_new(storage_reader, frag_reuse_index.clone()).await?; @@ -639,6 +645,8 @@ impl VectorIndex for IVFInd let (sub_index_type, quantization_type) = self.sub_index_type(); // Convert local path back to object_store Path (undo to_local_path's "/" prefix) let index_file_path = self.uri.trim_start_matches('/').to_string(); + let index_meta = self.reader.metadata(); + let aux_meta = self.storage.reader().metadata(); Some(Box::new(IvfIndexState { index_file_path, uuid: self.uuid.clone(), @@ -650,6 +658,8 @@ impl VectorIndex for IVFInd sub_index_type, quantization_type, cache_key_prefix: self.index_cache.prefix().to_string(), + index_file_size: index_meta.file_size(), + aux_file_size: aux_meta.file_size(), })) } } @@ -659,6 +669,60 @@ pub type IvfPq = IVFIndex; pub type IvfHnswSqIndex = IVFIndex; pub type IvfHnswPqIndex = IVFIndex; +/// CacheKey for file metadata, matching the key used by fragment reads. +struct FileMetadataCacheKey; + +impl CacheKey for FileMetadataCacheKey { + type ValueType = CachedFileMetadata; + + fn key(&self) -> std::borrow::Cow<'_, str> { + "".into() + } + + fn type_name(&self) -> &'static str { + "FileMetadata" + } +} + +/// Open a FileReader, using cached file metadata when available to avoid IO. +async fn open_reader_cached( + scheduler: &Arc, + path: &Path, + cache: &LanceCache, + known_file_size: u64, +) -> Result { + let file_cache = cache.with_key_prefix(path.as_ref()); + let cached_size = if known_file_size > 0 { + CachedFileSize::new(known_file_size) + } else { + CachedFileSize::unknown() + }; + let file_scheduler = scheduler.open_file(path, &cached_size).await?; + + if let Some(cached_meta) = file_cache.get_with_key(&FileMetadataCacheKey).await { + let encodings_io = Arc::new(lance_file::LanceEncodingsIo::new(file_scheduler)); + FileReader::try_open_with_file_metadata( + encodings_io, + path.clone(), + None, + Arc::::default(), + cached_meta, + cache, + FileReaderOptions::default(), + ) + .await + } else { + FileReader::try_open( + file_scheduler, + None, + Arc::::default(), + cache, + FileReaderOptions::default(), + ) + .await + } +} + /// Reconstruct a concrete `IVFIndex` from cached state. async fn reconstruct_typed( state: IvfIndexState, @@ -671,20 +735,16 @@ where { let io_parallelism = object_store.io_parallelism(); let scheduler_config = SchedulerConfig::max_bandwidth(&object_store); - let scheduler = ScanScheduler::new(object_store, scheduler_config); + let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config)); let index_path = Path::parse(&state.index_file_path) .map_err(|e| Error::io(format!("invalid index path: {e}")))?; - // Re-open index FileReader (cheap if file metadata cache is warm) - let index_reader = FileReader::try_open( - scheduler - .open_file(&index_path, &CachedFileSize::unknown()) - .await?, - None, - Arc::::default(), + let index_reader = open_reader_cached( + &scheduler, + &index_path, file_metadata_cache, - FileReaderOptions::default(), + state.index_file_size, ) .await?; @@ -697,14 +757,11 @@ where .unwrap_or(""); let aux_path = Path::parse(format!("{}/{}", parent_str, INDEX_AUXILIARY_FILE_NAME)) .map_err(|e| Error::io(format!("invalid aux path: {e}")))?; - let storage_reader = FileReader::try_open( - scheduler - .open_file(&aux_path, &CachedFileSize::unknown()) - .await?, - None, - Arc::::default(), + let storage_reader = open_reader_cached( + &scheduler, + &aux_path, file_metadata_cache, - FileReaderOptions::default(), + state.aux_file_size, ) .await?; @@ -3869,4 +3926,53 @@ mod tests { let stats = dataset.object_store().io_stats_incremental(); assert_io_eq!(stats, read_iops, 0, "second prewarm should not perform IO"); } + + #[tokio::test] + async fn test_reconstruct_from_cache_zero_io() { + use lance_io::assert_io_eq; + + let test_dir = TempStrDir::default(); + let test_uri = test_dir.as_str(); + let (mut dataset, _) = generate_test_dataset::(test_uri, 0.0..1.0).await; + + let params = VectorIndexParams::with_ivf_pq_params( + DistanceType::L2, + IvfBuildParams::new(4), + PQBuildParams::default(), + ); + dataset + .create_index( + &["vector"], + IndexType::Vector, + Some("my_idx".to_owned()), + ¶ms, + true, + ) + .await + .unwrap(); + + // First open: populates file metadata cache and VectorIndexData cache. + let indices = dataset.load_indices_by_name("my_idx").await.unwrap(); + let uuid = indices[0].uuid.to_string(); + dataset + .open_vector_index("vector", &uuid, &NoOpMetricsCollector) + .await + .unwrap(); + + // Reset IO stats, then open again — should reconstruct from cache. + dataset.object_store().io_stats_incremental(); + + dataset + .open_vector_index("vector", &uuid, &NoOpMetricsCollector) + .await + .unwrap(); + + let stats = dataset.object_store().io_stats_incremental(); + assert_io_eq!( + stats, + read_iops, + 0, + "reconstructing from cached state should not perform IO" + ); + } } diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index aa450483553..7242c0cca6a 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -235,7 +235,7 @@ mod tests { Cow::Borrowed(self.0) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "TestUnsized" } } diff --git a/rust/lance/src/session/caches.rs b/rust/lance/src/session/caches.rs index e4ac180d563..2654e356ac1 100644 --- a/rust/lance/src/session/caches.rs +++ b/rust/lance/src/session/caches.rs @@ -82,7 +82,7 @@ impl CacheKey for ManifestKey<'_> { Cow::Owned(format!("manifest/{}", self.version)) } } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "Manifest" } } @@ -97,7 +97,7 @@ impl CacheKey for TransactionKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("txn/{}", self.version)) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "Transaction" } } @@ -119,7 +119,7 @@ impl CacheKey for DeletionFileKey<'_> { self.deletion_file.file_type.suffix() )) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "DeletionVector" } } @@ -134,7 +134,7 @@ impl CacheKey for RowAddrMaskKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_addr_mask/{}", self.version)) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "RowAddrMask" } } @@ -149,7 +149,7 @@ impl CacheKey for RowIdIndexKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_id_index/{}", self.version)) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "RowIdIndex" } } @@ -164,7 +164,7 @@ impl CacheKey for RowIdSequenceKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_id_sequence/{}", self.fragment_id)) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "RowIdSequence" } } diff --git a/rust/lance/src/session/index_caches.rs b/rust/lance/src/session/index_caches.rs index c3430f4c840..04aa9791c8d 100644 --- a/rust/lance/src/session/index_caches.rs +++ b/rust/lance/src/session/index_caches.rs @@ -89,7 +89,7 @@ impl CacheKey for FragReuseIndexKey<'_> { Cow::Owned(format!("frag_reuse/{}", self.uuid)) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "FragReuseIndex" } } @@ -106,7 +106,7 @@ impl CacheKey for IndexMetadataKey { Cow::Owned(self.version.to_string()) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "Vec" } } @@ -137,7 +137,7 @@ impl CacheKey for ScalarIndexDetailsKey<'_> { Cow::Owned(format!("type/{}", self.uuid)) } - fn type_id(&self) -> &'static str { + fn type_name(&self) -> &'static str { "ScalarIndexDetails" } } From a1fb0ba6420bba402a8bee197c993e9d2385fcc6 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 20 Mar 2026 16:43:24 -0700 Subject: [PATCH 16/24] refactor: move serialization/reconstruction code to PR #6223 Move VectorIndexData, IvfIndexState, partition_serde, cacheable_state, and zero-IO reconstruction out of this PR to keep it focused on the pluggable cache backend. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-file/src/reader.rs | 10 - rust/lance-index/src/vector.rs | 212 --- rust/lance-index/src/vector/storage.rs | 22 - rust/lance/src/index.rs | 55 +- rust/lance/src/index/vector/ivf.rs | 1 - .../src/index/vector/ivf/partition_serde.rs | 1177 ----------------- rust/lance/src/index/vector/ivf/v2.rs | 375 +----- 7 files changed, 27 insertions(+), 1825 deletions(-) delete mode 100644 rust/lance/src/index/vector/ivf/partition_serde.rs diff --git a/rust/lance-file/src/reader.rs b/rust/lance-file/src/reader.rs index 3a3f41854c0..29c1aa3ccc0 100644 --- a/rust/lance-file/src/reader.rs +++ b/rust/lance-file/src/reader.rs @@ -105,16 +105,6 @@ pub struct CachedFileMetadata { pub minor_version: u16, } -impl CachedFileMetadata { - /// Total file size in bytes. - pub fn file_size(&self) -> u64 { - self.num_data_bytes - + self.num_global_buffer_bytes - + self.num_column_metadata_bytes - + self.num_footer_bytes - } -} - impl DeepSizeOf for CachedFileMetadata { // TODO: include size for `column_metadatas` and `column_infos`. fn deep_size_of_children(&self, context: &mut Context) -> usize { diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs index 0a1b50297d3..0fbff4475cb 100644 --- a/rust/lance-index/src/vector.rs +++ b/rust/lance-index/src/vector.rs @@ -11,14 +11,12 @@ use std::{collections::HashMap, sync::Arc}; use arrow_array::{ArrayRef, Float32Array, RecordBatch, UInt32Array}; use arrow_schema::Field; use async_trait::async_trait; -use bytes::Bytes; use datafusion::execution::SendableRecordBatchStream; use deepsize::DeepSizeOf; use ivf::storage::IvfModel; use lance_core::{ROW_ID_FIELD, Result}; use lance_io::traits::Reader; use lance_linalg::distance::DistanceType; -use prost::Message; use quantizer::{QuantizationType, Quantizer}; use std::sync::LazyLock; use v3::subindex::SubIndexType; @@ -142,210 +140,6 @@ impl From for pb::VectorMetricType { } } -/// Serializable snapshot of a vector index, suitable for disk caching. -/// -/// Implementations must be cheaply reconstructable into a live -/// [`VectorIndex`] given an ObjectStore, file metadata cache, and partition -/// cache. The reconstruction cost should be dominated by re-opening -/// `FileReader`s, which is cheap when the file metadata cache is warm. -pub trait VectorIndexData: Send + Sync + DeepSizeOf + std::fmt::Debug { - /// Serialize this state into `writer`. Called on a blocking thread by - /// the disk cache codec. - fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()>; - - /// Tag used to dispatch deserialization to the correct concrete type. - fn index_type_tag(&self) -> &'static str; - - /// Downcast to `&dyn Any` for concrete type access during reconstruction. - fn as_any(&self) -> &dyn Any; -} - -/// Deserialize a [`VectorIndexData`] from bytes previously written by -/// [`VectorIndexData::write_to`]. -pub fn deserialize_vector_index_data(data: Bytes) -> Result> { - // Currently only IVF indices support disk caching. The serialization - // format is self-describing (IvfIndexState header), so no external tag - // is needed yet. When additional index types are added, prepend a - // version/tag byte to the wire format. - let state = IvfIndexState::deserialize(data)?; - Ok(Arc::new(state)) -} - -/// Serializable state of an IVF index, sufficient to reconstruct the index -/// without re-reading global buffers from object storage. -/// -/// Produced by [`VectorIndex::cacheable_state`] and consumed by a -/// reconstruction function that re-opens FileReaders using cached file metadata. -#[derive(Debug, Clone)] -pub struct IvfIndexState { - /// Object-store path to the index file (before `to_local_path` conversion). - pub index_file_path: String, - pub uuid: String, - pub ivf: IvfModel, - pub distance_type: DistanceType, - pub sub_index_metadata: Vec, - /// JSON serialization of `Q::Metadata` (quantizer-specific metadata). - pub quantizer_metadata_json: String, - /// Large quantizer data (PQ codebook, RQ rotation matrix) from `extra_metadata()`. - pub quantizer_extra_data: Option>, - pub sub_index_type: SubIndexType, - pub quantization_type: QuantizationType, - /// The cache key prefix used by the original index's WeakLanceCache. - /// Needed to reconnect the reconstructed index to the shared cache backend. - pub cache_key_prefix: String, - /// File sizes for the index and auxiliary files, used to avoid HEAD requests - /// when reconstructing from cache. - pub index_file_size: u64, - pub aux_file_size: u64, -} - -/// Serialization header for [`IvfIndexState`]. -#[derive(serde::Serialize, serde::Deserialize)] -struct IvfIndexStateHeader { - index_file_path: String, - uuid: String, - distance_type: String, - sub_index_metadata: Vec, - sub_index_type: String, - quantization_type: String, - quantizer_metadata_json: String, - #[serde(default)] - cache_key_prefix: String, - #[serde(default)] - index_file_size: u64, - #[serde(default)] - aux_file_size: u64, -} - -impl IvfIndexState { - /// Wire format: - /// `[header_json_len: u64 LE][header JSON][ivf_pb_len: u64 LE][ivf protobuf] - /// [extra_len: u64 LE][extra bytes]` - pub fn serialize(&self) -> Result> { - let header = IvfIndexStateHeader { - index_file_path: self.index_file_path.clone(), - uuid: self.uuid.clone(), - distance_type: self.distance_type.to_string(), - sub_index_metadata: self.sub_index_metadata.clone(), - sub_index_type: self.sub_index_type.to_string(), - quantization_type: self.quantization_type.to_string(), - quantizer_metadata_json: self.quantizer_metadata_json.clone(), - cache_key_prefix: self.cache_key_prefix.clone(), - index_file_size: self.index_file_size, - aux_file_size: self.aux_file_size, - }; - let header_json = serde_json::to_vec(&header) - .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?; - - let ivf_pb = pb::Ivf::try_from(&self.ivf)?; - let ivf_bytes = ivf_pb.encode_to_vec(); - - let extra = self.quantizer_extra_data.as_deref().unwrap_or(&[]); - - let total = 8 + header_json.len() + 8 + ivf_bytes.len() + 8 + extra.len(); - let mut buf = Vec::with_capacity(total); - buf.extend_from_slice(&(header_json.len() as u64).to_le_bytes()); - buf.extend_from_slice(&header_json); - buf.extend_from_slice(&(ivf_bytes.len() as u64).to_le_bytes()); - buf.extend_from_slice(&ivf_bytes); - buf.extend_from_slice(&(extra.len() as u64).to_le_bytes()); - buf.extend_from_slice(extra); - Ok(buf) - } - - pub fn deserialize(data: Bytes) -> Result { - let mut offset = 0; - - let read_u64 = |data: &[u8], off: &mut usize| -> Result { - if *off + 8 > data.len() { - return Err(lance_core::Error::io("IvfIndexState data truncated")); - } - let val = u64::from_le_bytes(data[*off..*off + 8].try_into().unwrap()); - *off += 8; - Ok(val) - }; - - let header_len = read_u64(&data, &mut offset)? as usize; - if offset + header_len > data.len() { - return Err(lance_core::Error::io("IvfIndexState header truncated")); - } - let header: IvfIndexStateHeader = - serde_json::from_slice(&data[offset..offset + header_len]) - .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?; - offset += header_len; - - let ivf_len = read_u64(&data, &mut offset)? as usize; - if offset + ivf_len > data.len() { - return Err(lance_core::Error::io("IvfIndexState IVF data truncated")); - } - let ivf_pb = pb::Ivf::decode(&data[offset..offset + ivf_len]) - .map_err(|e| lance_core::Error::io(format!("IvfIndexState IVF decode: {e}")))?; - let ivf = IvfModel::try_from(ivf_pb)?; - offset += ivf_len; - - let extra_len = read_u64(&data, &mut offset)? as usize; - if offset + extra_len > data.len() { - return Err(lance_core::Error::io("IvfIndexState extra data truncated")); - } - let quantizer_extra_data = if extra_len > 0 { - Some(data[offset..offset + extra_len].to_vec()) - } else { - None - }; - - let distance_type = DistanceType::try_from(header.distance_type.as_str())?; - let sub_index_type = SubIndexType::try_from(header.sub_index_type.as_str())?; - let quantization_type = header.quantization_type.parse::()?; - - Ok(Self { - index_file_path: header.index_file_path, - uuid: header.uuid, - ivf, - distance_type, - sub_index_metadata: header.sub_index_metadata, - quantizer_metadata_json: header.quantizer_metadata_json, - quantizer_extra_data, - sub_index_type, - quantization_type, - cache_key_prefix: header.cache_key_prefix, - index_file_size: header.index_file_size, - aux_file_size: header.aux_file_size, - }) - } -} - -impl DeepSizeOf for IvfIndexState { - fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize { - self.index_file_path.deep_size_of_children(context) - + self.uuid.deep_size_of_children(context) - + self.ivf.deep_size_of_children(context) - + self.sub_index_metadata.deep_size_of_children(context) - + self.quantizer_metadata_json.deep_size_of_children(context) - + self - .quantizer_extra_data - .as_ref() - .map(|v| v.deep_size_of_children(context)) - .unwrap_or(0) - + self.cache_key_prefix.deep_size_of_children(context) - } -} - -impl VectorIndexData for IvfIndexState { - fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()> { - let bytes = self.serialize()?; - writer.write_all(&bytes)?; - Ok(()) - } - - fn index_type_tag(&self) -> &'static str { - "IVF" - } - - fn as_any(&self) -> &dyn Any { - self - } -} - /// Vector Index for (Approximate) Nearest Neighbor (ANN) Search. /// /// Vector indices are often built as a chain of indices. For example, IVF -> PQ @@ -470,12 +264,6 @@ pub trait VectorIndex: Send + Sync + std::fmt::Debug + Index { /// the index type of this vector index. fn sub_index_type(&self) -> (SubIndexType, QuantizationType); - - /// Export the index state needed for reconstruction from a disk cache. - /// Returns `None` if this index type doesn't support persistent caching. - fn cacheable_state(&self) -> Option> { - None - } } // it can be an IVF index or a partition of IVF index diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs index 526ba8e78d8..5a1c0e7e6f5 100644 --- a/rust/lance-index/src/vector/storage.rs +++ b/rust/lance-index/src/vector/storage.rs @@ -239,28 +239,6 @@ impl IvfQuantizationStorage { }) } - /// Construct from pre-parsed metadata, skipping global buffer reads. - /// Used when reconstructing from a disk cache. - pub fn from_cached( - reader: FileReader, - ivf: IvfModel, - metadata: Q::Metadata, - distance_type: DistanceType, - frag_reuse_index: Option>, - ) -> Self { - Self { - reader, - distance_type, - metadata, - ivf, - frag_reuse_index, - } - } - - pub fn reader(&self) -> &FileReader { - &self.reader - } - pub fn num_rows(&self) -> u64 { self.reader.num_rows() } diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 3c920e19426..7fc85b0b1dd 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -42,7 +42,6 @@ use lance_index::vector::flat::index::{FlatBinQuantizer, FlatIndex, FlatQuantize use lance_index::vector::hnsw::HNSW; use lance_index::vector::pq::ProductQuantizer; use lance_index::vector::sq::ScalarQuantizer; -use lance_index::vector::{IvfIndexState, VectorIndexData}; use lance_index::{DatasetIndexExt, INDEX_METADATA_SCHEMA_KEY, IndexDescription, IndexSegment}; use lance_index::{INDEX_FILE_NAME, Index, IndexType, pb, vector::VectorIndex}; use lance_index::{ @@ -130,7 +129,7 @@ impl<'a> VectorIndexCacheKey<'a> { } impl UnsizedCacheKey for VectorIndexCacheKey<'_> { - type ValueType = dyn VectorIndexData; + type ValueType = dyn VectorIndex; fn key(&self) -> std::borrow::Cow<'_, str> { if let Some(fri_uuid) = self.fri_uuid { @@ -1338,10 +1337,7 @@ impl DatasetIndexInternalExt for Dataset { uuid: &str, metrics: &dyn MetricsCollector, ) -> Result> { - // Quick cache checks for scalar and frag-reuse indices. VectorIndex - // is not checked here because the cache stores VectorIndexData (serializable - // state), not a live VectorIndex — reconstruction is handled by - // open_vector_index. + // Quick cache checks for scalar and frag-reuse indices. let frag_reuse_uuid = self.frag_reuse_index_uuid().await; let cache_key = ScalarIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await { @@ -1422,22 +1418,9 @@ impl DatasetIndexInternalExt for Dataset { let frag_reuse_uuid = self.frag_reuse_index_uuid().await; let cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); - // Check cache for serialized VectorIndexData and reconstruct if found. - if let Some(data) = self.index_cache.get_unsized_with_key(&cache_key).await - && let Some(state) = data.as_any().downcast_ref::() - { - log::debug!( - "Reconstructing vector index from cached state uuid: {}", - uuid - ); - let partition_cache = self.index_cache.with_key_prefix(&cache_key.key()); - return vector::ivf::v2::reconstruct_vector_index( - state.clone(), - self.object_store.clone(), - &self.metadata_cache, - partition_cache, - ) - .await; + if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await { + log::debug!("Found vector index in cache uuid: {}", uuid); + return Ok(index); } let frag_reuse_index = self.open_frag_reuse_index(metrics).await?; @@ -1501,12 +1484,9 @@ impl DatasetIndexInternalExt for Dataset { self.object_store.clone(), SchedulerConfig::max_bandwidth(&self.object_store), ); - let file_sizes = index_meta.file_size_map(); - let cached_size = file_sizes - .get(INDEX_FILE_NAME) - .map(|&size| CachedFileSize::new(size)) - .unwrap_or_else(CachedFileSize::unknown); - let file = scheduler.open_file(&index_file, &cached_size).await?; + let file = scheduler + .open_file(&index_file, &CachedFileSize::unknown()) + .await?; let reader = lance_file::reader::FileReader::try_open( file, None, @@ -1540,7 +1520,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1553,7 +1532,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1572,7 +1550,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1586,7 +1563,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1600,7 +1576,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1617,7 +1592,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, &file_metadata_cache, index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1631,7 +1605,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1645,7 +1618,6 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, - file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1664,14 +1636,9 @@ impl DatasetIndexInternalExt for Dataset { }; let index = index?; metrics.record_index_load(); - // Cache the serializable state, not the live index. The live index - // holds FileReader handles that can't survive serialization; the - // state can be cheaply reconstructed on the next cache hit. - if let Some(state) = index.cacheable_state() { - self.index_cache - .insert_unsized_with_key(&cache_key, Arc::from(state)) - .await; - } + self.index_cache + .insert_unsized_with_key(&cache_key, index.clone()) + .await; Ok(index) } diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs index 3768fad045c..c26da61d7ef 100644 --- a/rust/lance/src/index/vector/ivf.rs +++ b/rust/lance/src/index/vector/ivf.rs @@ -102,7 +102,6 @@ use uuid::Uuid; pub mod builder; pub mod io; -pub mod partition_serde; pub mod v2; // Cache wrapper for vector index trait objects diff --git a/rust/lance/src/index/vector/ivf/partition_serde.rs b/rust/lance/src/index/vector/ivf/partition_serde.rs deleted file mode 100644 index 3db0dcc634a..00000000000 --- a/rust/lance/src/index/vector/ivf/partition_serde.rs +++ /dev/null @@ -1,1177 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -//! Serialization and zero-copy deserialization for IVF partition cache entries. -//! -//! The format is a simple binary layout designed for ephemeral caching (not stable across versions): -//! -//! ```text -//! [header_len: u64 LE] -//! [header: JSON bytes] -//! [sub_index IPC file bytes] -//! [... quantizer-specific IPC sections ...] -//! [storage batch IPC file bytes] -//! ``` -//! -//! Each IPC section is a complete Arrow IPC file. On deserialization, the IPC -//! sections are read zero-copy using [`FileDecoder`] so that Arrow arrays -//! reference the original buffer directly. - -use std::io::Write; -use std::sync::Arc; - -use arrow_array::{FixedSizeListArray, RecordBatch}; -use arrow_buffer::Buffer; -use arrow_ipc::convert::fb_to_schema; -use arrow_ipc::reader::{FileDecoder, read_footer_length}; -use arrow_ipc::root_as_footer; -use arrow_ipc::writer::FileWriter; -use arrow_schema::{DataType, Field, Schema}; -use bytes::Bytes; -use lance_core::{Error, Result}; -use lance_index::vector::bq::RQRotationType; -use lance_index::vector::bq::builder::RabitQuantizer; -use lance_index::vector::bq::storage::RabitQuantizationMetadata; -use lance_index::vector::flat::index::{FlatMetadata, FlatQuantizer}; -use lance_index::vector::pq::ProductQuantizer; -use lance_index::vector::pq::storage::ProductQuantizationMetadata; -use lance_index::vector::quantizer::{Quantization, QuantizerStorage}; -use lance_index::vector::sq::ScalarQuantizer; -use lance_index::vector::sq::storage::ScalarQuantizationMetadata; -use lance_index::vector::storage::VectorStore; -use lance_index::vector::v3::subindex::IvfSubIndex; -use lance_linalg::distance::DistanceType; -use serde::{Deserialize, Serialize}; - -use super::v2::PartitionEntry; - -/// Serialization interface for spilling cache entries to an external store. -/// -/// `serialize` writes the entry into the provided writer and returns the -/// number of bytes written. `deserialize` reconstructs the entry from a -/// contiguous `Bytes` buffer (typically obtained by reading back whatever -/// was written). -pub trait Spillable: Sized { - fn serialize(&self, writer: &mut dyn Write) -> Result; - fn deserialize(data: Bytes) -> Result; -} - -// --------------------------------------------------------------------------- -// Common helpers -// --------------------------------------------------------------------------- - -fn distance_type_to_u8(dt: DistanceType) -> u8 { - match dt { - DistanceType::L2 => 0, - DistanceType::Cosine => 1, - DistanceType::Dot => 2, - DistanceType::Hamming => 3, - } -} - -fn u8_to_distance_type(v: u8) -> Result { - match v { - 0 => Ok(DistanceType::L2), - 1 => Ok(DistanceType::Cosine), - 2 => Ok(DistanceType::Dot), - 3 => Ok(DistanceType::Hamming), - _ => Err(Error::io(format!("unknown distance type: {v}"))), - } -} - -fn rotation_type_to_u8(rt: RQRotationType) -> u8 { - match rt { - RQRotationType::Matrix => 0, - RQRotationType::Fast => 1, - } -} - -fn u8_to_rotation_type(v: u8) -> Result { - match v { - 0 => Ok(RQRotationType::Matrix), - 1 => Ok(RQRotationType::Fast), - _ => Err(Error::io(format!("unknown rotation type: {v}"))), - } -} - -/// Write one or more RecordBatches as a complete Arrow IPC file into a Vec. -/// -/// Panics if `batches` is empty (caller is responsible for checking). -fn write_ipc_batches(batches: &[RecordBatch]) -> Result> { - let mut buf = Vec::new(); - let mut writer = FileWriter::try_new(&mut buf, batches[0].schema_ref())?; - for batch in batches { - writer.write(batch)?; - } - writer.finish()?; - Ok(buf) -} - -/// Write a single RecordBatch as a complete Arrow IPC file into a Vec. -fn write_ipc(batch: &RecordBatch) -> Result> { - write_ipc_batches(std::slice::from_ref(batch)) -} - -/// Decode the IPC footer and schema from a `Buffer`, returning the decoder and -/// the list of record-batch blocks. Zero-copy: all returned data references -/// the original buffer. -fn parse_ipc_footer(data: &Buffer) -> Result<(FileDecoder, Vec)> { - let trailer_start = data - .len() - .checked_sub(10) - .ok_or_else(|| Error::io("IPC section too small to contain footer".to_string()))?; - let footer_len = read_footer_length( - data[trailer_start..] - .try_into() - .map_err(|_| Error::io("IPC section too small for footer length".to_string()))?, - )?; - let footer_start = trailer_start - .checked_sub(footer_len) - .ok_or_else(|| Error::io("IPC footer length exceeds section size".to_string()))?; - let footer = root_as_footer(&data[footer_start..trailer_start]) - .map_err(|e| Error::io(format!("failed to parse IPC footer: {e}")))?; - - let schema = - Arc::new(fb_to_schema(footer.schema().ok_or_else(|| { - Error::io("IPC footer missing schema".to_string()) - })?)); - - let mut decoder = FileDecoder::new(schema, footer.version()); - - for block in footer.dictionaries().iter().flatten() { - let block_len = block.bodyLength() as usize + block.metaDataLength() as usize; - let block_data = data.slice_with_length(block.offset() as usize, block_len); - decoder.read_dictionary(block, &block_data)?; - } - - let batch_blocks: Vec = footer - .recordBatches() - .map(|b| b.iter().copied().collect()) - .unwrap_or_default(); - - Ok((decoder, batch_blocks)) -} - -/// Read all RecordBatches from an Arrow IPC file stored in a `Buffer`, zero-copy. -/// -/// The returned arrays reference slices of the provided buffer directly. -fn read_ipc_all_zero_copy(data: Buffer) -> Result> { - let (decoder, batch_blocks) = parse_ipc_footer(&data)?; - batch_blocks - .iter() - .map(|block| { - let block_len = block.bodyLength() as usize + block.metaDataLength() as usize; - let block_data = data.slice_with_length(block.offset() as usize, block_len); - decoder - .read_record_batch(block, &block_data)? - .ok_or_else(|| Error::io("IPC record batch was None".to_string())) - }) - .collect() -} - -/// Read a single RecordBatch from an Arrow IPC file stored in a `Buffer`, zero-copy. -/// -/// The returned `RecordBatch`'s arrays reference slices of the provided buffer -/// directly, avoiding copies. -fn read_ipc_zero_copy(data: Buffer) -> Result { - let (decoder, batch_blocks) = parse_ipc_footer(&data)?; - if batch_blocks.is_empty() { - return Err(Error::io("IPC file contains no record batches".to_string())); - } - let block = &batch_blocks[0]; - let block_len = block.bodyLength() as usize + block.metaDataLength() as usize; - let block_data = data.slice_with_length(block.offset() as usize, block_len); - decoder - .read_record_batch(block, &block_data)? - .ok_or_else(|| Error::io("IPC record batch was None".to_string())) -} - -/// Wrap a `FixedSizeListArray` in a single-column RecordBatch with the given column name. -fn fsl_to_batch(arr: &FixedSizeListArray, name: &str) -> Result { - let field = Field::new( - name, - DataType::FixedSizeList( - Arc::new(Field::new("item", arr.value_type(), true)), - arr.value_length(), - ), - false, - ); - let schema = Arc::new(Schema::new(vec![field])); - Ok(RecordBatch::try_new(schema, vec![Arc::new(arr.clone())])?) -} - -/// Extract a `FixedSizeListArray` from the first column of a RecordBatch. -fn batch_to_fsl(batch: &RecordBatch) -> Result { - let col = batch.column(0); - col.as_any() - .downcast_ref::() - .cloned() - .ok_or_else(|| Error::io("column is not FixedSizeListArray".to_string())) -} - -fn codebook_to_batch(codebook: &FixedSizeListArray) -> Result { - fsl_to_batch(codebook, "codebook") -} - -fn batch_to_codebook(batch: &RecordBatch) -> Result { - batch_to_fsl(batch) -} - -// --------------------------------------------------------------------------- -// PQ -// --------------------------------------------------------------------------- - -#[derive(Serialize, Deserialize)] -struct PqPartitionHeader { - distance_type: u8, - nbits: u32, - num_sub_vectors: usize, - dimension: usize, - transposed: bool, - /// Length of the sub-index IPC section in bytes. - sub_index_len: u64, - /// Length of the codebook IPC section in bytes. - codebook_len: u64, - /// Length of the storage batch IPC section in bytes. - storage_len: u64, -} - -impl Spillable for PartitionEntry { - /// Serialize this partition entry to bytes. - /// - /// The sub-index, PQ codebook, and storage batch are each written as Arrow - /// IPC file sections, preceded by a small JSON header containing scalar - /// metadata and section lengths. - fn serialize(&self, writer: &mut dyn Write) -> Result { - let metadata = self.storage.metadata(); - let distance_type = self.storage.distance_type(); - - // Serialize the three Arrow sections. - let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; - let codebook = metadata.codebook.as_ref().ok_or_else(|| { - Error::io("PQ metadata missing codebook during serialization".to_string()) - })?; - let codebook_ipc = write_ipc(&codebook_to_batch(codebook)?)?; - let storage_batches: Vec<_> = self.storage.to_batches()?.collect(); - let storage_ipc = if storage_batches.len() == 1 { - write_ipc(&storage_batches[0])? - } else { - return Err(Error::io( - "expected exactly one storage batch for PQ storage".to_string(), - )); - }; - - let header = PqPartitionHeader { - distance_type: distance_type_to_u8(distance_type), - nbits: metadata.nbits, - num_sub_vectors: metadata.num_sub_vectors, - dimension: metadata.dimension, - transposed: metadata.transposed, - sub_index_len: sub_index_ipc.len() as u64, - codebook_len: codebook_ipc.len() as u64, - storage_len: storage_ipc.len() as u64, - }; - - let header_json = serde_json::to_vec(&header)?; - let total_len = - 8 + header_json.len() + sub_index_ipc.len() + codebook_ipc.len() + storage_ipc.len(); - - writer.write_all(&(header_json.len() as u64).to_le_bytes())?; - writer.write_all(&header_json)?; - writer.write_all(&sub_index_ipc)?; - writer.write_all(&codebook_ipc)?; - writer.write_all(&storage_ipc)?; - - Ok(total_len) - } - - /// Deserialize a partition entry from bytes, zero-copy for Arrow data. - /// - /// The Arrow IPC sections are decoded using [`FileDecoder`] so that the - /// resulting arrays reference slices of the provided `Bytes` buffer directly. - fn deserialize(data: Bytes) -> Result { - if data.len() < 8 { - return Err(Error::io("partition data too small".to_string())); - } - - let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; - let header_end = 8 + header_len; - if data.len() < header_end { - return Err(Error::io("partition data truncated in header".to_string())); - } - - let header: PqPartitionHeader = serde_json::from_slice(&data[8..header_end])?; - let distance_type = u8_to_distance_type(header.distance_type)?; - - let sub_index_start = header_end; - let sub_index_end = sub_index_start + header.sub_index_len as usize; - let codebook_start = sub_index_end; - let codebook_end = codebook_start + header.codebook_len as usize; - let storage_start = codebook_end; - let storage_end = storage_start + header.storage_len as usize; - - if data.len() < storage_end { - return Err(Error::io( - "partition data truncated in IPC sections".to_string(), - )); - } - - // Zero-copy: create Buffer slices backed by the original Bytes. - let buffer = Buffer::from(data); - let sub_index_buf = - buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); - let codebook_buf = buffer.slice_with_length(codebook_start, header.codebook_len as usize); - let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); - - let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; - let codebook_batch = read_ipc_zero_copy(codebook_buf)?; - let storage_batch = read_ipc_zero_copy(storage_buf)?; - - let index = S::load(sub_index_batch)?; - let codebook = batch_to_codebook(&codebook_batch)?; - - let metadata = ProductQuantizationMetadata { - codebook_position: 0, - nbits: header.nbits, - num_sub_vectors: header.num_sub_vectors, - dimension: header.dimension, - codebook: Some(codebook), - codebook_tensor: Vec::new(), - transposed: header.transposed, - }; - - let storage = ::Storage::try_from_batch( - storage_batch, - &metadata, - distance_type, - None, - )?; - - Ok(Self { index, storage }) - } -} - -// --------------------------------------------------------------------------- -// Flat -// --------------------------------------------------------------------------- - -#[derive(Serialize, Deserialize)] -struct FlatPartitionHeader { - distance_type: u8, - dim: usize, - sub_index_len: u64, - storage_len: u64, -} - -impl Spillable for PartitionEntry { - /// Serialize this partition entry to bytes. - fn serialize(&self, writer: &mut dyn Write) -> Result { - let metadata = self.storage.metadata(); - let distance_type = self.storage.distance_type(); - - let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; - let storage_batches: Vec<_> = self.storage.to_batches()?.collect(); - let storage_ipc = if storage_batches.len() == 1 { - write_ipc(&storage_batches[0])? - } else { - return Err(Error::io( - "expected exactly one storage batch for Flat storage".to_string(), - )); - }; - - let header = FlatPartitionHeader { - distance_type: distance_type_to_u8(distance_type), - dim: metadata.dim, - sub_index_len: sub_index_ipc.len() as u64, - storage_len: storage_ipc.len() as u64, - }; - - let header_json = serde_json::to_vec(&header)?; - let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len(); - - writer.write_all(&(header_json.len() as u64).to_le_bytes())?; - writer.write_all(&header_json)?; - writer.write_all(&sub_index_ipc)?; - writer.write_all(&storage_ipc)?; - - Ok(total_len) - } - - /// Deserialize a partition entry from bytes, zero-copy for Arrow data. - fn deserialize(data: Bytes) -> Result { - if data.len() < 8 { - return Err(Error::io("partition data too small".to_string())); - } - let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; - let header_end = 8 + header_len; - if data.len() < header_end { - return Err(Error::io("partition data truncated in header".to_string())); - } - - let header: FlatPartitionHeader = serde_json::from_slice(&data[8..header_end])?; - let distance_type = u8_to_distance_type(header.distance_type)?; - - let sub_index_start = header_end; - let sub_index_end = sub_index_start + header.sub_index_len as usize; - let storage_start = sub_index_end; - let storage_end = storage_start + header.storage_len as usize; - - if data.len() < storage_end { - return Err(Error::io( - "partition data truncated in IPC sections".to_string(), - )); - } - - let buffer = Buffer::from(data); - let sub_index_buf = - buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); - let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); - - let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; - let storage_batch = read_ipc_zero_copy(storage_buf)?; - - let index = S::load(sub_index_batch)?; - let metadata = FlatMetadata { dim: header.dim }; - let storage = ::Storage::try_from_batch( - storage_batch, - &metadata, - distance_type, - None, - )?; - - Ok(Self { index, storage }) - } -} - -// --------------------------------------------------------------------------- -// SQ -// --------------------------------------------------------------------------- - -#[derive(Serialize, Deserialize)] -struct SqPartitionHeader { - distance_type: u8, - num_bits: u16, - dim: usize, - bounds_start: f64, - bounds_end: f64, - sub_index_len: u64, - storage_len: u64, -} - -impl Spillable for PartitionEntry { - /// Serialize this partition entry to bytes. - /// - /// Multiple SQ storage chunks are concatenated into a single IPC section. - fn serialize(&self, writer: &mut dyn Write) -> Result { - let metadata = self.storage.metadata(); - let distance_type = self.storage.distance_type(); - - let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; - - // Write all SQ chunks as multiple record batches in one IPC file, avoiding copies. - let batches: Vec<_> = self.storage.to_batches()?.collect(); - if batches.is_empty() { - return Err(Error::io("SQ storage has no batches".to_string())); - } - let storage_ipc = write_ipc_batches(&batches)?; - - let header = SqPartitionHeader { - distance_type: distance_type_to_u8(distance_type), - num_bits: metadata.num_bits, - dim: metadata.dim, - bounds_start: metadata.bounds.start, - bounds_end: metadata.bounds.end, - sub_index_len: sub_index_ipc.len() as u64, - storage_len: storage_ipc.len() as u64, - }; - - let header_json = serde_json::to_vec(&header)?; - let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len(); - - writer.write_all(&(header_json.len() as u64).to_le_bytes())?; - writer.write_all(&header_json)?; - writer.write_all(&sub_index_ipc)?; - writer.write_all(&storage_ipc)?; - - Ok(total_len) - } - - /// Deserialize a partition entry from bytes, zero-copy for Arrow data. - fn deserialize(data: Bytes) -> Result { - if data.len() < 8 { - return Err(Error::io("partition data too small".to_string())); - } - let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; - let header_end = 8 + header_len; - if data.len() < header_end { - return Err(Error::io("partition data truncated in header".to_string())); - } - - let header: SqPartitionHeader = serde_json::from_slice(&data[8..header_end])?; - let distance_type = u8_to_distance_type(header.distance_type)?; - - let sub_index_start = header_end; - let sub_index_end = sub_index_start + header.sub_index_len as usize; - let storage_start = sub_index_end; - let storage_end = storage_start + header.storage_len as usize; - - if data.len() < storage_end { - return Err(Error::io( - "partition data truncated in IPC sections".to_string(), - )); - } - - let buffer = Buffer::from(data); - let sub_index_buf = - buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); - let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); - - let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; - let storage_batches = read_ipc_all_zero_copy(storage_buf)?; - - let index = S::load(sub_index_batch)?; - let metadata = ScalarQuantizationMetadata { - dim: header.dim, - num_bits: header.num_bits, - bounds: header.bounds_start..header.bounds_end, - }; - let storage = ::Storage::try_new( - metadata.num_bits, - distance_type, - metadata.bounds, - storage_batches, - None, - )?; - - Ok(Self { index, storage }) - } -} - -// --------------------------------------------------------------------------- -// RabitQ -// --------------------------------------------------------------------------- - -#[derive(Serialize, Deserialize)] -struct RabitPartitionHeader { - distance_type: u8, - num_bits: u8, - code_dim: u32, - /// 0 = Matrix, 1 = Fast - rotation_type: u8, - /// Fast rotation signs (only set when rotation_type == Fast). - fast_rotation_signs: Option>, - sub_index_len: u64, - /// Length of the rotation matrix IPC section; 0 when rotation_type == Fast. - rotate_mat_len: u64, - storage_len: u64, -} - -impl Spillable for PartitionEntry { - /// Serialize this partition entry to bytes. - /// - /// For Matrix rotation the rotation matrix is stored as an Arrow IPC section. - /// For Fast rotation the signs are stored compactly in the JSON header. - /// - /// The storage batch is stored with already-packed codes so deserialization - /// can skip re-packing. - fn serialize(&self, writer: &mut dyn Write) -> Result { - let metadata = self.storage.metadata(); - let distance_type = self.storage.distance_type(); - - let sub_index_ipc = write_ipc(&self.index.to_batch()?)?; - - let rotate_mat_ipc = match metadata.rotation_type { - RQRotationType::Matrix => { - let mat = metadata.rotate_mat.as_ref().ok_or_else(|| { - Error::io( - "RabitQ Matrix metadata missing rotate_mat during serialization" - .to_string(), - ) - })?; - write_ipc(&fsl_to_batch(mat, "rotate_mat")?)? - } - RQRotationType::Fast => Vec::new(), - }; - - let storage_batches: Vec<_> = self.storage.to_batches()?.collect(); - let storage_ipc = if storage_batches.len() == 1 { - write_ipc(&storage_batches[0])? - } else { - return Err(Error::io( - "expected exactly one storage batch for RabitQ storage".to_string(), - )); - }; - - let header = RabitPartitionHeader { - distance_type: distance_type_to_u8(distance_type), - num_bits: metadata.num_bits, - code_dim: metadata.code_dim, - rotation_type: rotation_type_to_u8(metadata.rotation_type), - fast_rotation_signs: metadata.fast_rotation_signs.clone(), - sub_index_len: sub_index_ipc.len() as u64, - rotate_mat_len: rotate_mat_ipc.len() as u64, - storage_len: storage_ipc.len() as u64, - }; - - let header_json = serde_json::to_vec(&header)?; - let total_len = - 8 + header_json.len() + sub_index_ipc.len() + rotate_mat_ipc.len() + storage_ipc.len(); - - writer.write_all(&(header_json.len() as u64).to_le_bytes())?; - writer.write_all(&header_json)?; - writer.write_all(&sub_index_ipc)?; - writer.write_all(&rotate_mat_ipc)?; - writer.write_all(&storage_ipc)?; - - Ok(total_len) - } - - /// Deserialize a partition entry from bytes, zero-copy for Arrow data. - fn deserialize(data: Bytes) -> Result { - if data.len() < 8 { - return Err(Error::io("partition data too small".to_string())); - } - let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize; - let header_end = 8 + header_len; - if data.len() < header_end { - return Err(Error::io("partition data truncated in header".to_string())); - } - - let header: RabitPartitionHeader = serde_json::from_slice(&data[8..header_end])?; - let distance_type = u8_to_distance_type(header.distance_type)?; - let rotation_type = u8_to_rotation_type(header.rotation_type)?; - - let sub_index_start = header_end; - let sub_index_end = sub_index_start + header.sub_index_len as usize; - let rotate_mat_start = sub_index_end; - let rotate_mat_end = rotate_mat_start + header.rotate_mat_len as usize; - let storage_start = rotate_mat_end; - let storage_end = storage_start + header.storage_len as usize; - - if data.len() < storage_end { - return Err(Error::io( - "partition data truncated in IPC sections".to_string(), - )); - } - - let buffer = Buffer::from(data); - let sub_index_buf = - buffer.slice_with_length(sub_index_start, header.sub_index_len as usize); - let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize); - - let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?; - let storage_batch = read_ipc_zero_copy(storage_buf)?; - - let rotate_mat = if header.rotate_mat_len > 0 { - let rotate_mat_buf = - buffer.slice_with_length(rotate_mat_start, header.rotate_mat_len as usize); - let mat_batch = read_ipc_zero_copy(rotate_mat_buf)?; - Some(batch_to_fsl(&mat_batch)?) - } else { - None - }; - - let index = S::load(sub_index_batch)?; - let metadata = RabitQuantizationMetadata { - rotate_mat, - rotate_mat_position: None, - fast_rotation_signs: header.fast_rotation_signs, - rotation_type, - code_dim: header.code_dim, - num_bits: header.num_bits, - // The storage batch already has packed codes; skip re-packing. - packed: true, - }; - let storage = ::Storage::try_from_batch( - storage_batch, - &metadata, - distance_type, - None, - )?; - - Ok(Self { index, storage }) - } -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - use std::sync::Arc; - - use arrow_array::cast::AsArray; - use arrow_array::{ - Float32Array, UInt8Array, UInt64Array, - types::{Float32Type, UInt8Type}, - }; - use arrow_schema::{DataType, Field, Schema}; - use lance_arrow::FixedSizeListArrayExt; - use lance_index::vector::bq::storage::RABIT_CODE_COLUMN; - use lance_index::vector::bq::transform::{ADD_FACTORS_COLUMN, SCALE_FACTORS_COLUMN}; - use lance_index::vector::bq::{RQRotationType, builder::RabitQuantizer}; - use lance_index::vector::flat::index::FlatIndex; - use lance_index::vector::flat::storage::FlatFloatStorage; - use lance_index::vector::sq::storage::ScalarQuantizationStorage; - - // ----- PQ helpers ------------------------------------------------------- - - fn make_test_codebook(dim: usize, num_sub_vectors: usize) -> FixedSizeListArray { - let sub_dim = dim / num_sub_vectors; - let num_centroids = 256; - let total_values = num_sub_vectors * num_centroids * sub_dim; - let values: Vec = (0..total_values).map(|i| i as f32 * 0.01).collect(); - let values_array = Float32Array::from(values); - FixedSizeListArray::try_new_from_values(values_array, sub_dim as i32).unwrap() - } - - fn make_test_pq_storage( - num_rows: usize, - dim: usize, - num_sub_vectors: usize, - ) -> ::Storage { - let codebook = make_test_codebook(dim, num_sub_vectors); - let row_ids = UInt64Array::from((0..num_rows as u64).collect::>()); - let pq_codes_flat: Vec = (0..num_rows * num_sub_vectors) - .map(|i| (i % 256) as u8) - .collect(); - let pq_codes = UInt8Array::from(pq_codes_flat); - let pq_codes_fsl = - FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap(); - - let schema = Arc::new(Schema::new(vec![ - Field::new(lance_core::ROW_ID, DataType::UInt64, false), - Field::new( - lance_index::vector::PQ_CODE_COLUMN, - DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::UInt8, true)), - num_sub_vectors as i32, - ), - false, - ), - ])); - - let batch = - RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)]).unwrap(); - - ::Storage::new( - codebook, - batch, - 8, - num_sub_vectors, - dim, - DistanceType::L2, - false, - None, - ) - .unwrap() - } - - // ----- PQ tests --------------------------------------------------------- - - #[test] - fn test_roundtrip_flat_pq() { - let dim = 128; - let num_sub_vectors = 16; - let num_rows = 100; - - let storage = make_test_pq_storage(num_rows, dim, num_sub_vectors); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - - let mut serialized = Vec::new(); - entry.serialize(&mut serialized).unwrap(); - let deserialized = - PartitionEntry::::deserialize(serialized.into()).unwrap(); - - assert_eq!(entry.storage, deserialized.storage); - } - - #[test] - fn test_roundtrip_preserves_distance_type() { - for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { - let dim = 32; - let num_sub_vectors = 4; - let codebook = make_test_codebook(dim, num_sub_vectors); - let row_ids = UInt64Array::from(vec![0u64, 1, 2]); - let pq_codes = UInt8Array::from(vec![0u8; 3 * num_sub_vectors]); - let pq_codes_fsl = - FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap(); - - let schema = Arc::new(Schema::new(vec![ - Field::new(lance_core::ROW_ID, DataType::UInt64, false), - Field::new( - lance_index::vector::PQ_CODE_COLUMN, - DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::UInt8, true)), - num_sub_vectors as i32, - ), - false, - ), - ])); - let batch = - RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)]) - .unwrap(); - - let storage = ::Storage::new( - codebook, - batch, - 8, - num_sub_vectors, - dim, - dt, - false, - None, - ) - .unwrap(); - - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - assert_eq!( - restored.storage.distance_type(), - entry.storage.distance_type() - ); - } - } - - #[test] - fn test_empty_partition() { - let dim = 16; - let num_sub_vectors = 2; - let storage = make_test_pq_storage(0, dim, num_sub_vectors); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - - let mut serialized = Vec::new(); - entry.serialize(&mut serialized).unwrap(); - let deserialized = - PartitionEntry::::deserialize(serialized.into()).unwrap(); - assert_eq!(entry.storage, deserialized.storage); - } - - #[test] - fn test_truncated_data_errors() { - assert!( - PartitionEntry::::deserialize(Bytes::from_static( - b"short" - )) - .is_err() - ); - } - - // ----- Flat helpers ----------------------------------------------------- - - fn make_flat_storage(num_rows: usize, dim: usize) -> FlatFloatStorage { - let values: Vec = (0..num_rows * dim).map(|i| i as f32 * 0.01).collect(); - let values_array = Float32Array::from(values); - let vectors = FixedSizeListArray::try_new_from_values(values_array, dim as i32).unwrap(); - FlatFloatStorage::new(vectors, DistanceType::L2) - } - - // ----- Flat tests ------------------------------------------------------- - - #[test] - fn test_roundtrip_flat_flat() { - let storage = make_flat_storage(50, 64); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - - assert_eq!( - restored.storage.metadata().dim, - entry.storage.metadata().dim - ); - assert_eq!( - restored.storage.distance_type(), - entry.storage.distance_type() - ); - assert_eq!(restored.storage.len(), entry.storage.len()); - let orig_batch = entry.storage.to_batches().unwrap().next().unwrap(); - let rest_batch = restored.storage.to_batches().unwrap().next().unwrap(); - assert_eq!(orig_batch, rest_batch); - } - - #[test] - fn test_flat_distance_types() { - for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { - let values = Float32Array::from(vec![1.0f32; 32]); - let vectors = FixedSizeListArray::try_new_from_values(values, 32).unwrap(); - let storage = FlatFloatStorage::new(vectors, dt); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - assert_eq!(restored.storage.distance_type(), dt); - } - } - - // ----- SQ helpers ------------------------------------------------------- - - fn make_sq_storage( - num_rows: usize, - dim: usize, - distance_type: DistanceType, - ) -> ScalarQuantizationStorage { - let row_ids = UInt64Array::from_iter_values(0..num_rows as u64); - let sq_codes_flat: Vec = (0..num_rows * dim).map(|i| (i % 256) as u8).collect(); - let sq_codes = UInt8Array::from(sq_codes_flat); - let sq_codes_fsl = FixedSizeListArray::try_new_from_values(sq_codes, dim as i32).unwrap(); - - let schema = Arc::new(Schema::new(vec![ - Field::new(lance_core::ROW_ID, DataType::UInt64, false), - Field::new( - lance_index::vector::SQ_CODE_COLUMN, - DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::UInt8, true)), - dim as i32, - ), - false, - ), - ])); - let batch = - RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(sq_codes_fsl)]).unwrap(); - - ScalarQuantizationStorage::try_new(8, distance_type, -1.0..1.0, [batch], None).unwrap() - } - - // ----- SQ tests --------------------------------------------------------- - - #[test] - fn test_roundtrip_flat_sq() { - let storage = make_sq_storage(100, 64, DistanceType::L2); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - - let m = entry.storage.metadata(); - let rm = restored.storage.metadata(); - assert_eq!(rm.dim, m.dim); - assert_eq!(rm.num_bits, m.num_bits); - assert_eq!(rm.bounds, m.bounds); - assert_eq!( - restored.storage.distance_type(), - entry.storage.distance_type() - ); - assert_eq!(restored.storage.len(), entry.storage.len()); - - // Verify row IDs are preserved. - let orig_ids: Vec = entry.storage.row_ids().copied().collect(); - let rest_ids: Vec = restored.storage.row_ids().copied().collect(); - assert_eq!(orig_ids, rest_ids); - } - - #[test] - fn test_sq_distance_types() { - for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { - let storage = make_sq_storage(10, 16, dt); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - assert_eq!(restored.storage.distance_type(), dt); - } - } - - #[test] - fn test_sq_multiple_chunks_no_copy() { - // Build SQ storage with multiple chunks by appending batches separately. - let dim = 16usize; - let make_batch = |start: u64, n: usize| { - let row_ids = UInt64Array::from_iter_values(start..start + n as u64); - let codes = UInt8Array::from(vec![0u8; n * dim]); - let fsl = FixedSizeListArray::try_new_from_values(codes, dim as i32).unwrap(); - let schema = Arc::new(Schema::new(vec![ - Field::new(lance_core::ROW_ID, DataType::UInt64, false), - Field::new( - lance_index::vector::SQ_CODE_COLUMN, - DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::UInt8, true)), - dim as i32, - ), - false, - ), - ])); - RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(fsl)]).unwrap() - }; - // Three chunks with 10 rows each. - let storage = ScalarQuantizationStorage::try_new( - 8, - DistanceType::L2, - -1.0..1.0, - [make_batch(0, 10), make_batch(10, 10), make_batch(20, 10)], - None, - ) - .unwrap(); - assert_eq!(storage.len(), 30); - - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - - assert_eq!(restored.storage.len(), 30); - let orig_ids: Vec = entry.storage.row_ids().copied().collect(); - let rest_ids: Vec = restored.storage.row_ids().copied().collect(); - assert_eq!(orig_ids, rest_ids); - } - - // ----- RabitQ helpers --------------------------------------------------- - - fn make_rabit_storage_fast( - num_rows: usize, - code_dim: usize, - distance_type: DistanceType, - ) -> ::Storage { - use lance_arrow::FixedSizeListArrayExt; - - let quantizer = RabitQuantizer::new_with_rotation::( - 1, - code_dim as i32, - RQRotationType::Fast, - ); - // Generate float vectors and quantize them to binary codes. - let values: Vec = (0..num_rows * code_dim) - .map(|i| (i % 100) as f32 / 100.0 - 0.5) - .collect(); - let values_arr = Float32Array::from(values); - let vectors = FixedSizeListArray::try_new_from_values(values_arr, code_dim as i32).unwrap(); - let codes = quantizer - .quantize(&vectors) - .unwrap() - .as_fixed_size_list() - .clone(); - - let metadata = quantizer.metadata(None); - let batch = RecordBatch::try_from_iter(vec![ - ( - lance_core::ROW_ID, - Arc::new(UInt64Array::from_iter_values(0..num_rows as u64)) - as Arc, - ), - ( - RABIT_CODE_COLUMN, - Arc::new(codes) as Arc, - ), - ( - ADD_FACTORS_COLUMN, - Arc::new(Float32Array::from_iter_values( - (0..num_rows).map(|i| i as f32 * 0.1), - )) as Arc, - ), - ( - SCALE_FACTORS_COLUMN, - Arc::new(Float32Array::from_iter_values( - (0..num_rows).map(|i| i as f32 * 0.01 + 0.5), - )) as Arc, - ), - ]) - .unwrap(); - - ::Storage::try_from_batch( - batch, - &metadata, - distance_type, - None, - ) - .unwrap() - } - - // ----- RabitQ tests ----------------------------------------------------- - - #[test] - fn test_roundtrip_flat_rabitq_fast() { - let num_rows = 50; - let code_dim = 64; - let storage = make_rabit_storage_fast(num_rows, code_dim, DistanceType::L2); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - - let m = entry.storage.metadata(); - let rm = restored.storage.metadata(); - assert_eq!(rm.num_bits, m.num_bits); - assert_eq!(rm.code_dim, m.code_dim); - assert_eq!(rm.rotation_type, m.rotation_type); - assert_eq!(rm.fast_rotation_signs, m.fast_rotation_signs); - assert!(rm.packed); - assert_eq!( - restored.storage.distance_type(), - entry.storage.distance_type() - ); - assert_eq!(restored.storage.len(), entry.storage.len()); - - // Verify row IDs are preserved. - let orig_ids: Vec = entry.storage.row_ids().copied().collect(); - let rest_ids: Vec = restored.storage.row_ids().copied().collect(); - assert_eq!(orig_ids, rest_ids); - - // Verify codes are preserved. - let orig_batch = entry.storage.to_batches().unwrap().next().unwrap(); - let rest_batch = restored.storage.to_batches().unwrap().next().unwrap(); - let orig_codes = orig_batch[RABIT_CODE_COLUMN].as_fixed_size_list(); - let rest_codes = rest_batch[RABIT_CODE_COLUMN].as_fixed_size_list(); - assert_eq!( - orig_codes.values().as_primitive::().values(), - rest_codes.values().as_primitive::().values(), - ); - } - - #[test] - fn test_rabitq_distance_types() { - for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { - let storage = make_rabit_storage_fast(10, 32, dt); - let entry = PartitionEntry:: { - index: FlatIndex::default(), - storage, - }; - let mut bytes = Vec::new(); - entry.serialize(&mut bytes).unwrap(); - let restored = - PartitionEntry::::deserialize(bytes.into()).unwrap(); - assert_eq!(restored.storage.distance_type(), dt); - } - } -} diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 4dce97ebdb1..26776e52e12 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -26,19 +26,18 @@ use lance_core::utils::tokio::spawn_cpu; use lance_core::utils::tracing::{IO_TYPE_LOAD_VECTOR_PART, TRACE_IO_EVENTS}; use lance_core::{Error, ROW_ID, Result}; use lance_encoding::decoder::{DecoderPlugins, FilterExpression}; -use lance_file::reader::{CachedFileMetadata, FileReader, FileReaderOptions}; +use lance_file::reader::{FileReader, FileReaderOptions}; use lance_index::frag_reuse::FragReuseIndex; use lance_index::metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector}; +use lance_index::vector::VectorIndexCacheEntry; use lance_index::vector::flat::index::{FlatIndex, FlatQuantizer}; use lance_index::vector::hnsw::HNSW; use lance_index::vector::ivf::storage::IvfModel; use lance_index::vector::pq::ProductQuantizer; -use lance_index::vector::quantizer::QuantizerMetadata; use lance_index::vector::quantizer::{QuantizationType, Quantizer}; use lance_index::vector::sq::ScalarQuantizer; use lance_index::vector::storage::VectorStore; use lance_index::vector::v3::subindex::SubIndexType; -use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry, VectorIndexData}; use lance_index::{ INDEX_AUXILIARY_FILE_NAME, INDEX_FILE_NAME, Index, IndexType, pb, vector::{ @@ -149,30 +148,22 @@ impl IVFIndex { frag_reuse_index: Option>, file_metadata_cache: &LanceCache, index_cache: LanceCache, - file_sizes: HashMap, ) -> Result { let io_parallelism = object_store.io_parallelism(); let scheduler_config = SchedulerConfig::max_bandwidth(&object_store); - let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config)); + let scheduler = ScanScheduler::new(object_store, scheduler_config); let uri = index_dir.child(uuid.as_str()).child(INDEX_FILE_NAME); - let cached_size = file_sizes - .get(INDEX_FILE_NAME) - .map(|&size| CachedFileSize::new(size)) - .unwrap_or_else(CachedFileSize::unknown); let index_reader = FileReader::try_open( - scheduler.open_file(&uri, &cached_size).await?, + scheduler + .open_file(&uri, &CachedFileSize::unknown()) + .await?, None, Arc::::default(), file_metadata_cache, FileReaderOptions::default(), ) .await?; - // Cache file metadata so reconstruct_typed can skip the metadata read. - file_metadata_cache - .with_key_prefix(uri.as_ref()) - .insert_with_key(&FileMetadataCacheKey, index_reader.metadata().clone()) - .await; let index_metadata: IndexMetadata = serde_json::from_str( index_reader .schema() @@ -200,26 +191,21 @@ impl IVFIndex { .ok_or(Error::index(format!("{} not found", S::metadata_key())))?; let sub_index_metadata: Vec = serde_json::from_str(sub_index_metadata)?; - let aux_cached_size = file_sizes - .get(INDEX_AUXILIARY_FILE_NAME) - .map(|&size| CachedFileSize::new(size)) - .unwrap_or_else(CachedFileSize::unknown); - let aux_path = index_dir - .child(uuid.as_str()) - .child(INDEX_AUXILIARY_FILE_NAME); let storage_reader = FileReader::try_open( - scheduler.open_file(&aux_path, &aux_cached_size).await?, + scheduler + .open_file( + &index_dir + .child(uuid.as_str()) + .child(INDEX_AUXILIARY_FILE_NAME), + &CachedFileSize::unknown(), + ) + .await?, None, Arc::::default(), file_metadata_cache, FileReaderOptions::default(), ) .await?; - // Cache aux file metadata for reconstruction. - file_metadata_cache - .with_key_prefix(aux_path.as_ref()) - .insert_with_key(&FileMetadataCacheKey, storage_reader.metadata().clone()) - .await; let storage = IvfQuantizationStorage::try_new(storage_reader, frag_reuse_index.clone()).await?; @@ -239,35 +225,6 @@ impl IVFIndex { }) } - /// Reconstruct from cached state, skipping global buffer reads. - #[allow(clippy::too_many_arguments)] - pub(crate) fn from_cached_state( - uri: String, - uuid: String, - ivf: IvfModel, - reader: FileReader, - storage: IvfQuantizationStorage, - sub_index_metadata: Vec, - distance_type: DistanceType, - index_cache: LanceCache, - io_parallelism: usize, - ) -> Self { - let num_partitions = ivf.num_partitions(); - Self { - uri, - uuid, - ivf, - reader, - storage, - partition_locks: PartitionLoadLock::new(num_partitions), - sub_index_metadata, - distance_type, - index_cache: WeakLanceCache::from(&index_cache), - io_parallelism, - _marker: PhantomData, - } - } - #[instrument(level = "debug", skip(self, metrics))] pub async fn load_partition( &self, @@ -638,30 +595,6 @@ impl VectorIndex for IVFInd fn metric_type(&self) -> DistanceType { self.distance_type } - - fn cacheable_state(&self) -> Option> { - let extra_data = self.storage.metadata().extra_metadata().ok().flatten(); - let metadata_json = serde_json::to_string(self.storage.metadata()).ok()?; - let (sub_index_type, quantization_type) = self.sub_index_type(); - // Convert local path back to object_store Path (undo to_local_path's "/" prefix) - let index_file_path = self.uri.trim_start_matches('/').to_string(); - let index_meta = self.reader.metadata(); - let aux_meta = self.storage.reader().metadata(); - Some(Box::new(IvfIndexState { - index_file_path, - uuid: self.uuid.clone(), - ivf: self.ivf.clone(), - distance_type: self.distance_type, - sub_index_metadata: self.sub_index_metadata.clone(), - quantizer_metadata_json: metadata_json, - quantizer_extra_data: extra_data.map(|b| b.to_vec()), - sub_index_type, - quantization_type, - cache_key_prefix: self.index_cache.prefix().to_string(), - index_file_size: index_meta.file_size(), - aux_file_size: aux_meta.file_size(), - })) - } } pub type IvfFlatIndex = IVFIndex; @@ -669,224 +602,6 @@ pub type IvfPq = IVFIndex; pub type IvfHnswSqIndex = IVFIndex; pub type IvfHnswPqIndex = IVFIndex; -/// CacheKey for file metadata, matching the key used by fragment reads. -struct FileMetadataCacheKey; - -impl CacheKey for FileMetadataCacheKey { - type ValueType = CachedFileMetadata; - - fn key(&self) -> std::borrow::Cow<'_, str> { - "".into() - } - - fn type_name(&self) -> &'static str { - "FileMetadata" - } -} - -/// Open a FileReader, using cached file metadata when available to avoid IO. -async fn open_reader_cached( - scheduler: &Arc, - path: &Path, - cache: &LanceCache, - known_file_size: u64, -) -> Result { - let file_cache = cache.with_key_prefix(path.as_ref()); - let cached_size = if known_file_size > 0 { - CachedFileSize::new(known_file_size) - } else { - CachedFileSize::unknown() - }; - let file_scheduler = scheduler.open_file(path, &cached_size).await?; - - if let Some(cached_meta) = file_cache.get_with_key(&FileMetadataCacheKey).await { - let encodings_io = Arc::new(lance_file::LanceEncodingsIo::new(file_scheduler)); - FileReader::try_open_with_file_metadata( - encodings_io, - path.clone(), - None, - Arc::::default(), - cached_meta, - cache, - FileReaderOptions::default(), - ) - .await - } else { - FileReader::try_open( - file_scheduler, - None, - Arc::::default(), - cache, - FileReaderOptions::default(), - ) - .await - } -} - -/// Reconstruct a concrete `IVFIndex` from cached state. -async fn reconstruct_typed( - state: IvfIndexState, - object_store: Arc, - file_metadata_cache: &LanceCache, - index_cache: LanceCache, -) -> Result> -where - Q::Metadata: serde::de::DeserializeOwned, -{ - let io_parallelism = object_store.io_parallelism(); - let scheduler_config = SchedulerConfig::max_bandwidth(&object_store); - let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config)); - - let index_path = Path::parse(&state.index_file_path) - .map_err(|e| Error::io(format!("invalid index path: {e}")))?; - - let index_reader = open_reader_cached( - &scheduler, - &index_path, - file_metadata_cache, - state.index_file_size, - ) - .await?; - - // Derive aux file path: replace the filename with INDEX_AUXILIARY_FILE_NAME. - // index_path is like "path/to/{uuid}/index.lance", aux is "path/to/{uuid}/aux.lance". - let index_path_str = index_path.as_ref(); - let parent_str = index_path_str - .rsplit_once('/') - .map(|(p, _)| p) - .unwrap_or(""); - let aux_path = Path::parse(format!("{}/{}", parent_str, INDEX_AUXILIARY_FILE_NAME)) - .map_err(|e| Error::io(format!("invalid aux path: {e}")))?; - let storage_reader = open_reader_cached( - &scheduler, - &aux_path, - file_metadata_cache, - state.aux_file_size, - ) - .await?; - - // Parse quantizer metadata from cached JSON - let mut metadata: Q::Metadata = serde_json::from_str(&state.quantizer_metadata_json)?; - if let Some(extra) = state.quantizer_extra_data { - metadata.parse_buffer(extra.into())?; - } - - let storage = IvfQuantizationStorage::from_cached( - storage_reader, - state.ivf.clone(), - metadata, - state.distance_type, - None, // frag_reuse_index not cached - ); - - let index = IVFIndex::::from_cached_state( - to_local_path(&index_path), - state.uuid, - state.ivf, - index_reader, - storage, - state.sub_index_metadata, - state.distance_type, - index_cache, - io_parallelism, - ); - - Ok(Arc::new(index)) -} - -/// Reconstruct a `dyn VectorIndex` from a cached [`IvfIndexState`], dispatching -/// on the stored sub-index and quantization types. -pub async fn reconstruct_vector_index( - state: IvfIndexState, - object_store: Arc, - file_metadata_cache: &LanceCache, - index_cache: LanceCache, -) -> Result> { - use lance_index::vector::bq::builder::RabitQuantizer; - - // Extract type tags before consuming state. - let sub_idx = state.sub_index_type.to_string(); - let quant = state.quantization_type.to_string(); - - match (sub_idx.as_str(), quant.as_str()) { - ("FLAT", "FLAT") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - ("FLAT", "PQ") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - ("FLAT", "SQ") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - ("FLAT", "RQ") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - ("HNSW", "PQ") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - ("HNSW", "SQ") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - ("HNSW", "FLAT") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - ("HNSW", "RQ") => { - reconstruct_typed::( - state, - object_store, - file_metadata_cache, - index_cache, - ) - .await - } - (s, q) => Err(Error::index(format!( - "unsupported index type for reconstruction: sub_index={s}, quantization={q}" - ))), - } -} - #[cfg(test)] mod tests { use std::collections::HashSet; @@ -937,7 +652,7 @@ mod tests { use lance_index::vector::{ pq::storage::ProductQuantizationMetadata, storage::STORAGE_METADATA_KEY, }; - use lance_index::{DatasetIndexExt, IndexSegment, IndexType}; + use lance_index::{DatasetIndexExt, IndexType}; use lance_index::{INDEX_AUXILIARY_FILE_NAME, metrics::NoOpMetricsCollector}; use lance_index::{optimize::OptimizeOptions, scalar::IndexReader}; use lance_index::{scalar::IndexWriter, vector::hnsw::builder::HnswBuildParams}; @@ -1751,16 +1466,7 @@ mod tests { .unwrap(); dataset - .commit_existing_index_segments( - index_name, - "vector", - vec![IndexSegment::new( - shared_uuid, - dataset.fragment_bitmap.as_ref().clone(), - Arc::new(crate::index::vector_index_details()), - IndexType::IvfPq.version(), - )], - ) + .commit_existing_index(index_name, "vector", shared_uuid) .await .unwrap(); } @@ -3926,53 +3632,4 @@ mod tests { let stats = dataset.object_store().io_stats_incremental(); assert_io_eq!(stats, read_iops, 0, "second prewarm should not perform IO"); } - - #[tokio::test] - async fn test_reconstruct_from_cache_zero_io() { - use lance_io::assert_io_eq; - - let test_dir = TempStrDir::default(); - let test_uri = test_dir.as_str(); - let (mut dataset, _) = generate_test_dataset::(test_uri, 0.0..1.0).await; - - let params = VectorIndexParams::with_ivf_pq_params( - DistanceType::L2, - IvfBuildParams::new(4), - PQBuildParams::default(), - ); - dataset - .create_index( - &["vector"], - IndexType::Vector, - Some("my_idx".to_owned()), - ¶ms, - true, - ) - .await - .unwrap(); - - // First open: populates file metadata cache and VectorIndexData cache. - let indices = dataset.load_indices_by_name("my_idx").await.unwrap(); - let uuid = indices[0].uuid.to_string(); - dataset - .open_vector_index("vector", &uuid, &NoOpMetricsCollector) - .await - .unwrap(); - - // Reset IO stats, then open again — should reconstruct from cache. - dataset.object_store().io_stats_incremental(); - - dataset - .open_vector_index("vector", &uuid, &NoOpMetricsCollector) - .await - .unwrap(); - - let stats = dataset.object_store().io_stats_incremental(); - assert_io_eq!( - stats, - read_iops, - 0, - "reconstructing from cached state should not perform IO" - ); - } } From c85f9b5a2a3931e2b3d1932a2653466feab88bd8 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 20 Mar 2026 17:09:30 -0700 Subject: [PATCH 17/24] fix: update commit_existing_index to commit_existing_index_segments The method was renamed in #6209 but the test call site in v2.rs was not updated during the merge. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance/src/index/vector/ivf/v2.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 26776e52e12..d7c7db5e61a 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -652,7 +652,7 @@ mod tests { use lance_index::vector::{ pq::storage::ProductQuantizationMetadata, storage::STORAGE_METADATA_KEY, }; - use lance_index::{DatasetIndexExt, IndexType}; + use lance_index::{DatasetIndexExt, IndexSegment, IndexType}; use lance_index::{INDEX_AUXILIARY_FILE_NAME, metrics::NoOpMetricsCollector}; use lance_index::{optimize::OptimizeOptions, scalar::IndexReader}; use lance_index::{scalar::IndexWriter, vector::hnsw::builder::HnswBuildParams}; @@ -1466,7 +1466,16 @@ mod tests { .unwrap(); dataset - .commit_existing_index(index_name, "vector", shared_uuid) + .commit_existing_index_segments( + index_name, + "vector", + vec![IndexSegment::new( + shared_uuid, + dataset.fragment_bitmap.as_ref().clone(), + Arc::new(crate::index::vector_index_details()), + IndexType::IvfPq.version(), + )], + ) .await .unwrap(); } From 32d8c62cb2e48f4c0c57a123494771610b81797b Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 20 Mar 2026 21:39:38 -0700 Subject: [PATCH 18/24] fix: restore file_sizes optimization and vector cache check The file_sizes parameter on IVFIndex::try_new and the file_size_map() usage in open_vector_index were from merged PR #5497, not the serialization PR. Restoring them avoids unnecessary HEAD requests. Also restores vector index cache check in open_generic_index. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance/src/index.rs | 28 +++++++++++++++++++++++---- rust/lance/src/index/vector/ivf/v2.rs | 15 ++++++++++---- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 7fc85b0b1dd..97b25da6767 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -1337,13 +1337,22 @@ impl DatasetIndexInternalExt for Dataset { uuid: &str, metrics: &dyn MetricsCollector, ) -> Result> { - // Quick cache checks for scalar and frag-reuse indices. + // Checking for cache existence is cheap so we just check both scalar and vector caches let frag_reuse_uuid = self.frag_reuse_index_uuid().await; let cache_key = ScalarIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await { return Ok(index.as_index()); } + let vector_cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); + if let Some(index) = self + .index_cache + .get_unsized_with_key(&vector_cache_key) + .await + { + return Ok(index.as_index()); + } + let frag_reuse_cache_key = FragReuseIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref()); if let Some(index) = self.index_cache.get_with_key(&frag_reuse_cache_key).await { return Ok(index.as_index()); @@ -1484,9 +1493,12 @@ impl DatasetIndexInternalExt for Dataset { self.object_store.clone(), SchedulerConfig::max_bandwidth(&self.object_store), ); - let file = scheduler - .open_file(&index_file, &CachedFileSize::unknown()) - .await?; + let file_sizes = index_meta.file_size_map(); + let cached_size = file_sizes + .get(INDEX_FILE_NAME) + .map(|&size| CachedFileSize::new(size)) + .unwrap_or_else(CachedFileSize::unknown); + let file = scheduler.open_file(&index_file, &cached_size).await?; let reader = lance_file::reader::FileReader::try_open( file, None, @@ -1520,6 +1532,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1532,6 +1545,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1550,6 +1564,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1563,6 +1578,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1576,6 +1592,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1592,6 +1609,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, &file_metadata_cache, index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1605,6 +1623,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) @@ -1618,6 +1637,7 @@ impl DatasetIndexInternalExt for Dataset { frag_reuse_index, self.metadata_cache.as_ref(), index_cache, + file_sizes, ) .await?; Ok(Arc::new(ivf) as Arc) diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index d7c7db5e61a..9561c187b18 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -148,16 +148,19 @@ impl IVFIndex { frag_reuse_index: Option>, file_metadata_cache: &LanceCache, index_cache: LanceCache, + file_sizes: HashMap, ) -> Result { let io_parallelism = object_store.io_parallelism(); let scheduler_config = SchedulerConfig::max_bandwidth(&object_store); let scheduler = ScanScheduler::new(object_store, scheduler_config); let uri = index_dir.child(uuid.as_str()).child(INDEX_FILE_NAME); + let cached_size = file_sizes + .get(INDEX_FILE_NAME) + .map(|&size| CachedFileSize::new(size)) + .unwrap_or_else(CachedFileSize::unknown); let index_reader = FileReader::try_open( - scheduler - .open_file(&uri, &CachedFileSize::unknown()) - .await?, + scheduler.open_file(&uri, &cached_size).await?, None, Arc::::default(), file_metadata_cache, @@ -191,13 +194,17 @@ impl IVFIndex { .ok_or(Error::index(format!("{} not found", S::metadata_key())))?; let sub_index_metadata: Vec = serde_json::from_str(sub_index_metadata)?; + let aux_cached_size = file_sizes + .get(INDEX_AUXILIARY_FILE_NAME) + .map(|&size| CachedFileSize::new(size)) + .unwrap_or_else(CachedFileSize::unknown); let storage_reader = FileReader::try_open( scheduler .open_file( &index_dir .child(uuid.as_str()) .child(INDEX_AUXILIARY_FILE_NAME), - &CachedFileSize::unknown(), + &aux_cached_size, ) .await?, None, From 8c3ef350b886fd1fdbec0982813628fae0251d39 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 26 Mar 2026 14:11:22 -0700 Subject: [PATCH 19/24] refactor: presize cache keys, extract size helper, document type_name contract - Presize `make_cache_key` Vec to avoid intermediate String allocation - Extract `cache_entry_size` helper to replace magic `+ 8` pattern - Document `type_name` uniqueness requirement on CacheKey/UnsizedCacheKey - Remove unused derives from SubIndexType (confirmed compiles without them) - Use moka's `weighted_size()` instead of iterating in `approx_size_bytes` Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache/keys.rs | 21 +++++++++++++++++---- rust/lance-core/src/cache/mod.rs | 17 ++++++++++++----- rust/lance-core/src/cache/moka.rs | 2 +- rust/lance-index/src/vector/v3/subindex.rs | 1 - 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs index db412cc632f..164cd043952 100644 --- a/rust/lance-core/src/cache/keys.rs +++ b/rust/lance-core/src/cache/keys.rs @@ -20,12 +20,17 @@ pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) { /// Build a key: `prefix/user_key\0type_name`. pub(super) fn make_cache_key(prefix: &str, key: &str, type_name: &str) -> Vec { - let full_key = if prefix.is_empty() { - key.to_string() + let user_key_len = if prefix.is_empty() { + key.len() } else { - format!("{}/{}", prefix, key) + prefix.len() + 1 + key.len() }; - let mut bytes = full_key.into_bytes(); + let mut bytes = Vec::with_capacity(user_key_len + 1 + type_name.len()); + if !prefix.is_empty() { + bytes.extend_from_slice(prefix.as_bytes()); + bytes.push(b'/'); + } + bytes.extend_from_slice(key.as_bytes()); bytes.push(0); bytes.extend_from_slice(type_name.as_bytes()); bytes @@ -38,6 +43,11 @@ pub trait CacheKey { /// Short, stable string that distinguishes this value type from others in /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_name`). + /// + /// **Must be unique per value type.** If two `CacheKey` impls return the + /// same `type_name` but different `ValueType`s, entries will collide and + /// downcasts will fail silently (returning `None` on get). + /// /// Must be consistent across crate boundaries — use a short literal, not /// `std::any::type_name` pointers. fn type_name(&self) -> &'static str; @@ -48,5 +58,8 @@ pub trait UnsizedCacheKey { fn key(&self) -> Cow<'_, str>; + /// Short, stable string that distinguishes this value type from others in + /// the cache. Must be unique per value type — collisions cause silent + /// downcast failures. fn type_name(&self) -> &'static str; } diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs index 6bdd0c07152..bfa0af34dae 100644 --- a/rust/lance-core/src/cache/mod.rs +++ b/rust/lance-core/src/cache/mod.rs @@ -35,6 +35,11 @@ pub use deepsize::{Context, DeepSizeOf}; use keys::make_cache_key; +/// Size of a cached `Arc`, accounting for the Arc overhead (two atomic counters). +fn cache_entry_size(value: &T) -> usize { + value.deep_size_of() + std::mem::size_of::() * 2 +} + // --------------------------------------------------------------------------- // LanceCache — typed wrapper around dyn CacheBackend // --------------------------------------------------------------------------- @@ -141,7 +146,7 @@ impl LanceCache { type_name: &str, metadata: Arc, ) { - let size = metadata.deep_size_of() + 8; + let size = cache_entry_size(&*metadata); let cache_key = make_cache_key(&self.prefix, key, type_name); self.cache.insert(&cache_key, metadata, size).await; } @@ -188,12 +193,13 @@ impl LanceCache { let typed_loader = Box::pin(async move { let value = loader().await?; let arc = Arc::new(value); - let size = arc.deep_size_of() + 8; + let size = cache_entry_size(&*arc); Ok((arc as CacheEntry, size)) }); let entry = self.cache.get_or_insert(&cache_key, typed_loader).await?; + // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses. // Track hit/miss based on whether we got a pre-existing entry. // (Approximate: we can't distinguish "backend had it" from "loader ran" // without a richer return type. Count all get_or_insert as misses for now.) @@ -203,6 +209,7 @@ impl LanceCache { } // -- Unsized insert/get --------------------------------------------------- + // TODO: can we unify some of these methods? async fn insert_unsized_with_id( &self, @@ -359,7 +366,7 @@ impl WeakLanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { if let Some(cache) = self.inner.upgrade() { - let size = value.deep_size_of() + 8; + let size = cache_entry_size(&*value); let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); cache.insert(&key, value, size).await; true @@ -388,7 +395,7 @@ impl WeakLanceCache { let typed_loader = Box::pin(async move { let value = loader().await?; let arc = Arc::new(value); - let size = arc.deep_size_of() + 8; + let size = cache_entry_size(&*arc); Ok((arc as CacheEntry, size)) }); let entry = cache.get_or_insert(&key, typed_loader).await?; @@ -424,7 +431,7 @@ impl WeakLanceCache { { if let Some(cache) = self.inner.upgrade() { let wrapper = Arc::new(value); - let size = wrapper.deep_size_of() + 8; + let size = cache_entry_size(&*wrapper); let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); cache.insert(&key, wrapper, size).await; } else { diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs index 6a2cd673409..9ab6702e455 100644 --- a/rust/lance-core/src/cache/moka.rs +++ b/rust/lance-core/src/cache/moka.rs @@ -120,6 +120,6 @@ impl CacheBackend for MokaCacheBackend { } fn approx_size_bytes(&self) -> usize { - self.cache.iter().map(|(_, v)| v.size_bytes).sum() + self.cache.weighted_size() as usize } } diff --git a/rust/lance-index/src/vector/v3/subindex.rs b/rust/lance-index/src/vector/v3/subindex.rs index dd5d2b078a9..af0bb337352 100644 --- a/rust/lance-index/src/vector/v3/subindex.rs +++ b/rust/lance-index/src/vector/v3/subindex.rs @@ -59,7 +59,6 @@ pub trait IvfSubIndex: Send + Sync + Debug + DeepSizeOf { fn to_batch(&self) -> Result; } -#[derive(Debug, Clone, Copy)] pub enum SubIndexType { Flat, Hnsw, From 19aa33833df578c7866707728f61c5ff1435f278 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 26 Mar 2026 14:12:47 -0700 Subject: [PATCH 20/24] refactor: inline single-caller private cache methods into public API Inline `get_or_insert_with_id`, `insert_unsized_with_id`, and `get_unsized_with_id` into their sole public callers. Keep `insert_with_id` and `get_with_id` as shared helpers since they're used by both sized and unsized paths. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache/mod.rs | 79 ++++++++------------------------ 1 file changed, 20 insertions(+), 59 deletions(-) diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs index bfa0af34dae..eb7c4c40bfe 100644 --- a/rust/lance-core/src/cache/mod.rs +++ b/rust/lance-core/src/cache/mod.rs @@ -177,59 +177,6 @@ impl LanceCache { } } - async fn get_or_insert_with_id( - &self, - key: &str, - type_name: &str, - loader: F, - ) -> Result> - where - F: FnOnce() -> Fut + Send, - Fut: Future> + Send, - { - let cache_key = make_cache_key(&self.prefix, key, type_name); - - // Type-erase the loader into a pinned future for the backend. - let typed_loader = Box::pin(async move { - let value = loader().await?; - let arc = Arc::new(value); - let size = cache_entry_size(&*arc); - Ok((arc as CacheEntry, size)) - }); - - let entry = self.cache.get_or_insert(&cache_key, typed_loader).await?; - - // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses. - // Track hit/miss based on whether we got a pre-existing entry. - // (Approximate: we can't distinguish "backend had it" from "loader ran" - // without a richer return type. Count all get_or_insert as misses for now.) - self.misses.fetch_add(1, Ordering::Relaxed); - - Ok(entry.downcast::().unwrap()) - } - - // -- Unsized insert/get --------------------------------------------------- - // TODO: can we unify some of these methods? - - async fn insert_unsized_with_id( - &self, - key: &str, - type_name: &str, - metadata: Arc, - ) { - self.insert_with_id(key, type_name, Arc::new(metadata)) - .await - } - - async fn get_unsized_with_id( - &self, - key: &str, - type_name: &str, - ) -> Option> { - let outer = self.get_with_id::>(key, type_name).await?; - Some(outer.as_ref().clone()) - } - // -- Stats / clear -------------------------------------------------------- pub async fn stats(&self) -> CacheStats { @@ -280,9 +227,21 @@ impl LanceCache { F: FnOnce() -> Fut + Send, Fut: Future> + Send, { - let type_name = cache_key.type_name(); - let key_str = cache_key.key().into_owned(); - Box::pin(self.get_or_insert_with_id(&key_str, type_name, loader)).await + let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + + let typed_loader = Box::pin(async move { + let value = loader().await?; + let arc = Arc::new(value); + let size = cache_entry_size(&*arc); + Ok((arc as CacheEntry, size)) + }); + + let entry = self.cache.get_or_insert(&key, typed_loader).await?; + + // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses. + self.misses.fetch_add(1, Ordering::Relaxed); + + Ok(entry.downcast::().unwrap()) } pub async fn insert_unsized_with_key(&self, cache_key: &K, metadata: Arc) @@ -290,7 +249,7 @@ impl LanceCache { K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.insert_unsized_with_id(&cache_key.key(), cache_key.type_name(), metadata) + self.insert_with_id(&cache_key.key(), cache_key.type_name(), Arc::new(metadata)) .boxed() .await } @@ -300,9 +259,11 @@ impl LanceCache { K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.get_unsized_with_id::(&cache_key.key(), cache_key.type_name()) + let outer = self + .get_with_id::>(&cache_key.key(), cache_key.type_name()) .boxed() - .await + .await?; + Some(outer.as_ref().clone()) } } From c3f2cb600cfd88560209b88472e33dfc6947825f Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 26 Mar 2026 14:18:01 -0700 Subject: [PATCH 21/24] refactor: replace opaque byte keys with InternalCacheKey, add was_cached Replace `&[u8]` keys in `CacheBackend` with a structured `InternalCacheKey` type that provides direct access to prefix, key, and type_name fields. This eliminates the need for `parse_cache_key()` and `make_cache_key()`. Also change `get_or_insert` to return `(CacheEntry, bool)` where the bool indicates whether the entry was already cached. This enables accurate hit/miss tracking instead of counting all get_or_insert calls as misses. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache/backend.rs | 26 ++++--- rust/lance-core/src/cache/keys.rs | 62 ++++++++------- rust/lance-core/src/cache/mod.rs | 108 +++++++++++++++------------ rust/lance-core/src/cache/moka.rs | 33 +++++--- 4 files changed, 131 insertions(+), 98 deletions(-) diff --git a/rust/lance-core/src/cache/backend.rs b/rust/lance-core/src/cache/backend.rs index 970fb75888c..e929bff3529 100644 --- a/rust/lance-core/src/cache/backend.rs +++ b/rust/lance-core/src/cache/backend.rs @@ -10,31 +10,33 @@ use futures::Future; use crate::Result; +use super::keys::InternalCacheKey; + /// A type-erased cache entry. pub type CacheEntry = Arc; /// Low-level pluggable cache backend. /// -/// Implementations store entries keyed by opaque byte slices. +/// Implementations store entries keyed by [`InternalCacheKey`], which provides +/// structured access to the prefix, user key, and type name components. /// The [`LanceCache`](super::LanceCache) wrapper handles key construction and type safety; /// backend authors do not need to worry about key encoding. -/// -/// Keys are structured as `user_key\0type_name` where `type_name` comes from -/// [`CacheKey::type_name()`](super::CacheKey::type_name). Backend authors who need to -/// inspect keys can use [`parse_cache_key()`](super::parse_cache_key) to split them. #[async_trait] pub trait CacheBackend: Send + Sync + std::fmt::Debug { - /// Look up an entry by its opaque key. - async fn get(&self, key: &[u8]) -> Option; + /// Look up an entry by its key. + async fn get(&self, key: &InternalCacheKey) -> Option; /// Store an entry. `size_bytes` is used for eviction accounting. - async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize); + async fn insert(&self, key: &InternalCacheKey, entry: CacheEntry, size_bytes: usize); /// Get an existing entry or compute it from `loader`. /// /// Implementations should deduplicate concurrent loads for the same key /// so the loader runs at most once. /// + /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry + /// was already present in the cache (the loader was not invoked). + /// /// The loader is a pinned, boxed future rather than a generic closure /// because `async_trait` erases the `Self` lifetime, making it impossible /// to express a generic closure whose returned future borrows from the @@ -45,12 +47,12 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { /// this method — implementations must not store it beyond the call. async fn get_or_insert<'a>( &self, - key: &[u8], + key: &InternalCacheKey, loader: Pin> + Send + 'a>>, - ) -> Result; + ) -> Result<(CacheEntry, bool)>; - /// Remove all entries whose key starts with `prefix`. - async fn invalidate_prefix(&self, prefix: &[u8]); + /// Remove all entries whose prefix starts with the given string. + async fn invalidate_prefix(&self, prefix: &str); /// Remove all entries. async fn clear(&self); diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs index 164cd043952..d4afe55370f 100644 --- a/rust/lance-core/src/cache/keys.rs +++ b/rust/lance-core/src/cache/keys.rs @@ -1,39 +1,45 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::borrow::Cow; +use std::{borrow::Cow, sync::Arc}; -/// Cache keys are structured as `user_key\0type_name`. +/// Structured cache key used by [`CacheBackend`](super::CacheBackend). /// -/// This function splits an opaque cache key into the user-visible portion -/// and the type_name string. Backend implementations can use this to inspect keys. -/// Returns `(empty slice, "")` if no separator is found. -pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) { - if let Some(sep) = key.iter().position(|&b| b == 0) { - let user_key = &key[..sep]; - let type_name = std::str::from_utf8(&key[sep + 1..]).unwrap_or(""); - (user_key, type_name) - } else { - (key, "") - } +/// Composed of a prefix (scoping the key to a dataset/index), a user key +/// (identifying the specific entry), and a type name (distinguishing value +/// types that share the same user key). +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct InternalCacheKey { + prefix: Arc, + key: Arc, + type_name: &'static str, } -/// Build a key: `prefix/user_key\0type_name`. -pub(super) fn make_cache_key(prefix: &str, key: &str, type_name: &str) -> Vec { - let user_key_len = if prefix.is_empty() { - key.len() - } else { - prefix.len() + 1 + key.len() - }; - let mut bytes = Vec::with_capacity(user_key_len + 1 + type_name.len()); - if !prefix.is_empty() { - bytes.extend_from_slice(prefix.as_bytes()); - bytes.push(b'/'); +impl InternalCacheKey { + pub fn new(prefix: Arc, key: Arc, type_name: &'static str) -> Self { + Self { + prefix, + key, + type_name, + } + } + + pub fn prefix(&self) -> &str { + &self.prefix + } + + pub fn key(&self) -> &str { + &self.key + } + + pub fn type_name(&self) -> &'static str { + self.type_name + } + + /// Returns true if this key's prefix starts with the given string. + pub fn has_prefix(&self, prefix: &str) -> bool { + self.prefix.starts_with(prefix) } - bytes.extend_from_slice(key.as_bytes()); - bytes.push(0); - bytes.extend_from_slice(type_name.as_bytes()); - bytes } pub trait CacheKey { diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs index eb7c4c40bfe..d338d69036f 100644 --- a/rust/lance-core/src/cache/mod.rs +++ b/rust/lance-core/src/cache/mod.rs @@ -6,20 +6,20 @@ //! This module provides a two-layer caching system: //! //! - [`CacheBackend`] is the low-level, pluggable trait that custom cache implementations -//! can implement. It uses opaque byte keys and type-erased entries. -//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag -//! encoding), type-safe get/insert, and DeepSizeOf-based size computation. +//! can implement. It uses [`InternalCacheKey`] keys and type-erased entries. +//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag), +//! type-safe get/insert, and DeepSizeOf-based size computation. //! //! Cache keys are handled by the [`keys`] submodule: [`CacheKey`] / [`UnsizedCacheKey`] -//! define the typed key interface, and [`parse_cache_key`] lets backends inspect the -//! encoded `user_key\0type_name` format. +//! define the typed key interface, and [`InternalCacheKey`] is the structured key passed +//! to backends. mod backend; mod keys; mod moka; pub use backend::{CacheBackend, CacheEntry}; -pub use keys::{CacheKey, UnsizedCacheKey, parse_cache_key}; +pub use keys::{CacheKey, InternalCacheKey, UnsizedCacheKey}; pub use moka::MokaCacheBackend; use std::sync::{ @@ -33,13 +33,17 @@ use crate::Result; pub use deepsize::{Context, DeepSizeOf}; -use keys::make_cache_key; - /// Size of a cached `Arc`, accounting for the Arc overhead (two atomic counters). fn cache_entry_size(value: &T) -> usize { value.deep_size_of() + std::mem::size_of::() * 2 } +/// Build an [`InternalCacheKey`] from a cache's prefix, a user key string, +/// and a type name. +fn build_key(prefix: &Arc, key: &str, type_name: &'static str) -> InternalCacheKey { + InternalCacheKey::new(prefix.clone(), Arc::from(key), type_name) +} + // --------------------------------------------------------------------------- // LanceCache — typed wrapper around dyn CacheBackend // --------------------------------------------------------------------------- @@ -51,7 +55,7 @@ fn cache_entry_size(value: &T) -> usize { #[derive(Clone)] pub struct LanceCache { cache: Arc, - prefix: String, + prefix: Arc, hits: Arc, misses: Arc, } @@ -74,7 +78,7 @@ impl LanceCache { pub fn with_capacity(capacity: usize) -> Self { Self { cache: Arc::new(MokaCacheBackend::with_capacity(capacity)), - prefix: String::new(), + prefix: Arc::from(""), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), } @@ -84,7 +88,7 @@ impl LanceCache { pub fn with_backend(backend: Arc) -> Self { Self { cache: backend, - prefix: String::new(), + prefix: Arc::from(""), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), } @@ -93,7 +97,7 @@ impl LanceCache { pub fn no_cache() -> Self { Self { cache: Arc::new(MokaCacheBackend::no_cache()), - prefix: String::new(), + prefix: Arc::from(""), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), } @@ -104,7 +108,7 @@ impl LanceCache { pub fn with_backend_and_prefix(backend: Arc, prefix: String) -> Self { Self { cache: backend, - prefix, + prefix: Arc::from(prefix), hits: Arc::new(AtomicU64::new(0)), misses: Arc::new(AtomicU64::new(0)), } @@ -114,16 +118,16 @@ impl LanceCache { pub fn with_key_prefix(&self, prefix: &str) -> Self { Self { cache: self.cache.clone(), - prefix: format!("{}{}/", self.prefix, prefix), + prefix: Arc::from(format!("{}{}/", self.prefix, prefix)), hits: self.hits.clone(), misses: self.misses.clone(), } } - /// Invalidate all entries whose key starts with the given prefix. + /// Invalidate all entries whose prefix starts with the given string. pub async fn invalidate_prefix(&self, prefix: &str) { - let prefix_bytes = format!("{}{}", self.prefix, prefix).into_bytes(); - self.cache.invalidate_prefix(&prefix_bytes).await; + let full_prefix = format!("{}{}", self.prefix, prefix); + self.cache.invalidate_prefix(&full_prefix).await; } pub async fn size(&self) -> usize { @@ -138,25 +142,25 @@ impl LanceCache { self.cache.size_bytes().await } - // -- Sized insert/get (internal, used by CacheKey methods) ---------------- + // -- Sized insert/get (internal, shared by sized and unsized paths) -------- async fn insert_with_id( &self, key: &str, - type_name: &str, + type_name: &'static str, metadata: Arc, ) { let size = cache_entry_size(&*metadata); - let cache_key = make_cache_key(&self.prefix, key, type_name); + let cache_key = build_key(&self.prefix, key, type_name); self.cache.insert(&cache_key, metadata, size).await; } async fn get_with_id( &self, key: &str, - type_name: &str, + type_name: &'static str, ) -> Option> { - let cache_key = make_cache_key(&self.prefix, key, type_name); + let cache_key = build_key(&self.prefix, key, type_name); if let Some(entry) = self.cache.get(&cache_key).await { match entry.downcast::() { Ok(val) => { @@ -227,7 +231,7 @@ impl LanceCache { F: FnOnce() -> Fut + Send, Fut: Future> + Send, { - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); let typed_loader = Box::pin(async move { let value = loader().await?; @@ -236,10 +240,13 @@ impl LanceCache { Ok((arc as CacheEntry, size)) }); - let entry = self.cache.get_or_insert(&key, typed_loader).await?; + let (entry, was_cached) = self.cache.get_or_insert(&key, typed_loader).await?; - // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses. - self.misses.fetch_add(1, Ordering::Relaxed); + if was_cached { + self.hits.fetch_add(1, Ordering::Relaxed); + } else { + self.misses.fetch_add(1, Ordering::Relaxed); + } Ok(entry.downcast::().unwrap()) } @@ -276,7 +283,7 @@ impl LanceCache { #[derive(Clone, Debug)] pub struct WeakLanceCache { inner: std::sync::Weak, - prefix: String, + prefix: Arc, hits: Arc, misses: Arc, } @@ -294,7 +301,7 @@ impl WeakLanceCache { pub fn with_key_prefix(&self, prefix: &str) -> Self { Self { inner: self.inner.clone(), - prefix: format!("{}{}/", self.prefix, prefix), + prefix: Arc::from(format!("{}{}/", self.prefix, prefix)), hits: self.hits.clone(), misses: self.misses.clone(), } @@ -311,7 +318,7 @@ impl WeakLanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { let cache = self.inner.upgrade()?; - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); if let Some(entry) = cache.get(&key).await { self.hits.fetch_add(1, Ordering::Relaxed); Some(entry.downcast::().unwrap()) @@ -328,7 +335,7 @@ impl WeakLanceCache { { if let Some(cache) = self.inner.upgrade() { let size = cache_entry_size(&*value); - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); cache.insert(&key, value, size).await; true } else { @@ -352,15 +359,19 @@ impl WeakLanceCache { Fut: Future> + Send, { if let Some(cache) = self.inner.upgrade() { - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); let typed_loader = Box::pin(async move { let value = loader().await?; let arc = Arc::new(value); let size = cache_entry_size(&*arc); Ok((arc as CacheEntry, size)) }); - let entry = cache.get_or_insert(&key, typed_loader).await?; - self.misses.fetch_add(1, Ordering::Relaxed); + let (entry, was_cached) = cache.get_or_insert(&key, typed_loader).await?; + if was_cached { + self.hits.fetch_add(1, Ordering::Relaxed); + } else { + self.misses.fetch_add(1, Ordering::Relaxed); + } Ok(entry.downcast::().unwrap()) } else { log::warn!("WeakLanceCache: cache no longer available, computing without caching"); @@ -374,7 +385,7 @@ impl WeakLanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { let cache = self.inner.upgrade()?; - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); if let Some(entry) = cache.get(&key).await { entry .downcast::>() @@ -393,7 +404,7 @@ impl WeakLanceCache { if let Some(cache) = self.inner.upgrade() { let wrapper = Arc::new(value); let size = cache_entry_size(&*wrapper); - let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); cache.insert(&key, wrapper, size).await; } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item"); @@ -610,8 +621,10 @@ mod tests { .await .unwrap(); assert_eq!(*v, vec![1, 2, 3]); + assert_eq!(cache.stats().await.misses, 1); + assert_eq!(cache.stats().await.hits, 0); - // Second call should not invoke loader + // Second call should not invoke loader and should be a hit let v: Arc> = cache .get_or_insert_with_key(TestKey::>::new("k"), || async { panic!("should not be called") @@ -619,6 +632,7 @@ mod tests { .await .unwrap(); assert_eq!(*v, vec![1, 2, 3]); + assert_eq!(cache.stats().await.hits, 1); } #[tokio::test] @@ -628,7 +642,7 @@ mod tests { #[derive(Debug)] struct HashMapBackend { - map: Mutex, (CacheEntry, usize)>>, + map: Mutex>, } impl HashMapBackend { @@ -641,35 +655,35 @@ mod tests { #[async_trait] impl CacheBackend for HashMapBackend { - async fn get(&self, key: &[u8]) -> Option { + async fn get(&self, key: &InternalCacheKey) -> Option { self.map.lock().await.get(key).map(|(e, _)| e.clone()) } - async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) { + async fn insert(&self, key: &InternalCacheKey, entry: CacheEntry, size_bytes: usize) { self.map .lock() .await - .insert(key.to_vec(), (entry, size_bytes)); + .insert(key.clone(), (entry, size_bytes)); } async fn get_or_insert<'a>( &self, - key: &[u8], + key: &InternalCacheKey, loader: std::pin::Pin< Box> + Send + 'a>, >, - ) -> Result { + ) -> Result<(CacheEntry, bool)> { if let Some((entry, _)) = self.map.lock().await.get(key) { - Ok(entry.clone()) + Ok((entry.clone(), true)) } else { let (entry, size) = loader.await?; self.map .lock() .await - .insert(key.to_vec(), (entry.clone(), size)); - Ok(entry) + .insert(key.clone(), (entry.clone(), size)); + Ok((entry, false)) } } - async fn invalidate_prefix(&self, prefix: &[u8]) { - self.map.lock().await.retain(|k, _| !k.starts_with(prefix)); + async fn invalidate_prefix(&self, prefix: &str) { + self.map.lock().await.retain(|k, _| !k.has_prefix(prefix)); } async fn clear(&self) { self.map.lock().await.clear(); diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs index 9ab6702e455..a977ea0a61f 100644 --- a/rust/lance-core/src/cache/moka.rs +++ b/rust/lance-core/src/cache/moka.rs @@ -2,6 +2,8 @@ // SPDX-FileCopyrightText: Copyright The Lance Authors use std::pin::Pin; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; use async_trait::async_trait; use futures::Future; @@ -9,6 +11,7 @@ use futures::Future; use crate::Result; use super::backend::{CacheBackend, CacheEntry}; +use super::keys::InternalCacheKey; /// Internal record stored in the moka cache. #[derive(Clone, Debug)] @@ -22,7 +25,7 @@ struct MokaCacheEntry { /// Provides weighted-capacity eviction and concurrent-load deduplication /// via moka's built-in `optionally_get_with`. pub struct MokaCacheBackend { - cache: moka::future::Cache, MokaCacheEntry>, + cache: moka::future::Cache, } impl std::fmt::Debug for MokaCacheBackend { @@ -52,26 +55,31 @@ impl MokaCacheBackend { #[async_trait] impl CacheBackend for MokaCacheBackend { - async fn get(&self, key: &[u8]) -> Option { + async fn get(&self, key: &InternalCacheKey) -> Option { self.cache.get(key).await.map(|r| r.entry) } - async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) { + async fn insert(&self, key: &InternalCacheKey, entry: CacheEntry, size_bytes: usize) { self.cache - .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes }) + .insert(key.clone(), MokaCacheEntry { entry, size_bytes }) .await; } async fn get_or_insert<'a>( &self, - key: &[u8], + key: &InternalCacheKey, loader: Pin> + Send + 'a>>, - ) -> Result { + ) -> Result<(CacheEntry, bool)> { // Use moka's built-in dedup: optionally_get_with runs the init future // at most once per key, even under concurrent access. let (error_tx, error_rx) = tokio::sync::oneshot::channel(); + // Track whether the loader actually ran (= cache miss). + let was_miss = Arc::new(AtomicBool::new(false)); + let was_miss_clone = was_miss.clone(); + let init = async move { + was_miss_clone.store(true, Ordering::Relaxed); match loader.await { Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }), Err(e) => { @@ -81,9 +89,12 @@ impl CacheBackend for MokaCacheBackend { } }; - let owned_key = key.to_vec(); + let owned_key = key.clone(); match self.cache.optionally_get_with(owned_key, init).await { - Some(record) => Ok(record.entry), + Some(record) => { + let was_cached = !was_miss.load(Ordering::Relaxed); + Ok((record.entry, was_cached)) + } None => match error_rx.await { Ok(err) => Err(err), Err(_) => Err(crate::Error::internal( @@ -93,10 +104,10 @@ impl CacheBackend for MokaCacheBackend { } } - async fn invalidate_prefix(&self, prefix: &[u8]) { - let prefix = prefix.to_vec(); + async fn invalidate_prefix(&self, prefix: &str) { + let prefix = prefix.to_owned(); self.cache - .invalidate_entries_if(move |key, _value| key.starts_with(&prefix)) + .invalidate_entries_if(move |key, _value| key.has_prefix(&prefix)) .expect("Cache configured correctly"); } From 1494cd7d7667dcb182190c9d8d63f2469e004651 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 26 Mar 2026 14:59:21 -0700 Subject: [PATCH 22/24] fix: revert approx_size_bytes to iterate entries, fix rustdoc link `weighted_size()` can be stale without `run_pending_tasks()` (which is async). Revert `approx_size_bytes` to iterating entries so the synchronous `DeepSizeOf` path returns accurate values. Also remove reference to private `keys` module in module-level doc. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache/mod.rs | 5 ++--- rust/lance-core/src/cache/moka.rs | 5 ++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs index d338d69036f..3eba0415f04 100644 --- a/rust/lance-core/src/cache/mod.rs +++ b/rust/lance-core/src/cache/mod.rs @@ -10,9 +10,8 @@ //! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag), //! type-safe get/insert, and DeepSizeOf-based size computation. //! -//! Cache keys are handled by the [`keys`] submodule: [`CacheKey`] / [`UnsizedCacheKey`] -//! define the typed key interface, and [`InternalCacheKey`] is the structured key passed -//! to backends. +//! [`CacheKey`] / [`UnsizedCacheKey`] define the typed key interface, and +//! [`InternalCacheKey`] is the structured key passed to backends. mod backend; mod keys; diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs index a977ea0a61f..13a6c7e976c 100644 --- a/rust/lance-core/src/cache/moka.rs +++ b/rust/lance-core/src/cache/moka.rs @@ -131,6 +131,9 @@ impl CacheBackend for MokaCacheBackend { } fn approx_size_bytes(&self) -> usize { - self.cache.weighted_size() as usize + // Iterate rather than using `weighted_size()` because moka's + // weighted_size can be stale without `run_pending_tasks()`, which + // is async and can't be called from this synchronous context. + self.cache.iter().map(|(_, v)| v.size_bytes).sum() } } From ae4783a897ceef7974bdda68206e32fb496d7a36 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 27 Mar 2026 14:45:52 -0700 Subject: [PATCH 23/24] refactor: remove unused `&self` from CacheKey::type_name and UnsizedCacheKey::type_name No implementation uses `self`, so make `type_name` an associated function instead of a method. Update all call sites to use `K::type_name()`. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache/keys.rs | 4 ++-- rust/lance-core/src/cache/mod.rs | 24 +++++++++---------- .../src/encodings/logical/primitive.rs | 2 +- rust/lance-file/src/previous/reader.rs | 2 +- rust/lance-index/src/scalar/bitmap.rs | 2 +- rust/lance-index/src/scalar/btree.rs | 2 +- rust/lance-index/src/scalar/inverted/index.rs | 4 ++-- rust/lance-index/src/scalar/ngram.rs | 2 +- rust/lance-index/src/scalar/rtree.rs | 2 +- rust/lance/src/dataset/fragment.rs | 2 +- rust/lance/src/index.rs | 8 +++---- rust/lance/src/index/vector/ivf.rs | 2 +- rust/lance/src/index/vector/ivf/v2.rs | 2 +- rust/lance/src/session.rs | 2 +- rust/lance/src/session/caches.rs | 12 +++++----- rust/lance/src/session/index_caches.rs | 6 ++--- 16 files changed, 39 insertions(+), 39 deletions(-) diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs index d4afe55370f..5182c3e8e72 100644 --- a/rust/lance-core/src/cache/keys.rs +++ b/rust/lance-core/src/cache/keys.rs @@ -56,7 +56,7 @@ pub trait CacheKey { /// /// Must be consistent across crate boundaries — use a short literal, not /// `std::any::type_name` pointers. - fn type_name(&self) -> &'static str; + fn type_name() -> &'static str; } pub trait UnsizedCacheKey { @@ -67,5 +67,5 @@ pub trait UnsizedCacheKey { /// Short, stable string that distinguishes this value type from others in /// the cache. Must be unique per value type — collisions cause silent /// downcast failures. - fn type_name(&self) -> &'static str; + fn type_name() -> &'static str; } diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs index 3eba0415f04..01014997d7d 100644 --- a/rust/lance-core/src/cache/mod.rs +++ b/rust/lance-core/src/cache/mod.rs @@ -204,7 +204,7 @@ impl LanceCache { K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.insert_with_id(&cache_key.key(), cache_key.type_name(), metadata) + self.insert_with_id(&cache_key.key(), K::type_name(), metadata) .boxed() .await } @@ -214,7 +214,7 @@ impl LanceCache { K: CacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.get_with_id::(&cache_key.key(), cache_key.type_name()) + self.get_with_id::(&cache_key.key(), K::type_name()) .boxed() .await } @@ -230,7 +230,7 @@ impl LanceCache { F: FnOnce() -> Fut + Send, Fut: Future> + Send, { - let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), K::type_name()); let typed_loader = Box::pin(async move { let value = loader().await?; @@ -255,7 +255,7 @@ impl LanceCache { K: UnsizedCacheKey, K::ValueType: DeepSizeOf + Send + Sync + 'static, { - self.insert_with_id(&cache_key.key(), cache_key.type_name(), Arc::new(metadata)) + self.insert_with_id(&cache_key.key(), K::type_name(), Arc::new(metadata)) .boxed() .await } @@ -266,7 +266,7 @@ impl LanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { let outer = self - .get_with_id::>(&cache_key.key(), cache_key.type_name()) + .get_with_id::>(&cache_key.key(), K::type_name()) .boxed() .await?; Some(outer.as_ref().clone()) @@ -317,7 +317,7 @@ impl WeakLanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { let cache = self.inner.upgrade()?; - let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), K::type_name()); if let Some(entry) = cache.get(&key).await { self.hits.fetch_add(1, Ordering::Relaxed); Some(entry.downcast::().unwrap()) @@ -334,7 +334,7 @@ impl WeakLanceCache { { if let Some(cache) = self.inner.upgrade() { let size = cache_entry_size(&*value); - let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), K::type_name()); cache.insert(&key, value, size).await; true } else { @@ -358,7 +358,7 @@ impl WeakLanceCache { Fut: Future> + Send, { if let Some(cache) = self.inner.upgrade() { - let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), K::type_name()); let typed_loader = Box::pin(async move { let value = loader().await?; let arc = Arc::new(value); @@ -384,7 +384,7 @@ impl WeakLanceCache { K::ValueType: DeepSizeOf + Send + Sync + 'static, { let cache = self.inner.upgrade()?; - let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), K::type_name()); if let Some(entry) = cache.get(&key).await { entry .downcast::>() @@ -403,7 +403,7 @@ impl WeakLanceCache { if let Some(cache) = self.inner.upgrade() { let wrapper = Arc::new(value); let size = cache_entry_size(&*wrapper); - let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name()); + let key = build_key(&self.prefix, &cache_key.key(), K::type_name()); cache.insert(&key, wrapper, size).await; } else { log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item"); @@ -470,7 +470,7 @@ mod tests { fn key(&self) -> std::borrow::Cow<'_, str> { std::borrow::Cow::Borrowed(&self.key) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { std::any::type_name::() } } @@ -495,7 +495,7 @@ mod tests { fn key(&self) -> std::borrow::Cow<'_, str> { std::borrow::Cow::Borrowed(&self.key) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { std::any::type_name::() } } diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs index ba8a551f737..f4f8b6e8627 100644 --- a/rust/lance-encoding/src/encodings/logical/primitive.rs +++ b/rust/lance-encoding/src/encodings/logical/primitive.rs @@ -3417,7 +3417,7 @@ impl CacheKey for FieldDataCacheKey { self.column_index.to_string().into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "FieldData" } } diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs index fac113b4c10..0edaa7eb972 100644 --- a/rust/lance-file/src/previous/reader.rs +++ b/rust/lance-file/src/previous/reader.rs @@ -90,7 +90,7 @@ impl CacheKey for StringCacheKey<'_, T> { self.key.into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { std::any::type_name::() } } diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs index d10829b151c..05405344efd 100644 --- a/rust/lance-index/src/scalar/bitmap.rs +++ b/rust/lance-index/src/scalar/bitmap.rs @@ -129,7 +129,7 @@ impl CacheKey for BitmapKey { format!("{}", self.value.0).into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "Bitmap" } } diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs index 48db3d43b11..3fa05dfd131 100644 --- a/rust/lance-index/src/scalar/btree.rs +++ b/rust/lance-index/src/scalar/btree.rs @@ -991,7 +991,7 @@ impl CacheKey for BTreePageKey { format!("page-{}", self.page_number).into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "BTreePage" } } diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs index e5caf09cd78..88cecddd697 100644 --- a/rust/lance-index/src/scalar/inverted/index.rs +++ b/rust/lance-index/src/scalar/inverted/index.rs @@ -1889,7 +1889,7 @@ impl CacheKey for PostingListKey { format!("postings-{}", self.token_id).into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "PostingList" } } @@ -1906,7 +1906,7 @@ impl CacheKey for PositionKey { format!("positions-{}", self.token_id).into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "Position" } } diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs index 2a439ae6b34..4e614d99d99 100644 --- a/rust/lance-index/src/scalar/ngram.rs +++ b/rust/lance-index/src/scalar/ngram.rs @@ -171,7 +171,7 @@ impl CacheKey for NGramPostingListKey { format!("posting-list-{}", self.row_offset).into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "NGramPostingList" } } diff --git a/rust/lance-index/src/scalar/rtree.rs b/rust/lance-index/src/scalar/rtree.rs index 225e3be6e2a..920a59bb4b2 100644 --- a/rust/lance-index/src/scalar/rtree.rs +++ b/rust/lance-index/src/scalar/rtree.rs @@ -250,7 +250,7 @@ impl CacheKey for RTreeCacheKey { } } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "RTree" } } diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs index 81e1473c921..986fe8a8443 100644 --- a/rust/lance/src/dataset/fragment.rs +++ b/rust/lance/src/dataset/fragment.rs @@ -1880,7 +1880,7 @@ impl CacheKey for FileMetadataCacheKey { "".into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "FileMetadata" } } diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 97b25da6767..a1f41ead087 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -111,7 +111,7 @@ impl UnsizedCacheKey for ScalarIndexCacheKey<'_> { } } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "ScalarIndex" } } @@ -139,7 +139,7 @@ impl UnsizedCacheKey for VectorIndexCacheKey<'_> { } } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "VectorIndex" } } @@ -167,7 +167,7 @@ impl CacheKey for FragReuseIndexCacheKey<'_> { } } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "FragReuseIndex" } } @@ -195,7 +195,7 @@ impl CacheKey for MemWalCacheKey<'_> { } } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "MemWalIndex" } } diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs index c26da61d7ef..cc243eac887 100644 --- a/rust/lance/src/index/vector/ivf.rs +++ b/rust/lance/src/index/vector/ivf.rs @@ -124,7 +124,7 @@ impl UnsizedCacheKey for LegacyIVFPartitionKey { format!("ivf-{}", self.partition_id).into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "LegacyIVFPartition" } } diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 9561c187b18..5da12b687a7 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -97,7 +97,7 @@ impl CacheKey for IVFPartit format!("ivf-{}", self.partition_id).into() } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { // Using type_name is safe here: the impl is in the same crate as the // types, so the monomorphized pointer is consistent. std::any::type_name::>() diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs index 7242c0cca6a..b032cbaa15e 100644 --- a/rust/lance/src/session.rs +++ b/rust/lance/src/session.rs @@ -235,7 +235,7 @@ mod tests { Cow::Borrowed(self.0) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "TestUnsized" } } diff --git a/rust/lance/src/session/caches.rs b/rust/lance/src/session/caches.rs index 2654e356ac1..55f78a5068f 100644 --- a/rust/lance/src/session/caches.rs +++ b/rust/lance/src/session/caches.rs @@ -82,7 +82,7 @@ impl CacheKey for ManifestKey<'_> { Cow::Owned(format!("manifest/{}", self.version)) } } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "Manifest" } } @@ -97,7 +97,7 @@ impl CacheKey for TransactionKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("txn/{}", self.version)) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "Transaction" } } @@ -119,7 +119,7 @@ impl CacheKey for DeletionFileKey<'_> { self.deletion_file.file_type.suffix() )) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "DeletionVector" } } @@ -134,7 +134,7 @@ impl CacheKey for RowAddrMaskKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_addr_mask/{}", self.version)) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "RowAddrMask" } } @@ -149,7 +149,7 @@ impl CacheKey for RowIdIndexKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_id_index/{}", self.version)) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "RowIdIndex" } } @@ -164,7 +164,7 @@ impl CacheKey for RowIdSequenceKey { fn key(&self) -> Cow<'_, str> { Cow::Owned(format!("row_id_sequence/{}", self.fragment_id)) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "RowIdSequence" } } diff --git a/rust/lance/src/session/index_caches.rs b/rust/lance/src/session/index_caches.rs index 04aa9791c8d..43443b5dd34 100644 --- a/rust/lance/src/session/index_caches.rs +++ b/rust/lance/src/session/index_caches.rs @@ -89,7 +89,7 @@ impl CacheKey for FragReuseIndexKey<'_> { Cow::Owned(format!("frag_reuse/{}", self.uuid)) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "FragReuseIndex" } } @@ -106,7 +106,7 @@ impl CacheKey for IndexMetadataKey { Cow::Owned(self.version.to_string()) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "Vec" } } @@ -137,7 +137,7 @@ impl CacheKey for ScalarIndexDetailsKey<'_> { Cow::Owned(format!("type/{}", self.uuid)) } - fn type_name(&self) -> &'static str { + fn type_name() -> &'static str { "ScalarIndexDetails" } } From 8b6967163a069e80e0fc9ad02d752e7d2a7087fb Mon Sep 17 00:00:00 2001 From: Will Jones Date: Sun, 29 Mar 2026 15:09:40 -0700 Subject: [PATCH 24/24] refactor: clarify cache module API surfaces, address review feedback Restructure the cache module so the two audiences are clear: - mod.rs: user-facing API (LanceCache, CacheKey, UnsizedCacheKey) - backend.rs: implementor-facing API (CacheBackend, InternalCacheKey) Delete keys.rs, moving CacheKey/UnsizedCacheKey into mod.rs and InternalCacheKey into backend.rs. Also: rename has_prefix -> starts_with, trim verbose get_or_insert doc, add shared-buffer note to approx_size_bytes, improve type_name docs, explain std::any::type_name usage in previous/reader.rs. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/lance-core/src/cache/backend.rs | 74 ++++++++++++++++++----- rust/lance-core/src/cache/keys.rs | 71 ---------------------- rust/lance-core/src/cache/mod.rs | 84 ++++++++++++++++++++++---- rust/lance-core/src/cache/moka.rs | 5 +- rust/lance-file/src/previous/reader.rs | 3 + 5 files changed, 135 insertions(+), 102 deletions(-) delete mode 100644 rust/lance-core/src/cache/keys.rs diff --git a/rust/lance-core/src/cache/backend.rs b/rust/lance-core/src/cache/backend.rs index e929bff3529..54b24944ab0 100644 --- a/rust/lance-core/src/cache/backend.rs +++ b/rust/lance-core/src/cache/backend.rs @@ -1,6 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors +//! Backend interface for cache implementors. +//! +//! This module defines the trait that custom cache backends must implement, +//! along with the key and entry types they operate on. Most callers should +//! use [`LanceCache`](super::LanceCache) instead of interacting with +//! backends directly. + use std::any::Any; use std::pin::Pin; use std::sync::Arc; @@ -10,17 +17,60 @@ use futures::Future; use crate::Result; -use super::keys::InternalCacheKey; - /// A type-erased cache entry. pub type CacheEntry = Arc; +/// Structured cache key passed to [`CacheBackend`] methods. +/// +/// Composed of three parts: +/// - **prefix**: scopes the key to a dataset or index (e.g. `"s3://bucket/dataset/"`) +/// - **key**: identifies the specific entry (e.g. `"42"` for a version number) +/// - **type_name**: distinguishes different value types stored under the same +/// user key (e.g. `"Vec"`) +/// +/// [`LanceCache`](super::LanceCache) constructs these automatically from +/// [`CacheKey`](super::CacheKey) values; backend authors receive them +/// ready-made. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct InternalCacheKey { + prefix: Arc, + key: Arc, + type_name: &'static str, +} + +impl InternalCacheKey { + pub fn new(prefix: Arc, key: Arc, type_name: &'static str) -> Self { + Self { + prefix, + key, + type_name, + } + } + + pub fn prefix(&self) -> &str { + &self.prefix + } + + pub fn key(&self) -> &str { + &self.key + } + + pub fn type_name(&self) -> &'static str { + self.type_name + } + + /// Returns true if this key's prefix starts with the given string. + pub fn starts_with(&self, prefix: &str) -> bool { + self.prefix.starts_with(prefix) + } +} + /// Low-level pluggable cache backend. /// -/// Implementations store entries keyed by [`InternalCacheKey`], which provides -/// structured access to the prefix, user key, and type name components. -/// The [`LanceCache`](super::LanceCache) wrapper handles key construction and type safety; -/// backend authors do not need to worry about key encoding. +/// Implementations store entries keyed by [`InternalCacheKey`] and return +/// type-erased [`CacheEntry`] values. +/// [`LanceCache`](super::LanceCache) handles key construction and type safety; +/// backend authors only need to implement storage and eviction. #[async_trait] pub trait CacheBackend: Send + Sync + std::fmt::Debug { /// Look up an entry by its key. @@ -36,15 +86,6 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { /// /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry /// was already present in the cache (the loader was not invoked). - /// - /// The loader is a pinned, boxed future rather than a generic closure - /// because `async_trait` erases the `Self` lifetime, making it impossible - /// to express a generic closure whose returned future borrows from the - /// caller. Boxing the future once at the call site (in `LanceCache`) - /// avoids this lifetime conflict while keeping the trait object-safe. - /// - /// The future borrows from the caller's scope and will be `.await`ed within - /// this method — implementations must not store it beyond the call. async fn get_or_insert<'a>( &self, key: &InternalCacheKey, @@ -72,6 +113,9 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug { /// Approximate weighted size in bytes, callable from synchronous contexts. /// Used by `DeepSizeOf` to report cache memory usage. /// Backends that cannot provide this cheaply should return 0. + /// + /// Assumes entries do not share underlying buffers; if they do, the + /// returned total may overcount. fn approx_size_bytes(&self) -> usize { 0 } diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs deleted file mode 100644 index 5182c3e8e72..00000000000 --- a/rust/lance-core/src/cache/keys.rs +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The Lance Authors - -use std::{borrow::Cow, sync::Arc}; - -/// Structured cache key used by [`CacheBackend`](super::CacheBackend). -/// -/// Composed of a prefix (scoping the key to a dataset/index), a user key -/// (identifying the specific entry), and a type name (distinguishing value -/// types that share the same user key). -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct InternalCacheKey { - prefix: Arc, - key: Arc, - type_name: &'static str, -} - -impl InternalCacheKey { - pub fn new(prefix: Arc, key: Arc, type_name: &'static str) -> Self { - Self { - prefix, - key, - type_name, - } - } - - pub fn prefix(&self) -> &str { - &self.prefix - } - - pub fn key(&self) -> &str { - &self.key - } - - pub fn type_name(&self) -> &'static str { - self.type_name - } - - /// Returns true if this key's prefix starts with the given string. - pub fn has_prefix(&self, prefix: &str) -> bool { - self.prefix.starts_with(prefix) - } -} - -pub trait CacheKey { - type ValueType: 'static; - - fn key(&self) -> Cow<'_, str>; - - /// Short, stable string that distinguishes this value type from others in - /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_name`). - /// - /// **Must be unique per value type.** If two `CacheKey` impls return the - /// same `type_name` but different `ValueType`s, entries will collide and - /// downcasts will fail silently (returning `None` on get). - /// - /// Must be consistent across crate boundaries — use a short literal, not - /// `std::any::type_name` pointers. - fn type_name() -> &'static str; -} - -pub trait UnsizedCacheKey { - type ValueType: 'static + ?Sized; - - fn key(&self) -> Cow<'_, str>; - - /// Short, stable string that distinguishes this value type from others in - /// the cache. Must be unique per value type — collisions cause silent - /// downcast failures. - fn type_name() -> &'static str; -} diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs index 01014997d7d..43bb233df72 100644 --- a/rust/lance-core/src/cache/mod.rs +++ b/rust/lance-core/src/cache/mod.rs @@ -1,26 +1,28 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -//! Cache implementation +//! Lance cache system. //! -//! This module provides a two-layer caching system: +//! ## For cache users //! -//! - [`CacheBackend`] is the low-level, pluggable trait that custom cache implementations -//! can implement. It uses [`InternalCacheKey`] keys and type-erased entries. -//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag), -//! type-safe get/insert, and DeepSizeOf-based size computation. +//! Use [`LanceCache`] (or [`WeakLanceCache`]) to store and retrieve typed +//! values. Define a [`CacheKey`] (or [`UnsizedCacheKey`] for trait objects) to +//! describe what you're caching and its type. //! -//! [`CacheKey`] / [`UnsizedCacheKey`] define the typed key interface, and -//! [`InternalCacheKey`] is the structured key passed to backends. +//! ## For backend implementors +//! +//! Implement [`CacheBackend`] to provide a custom storage layer (disk, Redis, +//! etc.). Backends receive [`InternalCacheKey`] keys and type-erased +//! [`CacheEntry`] values — the typed wrapping is handled by [`LanceCache`]. +//! See the [`backend`] module for details. -mod backend; -mod keys; +pub mod backend; mod moka; -pub use backend::{CacheBackend, CacheEntry}; -pub use keys::{CacheKey, InternalCacheKey, UnsizedCacheKey}; +pub use backend::{CacheBackend, CacheEntry, InternalCacheKey}; pub use moka::MokaCacheBackend; +use std::borrow::Cow; use std::sync::{ Arc, atomic::{AtomicU64, Ordering}, @@ -32,6 +34,62 @@ use crate::Result; pub use deepsize::{Context, DeepSizeOf}; +// --------------------------------------------------------------------------- +// CacheKey / UnsizedCacheKey — typed key traits for cache users +// --------------------------------------------------------------------------- + +/// Typed cache key for sized value types. +/// +/// Implement this trait to define a new type of cached entry. [`LanceCache`] +/// uses the key string and type name to construct an [`InternalCacheKey`] +/// for the backend. +/// +/// # Example +/// +/// ```ignore +/// struct MyKey { id: u64 } +/// +/// impl CacheKey for MyKey { +/// type ValueType = MyData; +/// fn key(&self) -> Cow<'_, str> { self.id.to_string().into() } +/// fn type_name() -> &'static str { "MyData" } +/// } +/// ``` +pub trait CacheKey { + type ValueType: 'static; + + fn key(&self) -> Cow<'_, str>; + + /// Short, stable string identifying this value type. + /// + /// Two `CacheKey` impls that store different `ValueType`s **must** return + /// different type names; if they collide, gets will silently return `None` + /// due to failed downcasts. + /// + /// Use a short literal (e.g. `"Vec"`), not + /// `std::any::type_name` — the latter is not guaranteed stable across + /// compiler versions or build configurations. + fn type_name() -> &'static str; +} + +/// Like [`CacheKey`] but for unsized value types (e.g. `dyn Trait`). +/// +/// The cache wraps values in an extra `Arc` layer internally; callers pass +/// and receive `Arc` where `T: ?Sized`. +pub trait UnsizedCacheKey { + type ValueType: 'static + ?Sized; + + fn key(&self) -> Cow<'_, str>; + + /// Short, stable string identifying this value type. + /// See [`CacheKey::type_name`] for requirements. + fn type_name() -> &'static str; +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + /// Size of a cached `Arc`, accounting for the Arc overhead (two atomic counters). fn cache_entry_size(value: &T) -> usize { value.deep_size_of() + std::mem::size_of::() * 2 @@ -682,7 +740,7 @@ mod tests { } } async fn invalidate_prefix(&self, prefix: &str) { - self.map.lock().await.retain(|k, _| !k.has_prefix(prefix)); + self.map.lock().await.retain(|k, _| !k.starts_with(prefix)); } async fn clear(&self) { self.map.lock().await.clear(); diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs index 13a6c7e976c..05cb1e5909f 100644 --- a/rust/lance-core/src/cache/moka.rs +++ b/rust/lance-core/src/cache/moka.rs @@ -10,8 +10,7 @@ use futures::Future; use crate::Result; -use super::backend::{CacheBackend, CacheEntry}; -use super::keys::InternalCacheKey; +use super::backend::{CacheBackend, CacheEntry, InternalCacheKey}; /// Internal record stored in the moka cache. #[derive(Clone, Debug)] @@ -107,7 +106,7 @@ impl CacheBackend for MokaCacheBackend { async fn invalidate_prefix(&self, prefix: &str) { let prefix = prefix.to_owned(); self.cache - .invalidate_entries_if(move |key, _value| key.has_prefix(&prefix)) + .invalidate_entries_if(move |key, _value| key.starts_with(&prefix)) .expect("Cache configured correctly"); } diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs index 0edaa7eb972..9e1fc175d04 100644 --- a/rust/lance-file/src/previous/reader.rs +++ b/rust/lance-file/src/previous/reader.rs @@ -91,6 +91,9 @@ impl CacheKey for StringCacheKey<'_, T> { } fn type_name() -> &'static str { + // This is a private, crate-internal key that is only instantiated with + // a single concrete T within one build, so std::any::type_name is fine + // here — there is no cross-crate collision risk. std::any::type_name::() } }