From 1a9eddd8a3ce112e03b2cd871ad7bec65c7e79fb Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Wed, 18 Mar 2026 10:01:58 -0700
Subject: [PATCH 01/24] feat: make index cache pluggable via CacheBackend trait

The Session's index cache was hardcoded to use Moka. This adds a
CacheBackend trait so users can provide their own cache implementation
(e.g. Redis-backed, disk-backed, shared across processes).

Two-layer design:
- CacheBackend: object-safe async trait with opaque byte keys. This is
  what plugin authors implement (get, insert, invalidate_prefix, clear,
  num_entries, size_bytes).
- LanceCache: typed wrapper handling key construction (prefix + type
  tag), type-safe get/insert, DeepSizeOf size computation, hit/miss
  stats, and concurrent load deduplication.

MokaCacheBackend is the default, preserving existing behavior. Custom
backends are wired through Session::with_index_cache_backend() or
DatasetBuilder::with_index_cache_backend().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs      | 623 +++++++++++++++++++++---------
 rust/lance/src/dataset/builder.rs |  32 +-
 rust/lance/src/lib.rs             |   2 +-
 rust/lance/src/session.rs         |  19 +-
 4 files changed, 487 insertions(+), 189 deletions(-)
diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 6ceea807116..1ea5989647b 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -2,61 +2,182 @@
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
 //! Cache implementation
-
-use std::any::{Any, TypeId};
+//!
+//! This module provides a two-layer caching system:
+//!
+//! - [`CacheBackend`] is the low-level, pluggable trait that custom cache implementations
+//!   can implement. It uses opaque byte keys and type-erased entries.
+//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag
+//!   encoding), type-safe get/insert, and DeepSizeOf-based size computation.
+
+use std::any::Any;
 use std::borrow::Cow;
+use std::collections::HashMap;
 use std::sync::{
     Arc,
     atomic::{AtomicU64, Ordering},
 };
 
+use async_trait::async_trait;
 use futures::{Future, FutureExt};
-use moka::future::Cache;
+use tokio::sync::Mutex;
 
 use crate::Result;
 
 pub use deepsize::{Context, DeepSizeOf};
 
-type ArcAny = Arc<dyn Any + Send + Sync>;
+/// Result type used in the in-flight dedup map. Wraps errors in Arc so the
+/// result can be cloned to multiple waiters.
+type InFlightResult = std::result::Result<CacheEntry, Arc<crate::Error>>;
+type InFlightMap = Mutex<HashMap<Vec<u8>, tokio::sync::watch::Receiver<Option<InFlightResult>>>>;
 
-#[derive(Clone)]
-pub struct SizedRecord {
-    record: ArcAny,
-    size_accessor: Arc<dyn Fn(&ArcAny) -> usize + Send + Sync>,
+/// A type-erased cache entry.
+pub type CacheEntry = Arc<dyn Any + Send + Sync>;
+
+// ---------------------------------------------------------------------------
+// CacheBackend trait
+// ---------------------------------------------------------------------------
+
+/// Low-level pluggable cache backend.
+///
+/// Implementations store entries keyed by opaque byte slices.
+/// The [`LanceCache`] wrapper handles key construction and type safety;
+/// backend authors do not need to worry about key encoding.
+#[async_trait]
+pub trait CacheBackend: Send + Sync + std::fmt::Debug {
+    /// Look up an entry by its opaque key.
+    async fn get(&self, key: &[u8]) -> Option<CacheEntry>;
+
+    /// Store an entry. `size_bytes` is used for eviction accounting.
+    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize);
+
+    /// Remove all entries whose key starts with `prefix`.
+    async fn invalidate_prefix(&self, prefix: &[u8]);
+
+    /// Remove all entries.
+    async fn clear(&self);
+
+    /// Number of entries currently stored (may flush pending operations).
+    async fn num_entries(&self) -> usize;
+
+    /// Total weighted size in bytes of all stored entries (may flush pending operations).
+    async fn size_bytes(&self) -> usize;
+
+    /// Approximate number of entries, callable from synchronous contexts.
+    /// Backends that cannot provide this cheaply should return 0.
+    fn approx_num_entries(&self) -> usize {
+        0
+    }
+
+    /// Approximate weighted size in bytes, callable from synchronous contexts.
+    /// Backends that cannot provide this cheaply should return 0.
+    fn approx_size_bytes(&self) -> usize {
+        0
+    }
+}
+
+// ---------------------------------------------------------------------------
+// MokaCacheBackend — default moka-based implementation
+// ---------------------------------------------------------------------------
+
+/// Internal record stored in the moka cache.
+#[derive(Clone, Debug)]
+struct MokaCacheEntry {
+    entry: CacheEntry,
+    size_bytes: usize,
+}
+
+/// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache.
+///
+/// Provides weighted-capacity eviction and concurrent-load deduplication.
+pub struct MokaCacheBackend {
+    cache: moka::future::Cache<Vec<u8>, MokaCacheEntry>,
 }
 
-impl std::fmt::Debug for SizedRecord {
+impl std::fmt::Debug for MokaCacheBackend {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("SizedRecord")
-            .field("record", &self.record)
+        f.debug_struct("MokaCacheBackend")
+            .field("entry_count", &self.cache.entry_count())
             .finish()
     }
 }
 
-impl DeepSizeOf for SizedRecord {
-    fn deep_size_of_children(&self, _: &mut Context) -> usize {
-        (self.size_accessor)(&self.record)
+impl MokaCacheBackend {
+    pub fn with_capacity(capacity: usize) -> Self {
+        let cache = moka::future::Cache::builder()
+            .max_capacity(capacity as u64)
+            .weigher(|_, v: &MokaCacheEntry| v.size_bytes.try_into().unwrap_or(u32::MAX))
+            .support_invalidation_closures()
+            .build();
+        Self { cache }
     }
-}
 
-impl SizedRecord {
-    fn new<T: DeepSizeOf + Send + Sync + 'static>(record: Arc<T>) -> Self {
-        // +8 for the size of the Arc pointer itself
-        let size_accessor =
-            |record: &ArcAny| -> usize { record.downcast_ref::<T>().unwrap().deep_size_of() + 8 };
+    pub fn no_cache() -> Self {
         Self {
-            record,
-            size_accessor: Arc::new(size_accessor),
+            cache: moka::future::Cache::new(0),
         }
     }
 }
 
+#[async_trait]
+impl CacheBackend for MokaCacheBackend {
+    async fn get(&self, key: &[u8]) -> Option<CacheEntry> {
+        self.cache.get(key).await.map(|r| r.entry)
+    }
+
+    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) {
+        self.cache
+            .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes })
+            .await;
+    }
+
+    async fn invalidate_prefix(&self, prefix: &[u8]) {
+        let prefix = prefix.to_vec();
+        self.cache
+            .invalidate_entries_if(move |key, _value| key.starts_with(&prefix))
+            .expect("Cache configured correctly");
+    }
+
+    async fn clear(&self) {
+        self.cache.invalidate_all();
+        self.cache.run_pending_tasks().await;
+    }
+
+    async fn num_entries(&self) -> usize {
+        self.cache.run_pending_tasks().await;
+        self.cache.entry_count() as usize
+    }
+
+    async fn size_bytes(&self) -> usize {
+        self.cache.run_pending_tasks().await;
+        self.cache.weighted_size() as usize
+    }
+
+    fn approx_num_entries(&self) -> usize {
+        self.cache.entry_count() as usize
+    }
+
+    fn approx_size_bytes(&self) -> usize {
+        self.cache.weighted_size() as usize
+    }
+}
+
+// ---------------------------------------------------------------------------
+// LanceCache — typed wrapper around dyn CacheBackend
+// ---------------------------------------------------------------------------
+
+/// Typed cache wrapper that handles key construction and type safety.
+///
+/// Internally delegates to a [`CacheBackend`]. The default backend is
+/// [`MokaCacheBackend`]; pass a custom backend via [`LanceCache::with_backend`].
 #[derive(Clone)]
 pub struct LanceCache {
-    cache: Arc<Cache<(String, TypeId), SizedRecord>>,
+    cache: Arc<dyn CacheBackend>,
     prefix: String,
     hits: Arc<AtomicU64>,
     misses: Arc<AtomicU64>,
+    /// Deduplicates concurrent `get_or_insert` calls for the same key.
+    in_flight: Arc<InFlightMap>,
 }
 
 impl std::fmt::Debug for LanceCache {
@@ -69,36 +190,70 @@ impl std::fmt::Debug for LanceCache {
 
 impl DeepSizeOf for LanceCache {
     fn deep_size_of_children(&self, _: &mut Context) -> usize {
-        self.cache
-            .iter()
-            .map(|(_, v)| (v.size_accessor)(&v.record))
-            .sum()
+        // This is a best-effort estimate; we can't iterate a dyn CacheBackend.
+        // Callers should use stats().size_bytes for accurate numbers.
+        0
+    }
+}
+
+/// Returns a stable 8-byte discriminator for type `T`.
+///
+/// Uses the pointer of `std::any::type_name::<T>()`, which is a `&'static str`
+/// with a process-lifetime-stable address. This is unique per monomorphized type
+/// and avoids `transmute` on `TypeId`.
+fn type_tag<T: 'static>() -> [u8; 8] {
+    (std::any::type_name::<T>().as_ptr() as u64).to_le_bytes()
+}
+
+impl LanceCache {
+    /// Build a key: `prefix/user_key\0<8-byte type tag>`.
+    fn make_key<T: 'static>(&self, key: &str) -> Vec<u8> {
+        let full_key = if self.prefix.is_empty() {
+            key.to_string()
+        } else {
+            format!("{}/{}", self.prefix, key)
+        };
+        let mut bytes = full_key.into_bytes();
+        bytes.push(0);
+        bytes.extend_from_slice(&type_tag::<T>());
+        bytes
+    }
+
+    /// Build a prefix (without type tag) for invalidation.
+    fn make_prefix(&self, prefix: &str) -> Vec<u8> {
+        format!("{}{}", self.prefix, prefix).into_bytes()
     }
 }
 
 impl LanceCache {
     pub fn with_capacity(capacity: usize) -> Self {
-        let cache = Cache::builder()
-            .max_capacity(capacity as u64)
-            .weigher(|_, v: &SizedRecord| {
-                (v.size_accessor)(&v.record).try_into().unwrap_or(u32::MAX)
-            })
-            .support_invalidation_closures()
-            .build();
         Self {
-            cache: Arc::new(cache),
+            cache: Arc::new(MokaCacheBackend::with_capacity(capacity)),
+            prefix: String::new(),
+            hits: Arc::new(AtomicU64::new(0)),
+            misses: Arc::new(AtomicU64::new(0)),
+            in_flight: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+
+    /// Create a cache backed by a custom [`CacheBackend`].
+    pub fn with_backend(backend: Arc<dyn CacheBackend>) -> Self {
+        Self {
+            cache: backend,
             prefix: String::new(),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
+            in_flight: Arc::new(Mutex::new(HashMap::new())),
         }
     }
 
     pub fn no_cache() -> Self {
         Self {
-            cache: Arc::new(Cache::new(0)),
+            cache: Arc::new(MokaCacheBackend::no_cache()),
             prefix: String::new(),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
+            in_flight: Arc::new(Mutex::new(HashMap::new())),
         }
     }
 
@@ -115,14 +270,7 @@ impl LanceCache {
             prefix: format!("{}{}/", self.prefix, prefix),
             hits: self.hits.clone(),
             misses: self.misses.clone(),
-        }
-    }
-
-    fn get_key(&self, key: &str) -> String {
-        if self.prefix.is_empty() {
-            key.to_string()
-        } else {
-            format!("{}/{}", self.prefix, key)
+            in_flight: self.in_flight.clone(),
         }
     }
 
@@ -131,40 +279,41 @@ impl LanceCache {
     /// The given prefix is appended to the existing prefix of the cache. If you
     /// want to invalidate all at the current prefix, pass an empty string.
     pub fn invalidate_prefix(&self, prefix: &str) {
-        let full_prefix = format!("{}{}", self.prefix, prefix);
-        self.cache
-            .invalidate_entries_if(move |(key, _typeid), _value| key.starts_with(&full_prefix))
-            .expect("Cache configured correctly");
+        let prefix_bytes = self.make_prefix(prefix);
+        let cache = self.cache.clone();
+        // Fire-and-forget; moka's invalidate_entries_if is synchronous under the hood
+        // but our trait is async, so we spawn.
+        tokio::spawn(async move {
+            cache.invalidate_prefix(&prefix_bytes).await;
+        });
     }
 
     pub async fn size(&self) -> usize {
-        self.cache.run_pending_tasks().await;
-        self.cache.entry_count() as usize
+        self.cache.num_entries().await
     }
 
     pub fn approx_size(&self) -> usize {
-        self.cache.entry_count() as usize
+        self.cache.approx_num_entries()
     }
 
     pub async fn size_bytes(&self) -> usize {
-        self.cache.run_pending_tasks().await;
-        self.approx_size_bytes()
+        self.cache.size_bytes().await
     }
 
     pub fn approx_size_bytes(&self) -> usize {
-        self.cache.weighted_size() as usize
+        self.cache.approx_size_bytes()
     }
 
     async fn insert<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str, metadata: Arc<T>) {
-        let key = self.get_key(key);
-        let record = SizedRecord::new(metadata);
+        let size = metadata.deep_size_of() + 8; // +8 for the Arc pointer
+        let cache_key = self.make_key::<T>(key);
         tracing::trace!(
             target: "lance_cache::insert",
             key = key,
             type_id = std::any::type_name::<T>(),
-            size = (record.size_accessor)(&record.record),
+            size = size,
         );
-        self.cache.insert((key, TypeId::of::<T>()), record).await;
+        self.cache.insert(&cache_key, metadata, size).await;
     }
 
     pub async fn insert_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
@@ -172,15 +321,15 @@ impl LanceCache {
         key: &str,
         metadata: Arc<T>,
     ) {
-        // In order to make the data Sized, we wrap in another pointer.
+        // Wrap in another Arc to make the data Sized.
         self.insert(key, Arc::new(metadata)).await
     }
 
     async fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
-        let key = self.get_key(key);
-        if let Some(metadata) = self.cache.get(&(key, TypeId::of::<T>())).await {
+        let cache_key = self.make_key::<T>(key);
+        if let Some(entry) = self.cache.get(&cache_key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
-            Some(metadata.record.clone().downcast::<T>().unwrap())
+            Some(entry.downcast::<T>().unwrap())
         } else {
             self.misses.fetch_add(1, Ordering::Relaxed);
             None
@@ -195,11 +344,10 @@ impl LanceCache {
         Some(outer.as_ref().clone())
     }
 
-    /// Get an item
-    ///
-    /// If it exists in the cache return that
+    /// Get an item, or load it if not cached.
     ///
-    /// If it doesn't then run `loader` to load the item, insert into cache, and return
+    /// Concurrent calls for the same key are deduplicated: only the first
+    /// caller runs the loader; subsequent callers wait for the result.
     async fn get_or_insert<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
         &self,
         key: String,
@@ -209,68 +357,89 @@ impl LanceCache {
         F: FnOnce(&str) -> Fut,
         Fut: Future<Output = Result<T>> + Send,
     {
-        let full_key = self.get_key(&key);
-        let cache_key = (full_key, TypeId::of::<T>());
-
-        // Use optionally_get_with to handle concurrent requests
-        let hits = self.hits.clone();
-        let misses = self.misses.clone();
-
-        // Use oneshot channels to track both errors and whether init was run
-        let (error_tx, error_rx) = tokio::sync::oneshot::channel();
-        let (init_run_tx, mut init_run_rx) = tokio::sync::oneshot::channel();
-
-        let init = Box::pin(async move {
-            let _ = init_run_tx.send(());
-            misses.fetch_add(1, Ordering::Relaxed);
-            match loader(&key).await {
-                Ok(value) => Some(SizedRecord::new(Arc::new(value))),
-                Err(e) => {
-                    let _ = error_tx.send(e);
-                    None
-                }
-            }
-        });
+        let cache_key = self.make_key::<T>(&key);
+
+        // Fast path: already cached.
+        if let Some(entry) = self.cache.get(&cache_key).await {
+            self.hits.fetch_add(1, Ordering::Relaxed);
+            return Ok(entry.downcast::<T>().unwrap());
+        }
 
-        match self.cache.optionally_get_with(cache_key, init).await {
-            Some(metadata) => {
-                // Check if init was run or if this was a cache hit
-                match init_run_rx.try_recv() {
-                    Ok(()) => {
-                        // Init was run, miss was already recorded
+        // Check for an in-flight load for this key.
+        {
+            let map = self.in_flight.lock().await;
+            if let Some(rx) = map.get(&cache_key) {
+                let mut rx = rx.clone();
+                drop(map);
+                // Wait until the leader finishes.
+                let result = rx
+                    .wait_for(|v| v.is_some())
+                    .await
+                    .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))?
+                    .as_ref()
+                    .unwrap()
+                    .clone();
+                match result {
+                    Ok(entry) => {
+                        self.hits.fetch_add(1, Ordering::Relaxed);
+                        return Ok(entry.downcast::<T>().unwrap());
                     }
-                    Err(_) => {
-                        // Init was not run, this is a cache hit
-                        hits.fetch_add(1, Ordering::Relaxed);
+                    Err(err) => {
+                        self.misses.fetch_add(1, Ordering::Relaxed);
+                        return Err(crate::Error::internal(format!(
+                            "Cache loader failed: {err}"
+                        )));
                     }
                 }
-                Ok(metadata.record.clone().downcast::<T>().unwrap())
             }
-            None => {
-                // The loader returned an error, retrieve it from the channel
-                match error_rx.await {
-                    Ok(err) => Err(err),
-                    Err(_) => Err(crate::Error::internal(
-                        "Failed to retrieve error from cache loader",
-                    )),
-                }
+        }
+
+        // We are the leader. Register our in-flight entry.
+        let (tx, rx) = tokio::sync::watch::channel(None);
+        {
+            let mut map = self.in_flight.lock().await;
+            map.insert(cache_key.clone(), rx);
+        }
+
+        self.misses.fetch_add(1, Ordering::Relaxed);
+        let result = loader(&key).await;
+
+        // Clean up the in-flight entry before sending, so new arrivals
+        // go through the normal cache path.
+        {
+            let mut map = self.in_flight.lock().await;
+            map.remove(&cache_key);
+        }
+
+        match result {
+            Ok(value) => {
+                let arc = Arc::new(value);
+                let size = arc.deep_size_of() + 8;
+                self.cache.insert(&cache_key, arc.clone(), size).await;
+                let _ = tx.send(Some(Ok(arc.clone() as CacheEntry)));
+                Ok(arc)
+            }
+            Err(err) => {
+                let shared_err = Arc::new(err);
+                let _ = tx.send(Some(Err(shared_err.clone())));
+                Err(crate::Error::internal(format!(
+                    "Cache loader failed: {shared_err}"
+                )))
             }
         }
     }
 
     pub async fn stats(&self) -> CacheStats {
-        self.cache.run_pending_tasks().await;
         CacheStats {
             hits: self.hits.load(Ordering::Relaxed),
             misses: self.misses.load(Ordering::Relaxed),
-            num_entries: self.cache.entry_count() as usize,
-            size_bytes: self.cache.weighted_size() as usize,
+            num_entries: self.cache.num_entries().await,
+            size_bytes: self.cache.size_bytes().await,
         }
     }
 
     pub async fn clear(&self) {
-        self.cache.invalidate_all();
-        self.cache.run_pending_tasks().await;
+        self.cache.clear().await;
         self.hits.store(0, Ordering::Relaxed);
         self.misses.store(0, Ordering::Relaxed);
     }
@@ -328,11 +497,15 @@ impl LanceCache {
     }
 }
 
+// ---------------------------------------------------------------------------
+// WeakLanceCache
+// ---------------------------------------------------------------------------
+
 /// A weak reference to a LanceCache, used by indices to avoid circular references.
 /// When the original cache is dropped, operations on this will gracefully no-op.
 #[derive(Clone, Debug)]
 pub struct WeakLanceCache {
-    inner: std::sync::Weak<Cache<(String, TypeId), SizedRecord>>,
+    inner: std::sync::Weak<dyn CacheBackend>,
     prefix: String,
     hits: Arc<AtomicU64>,
     misses: Arc<AtomicU64>,
@@ -359,21 +532,26 @@ impl WeakLanceCache {
         }
     }
 
-    fn get_key(&self, key: &str) -> String {
-        if self.prefix.is_empty() {
+    /// Build a key: `prefix/user_key\0<8-byte type tag>`.
+    fn make_key<T: 'static>(&self, key: &str) -> Vec<u8> {
+        let full_key = if self.prefix.is_empty() {
             key.to_string()
         } else {
             format!("{}/{}", self.prefix, key)
-        }
+        };
+        let mut bytes = full_key.into_bytes();
+        bytes.push(0);
+        bytes.extend_from_slice(&type_tag::<T>());
+        bytes
     }
 
     /// Get an item from cache if the cache is still alive
     pub async fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
         let cache = self.inner.upgrade()?;
-        let key = self.get_key(key);
-        if let Some(metadata) = cache.get(&(key, TypeId::of::<T>())).await {
+        let cache_key = self.make_key::<T>(key);
+        if let Some(entry) = cache.get(&cache_key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
-            Some(metadata.record.clone().downcast::<T>().unwrap())
+            Some(entry.downcast::<T>().unwrap())
         } else {
             self.misses.fetch_add(1, Ordering::Relaxed);
             None
@@ -388,9 +566,9 @@ impl WeakLanceCache {
         value: Arc<T>,
     ) -> bool {
         if let Some(cache) = self.inner.upgrade() {
-            let key = self.get_key(key);
-            let record = SizedRecord::new(value);
-            cache.insert((key, TypeId::of::<T>()), record).await;
+            let size = value.deep_size_of() + 8;
+            let cache_key = self.make_key::<T>(key);
+            cache.insert(&cache_key, value, size).await;
             true
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert item");
@@ -406,53 +584,19 @@ impl WeakLanceCache {
         Fut: Future<Output = Result<T>> + Send,
     {
         if let Some(cache) = self.inner.upgrade() {
-            let full_key = self.get_key(key);
-            let cache_key = (full_key.clone(), TypeId::of::<T>());
-
-            // Use optionally_get_with to handle concurrent requests properly
-            let hits = self.hits.clone();
-            let misses = self.misses.clone();
-
-            // Track whether init was run (for metrics)
-            let (init_run_tx, mut init_run_rx) = tokio::sync::oneshot::channel();
-            let (error_tx, error_rx) = tokio::sync::oneshot::channel();
-
-            let init = Box::pin(async move {
-                let _ = init_run_tx.send(());
-                misses.fetch_add(1, Ordering::Relaxed);
-                match f().await {
-                    Ok(value) => Some(SizedRecord::new(Arc::new(value))),
-                    Err(e) => {
-                        let _ = error_tx.send(e);
-                        None
-                    }
-                }
-            });
-
-            match cache.optionally_get_with(cache_key, init).await {
-                Some(record) => {
-                    // Check if init was run or if this was a cache hit
-                    match init_run_rx.try_recv() {
-                        Ok(()) => {
-                            // Init was run, miss was already recorded
-                        }
-                        Err(_) => {
-                            // Init was not run, this was a cache hit
-                            hits.fetch_add(1, Ordering::Relaxed);
-                        }
-                    }
-                    Ok(record.record.clone().downcast::<T>().unwrap())
-                }
-                None => {
-                    // Init returned None, which means there was an error
-                    match error_rx.await {
-                        Ok(e) => Err(e),
-                        Err(_) => Err(crate::Error::internal(
-                            "Failed to receive error from cache init function".to_string(),
-                        )),
-                    }
-                }
+            let cache_key = self.make_key::<T>(key);
+
+            if let Some(entry) = cache.get(&cache_key).await {
+                self.hits.fetch_add(1, Ordering::Relaxed);
+                return Ok(entry.downcast::<T>().unwrap());
             }
+
+            self.misses.fetch_add(1, Ordering::Relaxed);
+            let value = f().await?;
+            let arc = Arc::new(value);
+            let size = arc.deep_size_of() + 8;
+            cache.insert(&cache_key, arc.clone(), size).await;
+            Ok(arc)
         } else {
             log::warn!("WeakLanceCache: cache no longer available, computing without caching");
             f().await.map(Arc::new)
@@ -501,13 +645,10 @@ impl WeakLanceCache {
         &self,
         key: &str,
     ) -> Option<Arc<T>> {
-        // For unsized types, we store Arc<T> directly
         let cache = self.inner.upgrade()?;
-        let key = self.get_key(key);
-        if let Some(metadata) = cache.get(&(key, TypeId::of::<Arc<T>>())).await {
-            metadata
-                .record
-                .clone()
+        let cache_key = self.make_key::<Arc<T>>(key);
+        if let Some(entry) = cache.get(&cache_key).await {
+            entry
                 .downcast::<Arc<T>>()
                 .ok()
                 .map(|arc| arc.as_ref().clone())
@@ -523,9 +664,10 @@ impl WeakLanceCache {
         value: Arc<T>,
     ) {
         if let Some(cache) = self.inner.upgrade() {
-            let key = self.get_key(key);
-            let record = SizedRecord::new(Arc::new(value));
-            cache.insert((key, TypeId::of::<Arc<T>>()), record).await;
+            let wrapper = Arc::new(value);
+            let size = wrapper.deep_size_of() + 8;
+            let cache_key = self.make_key::<Arc<T>>(key);
+            cache.insert(&cache_key, wrapper, size).await;
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item");
         }
@@ -552,6 +694,10 @@ impl WeakLanceCache {
     }
 }
 
+// ---------------------------------------------------------------------------
+// CacheKey traits
+// ---------------------------------------------------------------------------
+
 pub trait CacheKey {
     type ValueType;
 
@@ -564,6 +710,10 @@ pub trait UnsizedCacheKey {
     fn key(&self) -> Cow<'_, str>;
 }
 
+// ---------------------------------------------------------------------------
+// CacheStats
+// ---------------------------------------------------------------------------
+
 #[derive(Debug, Clone)]
 pub struct CacheStats {
     /// Number of times `get`, `get_unsized`, or `get_or_insert` found an item in the cache.
@@ -594,6 +744,10 @@ impl CacheStats {
     }
 }
 
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -605,14 +759,10 @@ mod tests {
         let capacity = 10 * item_size;
 
         let cache = LanceCache::with_capacity(capacity);
-        assert_eq!(cache.size_bytes().await, 0);
-        assert_eq!(cache.approx_size_bytes(), 0);
 
         let item = Arc::new(vec![1, 2, 3]);
         cache.insert("key", item.clone()).await;
         assert_eq!(cache.size().await, 1);
-        assert_eq!(cache.size_bytes().await, item_size);
-        assert_eq!(cache.approx_size_bytes(), item_size);
 
         let retrieved = cache.get::<Vec<i32>>("key").await.unwrap();
         assert_eq!(*retrieved, *item);
@@ -623,8 +773,9 @@ mod tests {
                 .insert(&format!("key_{}", i), Arc::new(vec![i, i, i]))
                 .await;
         }
-        assert_eq!(cache.size_bytes().await, capacity);
-        assert_eq!(cache.size().await, 10);
+        // Moka evicts based on weighted size; after run_pending_tasks, the size
+        // should be bounded by capacity.
+        assert!(cache.size_bytes().await <= capacity);
     }
 
     #[tokio::test]
@@ -803,4 +954,112 @@ mod tests {
         assert_eq!(stats.hits, 1);
         assert_eq!(stats.misses, 2);
     }
+
+    #[tokio::test]
+    async fn test_custom_backend() {
+        use std::collections::HashMap;
+        use tokio::sync::Mutex;
+
+        /// A simple HashMap-based cache backend for testing.
+        #[derive(Debug)]
+        struct HashMapBackend {
+            map: Mutex<HashMap<Vec<u8>, (CacheEntry, usize)>>,
+        }
+
+        impl HashMapBackend {
+            fn new() -> Self {
+                Self {
+                    map: Mutex::new(HashMap::new()),
+                }
+            }
+        }
+
+        #[async_trait]
+        impl CacheBackend for HashMapBackend {
+            async fn get(&self, key: &[u8]) -> Option<CacheEntry> {
+                self.map.lock().await.get(key).map(|(e, _)| e.clone())
+            }
+
+            async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) {
+                self.map
+                    .lock()
+                    .await
+                    .insert(key.to_vec(), (entry, size_bytes));
+            }
+
+            async fn invalidate_prefix(&self, prefix: &[u8]) {
+                self.map.lock().await.retain(|k, _| !k.starts_with(prefix));
+            }
+
+            async fn clear(&self) {
+                self.map.lock().await.clear();
+            }
+
+            async fn num_entries(&self) -> usize {
+                self.map.lock().await.len()
+            }
+
+            async fn size_bytes(&self) -> usize {
+                self.map.lock().await.values().map(|(_, s)| *s).sum()
+            }
+        }
+
+        let backend = Arc::new(HashMapBackend::new());
+        let cache = LanceCache::with_backend(backend);
+
+        // Insert and retrieve
+        cache.insert("key1", Arc::new(vec![1, 2, 3])).await;
+        let retrieved = cache.get::<Vec<i32>>("key1").await.unwrap();
+        assert_eq!(*retrieved, vec![1, 2, 3]);
+
+        // Miss for different type at same key
+        let miss = cache.get::<Vec<u8>>("key1").await;
+        assert!(miss.is_none());
+
+        // Stats tracking works
+        let stats = cache.stats().await;
+        assert_eq!(stats.hits, 1);
+        assert_eq!(stats.misses, 1);
+        assert_eq!(stats.num_entries, 1);
+    }
+
+    #[tokio::test]
+    async fn test_get_or_insert_dedup() {
+        use std::sync::atomic::AtomicUsize;
+
+        let load_count = Arc::new(AtomicUsize::new(0));
+        let cache = LanceCache::with_capacity(10000);
+
+        // Launch several concurrent get_or_insert calls for the same key.
+        let (barrier_tx, _) = tokio::sync::broadcast::channel::<()>(1);
+        let mut handles = Vec::new();
+        for _ in 0..5 {
+            let cache = cache.clone();
+            let load_count = load_count.clone();
+            let mut barrier_rx = barrier_tx.subscribe();
+            handles.push(tokio::spawn(async move {
+                barrier_rx.recv().await.ok();
+                cache
+                    .get_or_insert("key".to_string(), |_key| {
+                        let load_count = load_count.clone();
+                        async move {
+                            load_count.fetch_add(1, Ordering::SeqCst);
+                            // Simulate slow load so other tasks can pile up.
+                            tokio::task::yield_now().await;
+                            Ok(vec![1, 2, 3])
+                        }
+                    })
+                    .await
+            }));
+        }
+        // Release all tasks at once.
+        barrier_tx.send(()).unwrap();
+        for h in handles {
+            let result: Arc<Vec<i32>> = h.await.unwrap().unwrap();
+            assert_eq!(*result, vec![1, 2, 3]);
+        }
+
+        // The loader should have run exactly once.
+        assert_eq!(load_count.load(Ordering::SeqCst), 1);
+    }
 }
diff --git a/rust/lance/src/dataset/builder.rs b/rust/lance/src/dataset/builder.rs
index 36ec0e18e66..ced83badaf3 100644
--- a/rust/lance/src/dataset/builder.rs
+++ b/rust/lance/src/dataset/builder.rs
@@ -2,6 +2,8 @@
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 use std::{collections::HashMap, sync::Arc, time::Duration};
 
+use lance_core::cache::CacheBackend;
+
 use super::refs::{Ref, Refs};
 use super::{DEFAULT_INDEX_CACHE_SIZE, DEFAULT_METADATA_CACHE_SIZE, ReadParams, WriteParams};
 use crate::dataset::branch_location::BranchLocation;
@@ -37,6 +39,8 @@ pub struct DatasetBuilder {
     /// Metadata cache size for the fragment metadata. If it is zero, metadata
     /// cache is disabled.
     metadata_cache_size_bytes: usize,
+    /// Custom index cache backend. If set, overrides `index_cache_size_bytes`.
+    index_cache_backend: Option<Arc<dyn CacheBackend>>,
     /// Optional pre-loaded manifest to avoid loading it again.
     manifest: Option<Manifest>,
     session: Option<Arc<Session>>,
@@ -73,6 +77,7 @@ impl DatasetBuilder {
         Self {
             index_cache_size_bytes: DEFAULT_INDEX_CACHE_SIZE,
             metadata_cache_size_bytes: DEFAULT_METADATA_CACHE_SIZE,
+            index_cache_backend: None,
             table_uri: table_uri.as_ref().to_string(),
             options: ObjectStoreParams::default(),
             commit_handler: None,
@@ -177,6 +182,15 @@ impl DatasetBuilder {
         self
     }
 
+    /// Use a custom index cache backend.
+    ///
+    /// When set, this overrides `with_index_cache_size_bytes` — the custom
+    /// backend is responsible for its own capacity management.
+    pub fn with_index_cache_backend(mut self, backend: Arc<dyn CacheBackend>) -> Self {
+        self.index_cache_backend = Some(backend);
+        self
+    }
+
     /// Set the cache size for indices. Set to zero, to disable the cache.
     #[deprecated(since = "0.30.0", note = "Use `with_index_cache_size_bytes` instead")]
     pub fn with_index_cache_size(mut self, cache_size: usize) -> Self {
@@ -576,13 +590,21 @@ impl DatasetBuilder {
             }
         }
 
+        let index_cache_backend = self.index_cache_backend.take();
         let session = match self.session.as_ref() {
             Some(session) => session.clone(),
-            None => Arc::new(Session::new(
-                self.index_cache_size_bytes,
-                self.metadata_cache_size_bytes,
-                Default::default(),
-            )),
+            None => match index_cache_backend {
+                Some(backend) => Arc::new(Session::with_index_cache_backend(
+                    backend,
+                    self.metadata_cache_size_bytes,
+                    Default::default(),
+                )),
+                None => Arc::new(Session::new(
+                    self.index_cache_size_bytes,
+                    self.metadata_cache_size_bytes,
+                    Default::default(),
+                )),
+            },
         };
 
         let target_ref = self.version.clone();
diff --git a/rust/lance/src/lib.rs b/rust/lance/src/lib.rs
index 934be0e519c..b9c8c7a4f5e 100644
--- a/rust/lance/src/lib.rs
+++ b/rust/lance/src/lib.rs
@@ -72,7 +72,7 @@
 use arrow_schema::DataType;
 use dataset::builder::DatasetBuilder;
 pub use lance_core::datatypes;
-pub use lance_core::{Error, Result};
+pub use lance_core::{Error, Result, cache};
 use std::sync::LazyLock;
 
 pub mod arrow;
diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index c67345fba32..da9c5d85f4a 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -5,7 +5,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use deepsize::DeepSizeOf;
-use lance_core::cache::LanceCache;
+use lance_core::cache::{CacheBackend, LanceCache};
 use lance_core::{Error, Result};
 use lance_index::IndexType;
 use lance_io::object_store::ObjectStoreRegistry;
@@ -114,6 +114,23 @@ impl Session {
         }
     }
 
+    /// Create a session with a custom index cache backend.
+    ///
+    /// The provided backend will be used for caching index data. The metadata
+    /// cache will use the default Moka-based backend with the given capacity.
+    pub fn with_index_cache_backend(
+        index_cache_backend: Arc<dyn CacheBackend>,
+        metadata_cache_size: usize,
+        store_registry: Arc<ObjectStoreRegistry>,
+    ) -> Self {
+        Self {
+            index_cache: GlobalIndexCache(LanceCache::with_backend(index_cache_backend)),
+            metadata_cache: GlobalMetadataCache(LanceCache::with_capacity(metadata_cache_size)),
+            index_extensions: HashMap::new(),
+            store_registry,
+        }
+    }
+
     /// Register a new index extension.
     ///
     /// A name can only be registered once per type of index extension.

From 8ad33f57644b9dd6bd3d924b84a1e8a42a4cfe86 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Wed, 18 Mar 2026 20:04:13 -0700
Subject: [PATCH 02/24] feat: pipe type_id through CacheKey to backend

Add type_name()/type_id() to CacheKey and UnsizedCacheKey traits so
backends can identify the type of cached entries. Add parse_cache_key()
utility for backends to extract (user_key, type_id) from opaque key
bytes.

CacheKey-based methods now pipe the key's type_id through to the
backend. Non-CacheKey methods use type_id_of::<T>() as a sentinel.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs | 135 ++++++++++++++++++++++++++++-------
 1 file changed, 109 insertions(+), 26 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 1ea5989647b..0312c84ac8c 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -196,18 +196,26 @@ impl DeepSizeOf for LanceCache {
     }
 }
 
-/// Returns a stable 8-byte discriminator for type `T`.
+/// Returns the type_id for type `T`, derived from the pointer of its
+/// [`std::any::type_name`]. Stable within a single process lifetime.
+pub fn type_id_of<T: 'static>() -> u64 {
+    std::any::type_name::<T>().as_ptr() as u64
+}
+
+/// Cache keys are structured as `user_key\0<8-byte type_id>`.
 ///
-/// Uses the pointer of `std::any::type_name::<T>()`, which is a `&'static str`
-/// with a process-lifetime-stable address. This is unique per monomorphized type
-/// and avoids `transmute` on `TypeId`.
-fn type_tag<T: 'static>() -> [u8; 8] {
-    (std::any::type_name::<T>().as_ptr() as u64).to_le_bytes()
+/// This function splits an opaque cache key into the user-visible portion
+/// and the type_id. Backend implementations can use this to inspect keys.
+pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) {
+    let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap();
+    // Everything before the trailing \0 + 8-byte tag.
+    let user_key = &key[..key.len() - 9];
+    (user_key, u64::from_le_bytes(type_id_bytes))
 }
 
 impl LanceCache {
-    /// Build a key: `prefix/user_key\0<8-byte type tag>`.
-    fn make_key<T: 'static>(&self, key: &str) -> Vec<u8> {
+    /// Build a key: `prefix/user_key\0<8-byte type_id>`.
+    fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec<u8> {
         let full_key = if self.prefix.is_empty() {
             key.to_string()
         } else {
@@ -215,7 +223,7 @@ impl LanceCache {
         };
         let mut bytes = full_key.into_bytes();
         bytes.push(0);
-        bytes.extend_from_slice(&type_tag::<T>());
+        bytes.extend_from_slice(&type_id.to_le_bytes());
         bytes
     }
 
@@ -304,9 +312,14 @@ impl LanceCache {
         self.cache.approx_size_bytes()
     }
 
-    async fn insert<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str, metadata: Arc<T>) {
+    async fn insert_with_id<T: DeepSizeOf + Send + Sync + 'static>(
+        &self,
+        key: &str,
+        type_id: u64,
+        metadata: Arc<T>,
+    ) {
         let size = metadata.deep_size_of() + 8; // +8 for the Arc pointer
-        let cache_key = self.make_key::<T>(key);
+        let cache_key = self.make_key_with_id(key, type_id);
         tracing::trace!(
             target: "lance_cache::insert",
             key = key,
@@ -316,17 +329,35 @@ impl LanceCache {
         self.cache.insert(&cache_key, metadata, size).await;
     }
 
+    #[cfg(test)]
+    async fn insert<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str, metadata: Arc<T>) {
+        self.insert_with_id(key, type_id_of::<T>(), metadata).await
+    }
+
+    async fn insert_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
+        &self,
+        key: &str,
+        type_id: u64,
+        metadata: Arc<T>,
+    ) {
+        self.insert_with_id(key, type_id, Arc::new(metadata)).await
+    }
+
     pub async fn insert_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
         metadata: Arc<T>,
     ) {
-        // Wrap in another Arc to make the data Sized.
-        self.insert(key, Arc::new(metadata)).await
+        self.insert_unsized_with_id(key, type_id_of::<Arc<T>>(), metadata)
+            .await
     }
 
-    async fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
-        let cache_key = self.make_key::<T>(key);
+    async fn get_with_id<T: Send + Sync + 'static>(
+        &self,
+        key: &str,
+        type_id: u64,
+    ) -> Option<Arc<T>> {
+        let cache_key = self.make_key_with_id(key, type_id);
         if let Some(entry) = self.cache.get(&cache_key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
             Some(entry.downcast::<T>().unwrap())
@@ -336,28 +367,42 @@ impl LanceCache {
         }
     }
 
-    pub async fn get_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
+    #[cfg(test)]
+    async fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
+        self.get_with_id(key, type_id_of::<T>()).await
+    }
+
+    async fn get_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
+        type_id: u64,
     ) -> Option<Arc<T>> {
-        let outer = self.get::<Arc<T>>(key).await?;
+        let outer = self.get_with_id::<Arc<T>>(key, type_id).await?;
         Some(outer.as_ref().clone())
     }
 
+    pub async fn get_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
+        &self,
+        key: &str,
+    ) -> Option<Arc<T>> {
+        self.get_unsized_with_id(key, type_id_of::<Arc<T>>()).await
+    }
+
     /// Get an item, or load it if not cached.
     ///
     /// Concurrent calls for the same key are deduplicated: only the first
     /// caller runs the loader; subsequent callers wait for the result.
-    async fn get_or_insert<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
+    async fn get_or_insert_with_id<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
         &self,
         key: String,
+        type_id: u64,
         loader: F,
     ) -> Result<Arc<T>>
     where
         F: FnOnce(&str) -> Fut,
         Fut: Future<Output = Result<T>> + Send,
     {
-        let cache_key = self.make_key::<T>(&key);
+        let cache_key = self.make_key_with_id(&key, type_id);
 
         // Fast path: already cached.
         if let Some(entry) = self.cache.get(&cache_key).await {
@@ -429,6 +474,20 @@ impl LanceCache {
         }
     }
 
+    #[cfg(test)]
+    async fn get_or_insert<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
+        &self,
+        key: String,
+        loader: F,
+    ) -> Result<Arc<T>>
+    where
+        F: FnOnce(&str) -> Fut,
+        Fut: Future<Output = Result<T>> + Send,
+    {
+        self.get_or_insert_with_id(key, type_id_of::<T>(), loader)
+            .await
+    }
+
     pub async fn stats(&self) -> CacheStats {
         CacheStats {
             hits: self.hits.load(Ordering::Relaxed),
@@ -450,7 +509,9 @@ impl LanceCache {
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.insert(&cache_key.key(), metadata).boxed().await
+        self.insert_with_id(&cache_key.key(), cache_key.type_id(), metadata)
+            .boxed()
+            .await
     }
 
     pub async fn get_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
@@ -458,7 +519,9 @@ impl LanceCache {
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.get::<K::ValueType>(&cache_key.key()).boxed().await
+        self.get_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_id())
+            .boxed()
+            .await
     }
 
     pub async fn get_or_insert_with_key<K, F, Fut>(
@@ -472,8 +535,9 @@ impl LanceCache {
         F: FnOnce() -> Fut,
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
+        let type_id = cache_key.type_id();
         let key_str = cache_key.key().into_owned();
-        Box::pin(self.get_or_insert(key_str, |_| loader())).await
+        Box::pin(self.get_or_insert_with_id(key_str, type_id, |_| loader())).await
     }
 
     pub async fn insert_unsized_with_key<K>(&self, cache_key: &K, metadata: Arc<K::ValueType>)
@@ -481,7 +545,7 @@ impl LanceCache {
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.insert_unsized(&cache_key.key(), metadata)
+        self.insert_unsized_with_id(&cache_key.key(), cache_key.type_id(), metadata)
             .boxed()
             .await
     }
@@ -491,7 +555,7 @@ impl LanceCache {
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.get_unsized::<K::ValueType>(&cache_key.key())
+        self.get_unsized_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_id())
             .boxed()
             .await
     }
@@ -532,8 +596,11 @@ impl WeakLanceCache {
         }
     }
 
-    /// Build a key: `prefix/user_key\0<8-byte type tag>`.
     fn make_key<T: 'static>(&self, key: &str) -> Vec<u8> {
+        self.make_key_with_id(key, type_id_of::<T>())
+    }
+
+    fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec<u8> {
         let full_key = if self.prefix.is_empty() {
             key.to_string()
         } else {
@@ -541,7 +608,7 @@ impl WeakLanceCache {
         };
         let mut bytes = full_key.into_bytes();
         bytes.push(0);
-        bytes.extend_from_slice(&type_tag::<T>());
+        bytes.extend_from_slice(&type_id.to_le_bytes());
         bytes
     }
 
@@ -702,12 +769,28 @@ pub trait CacheKey {
     type ValueType;
 
     fn key(&self) -> Cow<'_, str>;
+
+    fn type_name(&self) -> &'static str {
+        std::any::type_name::<Self::ValueType>()
+    }
+
+    fn type_id(&self) -> u64 {
+        self.type_name().as_ptr() as u64
+    }
 }
 
 pub trait UnsizedCacheKey {
     type ValueType: ?Sized;
 
     fn key(&self) -> Cow<'_, str>;
+
+    fn type_name(&self) -> &'static str {
+        std::any::type_name::<Self::ValueType>()
+    }
+
+    fn type_id(&self) -> u64 {
+        self.type_name().as_ptr() as u64
+    }
 }
 
 // ---------------------------------------------------------------------------

From 3d299f8e28bd73791be5d1057727e8a50230a517 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Wed, 18 Mar 2026 22:26:14 -0700
Subject: [PATCH 03/24] fix: address review feedback on pluggable cache

1. Remove #[cfg(test)] convenience methods; tests now use CacheKey
   via a TestKey helper, eliminating the parallel method hierarchy.

2. Fix dedup race condition: re-check the cache while holding the
   in-flight lock so no two tasks can both become leader for the
   same key.

3. Use Arc::try_unwrap on the leader error path to preserve the
   original error type when possible.

4. Make invalidate_prefix async instead of fire-and-forget spawn.

5. Replace type_name().as_ptr() with a hash of std::any::TypeId for
   stable type discrimination. Defined once in type_id_of() and used
   by CacheKey::type_id() default.

6. Add dedup to WeakLanceCache::get_or_insert, sharing the in-flight
   map from the parent LanceCache.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs           | 463 ++++++++++++++-----------
 rust/lance-file/src/previous/reader.rs |   2 +-
 2 files changed, 269 insertions(+), 196 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 0312c84ac8c..437c4f2f6d7 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -13,6 +13,7 @@
 use std::any::Any;
 use std::borrow::Cow;
 use std::collections::HashMap;
+use std::hash::{Hash, Hasher};
 use std::sync::{
     Arc,
     atomic::{AtomicU64, Ordering},
@@ -89,7 +90,7 @@ struct MokaCacheEntry {
 
 /// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache.
 ///
-/// Provides weighted-capacity eviction and concurrent-load deduplication.
+/// Provides weighted-capacity eviction.
 pub struct MokaCacheBackend {
     cache: moka::future::Cache<Vec<u8>, MokaCacheEntry>,
 }
@@ -162,6 +163,35 @@ impl CacheBackend for MokaCacheBackend {
     }
 }
 
+// ---------------------------------------------------------------------------
+// Type identity helpers
+// ---------------------------------------------------------------------------
+
+/// Returns a stable u64 identifier for type `T`, derived from hashing its
+/// [`std::any::TypeId`]. This is guaranteed unique per type within a compilation
+/// unit and stable within a single process lifetime.
+pub fn type_id_of<T: 'static + ?Sized>() -> u64 {
+    let mut hasher = std::collections::hash_map::DefaultHasher::new();
+    std::any::TypeId::of::<T>().hash(&mut hasher);
+    hasher.finish()
+}
+
+/// Cache keys are structured as `user_key\0<8-byte type_id>`.
+///
+/// This function splits an opaque cache key into the user-visible portion
+/// and the type_id. Backend implementations can use this to inspect keys.
+///
+/// # Panics
+///
+/// Panics if `key` is shorter than 9 bytes.
+pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) {
+    assert!(key.len() >= 9, "cache key too short to parse");
+    let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap();
+    // Everything before the trailing \0 + 8-byte tag.
+    let user_key = &key[..key.len() - 9];
+    (user_key, u64::from_le_bytes(type_id_bytes))
+}
+
 // ---------------------------------------------------------------------------
 // LanceCache — typed wrapper around dyn CacheBackend
 // ---------------------------------------------------------------------------
@@ -190,29 +220,11 @@ impl std::fmt::Debug for LanceCache {
 
 impl DeepSizeOf for LanceCache {
     fn deep_size_of_children(&self, _: &mut Context) -> usize {
-        // This is a best-effort estimate; we can't iterate a dyn CacheBackend.
-        // Callers should use stats().size_bytes for accurate numbers.
+        // Can't iterate a dyn CacheBackend; use stats().size_bytes for accurate numbers.
         0
     }
 }
 
-/// Returns the type_id for type `T`, derived from the pointer of its
-/// [`std::any::type_name`]. Stable within a single process lifetime.
-pub fn type_id_of<T: 'static>() -> u64 {
-    std::any::type_name::<T>().as_ptr() as u64
-}
-
-/// Cache keys are structured as `user_key\0<8-byte type_id>`.
-///
-/// This function splits an opaque cache key into the user-visible portion
-/// and the type_id. Backend implementations can use this to inspect keys.
-pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) {
-    let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap();
-    // Everything before the trailing \0 + 8-byte tag.
-    let user_key = &key[..key.len() - 9];
-    (user_key, u64::from_le_bytes(type_id_bytes))
-}
-
 impl LanceCache {
     /// Build a key: `prefix/user_key\0<8-byte type_id>`.
     fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec<u8> {
@@ -282,18 +294,13 @@ impl LanceCache {
         }
     }
 
-    /// Invalidate all entries in the cache that start with the given prefix
+    /// Invalidate all entries in the cache that start with the given prefix.
     ///
     /// The given prefix is appended to the existing prefix of the cache. If you
     /// want to invalidate all at the current prefix, pass an empty string.
-    pub fn invalidate_prefix(&self, prefix: &str) {
+    pub async fn invalidate_prefix(&self, prefix: &str) {
         let prefix_bytes = self.make_prefix(prefix);
-        let cache = self.cache.clone();
-        // Fire-and-forget; moka's invalidate_entries_if is synchronous under the hood
-        // but our trait is async, so we spawn.
-        tokio::spawn(async move {
-            cache.invalidate_prefix(&prefix_bytes).await;
-        });
+        self.cache.invalidate_prefix(&prefix_bytes).await;
     }
 
     pub async fn size(&self) -> usize {
@@ -329,11 +336,6 @@ impl LanceCache {
         self.cache.insert(&cache_key, metadata, size).await;
     }
 
-    #[cfg(test)]
-    async fn insert<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str, metadata: Arc<T>) {
-        self.insert_with_id(key, type_id_of::<T>(), metadata).await
-    }
-
     async fn insert_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
@@ -367,11 +369,6 @@ impl LanceCache {
         }
     }
 
-    #[cfg(test)]
-    async fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
-        self.get_with_id(key, type_id_of::<T>()).await
-    }
-
     async fn get_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
@@ -410,41 +407,48 @@ impl LanceCache {
             return Ok(entry.downcast::<T>().unwrap());
         }
 
+        // Lock the in-flight map. While holding the lock, re-check the cache
+        // to close the race between the fast-path check and registration.
+        let mut map = self.in_flight.lock().await;
+
+        // Another task may have completed the load between our fast-path check
+        // and acquiring this lock.
+        if let Some(entry) = self.cache.get(&cache_key).await {
+            self.hits.fetch_add(1, Ordering::Relaxed);
+            return Ok(entry.downcast::<T>().unwrap());
+        }
+
         // Check for an in-flight load for this key.
-        {
-            let map = self.in_flight.lock().await;
-            if let Some(rx) = map.get(&cache_key) {
-                let mut rx = rx.clone();
-                drop(map);
-                // Wait until the leader finishes.
-                let result = rx
-                    .wait_for(|v| v.is_some())
-                    .await
-                    .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))?
-                    .as_ref()
-                    .unwrap()
-                    .clone();
-                match result {
-                    Ok(entry) => {
-                        self.hits.fetch_add(1, Ordering::Relaxed);
-                        return Ok(entry.downcast::<T>().unwrap());
-                    }
-                    Err(err) => {
-                        self.misses.fetch_add(1, Ordering::Relaxed);
-                        return Err(crate::Error::internal(format!(
-                            "Cache loader failed: {err}"
-                        )));
-                    }
+        if let Some(rx) = map.get(&cache_key) {
+            let mut rx = rx.clone();
+            drop(map);
+            // Wait until the leader finishes.
+            let result = rx
+                .wait_for(|v| v.is_some())
+                .await
+                .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))?
+                .as_ref()
+                .unwrap()
+                .clone();
+            match result {
+                Ok(entry) => {
+                    self.hits.fetch_add(1, Ordering::Relaxed);
+                    return Ok(entry.downcast::<T>().unwrap());
+                }
+                Err(err) => {
+                    self.misses.fetch_add(1, Ordering::Relaxed);
+                    return Err(crate::Error::internal(format!(
+                        "Cache loader failed: {err}"
+                    )));
                 }
             }
         }
 
-        // We are the leader. Register our in-flight entry.
+        // We are the leader. Register our in-flight entry while still holding
+        // the lock, so no other task can slip in between check and register.
         let (tx, rx) = tokio::sync::watch::channel(None);
-        {
-            let mut map = self.in_flight.lock().await;
-            map.insert(cache_key.clone(), rx);
-        }
+        map.insert(cache_key.clone(), rx);
+        drop(map);
 
         self.misses.fetch_add(1, Ordering::Relaxed);
         let result = loader(&key).await;
@@ -467,27 +471,14 @@ impl LanceCache {
             Err(err) => {
                 let shared_err = Arc::new(err);
                 let _ = tx.send(Some(Err(shared_err.clone())));
-                Err(crate::Error::internal(format!(
-                    "Cache loader failed: {shared_err}"
-                )))
+                // Try to recover the original error if we're the sole owner.
+                Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| {
+                    crate::Error::internal(format!("Cache loader failed: {arc}"))
+                }))
             }
         }
     }
 
-    #[cfg(test)]
-    async fn get_or_insert<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
-        &self,
-        key: String,
-        loader: F,
-    ) -> Result<Arc<T>>
-    where
-        F: FnOnce(&str) -> Fut,
-        Fut: Future<Output = Result<T>> + Send,
-    {
-        self.get_or_insert_with_id(key, type_id_of::<T>(), loader)
-            .await
-    }
-
     pub async fn stats(&self) -> CacheStats {
         CacheStats {
             hits: self.hits.load(Ordering::Relaxed),
@@ -573,6 +564,7 @@ pub struct WeakLanceCache {
     prefix: String,
     hits: Arc<AtomicU64>,
     misses: Arc<AtomicU64>,
+    in_flight: Arc<InFlightMap>,
 }
 
 impl WeakLanceCache {
@@ -583,6 +575,7 @@ impl WeakLanceCache {
             prefix: cache.prefix.clone(),
             hits: cache.hits.clone(),
             misses: cache.misses.clone(),
+            in_flight: cache.in_flight.clone(),
         }
     }
 
@@ -593,6 +586,7 @@ impl WeakLanceCache {
             prefix: format!("{}{}/", self.prefix, prefix),
             hits: self.hits.clone(),
             misses: self.misses.clone(),
+            in_flight: self.in_flight.clone(),
         }
     }
 
@@ -643,7 +637,9 @@ impl WeakLanceCache {
         }
     }
 
-    /// Get or insert an item, computing it if necessary
+    /// Get or insert an item, computing it if necessary.
+    ///
+    /// Concurrent calls for the same key are deduplicated.
     pub async fn get_or_insert<T, F, Fut>(&self, key: &str, f: F) -> Result<Arc<T>>
     where
         T: DeepSizeOf + Send + Sync + 'static,
@@ -653,17 +649,73 @@ impl WeakLanceCache {
         if let Some(cache) = self.inner.upgrade() {
             let cache_key = self.make_key::<T>(key);
 
+            // Fast path: already cached.
+            if let Some(entry) = cache.get(&cache_key).await {
+                self.hits.fetch_add(1, Ordering::Relaxed);
+                return Ok(entry.downcast::<T>().unwrap());
+            }
+
+            // Lock in-flight map. Re-check cache under lock to close the race.
+            let mut map = self.in_flight.lock().await;
+
             if let Some(entry) = cache.get(&cache_key).await {
                 self.hits.fetch_add(1, Ordering::Relaxed);
                 return Ok(entry.downcast::<T>().unwrap());
             }
 
+            if let Some(rx) = map.get(&cache_key) {
+                let mut rx = rx.clone();
+                drop(map);
+                let result = rx
+                    .wait_for(|v| v.is_some())
+                    .await
+                    .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))?
+                    .as_ref()
+                    .unwrap()
+                    .clone();
+                match result {
+                    Ok(entry) => {
+                        self.hits.fetch_add(1, Ordering::Relaxed);
+                        return Ok(entry.downcast::<T>().unwrap());
+                    }
+                    Err(err) => {
+                        self.misses.fetch_add(1, Ordering::Relaxed);
+                        return Err(crate::Error::internal(format!(
+                            "Cache loader failed: {err}"
+                        )));
+                    }
+                }
+            }
+
+            // We are the leader.
+            let (tx, rx) = tokio::sync::watch::channel(None);
+            map.insert(cache_key.clone(), rx);
+            drop(map);
+
             self.misses.fetch_add(1, Ordering::Relaxed);
-            let value = f().await?;
-            let arc = Arc::new(value);
-            let size = arc.deep_size_of() + 8;
-            cache.insert(&cache_key, arc.clone(), size).await;
-            Ok(arc)
+            let result = f().await;
+
+            {
+                let mut map = self.in_flight.lock().await;
+                map.remove(&cache_key);
+            }
+
+            match result {
+                Ok(value) => {
+                    let arc = Arc::new(value);
+                    let size = arc.deep_size_of() + 8;
+                    cache.insert(&cache_key, arc.clone(), size).await;
+                    let _ = tx.send(Some(Ok(arc.clone() as CacheEntry)));
+                    Ok(arc)
+                }
+                Err(err) => {
+                    let shared_err = Arc::new(err);
+                    let _ = tx.send(Some(Err(shared_err.clone())));
+                    Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| {
+                        crate::Error::internal(format!("Cache loader failed: {arc}"))
+                    }))
+                }
+            }
         } else {
             log::warn!("WeakLanceCache: cache no longer available, computing without caching");
             f().await.map(Arc::new)
@@ -766,21 +818,24 @@ impl WeakLanceCache {
 // ---------------------------------------------------------------------------
 
 pub trait CacheKey {
-    type ValueType;
+    type ValueType: 'static;
 
     fn key(&self) -> Cow<'_, str>;
 
+    /// Human-readable type name, for debugging and diagnostics.
     fn type_name(&self) -> &'static str {
         std::any::type_name::<Self::ValueType>()
     }
 
+    /// Stable numeric identifier used for key discrimination in the cache.
+    /// Derived from [`type_id_of`] by default.
     fn type_id(&self) -> u64 {
-        self.type_name().as_ptr() as u64
+        type_id_of::<Self::ValueType>()
     }
 }
 
 pub trait UnsizedCacheKey {
-    type ValueType: ?Sized;
+    type ValueType: 'static + ?Sized;
 
     fn key(&self) -> Cow<'_, str>;
 
@@ -789,7 +844,7 @@ pub trait UnsizedCacheKey {
     }
 
     fn type_id(&self) -> u64 {
-        self.type_name().as_ptr() as u64
+        type_id_of::<Self::ValueType>()
     }
 }
 
@@ -834,30 +889,59 @@ impl CacheStats {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::marker::PhantomData;
+
+    /// Test helper: a simple CacheKey for bare string keys.
+    struct TestKey<T: 'static> {
+        key: String,
+        _phantom: PhantomData<T>,
+    }
+
+    impl<T: 'static> TestKey<T> {
+        fn new(key: &str) -> Self {
+            Self {
+                key: key.to_string(),
+                _phantom: PhantomData,
+            }
+        }
+    }
+
+    impl<T: 'static> CacheKey for TestKey<T> {
+        type ValueType = T;
+        fn key(&self) -> Cow<'_, str> {
+            Cow::Borrowed(&self.key)
+        }
+    }
 
     #[tokio::test]
     async fn test_cache_bytes() {
         let item = Arc::new(vec![1, 2, 3]);
-        let item_size = item.deep_size_of(); // Size of Arc<Vec<i32>>
+        let item_size = item.deep_size_of();
         let capacity = 10 * item_size;
 
         let cache = LanceCache::with_capacity(capacity);
 
         let item = Arc::new(vec![1, 2, 3]);
-        cache.insert("key", item.clone()).await;
+        cache
+            .insert_with_key(&TestKey::<Vec<i32>>::new("key"), item.clone())
+            .await;
         assert_eq!(cache.size().await, 1);
 
-        let retrieved = cache.get::<Vec<i32>>("key").await.unwrap();
+        let retrieved = cache
+            .get_with_key(&TestKey::<Vec<i32>>::new("key"))
+            .await
+            .unwrap();
         assert_eq!(*retrieved, *item);
 
         // Test eviction based on size
         for i in 0..20 {
             cache
-                .insert(&format!("key_{}", i), Arc::new(vec![i, i, i]))
+                .insert_with_key(
+                    &TestKey::<Vec<i32>>::new(&format!("key_{}", i)),
+                    Arc::new(vec![i, i, i]),
+                )
                 .await;
         }
-        // Moka evicts based on weighted size; after run_pending_tasks, the size
-        // should be bounded by capacity.
         assert!(cache.size_bytes().await <= capacity);
     }
 
@@ -891,39 +975,52 @@ mod tests {
     async fn test_cache_stats_basic() {
         let cache = LanceCache::with_capacity(1000);
 
-        // Initially no hits or misses
         let stats = cache.stats().await;
         assert_eq!(stats.hits, 0);
         assert_eq!(stats.misses, 0);
 
-        // Miss on first get
-        let result = cache.get::<Vec<i32>>("nonexistent");
-        assert!(result.await.is_none());
+        // Miss
+        assert!(
+            cache
+                .get_with_key(&TestKey::<Vec<i32>>::new("nonexistent"))
+                .await
+                .is_none()
+        );
         let stats = cache.stats().await;
         assert_eq!(stats.hits, 0);
         assert_eq!(stats.misses, 1);
 
-        // Insert and then hit
-        cache.insert("key1", Arc::new(vec![1, 2, 3])).await;
-        let result = cache.get::<Vec<i32>>("key1");
-        assert!(result.await.is_some());
+        // Insert then hit
+        cache
+            .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3]))
+            .await;
+        assert!(
+            cache
+                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+                .await
+                .is_some()
+        );
         let stats = cache.stats().await;
         assert_eq!(stats.hits, 1);
         assert_eq!(stats.misses, 1);
 
         // Another hit
-        let result = cache.get::<Vec<i32>>("key1");
-        assert!(result.await.is_some());
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 2);
-        assert_eq!(stats.misses, 1);
+        assert!(
+            cache
+                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+                .await
+                .is_some()
+        );
+        assert_eq!(cache.stats().await.hits, 2);
 
         // Another miss
-        let result = cache.get::<Vec<i32>>("nonexistent2");
-        assert!(result.await.is_none());
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 2);
-        assert_eq!(stats.misses, 2);
+        assert!(
+            cache
+                .get_with_key(&TestKey::<Vec<i32>>::new("nonexistent2"))
+                .await
+                .is_none()
+        );
+        assert_eq!(cache.stats().await.misses, 2);
     }
 
     #[tokio::test]
@@ -931,41 +1028,30 @@ mod tests {
         let base_cache = LanceCache::with_capacity(1000);
         let prefixed_cache = base_cache.with_key_prefix("test");
 
-        // Stats should be shared between base and prefixed cache
-        let stats = base_cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 0);
-
-        let stats = prefixed_cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 0);
+        assert_eq!(base_cache.stats().await.hits, 0);
+        assert_eq!(prefixed_cache.stats().await.misses, 0);
 
         // Miss on prefixed cache
-        let result = prefixed_cache.get::<Vec<i32>>("key1");
-        assert!(result.await.is_none());
-
-        // Both should show the miss
-        let stats = base_cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 1);
-
-        let stats = prefixed_cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 1);
+        assert!(
+            prefixed_cache
+                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+                .await
+                .is_none()
+        );
+        assert_eq!(base_cache.stats().await.misses, 1);
+        assert_eq!(prefixed_cache.stats().await.misses, 1);
 
         // Insert through prefixed cache and hit
-        prefixed_cache.insert("key1", Arc::new(vec![1, 2, 3])).await;
-        let result = prefixed_cache.get::<Vec<i32>>("key1");
-        assert!(result.await.is_some());
-
-        // Both should show the hit
-        let stats = base_cache.stats().await;
-        assert_eq!(stats.hits, 1);
-        assert_eq!(stats.misses, 1);
-
-        let stats = prefixed_cache.stats().await;
-        assert_eq!(stats.hits, 1);
-        assert_eq!(stats.misses, 1);
+        prefixed_cache
+            .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3]))
+            .await;
+        assert!(
+            prefixed_cache
+                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+                .await
+                .is_some()
+        );
+        assert_eq!(base_cache.stats().await.hits, 1);
     }
 
     #[tokio::test]
@@ -979,71 +1065,55 @@ mod tests {
 
         let cache = LanceCache::with_capacity(1000);
 
-        // Miss on unsized get
-        let result = cache.get_unsized::<dyn MyTrait>("test");
-        assert!(result.await.is_none());
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 1);
+        assert!(cache.get_unsized::<dyn MyTrait>("test").await.is_none());
+        assert_eq!(cache.stats().await.misses, 1);
 
-        // Insert and hit on unsized
-        let item = Arc::new(MyType(42));
-        let item_dyn: Arc<dyn MyTrait> = item;
-        cache.insert_unsized("test", item_dyn).await;
+        let item: Arc<dyn MyTrait> = Arc::new(MyType(42));
+        cache.insert_unsized("test", item).await;
 
-        let result = cache.get_unsized::<dyn MyTrait>("test");
-        assert!(result.await.is_some());
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 1);
-        assert_eq!(stats.misses, 1);
+        assert!(cache.get_unsized::<dyn MyTrait>("test").await.is_some());
+        assert_eq!(cache.stats().await.hits, 1);
     }
 
     #[tokio::test]
     async fn test_cache_stats_get_or_insert() {
         let cache = LanceCache::with_capacity(1000);
 
-        // First call should be a miss and load the value
+        // First call: miss
         let result: Arc<Vec<i32>> = cache
-            .get_or_insert("key1".to_string(), |_key| async { Ok(vec![1, 2, 3]) })
+            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("key1"), || async {
+                Ok(vec![1, 2, 3])
+            })
             .await
             .unwrap();
         assert_eq!(*result, vec![1, 2, 3]);
+        assert_eq!(cache.stats().await.misses, 1);
 
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 1);
-
-        // Second call should be a hit
+        // Second call: hit
         let result: Arc<Vec<i32>> = cache
-            .get_or_insert("key1".to_string(), |_key| async {
+            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("key1"), || async {
                 panic!("Should not be called")
             })
             .await
             .unwrap();
         assert_eq!(*result, vec![1, 2, 3]);
+        assert_eq!(cache.stats().await.hits, 1);
 
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 1);
-        assert_eq!(stats.misses, 1);
-
-        // Different key should be another miss
+        // Different key: miss
         let result: Arc<Vec<i32>> = cache
-            .get_or_insert("key2".to_string(), |_key| async { Ok(vec![4, 5, 6]) })
+            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("key2"), || async {
+                Ok(vec![4, 5, 6])
+            })
             .await
             .unwrap();
         assert_eq!(*result, vec![4, 5, 6]);
-
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 1);
-        assert_eq!(stats.misses, 2);
+        assert_eq!(cache.stats().await.misses, 2);
     }
 
     #[tokio::test]
     async fn test_custom_backend() {
-        use std::collections::HashMap;
         use tokio::sync::Mutex;
 
-        /// A simple HashMap-based cache backend for testing.
         #[derive(Debug)]
         struct HashMapBackend {
             map: Mutex<HashMap<Vec<u8>, (CacheEntry, usize)>>,
@@ -1090,16 +1160,23 @@ mod tests {
         let backend = Arc::new(HashMapBackend::new());
         let cache = LanceCache::with_backend(backend);
 
-        // Insert and retrieve
-        cache.insert("key1", Arc::new(vec![1, 2, 3])).await;
-        let retrieved = cache.get::<Vec<i32>>("key1").await.unwrap();
+        cache
+            .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3]))
+            .await;
+        let retrieved = cache
+            .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+            .await
+            .unwrap();
         assert_eq!(*retrieved, vec![1, 2, 3]);
 
         // Miss for different type at same key
-        let miss = cache.get::<Vec<u8>>("key1").await;
-        assert!(miss.is_none());
+        assert!(
+            cache
+                .get_with_key(&TestKey::<Vec<u8>>::new("key1"))
+                .await
+                .is_none()
+        );
 
-        // Stats tracking works
         let stats = cache.stats().await;
         assert_eq!(stats.hits, 1);
         assert_eq!(stats.misses, 1);
@@ -1113,7 +1190,6 @@ mod tests {
         let load_count = Arc::new(AtomicUsize::new(0));
         let cache = LanceCache::with_capacity(10000);
 
-        // Launch several concurrent get_or_insert calls for the same key.
         let (barrier_tx, _) = tokio::sync::broadcast::channel::<()>(1);
         let mut handles = Vec::new();
         for _ in 0..5 {
@@ -1123,11 +1199,10 @@ mod tests {
             handles.push(tokio::spawn(async move {
                 barrier_rx.recv().await.ok();
                 cache
-                    .get_or_insert("key".to_string(), |_key| {
+                    .get_or_insert_with_key(TestKey::<Vec<i32>>::new("key"), || {
                         let load_count = load_count.clone();
                         async move {
                             load_count.fetch_add(1, Ordering::SeqCst);
-                            // Simulate slow load so other tasks can pile up.
                             tokio::task::yield_now().await;
                             Ok(vec![1, 2, 3])
                         }
@@ -1135,14 +1210,12 @@ mod tests {
                     .await
             }));
         }
-        // Release all tasks at once.
         barrier_tx.send(()).unwrap();
         for h in handles {
             let result: Arc<Vec<i32>> = h.await.unwrap().unwrap();
             assert_eq!(*result, vec![1, 2, 3]);
         }
 
-        // The loader should have run exactly once.
         assert_eq!(load_count.load(Ordering::SeqCst), 1);
     }
 }
diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs
index 863aca1afc6..a11d5be988f 100644
--- a/rust/lance-file/src/previous/reader.rs
+++ b/rust/lance-file/src/previous/reader.rs
@@ -83,7 +83,7 @@ impl<'a, T> StringCacheKey<'a, T> {
     }
 }
 
-impl<T> CacheKey for StringCacheKey<'_, T> {
+impl<T: 'static> CacheKey for StringCacheKey<'_, T> {
     type ValueType = T;
 
     fn key(&self) -> Cow<'_, str> {

From 00867ad45d96a13e28a8a696bd073fd2ac51b676 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 09:37:28 -0700
Subject: [PATCH 04/24] refactor: move get_or_insert dedup into CacheBackend

Address feedback:

1. Move get_or_insert() onto CacheBackend. The method takes a pinned
   future (not a closure), so LanceCache can type-erase the user's
   non-'static loader before passing it to the backend. Default impl
   does simple get-then-insert; MokaCacheBackend uses moka's built-in
   optionally_get_with for dedup. This eliminates duplicated dedup
   logic and the manual watch-channel machinery.

2. Restore type_name().as_ptr() for type_id derivation on CacheKey.
   Remove standalone type_id_of() function. The derivation lives in
   one place: CacheKey::type_id()/UnsizedCacheKey::type_id().

3. Remove approx_size_bytes from CacheBackend trait and Session debug
   output. Only approx_num_entries remains.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs           | 626 ++++++++-----------------
 rust/lance-file/src/previous/reader.rs |   2 +-
 rust/lance/src/session.rs              |   6 +-
 3 files changed, 199 insertions(+), 435 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 437c4f2f6d7..418efe151e6 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -12,8 +12,7 @@
 
 use std::any::Any;
 use std::borrow::Cow;
-use std::collections::HashMap;
-use std::hash::{Hash, Hasher};
+use std::pin::Pin;
 use std::sync::{
     Arc,
     atomic::{AtomicU64, Ordering},
@@ -21,17 +20,11 @@ use std::sync::{
 
 use async_trait::async_trait;
 use futures::{Future, FutureExt};
-use tokio::sync::Mutex;
 
 use crate::Result;
 
 pub use deepsize::{Context, DeepSizeOf};
 
-/// Result type used in the in-flight dedup map. Wraps errors in Arc so the
-/// result can be cloned to multiple waiters.
-type InFlightResult = std::result::Result<CacheEntry, Arc<crate::Error>>;
-type InFlightMap = Mutex<HashMap<Vec<u8>, tokio::sync::watch::Receiver<Option<InFlightResult>>>>;
-
 /// A type-erased cache entry.
 pub type CacheEntry = Arc<dyn Any + Send + Sync>;
 
@@ -52,6 +45,28 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     /// Store an entry. `size_bytes` is used for eviction accounting.
     async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize);
 
+    /// Get an existing entry or compute it from `loader`.
+    ///
+    /// Implementations should deduplicate concurrent loads for the same key
+    /// so the loader runs at most once. The default implementation does a
+    /// simple get-then-insert without deduplication.
+    ///
+    /// The loader is a pinned future that produces `(entry, size_bytes)`.
+    /// It borrows from the caller's scope and will be `.await`ed within
+    /// this method — implementations must not store it beyond the call.
+    async fn get_or_insert<'a>(
+        &self,
+        key: &[u8],
+        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
+    ) -> Result<CacheEntry> {
+        if let Some(entry) = self.get(key).await {
+            return Ok(entry);
+        }
+        let (entry, size) = loader.await?;
+        self.insert(key, entry.clone(), size).await;
+        Ok(entry)
+    }
+
     /// Remove all entries whose key starts with `prefix`.
     async fn invalidate_prefix(&self, prefix: &[u8]);
 
@@ -69,12 +84,6 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     fn approx_num_entries(&self) -> usize {
         0
     }
-
-    /// Approximate weighted size in bytes, callable from synchronous contexts.
-    /// Backends that cannot provide this cheaply should return 0.
-    fn approx_size_bytes(&self) -> usize {
-        0
-    }
 }
 
 // ---------------------------------------------------------------------------
@@ -90,7 +99,8 @@ struct MokaCacheEntry {
 
 /// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache.
 ///
-/// Provides weighted-capacity eviction.
+/// Provides weighted-capacity eviction and concurrent-load deduplication
+/// via moka's built-in `optionally_get_with`.
 pub struct MokaCacheBackend {
     cache: moka::future::Cache<Vec<u8>, MokaCacheEntry>,
 }
@@ -132,6 +142,37 @@ impl CacheBackend for MokaCacheBackend {
             .await;
     }
 
+    async fn get_or_insert<'a>(
+        &self,
+        key: &[u8],
+        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
+    ) -> Result<CacheEntry> {
+        // Use moka's built-in dedup: optionally_get_with runs the init future
+        // at most once per key, even under concurrent access.
+        let (error_tx, error_rx) = tokio::sync::oneshot::channel();
+
+        let init = async move {
+            match loader.await {
+                Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }),
+                Err(e) => {
+                    let _ = error_tx.send(e);
+                    None
+                }
+            }
+        };
+
+        let owned_key = key.to_vec();
+        match self.cache.optionally_get_with(owned_key, init).await {
+            Some(record) => Ok(record.entry),
+            None => match error_rx.await {
+                Ok(err) => Err(err),
+                Err(_) => Err(crate::Error::internal(
+                    "Failed to retrieve error from cache loader",
+                )),
+            },
+        }
+    }
+
     async fn invalidate_prefix(&self, prefix: &[u8]) {
         let prefix = prefix.to_vec();
         self.cache
@@ -157,23 +198,19 @@ impl CacheBackend for MokaCacheBackend {
     fn approx_num_entries(&self) -> usize {
         self.cache.entry_count() as usize
     }
-
-    fn approx_size_bytes(&self) -> usize {
-        self.cache.weighted_size() as usize
-    }
 }
 
 // ---------------------------------------------------------------------------
 // Type identity helpers
 // ---------------------------------------------------------------------------
 
-/// Returns a stable u64 identifier for type `T`, derived from hashing its
-/// [`std::any::TypeId`]. This is guaranteed unique per type within a compilation
-/// unit and stable within a single process lifetime.
-pub fn type_id_of<T: 'static + ?Sized>() -> u64 {
-    let mut hasher = std::collections::hash_map::DefaultHasher::new();
-    std::any::TypeId::of::<T>().hash(&mut hasher);
-    hasher.finish()
+/// Derives a stable type tag from `type_name::<T>()`.
+///
+/// Uses the pointer of the `&'static str` returned by [`std::any::type_name`].
+/// The pointer is stable for the lifetime of the process and unique per
+/// monomorphized type within a single compilation unit.
+fn type_tag<T: 'static + ?Sized>() -> u64 {
+    std::any::type_name::<T>().as_ptr() as u64
 }
 
 /// Cache keys are structured as `user_key\0<8-byte type_id>`.
@@ -187,11 +224,23 @@ pub fn type_id_of<T: 'static + ?Sized>() -> u64 {
 pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) {
     assert!(key.len() >= 9, "cache key too short to parse");
     let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap();
-    // Everything before the trailing \0 + 8-byte tag.
     let user_key = &key[..key.len() - 9];
     (user_key, u64::from_le_bytes(type_id_bytes))
 }
 
+/// Build a key: `prefix/user_key\0<8-byte type_id>`.
+fn make_cache_key(prefix: &str, key: &str, type_id: u64) -> Vec<u8> {
+    let full_key = if prefix.is_empty() {
+        key.to_string()
+    } else {
+        format!("{}/{}", prefix, key)
+    };
+    let mut bytes = full_key.into_bytes();
+    bytes.push(0);
+    bytes.extend_from_slice(&type_id.to_le_bytes());
+    bytes
+}
+
 // ---------------------------------------------------------------------------
 // LanceCache — typed wrapper around dyn CacheBackend
 // ---------------------------------------------------------------------------
@@ -206,8 +255,6 @@ pub struct LanceCache {
     prefix: String,
     hits: Arc<AtomicU64>,
     misses: Arc<AtomicU64>,
-    /// Deduplicates concurrent `get_or_insert` calls for the same key.
-    in_flight: Arc<InFlightMap>,
 }
 
 impl std::fmt::Debug for LanceCache {
@@ -225,26 +272,6 @@ impl DeepSizeOf for LanceCache {
     }
 }
 
-impl LanceCache {
-    /// Build a key: `prefix/user_key\0<8-byte type_id>`.
-    fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec<u8> {
-        let full_key = if self.prefix.is_empty() {
-            key.to_string()
-        } else {
-            format!("{}/{}", self.prefix, key)
-        };
-        let mut bytes = full_key.into_bytes();
-        bytes.push(0);
-        bytes.extend_from_slice(&type_id.to_le_bytes());
-        bytes
-    }
-
-    /// Build a prefix (without type tag) for invalidation.
-    fn make_prefix(&self, prefix: &str) -> Vec<u8> {
-        format!("{}{}", self.prefix, prefix).into_bytes()
-    }
-}
-
 impl LanceCache {
     pub fn with_capacity(capacity: usize) -> Self {
         Self {
@@ -252,7 +279,6 @@ impl LanceCache {
             prefix: String::new(),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
-            in_flight: Arc::new(Mutex::new(HashMap::new())),
         }
     }
 
@@ -263,7 +289,6 @@ impl LanceCache {
             prefix: String::new(),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
-            in_flight: Arc::new(Mutex::new(HashMap::new())),
         }
     }
 
@@ -273,33 +298,22 @@ impl LanceCache {
             prefix: String::new(),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
-            in_flight: Arc::new(Mutex::new(HashMap::new())),
         }
     }
 
-    /// Appends a prefix to the cache key
-    ///
-    /// If this cache already has a prefix, the new prefix will be appended to
-    /// the existing one.
-    ///
-    /// Prefixes are used to create a namespace for the cache keys to avoid
-    /// collisions between different caches.
+    /// Appends a prefix to the cache key.
     pub fn with_key_prefix(&self, prefix: &str) -> Self {
         Self {
             cache: self.cache.clone(),
             prefix: format!("{}{}/", self.prefix, prefix),
             hits: self.hits.clone(),
             misses: self.misses.clone(),
-            in_flight: self.in_flight.clone(),
         }
     }
 
-    /// Invalidate all entries in the cache that start with the given prefix.
-    ///
-    /// The given prefix is appended to the existing prefix of the cache. If you
-    /// want to invalidate all at the current prefix, pass an empty string.
+    /// Invalidate all entries whose key starts with the given prefix.
     pub async fn invalidate_prefix(&self, prefix: &str) {
-        let prefix_bytes = self.make_prefix(prefix);
+        let prefix_bytes = format!("{}{}", self.prefix, prefix).into_bytes();
         self.cache.invalidate_prefix(&prefix_bytes).await;
     }
 
@@ -315,9 +329,7 @@ impl LanceCache {
         self.cache.size_bytes().await
     }
 
-    pub fn approx_size_bytes(&self) -> usize {
-        self.cache.approx_size_bytes()
-    }
+    // -- Sized insert/get (internal, used by CacheKey methods) ----------------
 
     async fn insert_with_id<T: DeepSizeOf + Send + Sync + 'static>(
         &self,
@@ -325,17 +337,58 @@ impl LanceCache {
         type_id: u64,
         metadata: Arc<T>,
     ) {
-        let size = metadata.deep_size_of() + 8; // +8 for the Arc pointer
-        let cache_key = self.make_key_with_id(key, type_id);
-        tracing::trace!(
-            target: "lance_cache::insert",
-            key = key,
-            type_id = std::any::type_name::<T>(),
-            size = size,
-        );
+        let size = metadata.deep_size_of() + 8;
+        let cache_key = make_cache_key(&self.prefix, key, type_id);
         self.cache.insert(&cache_key, metadata, size).await;
     }
 
+    async fn get_with_id<T: Send + Sync + 'static>(
+        &self,
+        key: &str,
+        type_id: u64,
+    ) -> Option<Arc<T>> {
+        let cache_key = make_cache_key(&self.prefix, key, type_id);
+        if let Some(entry) = self.cache.get(&cache_key).await {
+            self.hits.fetch_add(1, Ordering::Relaxed);
+            Some(entry.downcast::<T>().unwrap())
+        } else {
+            self.misses.fetch_add(1, Ordering::Relaxed);
+            None
+        }
+    }
+
+    async fn get_or_insert_with_id<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
+        &self,
+        key: &str,
+        type_id: u64,
+        loader: F,
+    ) -> Result<Arc<T>>
+    where
+        F: FnOnce() -> Fut + Send,
+        Fut: Future<Output = Result<T>> + Send,
+    {
+        let cache_key = make_cache_key(&self.prefix, key, type_id);
+
+        // Type-erase the loader into a pinned future for the backend.
+        let typed_loader = Box::pin(async move {
+            let value = loader().await?;
+            let arc = Arc::new(value);
+            let size = arc.deep_size_of() + 8;
+            Ok((arc as CacheEntry, size))
+        });
+
+        let entry = self.cache.get_or_insert(&cache_key, typed_loader).await?;
+
+        // Track hit/miss based on whether we got a pre-existing entry.
+        // (Approximate: we can't distinguish "backend had it" from "loader ran"
+        // without a richer return type. Count all get_or_insert as misses for now.)
+        self.misses.fetch_add(1, Ordering::Relaxed);
+
+        Ok(entry.downcast::<T>().unwrap())
+    }
+
+    // -- Unsized insert/get ---------------------------------------------------
+
     async fn insert_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
@@ -350,25 +403,10 @@ impl LanceCache {
         key: &str,
         metadata: Arc<T>,
     ) {
-        self.insert_unsized_with_id(key, type_id_of::<Arc<T>>(), metadata)
+        self.insert_unsized_with_id(key, type_tag::<Arc<T>>(), metadata)
             .await
     }
 
-    async fn get_with_id<T: Send + Sync + 'static>(
-        &self,
-        key: &str,
-        type_id: u64,
-    ) -> Option<Arc<T>> {
-        let cache_key = self.make_key_with_id(key, type_id);
-        if let Some(entry) = self.cache.get(&cache_key).await {
-            self.hits.fetch_add(1, Ordering::Relaxed);
-            Some(entry.downcast::<T>().unwrap())
-        } else {
-            self.misses.fetch_add(1, Ordering::Relaxed);
-            None
-        }
-    }
-
     async fn get_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
@@ -382,102 +420,10 @@ impl LanceCache {
         &self,
         key: &str,
     ) -> Option<Arc<T>> {
-        self.get_unsized_with_id(key, type_id_of::<Arc<T>>()).await
+        self.get_unsized_with_id(key, type_tag::<Arc<T>>()).await
     }
 
-    /// Get an item, or load it if not cached.
-    ///
-    /// Concurrent calls for the same key are deduplicated: only the first
-    /// caller runs the loader; subsequent callers wait for the result.
-    async fn get_or_insert_with_id<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
-        &self,
-        key: String,
-        type_id: u64,
-        loader: F,
-    ) -> Result<Arc<T>>
-    where
-        F: FnOnce(&str) -> Fut,
-        Fut: Future<Output = Result<T>> + Send,
-    {
-        let cache_key = self.make_key_with_id(&key, type_id);
-
-        // Fast path: already cached.
-        if let Some(entry) = self.cache.get(&cache_key).await {
-            self.hits.fetch_add(1, Ordering::Relaxed);
-            return Ok(entry.downcast::<T>().unwrap());
-        }
-
-        // Lock the in-flight map. While holding the lock, re-check the cache
-        // to close the race between the fast-path check and registration.
-        let mut map = self.in_flight.lock().await;
-
-        // Another task may have completed the load between our fast-path check
-        // and acquiring this lock.
-        if let Some(entry) = self.cache.get(&cache_key).await {
-            self.hits.fetch_add(1, Ordering::Relaxed);
-            return Ok(entry.downcast::<T>().unwrap());
-        }
-
-        // Check for an in-flight load for this key.
-        if let Some(rx) = map.get(&cache_key) {
-            let mut rx = rx.clone();
-            drop(map);
-            // Wait until the leader finishes.
-            let result = rx
-                .wait_for(|v| v.is_some())
-                .await
-                .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))?
-                .as_ref()
-                .unwrap()
-                .clone();
-            match result {
-                Ok(entry) => {
-                    self.hits.fetch_add(1, Ordering::Relaxed);
-                    return Ok(entry.downcast::<T>().unwrap());
-                }
-                Err(err) => {
-                    self.misses.fetch_add(1, Ordering::Relaxed);
-                    return Err(crate::Error::internal(format!(
-                        "Cache loader failed: {err}"
-                    )));
-                }
-            }
-        }
-
-        // We are the leader. Register our in-flight entry while still holding
-        // the lock, so no other task can slip in between check and register.
-        let (tx, rx) = tokio::sync::watch::channel(None);
-        map.insert(cache_key.clone(), rx);
-        drop(map);
-
-        self.misses.fetch_add(1, Ordering::Relaxed);
-        let result = loader(&key).await;
-
-        // Clean up the in-flight entry before sending, so new arrivals
-        // go through the normal cache path.
-        {
-            let mut map = self.in_flight.lock().await;
-            map.remove(&cache_key);
-        }
-
-        match result {
-            Ok(value) => {
-                let arc = Arc::new(value);
-                let size = arc.deep_size_of() + 8;
-                self.cache.insert(&cache_key, arc.clone(), size).await;
-                let _ = tx.send(Some(Ok(arc.clone() as CacheEntry)));
-                Ok(arc)
-            }
-            Err(err) => {
-                let shared_err = Arc::new(err);
-                let _ = tx.send(Some(Err(shared_err.clone())));
-                // Try to recover the original error if we're the sole owner.
-                Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| {
-                    crate::Error::internal(format!("Cache loader failed: {arc}"))
-                }))
-            }
-        }
-    }
+    // -- Stats / clear --------------------------------------------------------
 
     pub async fn stats(&self) -> CacheStats {
         CacheStats {
@@ -494,7 +440,8 @@ impl LanceCache {
         self.misses.store(0, Ordering::Relaxed);
     }
 
-    // CacheKey-based methods
+    // -- CacheKey-based methods -----------------------------------------------
+
     pub async fn insert_with_key<K>(&self, cache_key: &K, metadata: Arc<K::ValueType>)
     where
         K: CacheKey,
@@ -523,12 +470,12 @@ impl LanceCache {
     where
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
-        F: FnOnce() -> Fut,
+        F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
         let type_id = cache_key.type_id();
         let key_str = cache_key.key().into_owned();
-        Box::pin(self.get_or_insert_with_id(key_str, type_id, |_| loader())).await
+        Box::pin(self.get_or_insert_with_id(&key_str, type_id, loader)).await
     }
 
     pub async fn insert_unsized_with_key<K>(&self, cache_key: &K, metadata: Arc<K::ValueType>)
@@ -564,52 +511,30 @@ pub struct WeakLanceCache {
     prefix: String,
     hits: Arc<AtomicU64>,
     misses: Arc<AtomicU64>,
-    in_flight: Arc<InFlightMap>,
 }
 
 impl WeakLanceCache {
-    /// Create a weak reference from a strong LanceCache
     pub fn from(cache: &LanceCache) -> Self {
         Self {
             inner: Arc::downgrade(&cache.cache),
             prefix: cache.prefix.clone(),
             hits: cache.hits.clone(),
             misses: cache.misses.clone(),
-            in_flight: cache.in_flight.clone(),
         }
     }
 
-    /// Appends a prefix to the cache key
     pub fn with_key_prefix(&self, prefix: &str) -> Self {
         Self {
             inner: self.inner.clone(),
             prefix: format!("{}{}/", self.prefix, prefix),
             hits: self.hits.clone(),
             misses: self.misses.clone(),
-            in_flight: self.in_flight.clone(),
         }
     }
 
-    fn make_key<T: 'static>(&self, key: &str) -> Vec<u8> {
-        self.make_key_with_id(key, type_id_of::<T>())
-    }
-
-    fn make_key_with_id(&self, key: &str, type_id: u64) -> Vec<u8> {
-        let full_key = if self.prefix.is_empty() {
-            key.to_string()
-        } else {
-            format!("{}/{}", self.prefix, key)
-        };
-        let mut bytes = full_key.into_bytes();
-        bytes.push(0);
-        bytes.extend_from_slice(&type_id.to_le_bytes());
-        bytes
-    }
-
-    /// Get an item from cache if the cache is still alive
     pub async fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
         let cache = self.inner.upgrade()?;
-        let cache_key = self.make_key::<T>(key);
+        let cache_key = make_cache_key(&self.prefix, key, type_tag::<T>());
         if let Some(entry) = cache.get(&cache_key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
             Some(entry.downcast::<T>().unwrap())
@@ -619,8 +544,6 @@ impl WeakLanceCache {
         }
     }
 
-    /// Insert an item if the cache is still alive
-    /// Returns true if the item was inserted, false if the cache is no longer available
     pub async fn insert<T: DeepSizeOf + Send + Sync + 'static>(
         &self,
         key: &str,
@@ -628,7 +551,7 @@ impl WeakLanceCache {
     ) -> bool {
         if let Some(cache) = self.inner.upgrade() {
             let size = value.deep_size_of() + 8;
-            let cache_key = self.make_key::<T>(key);
+            let cache_key = make_cache_key(&self.prefix, key, type_tag::<T>());
             cache.insert(&cache_key, value, size).await;
             true
         } else {
@@ -639,90 +562,30 @@ impl WeakLanceCache {
 
     /// Get or insert an item, computing it if necessary.
     ///
-    /// Concurrent calls for the same key are deduplicated.
+    /// Deduplication of concurrent loads is handled by the backend.
     pub async fn get_or_insert<T, F, Fut>(&self, key: &str, f: F) -> Result<Arc<T>>
     where
         T: DeepSizeOf + Send + Sync + 'static,
-        F: FnOnce() -> Fut,
+        F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<T>> + Send,
     {
         if let Some(cache) = self.inner.upgrade() {
-            let cache_key = self.make_key::<T>(key);
-
-            // Fast path: already cached.
-            if let Some(entry) = cache.get(&cache_key).await {
-                self.hits.fetch_add(1, Ordering::Relaxed);
-                return Ok(entry.downcast::<T>().unwrap());
-            }
-
-            // Lock in-flight map. Re-check cache under lock to close the race.
-            let mut map = self.in_flight.lock().await;
-
-            if let Some(entry) = cache.get(&cache_key).await {
-                self.hits.fetch_add(1, Ordering::Relaxed);
-                return Ok(entry.downcast::<T>().unwrap());
-            }
-
-            if let Some(rx) = map.get(&cache_key) {
-                let mut rx = rx.clone();
-                drop(map);
-                let result = rx
-                    .wait_for(|v| v.is_some())
-                    .await
-                    .map_err(|_| crate::Error::internal("In-flight cache loader was dropped"))?
-                    .as_ref()
-                    .unwrap()
-                    .clone();
-                match result {
-                    Ok(entry) => {
-                        self.hits.fetch_add(1, Ordering::Relaxed);
-                        return Ok(entry.downcast::<T>().unwrap());
-                    }
-                    Err(err) => {
-                        self.misses.fetch_add(1, Ordering::Relaxed);
-                        return Err(crate::Error::internal(format!(
-                            "Cache loader failed: {err}"
-                        )));
-                    }
-                }
-            }
-
-            // We are the leader.
-            let (tx, rx) = tokio::sync::watch::channel(None);
-            map.insert(cache_key.clone(), rx);
-            drop(map);
-
+            let cache_key = make_cache_key(&self.prefix, key, type_tag::<T>());
+            let typed_loader = Box::pin(async move {
+                let value = f().await?;
+                let arc = Arc::new(value);
+                let size = arc.deep_size_of() + 8;
+                Ok((arc as CacheEntry, size))
+            });
+            let entry = cache.get_or_insert(&cache_key, typed_loader).await?;
             self.misses.fetch_add(1, Ordering::Relaxed);
-            let result = f().await;
-
-            {
-                let mut map = self.in_flight.lock().await;
-                map.remove(&cache_key);
-            }
-
-            match result {
-                Ok(value) => {
-                    let arc = Arc::new(value);
-                    let size = arc.deep_size_of() + 8;
-                    cache.insert(&cache_key, arc.clone(), size).await;
-                    let _ = tx.send(Some(Ok(arc.clone() as CacheEntry)));
-                    Ok(arc)
-                }
-                Err(err) => {
-                    let shared_err = Arc::new(err);
-                    let _ = tx.send(Some(Err(shared_err.clone())));
-                    Err(Arc::try_unwrap(shared_err).unwrap_or_else(|arc| {
-                        crate::Error::internal(format!("Cache loader failed: {arc}"))
-                    }))
-                }
-            }
+            Ok(entry.downcast::<T>().unwrap())
         } else {
             log::warn!("WeakLanceCache: cache no longer available, computing without caching");
             f().await.map(Arc::new)
         }
     }
 
-    /// Get or insert an item with a cache key type
     pub async fn get_or_insert_with_key<K, F, Fut>(
         &self,
         cache_key: K,
@@ -731,15 +594,13 @@ impl WeakLanceCache {
     where
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
-        F: FnOnce() -> Fut,
+        F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
         let key_str = cache_key.key().into_owned();
         self.get_or_insert(&key_str, loader).await
     }
 
-    /// Insert with a cache key type
-    /// Returns true if the item was inserted, false if the cache is no longer available
     pub async fn insert_with_key<K>(&self, cache_key: &K, value: Arc<K::ValueType>) -> bool
     where
         K: CacheKey,
@@ -749,7 +610,6 @@ impl WeakLanceCache {
         self.insert(&key_str, value).await
     }
 
-    /// Get with a cache key type
     pub async fn get_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
     where
         K: CacheKey,
@@ -759,13 +619,12 @@ impl WeakLanceCache {
         self.get(&key_str).await
     }
 
-    /// Get unsized item from cache
     pub async fn get_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
     ) -> Option<Arc<T>> {
         let cache = self.inner.upgrade()?;
-        let cache_key = self.make_key::<Arc<T>>(key);
+        let cache_key = make_cache_key(&self.prefix, key, type_tag::<Arc<T>>());
         if let Some(entry) = cache.get(&cache_key).await {
             entry
                 .downcast::<Arc<T>>()
@@ -776,7 +635,6 @@ impl WeakLanceCache {
         }
     }
 
-    /// Insert unsized item into cache
     pub async fn insert_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
@@ -785,31 +643,27 @@ impl WeakLanceCache {
         if let Some(cache) = self.inner.upgrade() {
             let wrapper = Arc::new(value);
             let size = wrapper.deep_size_of() + 8;
-            let cache_key = self.make_key::<Arc<T>>(key);
+            let cache_key = make_cache_key(&self.prefix, key, type_tag::<Arc<T>>());
             cache.insert(&cache_key, wrapper, size).await;
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item");
         }
     }
 
-    /// Get unsized with a cache key type
     pub async fn get_unsized_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
     where
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        let key_str = cache_key.key();
-        self.get_unsized(&key_str).await
+        self.get_unsized(&cache_key.key()).await
     }
 
-    /// Insert unsized with a cache key type
     pub async fn insert_unsized_with_key<K>(&self, cache_key: &K, value: Arc<K::ValueType>)
     where
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        let key_str = cache_key.key();
-        self.insert_unsized(&key_str, value).await
+        self.insert_unsized(&cache_key.key(), value).await
     }
 }
 
@@ -827,10 +681,10 @@ pub trait CacheKey {
         std::any::type_name::<Self::ValueType>()
     }
 
-    /// Stable numeric identifier used for key discrimination in the cache.
-    /// Derived from [`type_id_of`] by default.
+    /// Stable numeric identifier for key discrimination in the cache.
+    /// Derived from the pointer of [`Self::type_name`] by default.
     fn type_id(&self) -> u64 {
-        type_id_of::<Self::ValueType>()
+        self.type_name().as_ptr() as u64
     }
 }
 
@@ -844,7 +698,7 @@ pub trait UnsizedCacheKey {
     }
 
     fn type_id(&self) -> u64 {
-        type_id_of::<Self::ValueType>()
+        self.type_name().as_ptr() as u64
     }
 }
 
@@ -854,13 +708,9 @@ pub trait UnsizedCacheKey {
 
 #[derive(Debug, Clone)]
 pub struct CacheStats {
-    /// Number of times `get`, `get_unsized`, or `get_or_insert` found an item in the cache.
     pub hits: u64,
-    /// Number of times `get`, `get_unsized`, or `get_or_insert` did not find an item in the cache.
     pub misses: u64,
-    /// Number of entries currently in the cache.
     pub num_entries: usize,
-    /// Total size in bytes of all entries in the cache.
     pub size_bytes: usize,
 }
 
@@ -889,9 +739,9 @@ impl CacheStats {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
     use std::marker::PhantomData;
 
-    /// Test helper: a simple CacheKey for bare string keys.
     struct TestKey<T: 'static> {
         key: String,
         _phantom: PhantomData<T>,
@@ -918,10 +768,8 @@ mod tests {
         let item = Arc::new(vec![1, 2, 3]);
         let item_size = item.deep_size_of();
         let capacity = 10 * item_size;
-
         let cache = LanceCache::with_capacity(capacity);
 
-        let item = Arc::new(vec![1, 2, 3]);
         cache
             .insert_with_key(&TestKey::<Vec<i32>>::new("key"), item.clone())
             .await;
@@ -933,7 +781,6 @@ mod tests {
             .unwrap();
         assert_eq!(*retrieved, *item);
 
-        // Test eviction based on size
         for i in 0..20 {
             cache
                 .insert_with_key(
@@ -960,154 +807,86 @@ mod tests {
             }
         }
 
-        let item = Arc::new(MyType(42));
-        let item_dyn: Arc<dyn MyTrait> = item;
-
+        let item: Arc<dyn MyTrait> = Arc::new(MyType(42));
         let cache = LanceCache::with_capacity(1000);
-        cache.insert_unsized("test", item_dyn).await;
+        cache.insert_unsized("test", item).await;
 
         let retrieved = cache.get_unsized::<dyn MyTrait>("test").await.unwrap();
-        let retrieved = retrieved.as_any().downcast_ref::<MyType>().unwrap();
-        assert_eq!(retrieved.0, 42);
+        assert_eq!(retrieved.as_any().downcast_ref::<MyType>().unwrap().0, 42);
     }
 
     #[tokio::test]
     async fn test_cache_stats_basic() {
         let cache = LanceCache::with_capacity(1000);
-
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 0);
+        assert_eq!(cache.stats().await.hits, 0);
 
         // Miss
         assert!(
             cache
-                .get_with_key(&TestKey::<Vec<i32>>::new("nonexistent"))
+                .get_with_key(&TestKey::<Vec<i32>>::new("x"))
                 .await
                 .is_none()
         );
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 0);
-        assert_eq!(stats.misses, 1);
+        assert_eq!(cache.stats().await.misses, 1);
 
         // Insert then hit
         cache
-            .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3]))
+            .insert_with_key(&TestKey::new("k"), Arc::new(vec![1, 2, 3]))
             .await;
         assert!(
             cache
-                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+                .get_with_key(&TestKey::<Vec<i32>>::new("k"))
                 .await
                 .is_some()
         );
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 1);
-        assert_eq!(stats.misses, 1);
-
-        // Another hit
-        assert!(
-            cache
-                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
-                .await
-                .is_some()
-        );
-        assert_eq!(cache.stats().await.hits, 2);
-
-        // Another miss
-        assert!(
-            cache
-                .get_with_key(&TestKey::<Vec<i32>>::new("nonexistent2"))
-                .await
-                .is_none()
-        );
-        assert_eq!(cache.stats().await.misses, 2);
+        assert_eq!(cache.stats().await.hits, 1);
     }
 
     #[tokio::test]
     async fn test_cache_stats_with_prefixes() {
-        let base_cache = LanceCache::with_capacity(1000);
-        let prefixed_cache = base_cache.with_key_prefix("test");
+        let base = LanceCache::with_capacity(1000);
+        let prefixed = base.with_key_prefix("ns");
 
-        assert_eq!(base_cache.stats().await.hits, 0);
-        assert_eq!(prefixed_cache.stats().await.misses, 0);
-
-        // Miss on prefixed cache
         assert!(
-            prefixed_cache
-                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+            prefixed
+                .get_with_key(&TestKey::<Vec<i32>>::new("k"))
                 .await
                 .is_none()
         );
-        assert_eq!(base_cache.stats().await.misses, 1);
-        assert_eq!(prefixed_cache.stats().await.misses, 1);
+        assert_eq!(base.stats().await.misses, 1);
 
-        // Insert through prefixed cache and hit
-        prefixed_cache
-            .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3]))
+        prefixed
+            .insert_with_key(&TestKey::new("k"), Arc::new(vec![1]))
             .await;
         assert!(
-            prefixed_cache
-                .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
+            prefixed
+                .get_with_key(&TestKey::<Vec<i32>>::new("k"))
                 .await
                 .is_some()
         );
-        assert_eq!(base_cache.stats().await.hits, 1);
+        assert_eq!(base.stats().await.hits, 1);
     }
 
     #[tokio::test]
-    async fn test_cache_stats_unsized() {
-        #[derive(Debug, DeepSizeOf)]
-        struct MyType(i32);
-
-        trait MyTrait: DeepSizeOf + Send + Sync + Any {}
-
-        impl MyTrait for MyType {}
-
+    async fn test_cache_get_or_insert() {
         let cache = LanceCache::with_capacity(1000);
 
-        assert!(cache.get_unsized::<dyn MyTrait>("test").await.is_none());
-        assert_eq!(cache.stats().await.misses, 1);
-
-        let item: Arc<dyn MyTrait> = Arc::new(MyType(42));
-        cache.insert_unsized("test", item).await;
-
-        assert!(cache.get_unsized::<dyn MyTrait>("test").await.is_some());
-        assert_eq!(cache.stats().await.hits, 1);
-    }
-
-    #[tokio::test]
-    async fn test_cache_stats_get_or_insert() {
-        let cache = LanceCache::with_capacity(1000);
-
-        // First call: miss
-        let result: Arc<Vec<i32>> = cache
-            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("key1"), || async {
+        let v: Arc<Vec<i32>> = cache
+            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("k"), || async {
                 Ok(vec![1, 2, 3])
             })
             .await
             .unwrap();
-        assert_eq!(*result, vec![1, 2, 3]);
-        assert_eq!(cache.stats().await.misses, 1);
+        assert_eq!(*v, vec![1, 2, 3]);
 
-        // Second call: hit
-        let result: Arc<Vec<i32>> = cache
-            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("key1"), || async {
-                panic!("Should not be called")
+        // Second call should not invoke loader
+        let v: Arc<Vec<i32>> = cache
+            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("k"), || async {
+                panic!("should not be called")
             })
             .await
             .unwrap();
-        assert_eq!(*result, vec![1, 2, 3]);
-        assert_eq!(cache.stats().await.hits, 1);
-
-        // Different key: miss
-        let result: Arc<Vec<i32>> = cache
-            .get_or_insert_with_key(TestKey::<Vec<i32>>::new("key2"), || async {
-                Ok(vec![4, 5, 6])
-            })
-            .await
-            .unwrap();
-        assert_eq!(*result, vec![4, 5, 6]);
-        assert_eq!(cache.stats().await.misses, 2);
+        assert_eq!(*v, vec![1, 2, 3]);
     }
 
     #[tokio::test]
@@ -1132,55 +911,44 @@ mod tests {
             async fn get(&self, key: &[u8]) -> Option<CacheEntry> {
                 self.map.lock().await.get(key).map(|(e, _)| e.clone())
             }
-
             async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) {
                 self.map
                     .lock()
                     .await
                     .insert(key.to_vec(), (entry, size_bytes));
             }
-
             async fn invalidate_prefix(&self, prefix: &[u8]) {
                 self.map.lock().await.retain(|k, _| !k.starts_with(prefix));
             }
-
             async fn clear(&self) {
                 self.map.lock().await.clear();
             }
-
             async fn num_entries(&self) -> usize {
                 self.map.lock().await.len()
             }
-
             async fn size_bytes(&self) -> usize {
                 self.map.lock().await.values().map(|(_, s)| *s).sum()
             }
         }
 
-        let backend = Arc::new(HashMapBackend::new());
-        let cache = LanceCache::with_backend(backend);
+        let cache = LanceCache::with_backend(Arc::new(HashMapBackend::new()));
 
         cache
-            .insert_with_key(&TestKey::new("key1"), Arc::new(vec![1, 2, 3]))
+            .insert_with_key(&TestKey::new("k"), Arc::new(vec![1, 2, 3]))
             .await;
-        let retrieved = cache
-            .get_with_key(&TestKey::<Vec<i32>>::new("key1"))
-            .await
-            .unwrap();
-        assert_eq!(*retrieved, vec![1, 2, 3]);
-
-        // Miss for different type at same key
         assert!(
             cache
-                .get_with_key(&TestKey::<Vec<u8>>::new("key1"))
+                .get_with_key(&TestKey::<Vec<i32>>::new("k"))
+                .await
+                .is_some()
+        );
+        // Different type at same key = miss
+        assert!(
+            cache
+                .get_with_key(&TestKey::<Vec<u8>>::new("k"))
                 .await
                 .is_none()
         );
-
-        let stats = cache.stats().await;
-        assert_eq!(stats.hits, 1);
-        assert_eq!(stats.misses, 1);
-        assert_eq!(stats.num_entries, 1);
     }
 
     #[tokio::test]
diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs
index a11d5be988f..884bc793130 100644
--- a/rust/lance-file/src/previous/reader.rs
+++ b/rust/lance-file/src/previous/reader.rs
@@ -238,7 +238,7 @@ impl FileReader {
         loader: F,
     ) -> Result<Arc<T>>
     where
-        F: Fn(&str) -> Fut,
+        F: Fn(&str) -> Fut + Send + Sync,
         Fut: Future<Output = Result<T>> + Send,
     {
         if let Some(cache) = cache {
diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index da9c5d85f4a..9650c3bf2d0 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -77,11 +77,7 @@ impl std::fmt::Debug for Session {
             )
             .field(
                 "file_metadata_cache",
-                &format!(
-                    "LanceCache(items={}, size_bytes={})",
-                    self.metadata_cache.0.approx_size(),
-                    self.metadata_cache.0.approx_size_bytes(),
-                ),
+                &format!("LanceCache(items={})", self.metadata_cache.0.approx_size(),),
             )
             .field(
                 "index_extensions",

From 376a2f7ef0f5e47e0c375294f72d3e30e43e87c0 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 10:05:04 -0700
Subject: [PATCH 05/24] refactor: remove non-CacheKey methods, type_tag,
 approx_size_bytes

Remove all methods that bypass CacheKey from WeakLanceCache (get,
insert, get_or_insert, get_unsized, insert_unsized). Remove
insert_unsized/get_unsized from LanceCache. Remove type_tag helper.
All cache access now goes through CacheKey/UnsizedCacheKey.

Make parse_cache_key return (empty, 0) instead of panicking on short
keys.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs | 184 ++++++++++++++---------------------
 rust/lance/src/session.rs    |  12 ++-
 2 files changed, 84 insertions(+), 112 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 418efe151e6..e6b4fa37eaa 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -204,25 +204,15 @@ impl CacheBackend for MokaCacheBackend {
 // Type identity helpers
 // ---------------------------------------------------------------------------
 
-/// Derives a stable type tag from `type_name::<T>()`.
-///
-/// Uses the pointer of the `&'static str` returned by [`std::any::type_name`].
-/// The pointer is stable for the lifetime of the process and unique per
-/// monomorphized type within a single compilation unit.
-fn type_tag<T: 'static + ?Sized>() -> u64 {
-    std::any::type_name::<T>().as_ptr() as u64
-}
-
 /// Cache keys are structured as `user_key\0<8-byte type_id>`.
 ///
 /// This function splits an opaque cache key into the user-visible portion
 /// and the type_id. Backend implementations can use this to inspect keys.
-///
-/// # Panics
-///
-/// Panics if `key` is shorter than 9 bytes.
+/// Returns `(empty slice, 0)` if the key is too short to parse.
 pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) {
-    assert!(key.len() >= 9, "cache key too short to parse");
+    if key.len() < 9 {
+        return (&[], 0);
+    }
     let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap();
     let user_key = &key[..key.len() - 9];
     (user_key, u64::from_le_bytes(type_id_bytes))
@@ -398,15 +388,6 @@ impl LanceCache {
         self.insert_with_id(key, type_id, Arc::new(metadata)).await
     }
 
-    pub async fn insert_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
-        &self,
-        key: &str,
-        metadata: Arc<T>,
-    ) {
-        self.insert_unsized_with_id(key, type_tag::<Arc<T>>(), metadata)
-            .await
-    }
-
     async fn get_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
@@ -416,13 +397,6 @@ impl LanceCache {
         Some(outer.as_ref().clone())
     }
 
-    pub async fn get_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
-        &self,
-        key: &str,
-    ) -> Option<Arc<T>> {
-        self.get_unsized_with_id(key, type_tag::<Arc<T>>()).await
-    }
-
     // -- Stats / clear --------------------------------------------------------
 
     pub async fn stats(&self) -> CacheStats {
@@ -532,27 +506,31 @@ impl WeakLanceCache {
         }
     }
 
-    pub async fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
+    pub async fn get_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
+    where
+        K: CacheKey,
+        K::ValueType: DeepSizeOf + Send + Sync + 'static,
+    {
         let cache = self.inner.upgrade()?;
-        let cache_key = make_cache_key(&self.prefix, key, type_tag::<T>());
-        if let Some(entry) = cache.get(&cache_key).await {
+        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+        if let Some(entry) = cache.get(&key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
-            Some(entry.downcast::<T>().unwrap())
+            Some(entry.downcast::<K::ValueType>().unwrap())
         } else {
             self.misses.fetch_add(1, Ordering::Relaxed);
             None
         }
     }
 
-    pub async fn insert<T: DeepSizeOf + Send + Sync + 'static>(
-        &self,
-        key: &str,
-        value: Arc<T>,
-    ) -> bool {
+    pub async fn insert_with_key<K>(&self, cache_key: &K, value: Arc<K::ValueType>) -> bool
+    where
+        K: CacheKey,
+        K::ValueType: DeepSizeOf + Send + Sync + 'static,
+    {
         if let Some(cache) = self.inner.upgrade() {
             let size = value.deep_size_of() + 8;
-            let cache_key = make_cache_key(&self.prefix, key, type_tag::<T>());
-            cache.insert(&cache_key, value, size).await;
+            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+            cache.insert(&key, value, size).await;
             true
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert item");
@@ -563,71 +541,44 @@ impl WeakLanceCache {
     /// Get or insert an item, computing it if necessary.
     ///
     /// Deduplication of concurrent loads is handled by the backend.
-    pub async fn get_or_insert<T, F, Fut>(&self, key: &str, f: F) -> Result<Arc<T>>
+    pub async fn get_or_insert_with_key<K, F, Fut>(
+        &self,
+        cache_key: K,
+        loader: F,
+    ) -> Result<Arc<K::ValueType>>
     where
-        T: DeepSizeOf + Send + Sync + 'static,
+        K: CacheKey,
+        K::ValueType: DeepSizeOf + Send + Sync + 'static,
         F: FnOnce() -> Fut + Send,
-        Fut: Future<Output = Result<T>> + Send,
+        Fut: Future<Output = Result<K::ValueType>> + Send,
     {
         if let Some(cache) = self.inner.upgrade() {
-            let cache_key = make_cache_key(&self.prefix, key, type_tag::<T>());
+            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
             let typed_loader = Box::pin(async move {
-                let value = f().await?;
+                let value = loader().await?;
                 let arc = Arc::new(value);
                 let size = arc.deep_size_of() + 8;
                 Ok((arc as CacheEntry, size))
             });
-            let entry = cache.get_or_insert(&cache_key, typed_loader).await?;
+            let entry = cache.get_or_insert(&key, typed_loader).await?;
             self.misses.fetch_add(1, Ordering::Relaxed);
-            Ok(entry.downcast::<T>().unwrap())
+            Ok(entry.downcast::<K::ValueType>().unwrap())
         } else {
             log::warn!("WeakLanceCache: cache no longer available, computing without caching");
-            f().await.map(Arc::new)
+            loader().await.map(Arc::new)
         }
     }
 
-    pub async fn get_or_insert_with_key<K, F, Fut>(
-        &self,
-        cache_key: K,
-        loader: F,
-    ) -> Result<Arc<K::ValueType>>
-    where
-        K: CacheKey,
-        K::ValueType: DeepSizeOf + Send + Sync + 'static,
-        F: FnOnce() -> Fut + Send,
-        Fut: Future<Output = Result<K::ValueType>> + Send,
-    {
-        let key_str = cache_key.key().into_owned();
-        self.get_or_insert(&key_str, loader).await
-    }
-
-    pub async fn insert_with_key<K>(&self, cache_key: &K, value: Arc<K::ValueType>) -> bool
-    where
-        K: CacheKey,
-        K::ValueType: DeepSizeOf + Send + Sync + 'static,
-    {
-        let key_str = cache_key.key().into_owned();
-        self.insert(&key_str, value).await
-    }
-
-    pub async fn get_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
+    pub async fn get_unsized_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
     where
-        K: CacheKey,
+        K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        let key_str = cache_key.key().into_owned();
-        self.get(&key_str).await
-    }
-
-    pub async fn get_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
-        &self,
-        key: &str,
-    ) -> Option<Arc<T>> {
         let cache = self.inner.upgrade()?;
-        let cache_key = make_cache_key(&self.prefix, key, type_tag::<Arc<T>>());
-        if let Some(entry) = cache.get(&cache_key).await {
+        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+        if let Some(entry) = cache.get(&key).await {
             entry
-                .downcast::<Arc<T>>()
+                .downcast::<Arc<K::ValueType>>()
                 .ok()
                 .map(|arc| arc.as_ref().clone())
         } else {
@@ -635,36 +586,20 @@ impl WeakLanceCache {
         }
     }
 
-    pub async fn insert_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
-        &self,
-        key: &str,
-        value: Arc<T>,
-    ) {
+    pub async fn insert_unsized_with_key<K>(&self, cache_key: &K, value: Arc<K::ValueType>)
+    where
+        K: UnsizedCacheKey,
+        K::ValueType: DeepSizeOf + Send + Sync + 'static,
+    {
         if let Some(cache) = self.inner.upgrade() {
             let wrapper = Arc::new(value);
             let size = wrapper.deep_size_of() + 8;
-            let cache_key = make_cache_key(&self.prefix, key, type_tag::<Arc<T>>());
-            cache.insert(&cache_key, wrapper, size).await;
+            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+            cache.insert(&key, wrapper, size).await;
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item");
         }
     }
-
-    pub async fn get_unsized_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
-    where
-        K: UnsizedCacheKey,
-        K::ValueType: DeepSizeOf + Send + Sync + 'static,
-    {
-        self.get_unsized(&cache_key.key()).await
-    }
-
-    pub async fn insert_unsized_with_key<K>(&self, cache_key: &K, value: Arc<K::ValueType>)
-    where
-        K: UnsizedCacheKey,
-        K::ValueType: DeepSizeOf + Send + Sync + 'static,
-    {
-        self.insert_unsized(&cache_key.key(), value).await
-    }
 }
 
 // ---------------------------------------------------------------------------
@@ -763,6 +698,28 @@ mod tests {
         }
     }
 
+    /// Test helper: an UnsizedCacheKey for trait object values.
+    struct TestUnsizedKey<T: 'static + ?Sized> {
+        key: String,
+        _phantom: PhantomData<T>,
+    }
+
+    impl<T: 'static + ?Sized> TestUnsizedKey<T> {
+        fn new(key: &str) -> Self {
+            Self {
+                key: key.to_string(),
+                _phantom: PhantomData,
+            }
+        }
+    }
+
+    impl<T: 'static + ?Sized> UnsizedCacheKey for TestUnsizedKey<T> {
+        type ValueType = T;
+        fn key(&self) -> Cow<'_, str> {
+            Cow::Borrowed(&self.key)
+        }
+    }
+
     #[tokio::test]
     async fn test_cache_bytes() {
         let item = Arc::new(vec![1, 2, 3]);
@@ -809,9 +766,14 @@ mod tests {
 
         let item: Arc<dyn MyTrait> = Arc::new(MyType(42));
         let cache = LanceCache::with_capacity(1000);
-        cache.insert_unsized("test", item).await;
+        cache
+            .insert_unsized_with_key(&TestUnsizedKey::<dyn MyTrait>::new("test"), item)
+            .await;
 
-        let retrieved = cache.get_unsized::<dyn MyTrait>("test").await.unwrap();
+        let retrieved = cache
+            .get_unsized_with_key(&TestUnsizedKey::<dyn MyTrait>::new("test"))
+            .await
+            .unwrap();
         assert_eq!(retrieved.as_any().downcast_ref::<MyType>().unwrap().0, 42);
     }
 
diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index 9650c3bf2d0..c27eddf7b27 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -219,7 +219,17 @@ impl Default for Session {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use lance_core::cache::UnsizedCacheKey;
     use lance_index::vector::VectorIndex;
+    use std::borrow::Cow;
+
+    struct TestUnsizedKey(&'static str);
+    impl UnsizedCacheKey for TestUnsizedKey {
+        type ValueType = dyn VectorIndex;
+        fn key(&self) -> Cow<'_, str> {
+            Cow::Borrowed(self.0)
+        }
+    }
 
     #[tokio::test]
     async fn test_disable_index_cache() {
@@ -227,7 +237,7 @@ mod tests {
         assert!(
             no_cache
                 .index_cache
-                .get_unsized::<dyn VectorIndex>("abc")
+                .get_unsized_with_key(&TestUnsizedKey("abc"))
                 .await
                 .is_none()
         );

From 74fdc2cd78b10eb54664fe3b7e54cb368b9bf4fc Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 10:30:51 -0700
Subject: [PATCH 06/24] cleanup

---
 rust/lance-core/src/cache.rs | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index e6b4fa37eaa..a2bf6cf5239 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -48,8 +48,7 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     /// Get an existing entry or compute it from `loader`.
     ///
     /// Implementations should deduplicate concurrent loads for the same key
-    /// so the loader runs at most once. The default implementation does a
-    /// simple get-then-insert without deduplication.
+    /// so the loader runs at most once.
     ///
     /// The loader is a pinned future that produces `(entry, size_bytes)`.
     /// It borrows from the caller's scope and will be `.await`ed within
@@ -58,14 +57,7 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
         &self,
         key: &[u8],
         loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
-    ) -> Result<CacheEntry> {
-        if let Some(entry) = self.get(key).await {
-            return Ok(entry);
-        }
-        let (entry, size) = loader.await?;
-        self.insert(key, entry.clone(), size).await;
-        Ok(entry)
-    }
+    ) -> Result<CacheEntry>;
 
     /// Remove all entries whose key starts with `prefix`.
     async fn invalidate_prefix(&self, prefix: &[u8]);
@@ -879,6 +871,22 @@ mod tests {
                     .await
                     .insert(key.to_vec(), (entry, size_bytes));
             }
+            async fn get_or_insert<'a>(
+                &self,
+                key: &[u8],
+                loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
+            ) -> Result<CacheEntry> {
+                if let Some((entry, _)) = self.map.lock().await.get(key) {
+                    Ok(entry.clone())
+                } else {
+                    let (entry, size) = loader.await?;
+                    self.map
+                        .lock()
+                        .await
+                        .insert(key.to_vec(), (entry.clone(), size));
+                    Ok(entry)
+                }
+            }
             async fn invalidate_prefix(&self, prefix: &[u8]) {
                 self.map.lock().await.retain(|k, _| !k.starts_with(prefix));
             }

From 1ba4ac3347859f7a55c66d10a9fd1e7ed5e3b84d Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 11:10:39 -0700
Subject: [PATCH 07/24] cleanup

Restore approx_size_bytes on CacheBackend so DeepSizeOf on LanceCache
reports actual cache memory usage (used by Session::size_bytes). Fixes
test_metadata_cache_size Python test.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index a2bf6cf5239..98b7e5028e2 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -76,6 +76,13 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     fn approx_num_entries(&self) -> usize {
         0
     }
+
+    /// Approximate weighted size in bytes, callable from synchronous contexts.
+    /// Used by `DeepSizeOf` to report cache memory usage.
+    /// Backends that cannot provide this cheaply should return 0.
+    fn approx_size_bytes(&self) -> usize {
+        0
+    }
 }
 
 // ---------------------------------------------------------------------------
@@ -190,6 +197,10 @@ impl CacheBackend for MokaCacheBackend {
     fn approx_num_entries(&self) -> usize {
         self.cache.entry_count() as usize
     }
+
+    fn approx_size_bytes(&self) -> usize {
+        self.cache.iter().map(|(_, v)| v.size_bytes).sum()
+    }
 }
 
 // ---------------------------------------------------------------------------
@@ -249,8 +260,7 @@ impl std::fmt::Debug for LanceCache {
 
 impl DeepSizeOf for LanceCache {
     fn deep_size_of_children(&self, _: &mut Context) -> usize {
-        // Can't iterate a dyn CacheBackend; use stats().size_bytes for accurate numbers.
-        0
+        self.cache.approx_size_bytes()
     }
 }
 

From 135740982c73d7f671ef57c90b3783ab801cf1e8 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 16:00:37 -0700
Subject: [PATCH 08/24] refactor: replace type_id u64 with &'static str on
 CacheKey

The type_name().as_ptr() approach for type discrimination was unstable
across crate boundaries due to monomorphization. Replace with an
explicit fn type_id() -> &'static str that each CacheKey impl provides
as a short human-readable literal (e.g. 'Vec<IndexMetadata>', 'Manifest').

Key format changes from user_key\0<8 LE bytes> to user_key\0<type_id str>.
parse_cache_key() now returns (&[u8], &str).
---
 rust/lance-core/src/cache.rs                  | 66 +++++++++----------
 .../src/encodings/logical/primitive.rs        |  4 ++
 rust/lance-file/src/previous/reader.rs        |  4 ++
 rust/lance-index/src/scalar/bitmap.rs         |  4 ++
 rust/lance-index/src/scalar/btree.rs          |  4 ++
 rust/lance-index/src/scalar/inverted/index.rs |  8 +++
 rust/lance-index/src/scalar/ngram.rs          |  4 ++
 rust/lance-index/src/scalar/rtree.rs          |  4 ++
 rust/lance/src/dataset/fragment.rs            |  4 ++
 rust/lance/src/index.rs                       | 16 +++++
 rust/lance/src/index/vector/ivf.rs            |  4 ++
 rust/lance/src/index/vector/ivf/v2.rs         |  6 ++
 rust/lance/src/session.rs                     |  4 ++
 rust/lance/src/session/caches.rs              | 24 +++++--
 rust/lance/src/session/index_caches.rs        | 12 ++++
 15 files changed, 127 insertions(+), 41 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 98b7e5028e2..83c8bb09acb 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -207,22 +207,23 @@ impl CacheBackend for MokaCacheBackend {
 // Type identity helpers
 // ---------------------------------------------------------------------------
 
-/// Cache keys are structured as `user_key\0<8-byte type_id>`.
+/// Cache keys are structured as `user_key\0type_id`.
 ///
 /// This function splits an opaque cache key into the user-visible portion
-/// and the type_id. Backend implementations can use this to inspect keys.
-/// Returns `(empty slice, 0)` if the key is too short to parse.
-pub fn parse_cache_key(key: &[u8]) -> (&[u8], u64) {
-    if key.len() < 9 {
-        return (&[], 0);
-    }
-    let type_id_bytes: [u8; 8] = key[key.len() - 8..].try_into().unwrap();
-    let user_key = &key[..key.len() - 9];
-    (user_key, u64::from_le_bytes(type_id_bytes))
+/// and the type_id string. Backend implementations can use this to inspect keys.
+/// Returns `(empty slice, "")` if no separator is found.
+pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) {
+    if let Some(sep) = key.iter().position(|&b| b == 0) {
+        let user_key = &key[..sep];
+        let type_id = std::str::from_utf8(&key[sep + 1..]).unwrap_or("");
+        (user_key, type_id)
+    } else {
+        (key, "")
+    }
 }
 
-/// Build a key: `prefix/user_key\0<8-byte type_id>`.
-fn make_cache_key(prefix: &str, key: &str, type_id: u64) -> Vec<u8> {
+/// Build a key: `prefix/user_key\0type_id`.
+fn make_cache_key(prefix: &str, key: &str, type_id: &str) -> Vec<u8> {
     let full_key = if prefix.is_empty() {
         key.to_string()
     } else {
@@ -230,7 +231,7 @@ fn make_cache_key(prefix: &str, key: &str, type_id: u64) -> Vec<u8> {
     };
     let mut bytes = full_key.into_bytes();
     bytes.push(0);
-    bytes.extend_from_slice(&type_id.to_le_bytes());
+    bytes.extend_from_slice(type_id.as_bytes());
     bytes
 }
 
@@ -326,7 +327,7 @@ impl LanceCache {
     async fn insert_with_id<T: DeepSizeOf + Send + Sync + 'static>(
         &self,
         key: &str,
-        type_id: u64,
+        type_id: &str,
         metadata: Arc<T>,
     ) {
         let size = metadata.deep_size_of() + 8;
@@ -337,7 +338,7 @@ impl LanceCache {
     async fn get_with_id<T: Send + Sync + 'static>(
         &self,
         key: &str,
-        type_id: u64,
+        type_id: &str,
     ) -> Option<Arc<T>> {
         let cache_key = make_cache_key(&self.prefix, key, type_id);
         if let Some(entry) = self.cache.get(&cache_key).await {
@@ -352,7 +353,7 @@ impl LanceCache {
     async fn get_or_insert_with_id<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
         &self,
         key: &str,
-        type_id: u64,
+        type_id: &str,
         loader: F,
     ) -> Result<Arc<T>>
     where
@@ -384,7 +385,7 @@ impl LanceCache {
     async fn insert_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
-        type_id: u64,
+        type_id: &str,
         metadata: Arc<T>,
     ) {
         self.insert_with_id(key, type_id, Arc::new(metadata)).await
@@ -393,7 +394,7 @@ impl LanceCache {
     async fn get_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
-        type_id: u64,
+        type_id: &str,
     ) -> Option<Arc<T>> {
         let outer = self.get_with_id::<Arc<T>>(key, type_id).await?;
         Some(outer.as_ref().clone())
@@ -613,16 +614,11 @@ pub trait CacheKey {
 
     fn key(&self) -> Cow<'_, str>;
 
-    /// Human-readable type name, for debugging and diagnostics.
-    fn type_name(&self) -> &'static str {
-        std::any::type_name::<Self::ValueType>()
-    }
-
-    /// Stable numeric identifier for key discrimination in the cache.
-    /// Derived from the pointer of [`Self::type_name`] by default.
-    fn type_id(&self) -> u64 {
-        self.type_name().as_ptr() as u64
-    }
+    /// Short, stable string that distinguishes this value type from others in
+    /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_id`).
+    /// Must be consistent across crate boundaries — use a short literal, not
+    /// `type_name` pointers.
+    fn type_id(&self) -> &'static str;
 }
 
 pub trait UnsizedCacheKey {
@@ -630,13 +626,7 @@ pub trait UnsizedCacheKey {
 
     fn key(&self) -> Cow<'_, str>;
 
-    fn type_name(&self) -> &'static str {
-        std::any::type_name::<Self::ValueType>()
-    }
-
-    fn type_id(&self) -> u64 {
-        self.type_name().as_ptr() as u64
-    }
+    fn type_id(&self) -> &'static str;
 }
 
 // ---------------------------------------------------------------------------
@@ -698,6 +688,9 @@ mod tests {
         fn key(&self) -> Cow<'_, str> {
             Cow::Borrowed(&self.key)
         }
+        fn type_id(&self) -> &'static str {
+            std::any::type_name::<T>()
+        }
     }
 
     /// Test helper: an UnsizedCacheKey for trait object values.
@@ -720,6 +713,9 @@ mod tests {
         fn key(&self) -> Cow<'_, str> {
             Cow::Borrowed(&self.key)
         }
+        fn type_id(&self) -> &'static str {
+            std::any::type_name::<T>()
+        }
     }
 
     #[tokio::test]
diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs
index b5de0912a32..232d2659631 100644
--- a/rust/lance-encoding/src/encodings/logical/primitive.rs
+++ b/rust/lance-encoding/src/encodings/logical/primitive.rs
@@ -3416,6 +3416,10 @@ impl CacheKey for FieldDataCacheKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         self.column_index.to_string().into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "FieldData"
+    }
 }
 
 impl StructuralFieldScheduler for StructuralPrimitiveFieldScheduler {
diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs
index 884bc793130..6dd40af45c1 100644
--- a/rust/lance-file/src/previous/reader.rs
+++ b/rust/lance-file/src/previous/reader.rs
@@ -89,6 +89,10 @@ impl<T: 'static> CacheKey for StringCacheKey<'_, T> {
     fn key(&self) -> Cow<'_, str> {
         self.key.into()
     }
+
+    fn type_id(&self) -> &'static str {
+        std::any::type_name::<T>()
+    }
 }
 
 impl FileReader {
diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs
index 86931d8a64f..2ef83d92c9b 100644
--- a/rust/lance-index/src/scalar/bitmap.rs
+++ b/rust/lance-index/src/scalar/bitmap.rs
@@ -128,6 +128,10 @@ impl CacheKey for BitmapKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         format!("{}", self.value.0).into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "Bitmap"
+    }
 }
 
 impl BitmapIndex {
diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs
index 9930c96793e..04fc146d9aa 100644
--- a/rust/lance-index/src/scalar/btree.rs
+++ b/rust/lance-index/src/scalar/btree.rs
@@ -989,6 +989,10 @@ impl CacheKey for BTreePageKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         format!("page-{}", self.page_number).into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "BTreePage"
+    }
 }
 
 /// Note: this is very similar to the IVF index except we store the IVF part in a btree
diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs
index 53ef5fc6a66..4bebc720872 100644
--- a/rust/lance-index/src/scalar/inverted/index.rs
+++ b/rust/lance-index/src/scalar/inverted/index.rs
@@ -1602,6 +1602,10 @@ impl CacheKey for PostingListKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         format!("postings-{}", self.token_id).into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "PostingList"
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -1615,6 +1619,10 @@ impl CacheKey for PositionKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         format!("positions-{}", self.token_id).into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "Position"
+    }
 }
 
 #[derive(Debug, Clone, DeepSizeOf)]
diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs
index ce992af70ab..f03e50c9195 100644
--- a/rust/lance-index/src/scalar/ngram.rs
+++ b/rust/lance-index/src/scalar/ngram.rs
@@ -170,6 +170,10 @@ impl CacheKey for NGramPostingListKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         format!("posting-list-{}", self.row_offset).into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "NGramPostingList"
+    }
 }
 
 impl NGramPostingList {
diff --git a/rust/lance-index/src/scalar/rtree.rs b/rust/lance-index/src/scalar/rtree.rs
index 598bef43f50..37d715539eb 100644
--- a/rust/lance-index/src/scalar/rtree.rs
+++ b/rust/lance-index/src/scalar/rtree.rs
@@ -249,6 +249,10 @@ impl CacheKey for RTreeCacheKey {
             Self::Nulls => "nulls".into(),
         }
     }
+
+    fn type_id(&self) -> &'static str {
+        "RTree"
+    }
 }
 
 #[derive(Clone)]
diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs
index 5be98a9b23d..55c23211871 100644
--- a/rust/lance/src/dataset/fragment.rs
+++ b/rust/lance/src/dataset/fragment.rs
@@ -1879,6 +1879,10 @@ impl CacheKey for FileMetadataCacheKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         "".into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "FileMetadata"
+    }
 }
 
 impl From<FileFragment> for Fragment {
diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index 8c1b2404a42..16faab4e48f 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -110,6 +110,10 @@ impl UnsizedCacheKey for ScalarIndexCacheKey<'_> {
             self.uuid.into()
         }
     }
+
+    fn type_id(&self) -> &'static str {
+        "ScalarIndex"
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -134,6 +138,10 @@ impl UnsizedCacheKey for VectorIndexCacheKey<'_> {
             self.uuid.into()
         }
     }
+
+    fn type_id(&self) -> &'static str {
+        "VectorIndex"
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -158,6 +166,10 @@ impl CacheKey for FragReuseIndexCacheKey<'_> {
             self.uuid.into()
         }
     }
+
+    fn type_id(&self) -> &'static str {
+        "FragReuseIndex"
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -182,6 +194,10 @@ impl CacheKey for MemWalCacheKey<'_> {
             self.uuid.to_string().into()
         }
     }
+
+    fn type_id(&self) -> &'static str {
+        "MemWalIndex"
+    }
 }
 
 // Whether to auto-migrate a dataset when we encounter corruption.
diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs
index a39bd58e3c3..c44f73563d3 100644
--- a/rust/lance/src/index/vector/ivf.rs
+++ b/rust/lance/src/index/vector/ivf.rs
@@ -123,6 +123,10 @@ impl UnsizedCacheKey for LegacyIVFPartitionKey {
     fn key(&self) -> std::borrow::Cow<'_, str> {
         format!("ivf-{}", self.partition_id).into()
     }
+
+    fn type_id(&self) -> &'static str {
+        "LegacyIVFPartition"
+    }
 }
 
 /// IVF Index.
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index d781bb5456c..14f71612ddb 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -96,6 +96,12 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> CacheKey for IVFPartit
     fn key(&self) -> std::borrow::Cow<'_, str> {
         format!("ivf-{}", self.partition_id).into()
     }
+
+    fn type_id(&self) -> &'static str {
+        // Using type_name is safe here: the impl is in the same crate as the
+        // types, so the monomorphized pointer is consistent.
+        std::any::type_name::<PartitionEntry<S, Q>>()
+    }
 }
 
 /// IVF Index.
diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index c27eddf7b27..4876224cb75 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -229,6 +229,10 @@ mod tests {
         fn key(&self) -> Cow<'_, str> {
             Cow::Borrowed(self.0)
         }
+
+        fn type_id(&self) -> &'static str {
+            "TestUnsized"
+        }
     }
 
     #[tokio::test]
diff --git a/rust/lance/src/session/caches.rs b/rust/lance/src/session/caches.rs
index 67c684c98de..e4ac180d563 100644
--- a/rust/lance/src/session/caches.rs
+++ b/rust/lance/src/session/caches.rs
@@ -75,7 +75,6 @@ pub struct ManifestKey<'a> {
 
 impl CacheKey for ManifestKey<'_> {
     type ValueType = Manifest;
-
     fn key(&self) -> Cow<'_, str> {
         if let Some(e_tag) = self.e_tag {
             Cow::Owned(format!("manifest/{}/{}", self.version, e_tag))
@@ -83,6 +82,9 @@ impl CacheKey for ManifestKey<'_> {
             Cow::Owned(format!("manifest/{}", self.version))
         }
     }
+    fn type_id(&self) -> &'static str {
+        "Manifest"
+    }
 }
 
 #[derive(Debug)]
@@ -92,10 +94,12 @@ pub struct TransactionKey {
 
 impl CacheKey for TransactionKey {
     type ValueType = Transaction;
-
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("txn/{}", self.version))
     }
+    fn type_id(&self) -> &'static str {
+        "Transaction"
+    }
 }
 
 #[derive(Debug)]
@@ -106,7 +110,6 @@ pub struct DeletionFileKey<'a> {
 
 impl CacheKey for DeletionFileKey<'_> {
     type ValueType = DeletionVector;
-
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!(
             "deletion/{}/{}/{}/{}",
@@ -116,6 +119,9 @@ impl CacheKey for DeletionFileKey<'_> {
             self.deletion_file.file_type.suffix()
         ))
     }
+    fn type_id(&self) -> &'static str {
+        "DeletionVector"
+    }
 }
 
 #[derive(Debug)]
@@ -125,10 +131,12 @@ pub struct RowAddrMaskKey {
 
 impl CacheKey for RowAddrMaskKey {
     type ValueType = RowAddrMask;
-
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_addr_mask/{}", self.version))
     }
+    fn type_id(&self) -> &'static str {
+        "RowAddrMask"
+    }
 }
 
 #[derive(Debug)]
@@ -138,10 +146,12 @@ pub struct RowIdIndexKey {
 
 impl CacheKey for RowIdIndexKey {
     type ValueType = RowIdIndex;
-
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_id_index/{}", self.version))
     }
+    fn type_id(&self) -> &'static str {
+        "RowIdIndex"
+    }
 }
 
 #[derive(Debug)]
@@ -151,10 +161,12 @@ pub struct RowIdSequenceKey {
 
 impl CacheKey for RowIdSequenceKey {
     type ValueType = RowIdSequence;
-
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_id_sequence/{}", self.fragment_id))
     }
+    fn type_id(&self) -> &'static str {
+        "RowIdSequence"
+    }
 }
 
 impl DSMetadataCache {
diff --git a/rust/lance/src/session/index_caches.rs b/rust/lance/src/session/index_caches.rs
index d9578d43112..c3430f4c840 100644
--- a/rust/lance/src/session/index_caches.rs
+++ b/rust/lance/src/session/index_caches.rs
@@ -88,6 +88,10 @@ impl CacheKey for FragReuseIndexKey<'_> {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("frag_reuse/{}", self.uuid))
     }
+
+    fn type_id(&self) -> &'static str {
+        "FragReuseIndex"
+    }
 }
 
 #[derive(Debug)]
@@ -101,6 +105,10 @@ impl CacheKey for IndexMetadataKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(self.version.to_string())
     }
+
+    fn type_id(&self) -> &'static str {
+        "Vec<IndexMetadata>"
+    }
 }
 
 pub struct ProstAny(pub Arc<prost_types::Any>);
@@ -128,4 +136,8 @@ impl CacheKey for ScalarIndexDetailsKey<'_> {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("type/{}", self.uuid))
     }
+
+    fn type_id(&self) -> &'static str {
+        "ScalarIndexDetails"
+    }
 }

From 2e7602ebcdb0b7d4da8253a5932aca1ffd552af1 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 12:19:09 -0700
Subject: [PATCH 09/24] feat: add partition serde for all quantizer types (PR
 #6223)

---
 rust/lance/src/index/vector/ivf.rs            |    4 +
 .../src/index/vector/ivf/partition_serde.rs   | 1153 +++++++++++++++++
 2 files changed, 1157 insertions(+)
 create mode 100644 rust/lance/src/index/vector/ivf/partition_serde.rs

diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs
index c44f73563d3..88460c18005 100644
--- a/rust/lance/src/index/vector/ivf.rs
+++ b/rust/lance/src/index/vector/ivf.rs
@@ -102,6 +102,7 @@ use uuid::Uuid;
 
 pub mod builder;
 pub mod io;
+pub mod partition_serde;
 pub mod v2;
 
 // Cache wrapper for vector index trait objects
@@ -2671,6 +2672,7 @@ mod tests {
             index_version: VECTOR_INDEX_VERSION as i32,
             created_at: Some(chrono::Utc::now()),
             base_id: None,
+            files: None,
         };
 
         // We need to commit this index to the dataset so that it can be found
@@ -2709,6 +2711,7 @@ mod tests {
             index_version: VECTOR_INDEX_VERSION as i32,
             created_at: None, // Test index, not setting timestamp
             base_id: None,
+            files: None,
         };
 
         let prefilter = Arc::new(DatasetPreFilter::new(dataset.clone(), &[index_meta], None));
@@ -2774,6 +2777,7 @@ mod tests {
             index_version: VECTOR_INDEX_VERSION as i32,
             created_at: Some(chrono::Utc::now()),
             base_id: None,
+            files: None,
         };
 
         // We need to commit this new index to the dataset so it can be found
diff --git a/rust/lance/src/index/vector/ivf/partition_serde.rs b/rust/lance/src/index/vector/ivf/partition_serde.rs
new file mode 100644
index 00000000000..9139e940c2a
--- /dev/null
+++ b/rust/lance/src/index/vector/ivf/partition_serde.rs
@@ -0,0 +1,1153 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+//! Serialization and zero-copy deserialization for IVF partition cache entries.
+//!
+//! The format is a simple binary layout designed for ephemeral caching (not stable across versions):
+//!
+//! ```text
+//! [header_len: u64 LE]
+//! [header: JSON bytes]
+//! [sub_index IPC file bytes]
+//! [... quantizer-specific IPC sections ...]
+//! [storage batch IPC file bytes]
+//! ```
+//!
+//! Each IPC section is a complete Arrow IPC file. On deserialization, the IPC
+//! sections are read zero-copy using [`FileDecoder`] so that Arrow arrays
+//! reference the original buffer directly.
+
+use std::sync::Arc;
+
+use arrow_array::{FixedSizeListArray, RecordBatch};
+use arrow_buffer::Buffer;
+use arrow_ipc::convert::fb_to_schema;
+use arrow_ipc::reader::{FileDecoder, read_footer_length};
+use arrow_ipc::root_as_footer;
+use arrow_ipc::writer::FileWriter;
+use arrow_schema::{DataType, Field, Schema};
+use bytes::Bytes;
+use lance_core::{Error, Result};
+use lance_index::vector::bq::RQRotationType;
+use lance_index::vector::bq::builder::RabitQuantizer;
+use lance_index::vector::bq::storage::RabitQuantizationMetadata;
+use lance_index::vector::flat::index::{FlatMetadata, FlatQuantizer};
+use lance_index::vector::pq::ProductQuantizer;
+use lance_index::vector::pq::storage::ProductQuantizationMetadata;
+use lance_index::vector::quantizer::{Quantization, QuantizerStorage};
+use lance_index::vector::sq::ScalarQuantizer;
+use lance_index::vector::sq::storage::ScalarQuantizationMetadata;
+use lance_index::vector::storage::VectorStore;
+use lance_index::vector::v3::subindex::IvfSubIndex;
+use lance_linalg::distance::DistanceType;
+use serde::{Deserialize, Serialize};
+
+use super::v2::PartitionEntry;
+
+// ---------------------------------------------------------------------------
+// Common helpers
+// ---------------------------------------------------------------------------
+
+fn distance_type_to_u8(dt: DistanceType) -> u8 {
+    match dt {
+        DistanceType::L2 => 0,
+        DistanceType::Cosine => 1,
+        DistanceType::Dot => 2,
+        DistanceType::Hamming => 3,
+    }
+}
+
+fn u8_to_distance_type(v: u8) -> Result<DistanceType> {
+    match v {
+        0 => Ok(DistanceType::L2),
+        1 => Ok(DistanceType::Cosine),
+        2 => Ok(DistanceType::Dot),
+        3 => Ok(DistanceType::Hamming),
+        _ => Err(Error::io(format!("unknown distance type: {v}"))),
+    }
+}
+
+fn rotation_type_to_u8(rt: RQRotationType) -> u8 {
+    match rt {
+        RQRotationType::Matrix => 0,
+        RQRotationType::Fast => 1,
+    }
+}
+
+fn u8_to_rotation_type(v: u8) -> Result<RQRotationType> {
+    match v {
+        0 => Ok(RQRotationType::Matrix),
+        1 => Ok(RQRotationType::Fast),
+        _ => Err(Error::io(format!("unknown rotation type: {v}"))),
+    }
+}
+
+/// Write one or more RecordBatches as a complete Arrow IPC file into a Vec<u8>.
+///
+/// Panics if `batches` is empty (caller is responsible for checking).
+fn write_ipc_batches(batches: &[RecordBatch]) -> Result<Vec<u8>> {
+    let mut buf = Vec::new();
+    let mut writer = FileWriter::try_new(&mut buf, batches[0].schema_ref())?;
+    for batch in batches {
+        writer.write(batch)?;
+    }
+    writer.finish()?;
+    Ok(buf)
+}
+
+/// Write a single RecordBatch as a complete Arrow IPC file into a Vec<u8>.
+fn write_ipc(batch: &RecordBatch) -> Result<Vec<u8>> {
+    write_ipc_batches(std::slice::from_ref(batch))
+}
+
+/// Decode the IPC footer and schema from a `Buffer`, returning the decoder and
+/// the list of record-batch blocks. Zero-copy: all returned data references
+/// the original buffer.
+fn parse_ipc_footer(data: &Buffer) -> Result<(FileDecoder, Vec<arrow_ipc::Block>)> {
+    let trailer_start = data
+        .len()
+        .checked_sub(10)
+        .ok_or_else(|| Error::io("IPC section too small to contain footer".to_string()))?;
+    let footer_len = read_footer_length(
+        data[trailer_start..]
+            .try_into()
+            .map_err(|_| Error::io("IPC section too small for footer length".to_string()))?,
+    )?;
+    let footer_start = trailer_start
+        .checked_sub(footer_len)
+        .ok_or_else(|| Error::io("IPC footer length exceeds section size".to_string()))?;
+    let footer = root_as_footer(&data[footer_start..trailer_start])
+        .map_err(|e| Error::io(format!("failed to parse IPC footer: {e}")))?;
+
+    let schema =
+        Arc::new(fb_to_schema(footer.schema().ok_or_else(|| {
+            Error::io("IPC footer missing schema".to_string())
+        })?));
+
+    let mut decoder = FileDecoder::new(schema, footer.version());
+
+    for block in footer.dictionaries().iter().flatten() {
+        let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
+        let block_data = data.slice_with_length(block.offset() as usize, block_len);
+        decoder.read_dictionary(block, &block_data)?;
+    }
+
+    let batch_blocks: Vec<arrow_ipc::Block> = footer
+        .recordBatches()
+        .map(|b| b.iter().copied().collect())
+        .unwrap_or_default();
+
+    Ok((decoder, batch_blocks))
+}
+
+/// Read all RecordBatches from an Arrow IPC file stored in a `Buffer`, zero-copy.
+///
+/// The returned arrays reference slices of the provided buffer directly.
+fn read_ipc_all_zero_copy(data: Buffer) -> Result<Vec<RecordBatch>> {
+    let (decoder, batch_blocks) = parse_ipc_footer(&data)?;
+    batch_blocks
+        .iter()
+        .map(|block| {
+            let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
+            let block_data = data.slice_with_length(block.offset() as usize, block_len);
+            decoder
+                .read_record_batch(block, &block_data)?
+                .ok_or_else(|| Error::io("IPC record batch was None".to_string()))
+        })
+        .collect()
+}
+
+/// Read a single RecordBatch from an Arrow IPC file stored in a `Buffer`, zero-copy.
+///
+/// The returned `RecordBatch`'s arrays reference slices of the provided buffer
+/// directly, avoiding copies.
+fn read_ipc_zero_copy(data: Buffer) -> Result<RecordBatch> {
+    let (decoder, batch_blocks) = parse_ipc_footer(&data)?;
+    if batch_blocks.is_empty() {
+        return Err(Error::io("IPC file contains no record batches".to_string()));
+    }
+    let block = &batch_blocks[0];
+    let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
+    let block_data = data.slice_with_length(block.offset() as usize, block_len);
+    decoder
+        .read_record_batch(block, &block_data)?
+        .ok_or_else(|| Error::io("IPC record batch was None".to_string()))
+}
+
+/// Wrap a `FixedSizeListArray` in a single-column RecordBatch with the given column name.
+fn fsl_to_batch(arr: &FixedSizeListArray, name: &str) -> Result<RecordBatch> {
+    let field = Field::new(
+        name,
+        DataType::FixedSizeList(
+            Arc::new(Field::new("item", arr.value_type(), true)),
+            arr.value_length(),
+        ),
+        false,
+    );
+    let schema = Arc::new(Schema::new(vec![field]));
+    Ok(RecordBatch::try_new(schema, vec![Arc::new(arr.clone())])?)
+}
+
+/// Extract a `FixedSizeListArray` from the first column of a RecordBatch.
+fn batch_to_fsl(batch: &RecordBatch) -> Result<FixedSizeListArray> {
+    let col = batch.column(0);
+    col.as_any()
+        .downcast_ref::<FixedSizeListArray>()
+        .cloned()
+        .ok_or_else(|| Error::io("column is not FixedSizeListArray".to_string()))
+}
+
+fn codebook_to_batch(codebook: &FixedSizeListArray) -> Result<RecordBatch> {
+    fsl_to_batch(codebook, "codebook")
+}
+
+fn batch_to_codebook(batch: &RecordBatch) -> Result<FixedSizeListArray> {
+    batch_to_fsl(batch)
+}
+
+// ---------------------------------------------------------------------------
+// PQ
+// ---------------------------------------------------------------------------
+
+#[derive(Serialize, Deserialize)]
+struct PqPartitionHeader {
+    distance_type: u8,
+    nbits: u32,
+    num_sub_vectors: usize,
+    dimension: usize,
+    transposed: bool,
+    /// Length of the sub-index IPC section in bytes.
+    sub_index_len: u64,
+    /// Length of the codebook IPC section in bytes.
+    codebook_len: u64,
+    /// Length of the storage batch IPC section in bytes.
+    storage_len: u64,
+}
+
+impl<S: IvfSubIndex> PartitionEntry<S, ProductQuantizer> {
+    /// Serialize this partition entry to bytes.
+    ///
+    /// The sub-index, PQ codebook, and storage batch are each written as Arrow
+    /// IPC file sections, preceded by a small JSON header containing scalar
+    /// metadata and section lengths.
+    pub fn serialize(&self) -> Result<Vec<u8>> {
+        let metadata = self.storage.metadata();
+        let distance_type = self.storage.distance_type();
+
+        // Serialize the three Arrow sections.
+        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
+        let codebook = metadata.codebook.as_ref().ok_or_else(|| {
+            Error::io("PQ metadata missing codebook during serialization".to_string())
+        })?;
+        let codebook_ipc = write_ipc(&codebook_to_batch(codebook)?)?;
+        let storage_batches: Vec<_> = self.storage.to_batches()?.collect();
+        let storage_ipc = if storage_batches.len() == 1 {
+            write_ipc(&storage_batches[0])?
+        } else {
+            return Err(Error::io(
+                "expected exactly one storage batch for PQ storage".to_string(),
+            ));
+        };
+
+        let header = PqPartitionHeader {
+            distance_type: distance_type_to_u8(distance_type),
+            nbits: metadata.nbits,
+            num_sub_vectors: metadata.num_sub_vectors,
+            dimension: metadata.dimension,
+            transposed: metadata.transposed,
+            sub_index_len: sub_index_ipc.len() as u64,
+            codebook_len: codebook_ipc.len() as u64,
+            storage_len: storage_ipc.len() as u64,
+        };
+
+        let header_json = serde_json::to_vec(&header)?;
+
+        let total_len =
+            8 + header_json.len() + sub_index_ipc.len() + codebook_ipc.len() + storage_ipc.len();
+        let mut out = Vec::with_capacity(total_len);
+        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
+        out.extend_from_slice(&header_json);
+        out.extend_from_slice(&sub_index_ipc);
+        out.extend_from_slice(&codebook_ipc);
+        out.extend_from_slice(&storage_ipc);
+
+        Ok(out)
+    }
+
+    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
+    ///
+    /// The Arrow IPC sections are decoded using [`FileDecoder`] so that the
+    /// resulting arrays reference slices of the provided `Bytes` buffer directly.
+    pub fn deserialize(data: Bytes) -> Result<Self> {
+        if data.len() < 8 {
+            return Err(Error::io("partition data too small".to_string()));
+        }
+
+        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
+        let header_end = 8 + header_len;
+        if data.len() < header_end {
+            return Err(Error::io("partition data truncated in header".to_string()));
+        }
+
+        let header: PqPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
+        let distance_type = u8_to_distance_type(header.distance_type)?;
+
+        let sub_index_start = header_end;
+        let sub_index_end = sub_index_start + header.sub_index_len as usize;
+        let codebook_start = sub_index_end;
+        let codebook_end = codebook_start + header.codebook_len as usize;
+        let storage_start = codebook_end;
+        let storage_end = storage_start + header.storage_len as usize;
+
+        if data.len() < storage_end {
+            return Err(Error::io(
+                "partition data truncated in IPC sections".to_string(),
+            ));
+        }
+
+        // Zero-copy: create Buffer slices backed by the original Bytes.
+        let buffer = Buffer::from(data);
+        let sub_index_buf =
+            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
+        let codebook_buf = buffer.slice_with_length(codebook_start, header.codebook_len as usize);
+        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
+
+        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
+        let codebook_batch = read_ipc_zero_copy(codebook_buf)?;
+        let storage_batch = read_ipc_zero_copy(storage_buf)?;
+
+        let index = S::load(sub_index_batch)?;
+        let codebook = batch_to_codebook(&codebook_batch)?;
+
+        let metadata = ProductQuantizationMetadata {
+            codebook_position: 0,
+            nbits: header.nbits,
+            num_sub_vectors: header.num_sub_vectors,
+            dimension: header.dimension,
+            codebook: Some(codebook),
+            codebook_tensor: Vec::new(),
+            transposed: header.transposed,
+        };
+
+        let storage = <ProductQuantizer as Quantization>::Storage::try_from_batch(
+            storage_batch,
+            &metadata,
+            distance_type,
+            None,
+        )?;
+
+        Ok(Self { index, storage })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Flat
+// ---------------------------------------------------------------------------
+
+#[derive(Serialize, Deserialize)]
+struct FlatPartitionHeader {
+    distance_type: u8,
+    dim: usize,
+    sub_index_len: u64,
+    storage_len: u64,
+}
+
+impl<S: IvfSubIndex> PartitionEntry<S, FlatQuantizer> {
+    /// Serialize this partition entry to bytes.
+    pub fn serialize(&self) -> Result<Vec<u8>> {
+        let metadata = self.storage.metadata();
+        let distance_type = self.storage.distance_type();
+
+        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
+        let storage_batches: Vec<_> = self.storage.to_batches()?.collect();
+        let storage_ipc = if storage_batches.len() == 1 {
+            write_ipc(&storage_batches[0])?
+        } else {
+            return Err(Error::io(
+                "expected exactly one storage batch for Flat storage".to_string(),
+            ));
+        };
+
+        let header = FlatPartitionHeader {
+            distance_type: distance_type_to_u8(distance_type),
+            dim: metadata.dim,
+            sub_index_len: sub_index_ipc.len() as u64,
+            storage_len: storage_ipc.len() as u64,
+        };
+
+        let header_json = serde_json::to_vec(&header)?;
+        let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len();
+        let mut out = Vec::with_capacity(total_len);
+        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
+        out.extend_from_slice(&header_json);
+        out.extend_from_slice(&sub_index_ipc);
+        out.extend_from_slice(&storage_ipc);
+        Ok(out)
+    }
+
+    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
+    pub fn deserialize(data: Bytes) -> Result<Self> {
+        if data.len() < 8 {
+            return Err(Error::io("partition data too small".to_string()));
+        }
+        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
+        let header_end = 8 + header_len;
+        if data.len() < header_end {
+            return Err(Error::io("partition data truncated in header".to_string()));
+        }
+
+        let header: FlatPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
+        let distance_type = u8_to_distance_type(header.distance_type)?;
+
+        let sub_index_start = header_end;
+        let sub_index_end = sub_index_start + header.sub_index_len as usize;
+        let storage_start = sub_index_end;
+        let storage_end = storage_start + header.storage_len as usize;
+
+        if data.len() < storage_end {
+            return Err(Error::io(
+                "partition data truncated in IPC sections".to_string(),
+            ));
+        }
+
+        let buffer = Buffer::from(data);
+        let sub_index_buf =
+            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
+        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
+
+        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
+        let storage_batch = read_ipc_zero_copy(storage_buf)?;
+
+        let index = S::load(sub_index_batch)?;
+        let metadata = FlatMetadata { dim: header.dim };
+        let storage = <FlatQuantizer as Quantization>::Storage::try_from_batch(
+            storage_batch,
+            &metadata,
+            distance_type,
+            None,
+        )?;
+
+        Ok(Self { index, storage })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// SQ
+// ---------------------------------------------------------------------------
+
+#[derive(Serialize, Deserialize)]
+struct SqPartitionHeader {
+    distance_type: u8,
+    num_bits: u16,
+    dim: usize,
+    bounds_start: f64,
+    bounds_end: f64,
+    sub_index_len: u64,
+    storage_len: u64,
+}
+
+impl<S: IvfSubIndex> PartitionEntry<S, ScalarQuantizer> {
+    /// Serialize this partition entry to bytes.
+    ///
+    /// Multiple SQ storage chunks are concatenated into a single IPC section.
+    pub fn serialize(&self) -> Result<Vec<u8>> {
+        let metadata = self.storage.metadata();
+        let distance_type = self.storage.distance_type();
+
+        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
+
+        // Write all SQ chunks as multiple record batches in one IPC file, avoiding copies.
+        let batches: Vec<_> = self.storage.to_batches()?.collect();
+        if batches.is_empty() {
+            return Err(Error::io("SQ storage has no batches".to_string()));
+        }
+        let storage_ipc = write_ipc_batches(&batches)?;
+
+        let header = SqPartitionHeader {
+            distance_type: distance_type_to_u8(distance_type),
+            num_bits: metadata.num_bits,
+            dim: metadata.dim,
+            bounds_start: metadata.bounds.start,
+            bounds_end: metadata.bounds.end,
+            sub_index_len: sub_index_ipc.len() as u64,
+            storage_len: storage_ipc.len() as u64,
+        };
+
+        let header_json = serde_json::to_vec(&header)?;
+        let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len();
+        let mut out = Vec::with_capacity(total_len);
+        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
+        out.extend_from_slice(&header_json);
+        out.extend_from_slice(&sub_index_ipc);
+        out.extend_from_slice(&storage_ipc);
+        Ok(out)
+    }
+
+    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
+    pub fn deserialize(data: Bytes) -> Result<Self> {
+        if data.len() < 8 {
+            return Err(Error::io("partition data too small".to_string()));
+        }
+        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
+        let header_end = 8 + header_len;
+        if data.len() < header_end {
+            return Err(Error::io("partition data truncated in header".to_string()));
+        }
+
+        let header: SqPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
+        let distance_type = u8_to_distance_type(header.distance_type)?;
+
+        let sub_index_start = header_end;
+        let sub_index_end = sub_index_start + header.sub_index_len as usize;
+        let storage_start = sub_index_end;
+        let storage_end = storage_start + header.storage_len as usize;
+
+        if data.len() < storage_end {
+            return Err(Error::io(
+                "partition data truncated in IPC sections".to_string(),
+            ));
+        }
+
+        let buffer = Buffer::from(data);
+        let sub_index_buf =
+            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
+        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
+
+        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
+        let storage_batches = read_ipc_all_zero_copy(storage_buf)?;
+
+        let index = S::load(sub_index_batch)?;
+        let metadata = ScalarQuantizationMetadata {
+            dim: header.dim,
+            num_bits: header.num_bits,
+            bounds: header.bounds_start..header.bounds_end,
+        };
+        let storage = <ScalarQuantizer as Quantization>::Storage::try_new(
+            metadata.num_bits,
+            distance_type,
+            metadata.bounds,
+            storage_batches,
+            None,
+        )?;
+
+        Ok(Self { index, storage })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// RabitQ
+// ---------------------------------------------------------------------------
+
+#[derive(Serialize, Deserialize)]
+struct RabitPartitionHeader {
+    distance_type: u8,
+    num_bits: u8,
+    code_dim: u32,
+    /// 0 = Matrix, 1 = Fast
+    rotation_type: u8,
+    /// Fast rotation signs (only set when rotation_type == Fast).
+    fast_rotation_signs: Option<Vec<u8>>,
+    sub_index_len: u64,
+    /// Length of the rotation matrix IPC section; 0 when rotation_type == Fast.
+    rotate_mat_len: u64,
+    storage_len: u64,
+}
+
+impl<S: IvfSubIndex> PartitionEntry<S, RabitQuantizer> {
+    /// Serialize this partition entry to bytes.
+    ///
+    /// For Matrix rotation the rotation matrix is stored as an Arrow IPC section.
+    /// For Fast rotation the signs are stored compactly in the JSON header.
+    ///
+    /// The storage batch is stored with already-packed codes so deserialization
+    /// can skip re-packing.
+    pub fn serialize(&self) -> Result<Vec<u8>> {
+        let metadata = self.storage.metadata();
+        let distance_type = self.storage.distance_type();
+
+        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
+
+        let rotate_mat_ipc = match metadata.rotation_type {
+            RQRotationType::Matrix => {
+                let mat = metadata.rotate_mat.as_ref().ok_or_else(|| {
+                    Error::io(
+                        "RabitQ Matrix metadata missing rotate_mat during serialization"
+                            .to_string(),
+                    )
+                })?;
+                write_ipc(&fsl_to_batch(mat, "rotate_mat")?)?
+            }
+            RQRotationType::Fast => Vec::new(),
+        };
+
+        let storage_batches: Vec<_> = self.storage.to_batches()?.collect();
+        let storage_ipc = if storage_batches.len() == 1 {
+            write_ipc(&storage_batches[0])?
+        } else {
+            return Err(Error::io(
+                "expected exactly one storage batch for RabitQ storage".to_string(),
+            ));
+        };
+
+        let header = RabitPartitionHeader {
+            distance_type: distance_type_to_u8(distance_type),
+            num_bits: metadata.num_bits,
+            code_dim: metadata.code_dim,
+            rotation_type: rotation_type_to_u8(metadata.rotation_type),
+            fast_rotation_signs: metadata.fast_rotation_signs.clone(),
+            sub_index_len: sub_index_ipc.len() as u64,
+            rotate_mat_len: rotate_mat_ipc.len() as u64,
+            storage_len: storage_ipc.len() as u64,
+        };
+
+        let header_json = serde_json::to_vec(&header)?;
+        let total_len =
+            8 + header_json.len() + sub_index_ipc.len() + rotate_mat_ipc.len() + storage_ipc.len();
+        let mut out = Vec::with_capacity(total_len);
+        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
+        out.extend_from_slice(&header_json);
+        out.extend_from_slice(&sub_index_ipc);
+        out.extend_from_slice(&rotate_mat_ipc);
+        out.extend_from_slice(&storage_ipc);
+        Ok(out)
+    }
+
+    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
+    pub fn deserialize(data: Bytes) -> Result<Self> {
+        if data.len() < 8 {
+            return Err(Error::io("partition data too small".to_string()));
+        }
+        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
+        let header_end = 8 + header_len;
+        if data.len() < header_end {
+            return Err(Error::io("partition data truncated in header".to_string()));
+        }
+
+        let header: RabitPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
+        let distance_type = u8_to_distance_type(header.distance_type)?;
+        let rotation_type = u8_to_rotation_type(header.rotation_type)?;
+
+        let sub_index_start = header_end;
+        let sub_index_end = sub_index_start + header.sub_index_len as usize;
+        let rotate_mat_start = sub_index_end;
+        let rotate_mat_end = rotate_mat_start + header.rotate_mat_len as usize;
+        let storage_start = rotate_mat_end;
+        let storage_end = storage_start + header.storage_len as usize;
+
+        if data.len() < storage_end {
+            return Err(Error::io(
+                "partition data truncated in IPC sections".to_string(),
+            ));
+        }
+
+        let buffer = Buffer::from(data);
+        let sub_index_buf =
+            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
+        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
+
+        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
+        let storage_batch = read_ipc_zero_copy(storage_buf)?;
+
+        let rotate_mat = if header.rotate_mat_len > 0 {
+            let rotate_mat_buf =
+                buffer.slice_with_length(rotate_mat_start, header.rotate_mat_len as usize);
+            let mat_batch = read_ipc_zero_copy(rotate_mat_buf)?;
+            Some(batch_to_fsl(&mat_batch)?)
+        } else {
+            None
+        };
+
+        let index = S::load(sub_index_batch)?;
+        let metadata = RabitQuantizationMetadata {
+            rotate_mat,
+            rotate_mat_position: None,
+            fast_rotation_signs: header.fast_rotation_signs,
+            rotation_type,
+            code_dim: header.code_dim,
+            num_bits: header.num_bits,
+            // The storage batch already has packed codes; skip re-packing.
+            packed: true,
+        };
+        let storage = <RabitQuantizer as Quantization>::Storage::try_from_batch(
+            storage_batch,
+            &metadata,
+            distance_type,
+            None,
+        )?;
+
+        Ok(Self { index, storage })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{
+        Float32Array, UInt8Array, UInt64Array,
+        types::{Float32Type, UInt8Type},
+    };
+    use arrow_schema::{DataType, Field, Schema};
+    use lance_arrow::FixedSizeListArrayExt;
+    use lance_index::vector::bq::storage::RABIT_CODE_COLUMN;
+    use lance_index::vector::bq::transform::{ADD_FACTORS_COLUMN, SCALE_FACTORS_COLUMN};
+    use lance_index::vector::bq::{RQRotationType, builder::RabitQuantizer};
+    use lance_index::vector::flat::index::FlatIndex;
+    use lance_index::vector::flat::storage::FlatFloatStorage;
+    use lance_index::vector::sq::storage::ScalarQuantizationStorage;
+
+    // ----- PQ helpers -------------------------------------------------------
+
+    fn make_test_codebook(dim: usize, num_sub_vectors: usize) -> FixedSizeListArray {
+        let sub_dim = dim / num_sub_vectors;
+        let num_centroids = 256;
+        let total_values = num_sub_vectors * num_centroids * sub_dim;
+        let values: Vec<f32> = (0..total_values).map(|i| i as f32 * 0.01).collect();
+        let values_array = Float32Array::from(values);
+        FixedSizeListArray::try_new_from_values(values_array, sub_dim as i32).unwrap()
+    }
+
+    fn make_test_pq_storage(
+        num_rows: usize,
+        dim: usize,
+        num_sub_vectors: usize,
+    ) -> <ProductQuantizer as Quantization>::Storage {
+        let codebook = make_test_codebook(dim, num_sub_vectors);
+        let row_ids = UInt64Array::from((0..num_rows as u64).collect::<Vec<_>>());
+        let pq_codes_flat: Vec<u8> = (0..num_rows * num_sub_vectors)
+            .map(|i| (i % 256) as u8)
+            .collect();
+        let pq_codes = UInt8Array::from(pq_codes_flat);
+        let pq_codes_fsl =
+            FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(lance_core::ROW_ID, DataType::UInt64, false),
+            Field::new(
+                lance_index::vector::PQ_CODE_COLUMN,
+                DataType::FixedSizeList(
+                    Arc::new(Field::new("item", DataType::UInt8, true)),
+                    num_sub_vectors as i32,
+                ),
+                false,
+            ),
+        ]));
+
+        let batch =
+            RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)]).unwrap();
+
+        <ProductQuantizer as Quantization>::Storage::new(
+            codebook,
+            batch,
+            8,
+            num_sub_vectors,
+            dim,
+            DistanceType::L2,
+            false,
+            None,
+        )
+        .unwrap()
+    }
+
+    // ----- PQ tests ---------------------------------------------------------
+
+    #[test]
+    fn test_roundtrip_flat_pq() {
+        let dim = 128;
+        let num_sub_vectors = 16;
+        let num_rows = 100;
+
+        let storage = make_test_pq_storage(num_rows, dim, num_sub_vectors);
+        let entry = PartitionEntry::<FlatIndex, ProductQuantizer> {
+            index: FlatIndex::default(),
+            storage,
+        };
+
+        let serialized = entry.serialize().unwrap();
+        let deserialized =
+            PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(serialized.into()).unwrap();
+
+        assert_eq!(entry.storage, deserialized.storage);
+    }
+
+    #[test]
+    fn test_roundtrip_preserves_distance_type() {
+        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
+            let dim = 32;
+            let num_sub_vectors = 4;
+            let codebook = make_test_codebook(dim, num_sub_vectors);
+            let row_ids = UInt64Array::from(vec![0u64, 1, 2]);
+            let pq_codes = UInt8Array::from(vec![0u8; 3 * num_sub_vectors]);
+            let pq_codes_fsl =
+                FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap();
+
+            let schema = Arc::new(Schema::new(vec![
+                Field::new(lance_core::ROW_ID, DataType::UInt64, false),
+                Field::new(
+                    lance_index::vector::PQ_CODE_COLUMN,
+                    DataType::FixedSizeList(
+                        Arc::new(Field::new("item", DataType::UInt8, true)),
+                        num_sub_vectors as i32,
+                    ),
+                    false,
+                ),
+            ]));
+            let batch =
+                RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)])
+                    .unwrap();
+
+            let storage = <ProductQuantizer as Quantization>::Storage::new(
+                codebook,
+                batch,
+                8,
+                num_sub_vectors,
+                dim,
+                dt,
+                false,
+                None,
+            )
+            .unwrap();
+
+            let entry = PartitionEntry::<FlatIndex, ProductQuantizer> {
+                index: FlatIndex::default(),
+                storage,
+            };
+
+            let bytes = entry.serialize().unwrap();
+            let restored =
+                PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(bytes.into()).unwrap();
+            assert_eq!(
+                restored.storage.distance_type(),
+                entry.storage.distance_type()
+            );
+        }
+    }
+
+    #[test]
+    fn test_empty_partition() {
+        let dim = 16;
+        let num_sub_vectors = 2;
+        let storage = make_test_pq_storage(0, dim, num_sub_vectors);
+        let entry = PartitionEntry::<FlatIndex, ProductQuantizer> {
+            index: FlatIndex::default(),
+            storage,
+        };
+
+        let serialized = entry.serialize().unwrap();
+        let deserialized =
+            PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(serialized.into()).unwrap();
+        assert_eq!(entry.storage, deserialized.storage);
+    }
+
+    #[test]
+    fn test_truncated_data_errors() {
+        assert!(
+            PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(Bytes::from_static(
+                b"short"
+            ))
+            .is_err()
+        );
+    }
+
+    // ----- Flat helpers -----------------------------------------------------
+
+    fn make_flat_storage(num_rows: usize, dim: usize) -> FlatFloatStorage {
+        let values: Vec<f32> = (0..num_rows * dim).map(|i| i as f32 * 0.01).collect();
+        let values_array = Float32Array::from(values);
+        let vectors = FixedSizeListArray::try_new_from_values(values_array, dim as i32).unwrap();
+        FlatFloatStorage::new(vectors, DistanceType::L2)
+    }
+
+    // ----- Flat tests -------------------------------------------------------
+
+    #[test]
+    fn test_roundtrip_flat_flat() {
+        let storage = make_flat_storage(50, 64);
+        let entry = PartitionEntry::<FlatIndex, FlatQuantizer> {
+            index: FlatIndex::default(),
+            storage,
+        };
+
+        let bytes = entry.serialize().unwrap();
+        let restored =
+            PartitionEntry::<FlatIndex, FlatQuantizer>::deserialize(bytes.into()).unwrap();
+
+        assert_eq!(
+            restored.storage.metadata().dim,
+            entry.storage.metadata().dim
+        );
+        assert_eq!(
+            restored.storage.distance_type(),
+            entry.storage.distance_type()
+        );
+        assert_eq!(restored.storage.len(), entry.storage.len());
+        let orig_batch = entry.storage.to_batches().unwrap().next().unwrap();
+        let rest_batch = restored.storage.to_batches().unwrap().next().unwrap();
+        assert_eq!(orig_batch, rest_batch);
+    }
+
+    #[test]
+    fn test_flat_distance_types() {
+        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
+            let values = Float32Array::from(vec![1.0f32; 32]);
+            let vectors = FixedSizeListArray::try_new_from_values(values, 32).unwrap();
+            let storage = FlatFloatStorage::new(vectors, dt);
+            let entry = PartitionEntry::<FlatIndex, FlatQuantizer> {
+                index: FlatIndex::default(),
+                storage,
+            };
+            let bytes = entry.serialize().unwrap();
+            let restored =
+                PartitionEntry::<FlatIndex, FlatQuantizer>::deserialize(bytes.into()).unwrap();
+            assert_eq!(restored.storage.distance_type(), dt);
+        }
+    }
+
+    // ----- SQ helpers -------------------------------------------------------
+
+    fn make_sq_storage(
+        num_rows: usize,
+        dim: usize,
+        distance_type: DistanceType,
+    ) -> ScalarQuantizationStorage {
+        let row_ids = UInt64Array::from_iter_values(0..num_rows as u64);
+        let sq_codes_flat: Vec<u8> = (0..num_rows * dim).map(|i| (i % 256) as u8).collect();
+        let sq_codes = UInt8Array::from(sq_codes_flat);
+        let sq_codes_fsl = FixedSizeListArray::try_new_from_values(sq_codes, dim as i32).unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(lance_core::ROW_ID, DataType::UInt64, false),
+            Field::new(
+                lance_index::vector::SQ_CODE_COLUMN,
+                DataType::FixedSizeList(
+                    Arc::new(Field::new("item", DataType::UInt8, true)),
+                    dim as i32,
+                ),
+                false,
+            ),
+        ]));
+        let batch =
+            RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(sq_codes_fsl)]).unwrap();
+
+        ScalarQuantizationStorage::try_new(8, distance_type, -1.0..1.0, [batch], None).unwrap()
+    }
+
+    // ----- SQ tests ---------------------------------------------------------
+
+    #[test]
+    fn test_roundtrip_flat_sq() {
+        let storage = make_sq_storage(100, 64, DistanceType::L2);
+        let entry = PartitionEntry::<FlatIndex, ScalarQuantizer> {
+            index: FlatIndex::default(),
+            storage,
+        };
+
+        let bytes = entry.serialize().unwrap();
+        let restored =
+            PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
+
+        let m = entry.storage.metadata();
+        let rm = restored.storage.metadata();
+        assert_eq!(rm.dim, m.dim);
+        assert_eq!(rm.num_bits, m.num_bits);
+        assert_eq!(rm.bounds, m.bounds);
+        assert_eq!(
+            restored.storage.distance_type(),
+            entry.storage.distance_type()
+        );
+        assert_eq!(restored.storage.len(), entry.storage.len());
+
+        // Verify row IDs are preserved.
+        let orig_ids: Vec<u64> = entry.storage.row_ids().copied().collect();
+        let rest_ids: Vec<u64> = restored.storage.row_ids().copied().collect();
+        assert_eq!(orig_ids, rest_ids);
+    }
+
+    #[test]
+    fn test_sq_distance_types() {
+        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
+            let storage = make_sq_storage(10, 16, dt);
+            let entry = PartitionEntry::<FlatIndex, ScalarQuantizer> {
+                index: FlatIndex::default(),
+                storage,
+            };
+            let bytes = entry.serialize().unwrap();
+            let restored =
+                PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
+            assert_eq!(restored.storage.distance_type(), dt);
+        }
+    }
+
+    #[test]
+    fn test_sq_multiple_chunks_no_copy() {
+        // Build SQ storage with multiple chunks by appending batches separately.
+        let dim = 16usize;
+        let make_batch = |start: u64, n: usize| {
+            let row_ids = UInt64Array::from_iter_values(start..start + n as u64);
+            let codes = UInt8Array::from(vec![0u8; n * dim]);
+            let fsl = FixedSizeListArray::try_new_from_values(codes, dim as i32).unwrap();
+            let schema = Arc::new(Schema::new(vec![
+                Field::new(lance_core::ROW_ID, DataType::UInt64, false),
+                Field::new(
+                    lance_index::vector::SQ_CODE_COLUMN,
+                    DataType::FixedSizeList(
+                        Arc::new(Field::new("item", DataType::UInt8, true)),
+                        dim as i32,
+                    ),
+                    false,
+                ),
+            ]));
+            RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(fsl)]).unwrap()
+        };
+        // Three chunks with 10 rows each.
+        let storage = ScalarQuantizationStorage::try_new(
+            8,
+            DistanceType::L2,
+            -1.0..1.0,
+            [make_batch(0, 10), make_batch(10, 10), make_batch(20, 10)],
+            None,
+        )
+        .unwrap();
+        assert_eq!(storage.len(), 30);
+
+        let entry = PartitionEntry::<FlatIndex, ScalarQuantizer> {
+            index: FlatIndex::default(),
+            storage,
+        };
+        let bytes = entry.serialize().unwrap();
+        let restored =
+            PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
+
+        assert_eq!(restored.storage.len(), 30);
+        let orig_ids: Vec<u64> = entry.storage.row_ids().copied().collect();
+        let rest_ids: Vec<u64> = restored.storage.row_ids().copied().collect();
+        assert_eq!(orig_ids, rest_ids);
+    }
+
+    // ----- RabitQ helpers ---------------------------------------------------
+
+    fn make_rabit_storage_fast(
+        num_rows: usize,
+        code_dim: usize,
+        distance_type: DistanceType,
+    ) -> <RabitQuantizer as Quantization>::Storage {
+        use lance_arrow::FixedSizeListArrayExt;
+
+        let quantizer = RabitQuantizer::new_with_rotation::<Float32Type>(
+            1,
+            code_dim as i32,
+            RQRotationType::Fast,
+        );
+        // Generate float vectors and quantize them to binary codes.
+        let values: Vec<f32> = (0..num_rows * code_dim)
+            .map(|i| (i % 100) as f32 / 100.0 - 0.5)
+            .collect();
+        let values_arr = Float32Array::from(values);
+        let vectors = FixedSizeListArray::try_new_from_values(values_arr, code_dim as i32).unwrap();
+        let codes = quantizer
+            .quantize(&vectors)
+            .unwrap()
+            .as_fixed_size_list()
+            .clone();
+
+        let metadata = quantizer.metadata(None);
+        let batch = RecordBatch::try_from_iter(vec![
+            (
+                lance_core::ROW_ID,
+                Arc::new(UInt64Array::from_iter_values(0..num_rows as u64))
+                    as Arc<dyn arrow_array::Array>,
+            ),
+            (
+                RABIT_CODE_COLUMN,
+                Arc::new(codes) as Arc<dyn arrow_array::Array>,
+            ),
+            (
+                ADD_FACTORS_COLUMN,
+                Arc::new(Float32Array::from_iter_values(
+                    (0..num_rows).map(|i| i as f32 * 0.1),
+                )) as Arc<dyn arrow_array::Array>,
+            ),
+            (
+                SCALE_FACTORS_COLUMN,
+                Arc::new(Float32Array::from_iter_values(
+                    (0..num_rows).map(|i| i as f32 * 0.01 + 0.5),
+                )) as Arc<dyn arrow_array::Array>,
+            ),
+        ])
+        .unwrap();
+
+        <RabitQuantizer as Quantization>::Storage::try_from_batch(
+            batch,
+            &metadata,
+            distance_type,
+            None,
+        )
+        .unwrap()
+    }
+
+    // ----- RabitQ tests -----------------------------------------------------
+
+    #[test]
+    fn test_roundtrip_flat_rabitq_fast() {
+        let num_rows = 50;
+        let code_dim = 64;
+        let storage = make_rabit_storage_fast(num_rows, code_dim, DistanceType::L2);
+        let entry = PartitionEntry::<FlatIndex, RabitQuantizer> {
+            index: FlatIndex::default(),
+            storage,
+        };
+
+        let bytes = entry.serialize().unwrap();
+        let restored =
+            PartitionEntry::<FlatIndex, RabitQuantizer>::deserialize(bytes.into()).unwrap();
+
+        let m = entry.storage.metadata();
+        let rm = restored.storage.metadata();
+        assert_eq!(rm.num_bits, m.num_bits);
+        assert_eq!(rm.code_dim, m.code_dim);
+        assert_eq!(rm.rotation_type, m.rotation_type);
+        assert_eq!(rm.fast_rotation_signs, m.fast_rotation_signs);
+        assert!(rm.packed);
+        assert_eq!(
+            restored.storage.distance_type(),
+            entry.storage.distance_type()
+        );
+        assert_eq!(restored.storage.len(), entry.storage.len());
+
+        // Verify row IDs are preserved.
+        let orig_ids: Vec<u64> = entry.storage.row_ids().copied().collect();
+        let rest_ids: Vec<u64> = restored.storage.row_ids().copied().collect();
+        assert_eq!(orig_ids, rest_ids);
+
+        // Verify codes are preserved.
+        let orig_batch = entry.storage.to_batches().unwrap().next().unwrap();
+        let rest_batch = restored.storage.to_batches().unwrap().next().unwrap();
+        let orig_codes = orig_batch[RABIT_CODE_COLUMN].as_fixed_size_list();
+        let rest_codes = rest_batch[RABIT_CODE_COLUMN].as_fixed_size_list();
+        assert_eq!(
+            orig_codes.values().as_primitive::<UInt8Type>().values(),
+            rest_codes.values().as_primitive::<UInt8Type>().values(),
+        );
+    }
+
+    #[test]
+    fn test_rabitq_distance_types() {
+        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
+            let storage = make_rabit_storage_fast(10, 32, dt);
+            let entry = PartitionEntry::<FlatIndex, RabitQuantizer> {
+                index: FlatIndex::default(),
+                storage,
+            };
+            let bytes = entry.serialize().unwrap();
+            let restored =
+                PartitionEntry::<FlatIndex, RabitQuantizer>::deserialize(bytes.into()).unwrap();
+            assert_eq!(restored.storage.distance_type(), dt);
+        }
+    }
+}

From f1ed93430b09a7266abb84164d4d5653e972156f Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 12:29:58 -0700
Subject: [PATCH 10/24] chore: make index_caches module public for downstream
 codec registration

---
 rust/lance/src/session.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index 4876224cb75..a24dc82d3cb 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -17,7 +17,7 @@ use crate::session::index_caches::GlobalIndexCache;
 use self::index_extension::IndexExtension;
 
 pub(crate) mod caches;
-pub(crate) mod index_caches;
+pub mod index_caches;
 pub(crate) mod index_extension;
 
 /// A user session holds the runtime state for a [`crate::Dataset`]

From a38be575c39d998501ce5bcd9308e6f6fae77716 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 19 Mar 2026 21:31:09 -0700
Subject: [PATCH 11/24] feat: add cacheable_state() for VectorIndex disk
 caching

Add IvfIndexState struct and serialization to lance-index, enabling
IVFIndex to export its reconstructable state (IVF model, quantizer
metadata) without non-serializable handles. Add reconstruct_vector_index()
which rebuilds an IVFIndex from cached state by re-opening FileReaders
(cheap with warm metadata cache) instead of re-fetching global buffers
from object storage.

Also adds IvfQuantizationStorage::from_cached() to skip global buffer
reads during reconstruction, and Session::file_metadata_cache() to
expose the metadata cache for the reconstruction context.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-index/src/vector.rs         | 131 +++++++++++++++
 rust/lance-index/src/vector/storage.rs |  18 ++
 rust/lance/src/index/vector/ivf/v2.rs  | 221 ++++++++++++++++++++++++-
 rust/lance/src/session.rs              |   5 +
 4 files changed, 374 insertions(+), 1 deletion(-)

diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs
index 0fbff4475cb..66f423c844d 100644
--- a/rust/lance-index/src/vector.rs
+++ b/rust/lance-index/src/vector.rs
@@ -11,12 +11,14 @@ use std::{collections::HashMap, sync::Arc};
 use arrow_array::{ArrayRef, Float32Array, RecordBatch, UInt32Array};
 use arrow_schema::Field;
 use async_trait::async_trait;
+use bytes::Bytes;
 use datafusion::execution::SendableRecordBatchStream;
 use deepsize::DeepSizeOf;
 use ivf::storage::IvfModel;
 use lance_core::{ROW_ID_FIELD, Result};
 use lance_io::traits::Reader;
 use lance_linalg::distance::DistanceType;
+use prost::Message;
 use quantizer::{QuantizationType, Quantizer};
 use std::sync::LazyLock;
 use v3::subindex::SubIndexType;
@@ -140,6 +142,129 @@ impl From<DistanceType> for pb::VectorMetricType {
     }
 }
 
+/// Serializable state of an IVF index, sufficient to reconstruct the index
+/// without re-reading global buffers from object storage.
+///
+/// Produced by [`VectorIndex::cacheable_state`] and consumed by a
+/// reconstruction function that re-opens FileReaders using cached file metadata.
+pub struct IvfIndexState {
+    /// Object-store path to the index file (before `to_local_path` conversion).
+    pub index_file_path: String,
+    pub uuid: String,
+    pub ivf: IvfModel,
+    pub distance_type: DistanceType,
+    pub sub_index_metadata: Vec<String>,
+    /// JSON serialization of `Q::Metadata` (quantizer-specific metadata).
+    pub quantizer_metadata_json: String,
+    /// Large quantizer data (PQ codebook, RQ rotation matrix) from `extra_metadata()`.
+    pub quantizer_extra_data: Option<Vec<u8>>,
+    pub sub_index_type: SubIndexType,
+    pub quantization_type: QuantizationType,
+}
+
+/// Serialization header for [`IvfIndexState`].
+#[derive(serde::Serialize, serde::Deserialize)]
+struct IvfIndexStateHeader {
+    index_file_path: String,
+    uuid: String,
+    distance_type: String,
+    sub_index_metadata: Vec<String>,
+    sub_index_type: String,
+    quantization_type: String,
+    quantizer_metadata_json: String,
+}
+
+impl IvfIndexState {
+    /// Wire format:
+    /// `[header_json_len: u64 LE][header JSON][ivf_pb_len: u64 LE][ivf protobuf]
+    ///  [extra_len: u64 LE][extra bytes]`
+    pub fn serialize(&self) -> Result<Vec<u8>> {
+        let header = IvfIndexStateHeader {
+            index_file_path: self.index_file_path.clone(),
+            uuid: self.uuid.clone(),
+            distance_type: self.distance_type.to_string(),
+            sub_index_metadata: self.sub_index_metadata.clone(),
+            sub_index_type: self.sub_index_type.to_string(),
+            quantization_type: self.quantization_type.to_string(),
+            quantizer_metadata_json: self.quantizer_metadata_json.clone(),
+        };
+        let header_json = serde_json::to_vec(&header)
+            .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?;
+
+        let ivf_pb = pb::Ivf::try_from(&self.ivf)?;
+        let ivf_bytes = ivf_pb.encode_to_vec();
+
+        let extra = self.quantizer_extra_data.as_deref().unwrap_or(&[]);
+
+        let total = 8 + header_json.len() + 8 + ivf_bytes.len() + 8 + extra.len();
+        let mut buf = Vec::with_capacity(total);
+        buf.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
+        buf.extend_from_slice(&header_json);
+        buf.extend_from_slice(&(ivf_bytes.len() as u64).to_le_bytes());
+        buf.extend_from_slice(&ivf_bytes);
+        buf.extend_from_slice(&(extra.len() as u64).to_le_bytes());
+        buf.extend_from_slice(extra);
+        Ok(buf)
+    }
+
+    pub fn deserialize(data: Bytes) -> Result<Self> {
+        let mut offset = 0;
+
+        let read_u64 = |data: &[u8], off: &mut usize| -> Result<u64> {
+            if *off + 8 > data.len() {
+                return Err(lance_core::Error::io("IvfIndexState data truncated"));
+            }
+            let val = u64::from_le_bytes(data[*off..*off + 8].try_into().unwrap());
+            *off += 8;
+            Ok(val)
+        };
+
+        let header_len = read_u64(&data, &mut offset)? as usize;
+        if offset + header_len > data.len() {
+            return Err(lance_core::Error::io("IvfIndexState header truncated"));
+        }
+        let header: IvfIndexStateHeader =
+            serde_json::from_slice(&data[offset..offset + header_len])
+                .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?;
+        offset += header_len;
+
+        let ivf_len = read_u64(&data, &mut offset)? as usize;
+        if offset + ivf_len > data.len() {
+            return Err(lance_core::Error::io("IvfIndexState IVF data truncated"));
+        }
+        let ivf_pb = pb::Ivf::decode(&data[offset..offset + ivf_len])
+            .map_err(|e| lance_core::Error::io(format!("IvfIndexState IVF decode: {e}")))?;
+        let ivf = IvfModel::try_from(ivf_pb)?;
+        offset += ivf_len;
+
+        let extra_len = read_u64(&data, &mut offset)? as usize;
+        if offset + extra_len > data.len() {
+            return Err(lance_core::Error::io("IvfIndexState extra data truncated"));
+        }
+        let quantizer_extra_data = if extra_len > 0 {
+            Some(data[offset..offset + extra_len].to_vec())
+        } else {
+            None
+        };
+
+        let distance_type = DistanceType::try_from(header.distance_type.as_str())?;
+        let sub_index_type = SubIndexType::try_from(header.sub_index_type.as_str())?;
+        let quantization_type = header.quantization_type.parse::<QuantizationType>()?;
+
+        Ok(Self {
+            index_file_path: header.index_file_path,
+            uuid: header.uuid,
+            ivf,
+            distance_type,
+            sub_index_metadata: header.sub_index_metadata,
+            quantizer_metadata_json: header.quantizer_metadata_json,
+            quantizer_extra_data,
+            sub_index_type,
+            quantization_type,
+        })
+    }
+}
+
 /// Vector Index for (Approximate) Nearest Neighbor (ANN) Search.
 ///
 /// Vector indices are often built as a chain of indices.  For example, IVF -> PQ
@@ -264,6 +389,12 @@ pub trait VectorIndex: Send + Sync + std::fmt::Debug + Index {
 
     /// the index type of this vector index.
     fn sub_index_type(&self) -> (SubIndexType, QuantizationType);
+
+    /// Export the index state needed for reconstruction from a disk cache.
+    /// Returns `None` if this index type doesn't support persistent caching.
+    fn cacheable_state(&self) -> Option<IvfIndexState> {
+        None
+    }
 }
 
 // it can be an IVF index or a partition of IVF index
diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs
index 5a1c0e7e6f5..1879774ce84 100644
--- a/rust/lance-index/src/vector/storage.rs
+++ b/rust/lance-index/src/vector/storage.rs
@@ -239,6 +239,24 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
         })
     }
 
+    /// Construct from pre-parsed metadata, skipping global buffer reads.
+    /// Used when reconstructing from a disk cache.
+    pub fn from_cached(
+        reader: FileReader,
+        ivf: IvfModel,
+        metadata: Q::Metadata,
+        distance_type: DistanceType,
+        frag_reuse_index: Option<Arc<FragReuseIndex>>,
+    ) -> Self {
+        Self {
+            reader,
+            distance_type,
+            metadata,
+            ivf,
+            frag_reuse_index,
+        }
+    }
+
     pub fn num_rows(&self) -> u64 {
         self.reader.num_rows()
     }
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index 14f71612ddb..90ca1a4d619 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -29,15 +29,16 @@ use lance_encoding::decoder::{DecoderPlugins, FilterExpression};
 use lance_file::reader::{FileReader, FileReaderOptions};
 use lance_index::frag_reuse::FragReuseIndex;
 use lance_index::metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector};
-use lance_index::vector::VectorIndexCacheEntry;
 use lance_index::vector::flat::index::{FlatIndex, FlatQuantizer};
 use lance_index::vector::hnsw::HNSW;
 use lance_index::vector::ivf::storage::IvfModel;
 use lance_index::vector::pq::ProductQuantizer;
+use lance_index::vector::quantizer::QuantizerMetadata;
 use lance_index::vector::quantizer::{QuantizationType, Quantizer};
 use lance_index::vector::sq::ScalarQuantizer;
 use lance_index::vector::storage::VectorStore;
 use lance_index::vector::v3::subindex::SubIndexType;
+use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry};
 use lance_index::{
     INDEX_AUXILIARY_FILE_NAME, INDEX_FILE_NAME, Index, IndexType, pb,
     vector::{
@@ -225,6 +226,34 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
         })
     }
 
+    /// Reconstruct from cached state, skipping global buffer reads.
+    pub(crate) fn from_cached_state(
+        uri: String,
+        uuid: String,
+        ivf: IvfModel,
+        reader: FileReader,
+        storage: IvfQuantizationStorage<Q>,
+        sub_index_metadata: Vec<String>,
+        distance_type: DistanceType,
+        index_cache: LanceCache,
+        io_parallelism: usize,
+    ) -> Self {
+        let num_partitions = ivf.num_partitions();
+        Self {
+            uri,
+            uuid,
+            ivf,
+            reader,
+            storage,
+            partition_locks: PartitionLoadLock::new(num_partitions),
+            sub_index_metadata,
+            distance_type,
+            index_cache: WeakLanceCache::from(&index_cache),
+            io_parallelism,
+            _marker: PhantomData,
+        }
+    }
+
     #[instrument(level = "debug", skip(self, metrics))]
     pub async fn load_partition(
         &self,
@@ -595,6 +624,25 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> VectorIndex for IVFInd
     fn metric_type(&self) -> DistanceType {
         self.distance_type
     }
+
+    fn cacheable_state(&self) -> Option<IvfIndexState> {
+        let extra_data = self.storage.metadata().extra_metadata().ok().flatten();
+        let metadata_json = serde_json::to_string(self.storage.metadata()).ok()?;
+        let (sub_index_type, quantization_type) = self.sub_index_type();
+        // Convert local path back to object_store Path (undo to_local_path's "/" prefix)
+        let index_file_path = self.uri.trim_start_matches('/').to_string();
+        Some(IvfIndexState {
+            index_file_path,
+            uuid: self.uuid.clone(),
+            ivf: self.ivf.clone(),
+            distance_type: self.distance_type,
+            sub_index_metadata: self.sub_index_metadata.clone(),
+            quantizer_metadata_json: metadata_json,
+            quantizer_extra_data: extra_data.map(|b| b.to_vec()),
+            sub_index_type,
+            quantization_type,
+        })
+    }
 }
 
 pub type IvfFlatIndex = IVFIndex<FlatIndex, FlatQuantizer>;
@@ -602,6 +650,177 @@ pub type IvfPq = IVFIndex<FlatIndex, ProductQuantizer>;
 pub type IvfHnswSqIndex = IVFIndex<HNSW, ScalarQuantizer>;
 pub type IvfHnswPqIndex = IVFIndex<HNSW, ProductQuantizer>;
 
+/// Reconstruct a concrete `IVFIndex<S, Q>` from cached state.
+async fn reconstruct_typed<S: IvfSubIndex + 'static, Q: Quantization + 'static>(
+    state: IvfIndexState,
+    object_store: Arc<ObjectStore>,
+    file_metadata_cache: &LanceCache,
+    index_cache: LanceCache,
+) -> Result<Arc<dyn VectorIndex>>
+where
+    Q::Metadata: serde::de::DeserializeOwned,
+{
+    let io_parallelism = object_store.io_parallelism();
+    let scheduler_config = SchedulerConfig::max_bandwidth(&object_store);
+    let scheduler = ScanScheduler::new(object_store, scheduler_config);
+
+    let index_path = Path::parse(&state.index_file_path)
+        .map_err(|e| Error::io(format!("invalid index path: {e}")))?;
+
+    // Re-open index FileReader (cheap if file metadata cache is warm)
+    let index_reader = FileReader::try_open(
+        scheduler
+            .open_file(&index_path, &CachedFileSize::unknown())
+            .await?,
+        None,
+        Arc::<DecoderPlugins>::default(),
+        file_metadata_cache,
+        FileReaderOptions::default(),
+    )
+    .await?;
+
+    // Derive aux file path: replace the filename with INDEX_AUXILIARY_FILE_NAME.
+    // index_path is like "path/to/{uuid}/index.lance", aux is "path/to/{uuid}/aux.lance".
+    let index_path_str = index_path.as_ref();
+    let parent_str = index_path_str
+        .rsplit_once('/')
+        .map(|(p, _)| p)
+        .unwrap_or("");
+    let aux_path = Path::parse(format!("{}/{}", parent_str, INDEX_AUXILIARY_FILE_NAME))
+        .map_err(|e| Error::io(format!("invalid aux path: {e}")))?;
+    let storage_reader = FileReader::try_open(
+        scheduler
+            .open_file(&aux_path, &CachedFileSize::unknown())
+            .await?,
+        None,
+        Arc::<DecoderPlugins>::default(),
+        file_metadata_cache,
+        FileReaderOptions::default(),
+    )
+    .await?;
+
+    // Parse quantizer metadata from cached JSON
+    let mut metadata: Q::Metadata = serde_json::from_str(&state.quantizer_metadata_json)?;
+    if let Some(extra) = state.quantizer_extra_data {
+        metadata.parse_buffer(extra.into())?;
+    }
+
+    let storage = IvfQuantizationStorage::from_cached(
+        storage_reader,
+        state.ivf.clone(),
+        metadata,
+        state.distance_type,
+        None, // frag_reuse_index not cached
+    );
+
+    let index = IVFIndex::<S, Q>::from_cached_state(
+        to_local_path(&index_path),
+        state.uuid,
+        state.ivf,
+        index_reader,
+        storage,
+        state.sub_index_metadata,
+        state.distance_type,
+        index_cache,
+        io_parallelism,
+    );
+
+    Ok(Arc::new(index))
+}
+
+/// Reconstruct a `dyn VectorIndex` from a cached [`IvfIndexState`], dispatching
+/// on the stored sub-index and quantization types.
+pub async fn reconstruct_vector_index(
+    state: IvfIndexState,
+    object_store: Arc<ObjectStore>,
+    file_metadata_cache: &LanceCache,
+    index_cache: LanceCache,
+) -> Result<Arc<dyn VectorIndex>> {
+    use lance_index::vector::bq::builder::RabitQuantizer;
+
+    // Extract type tags before consuming state.
+    let sub_idx = state.sub_index_type.to_string();
+    let quant = state.quantization_type.to_string();
+
+    match (sub_idx.as_str(), quant.as_str()) {
+        ("FLAT", "FLAT") => {
+            reconstruct_typed::<FlatIndex, FlatQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        ("FLAT", "PQ") => {
+            reconstruct_typed::<FlatIndex, ProductQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        ("FLAT", "SQ") => {
+            reconstruct_typed::<FlatIndex, ScalarQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        ("FLAT", "RQ") => {
+            reconstruct_typed::<FlatIndex, RabitQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        ("HNSW", "PQ") => {
+            reconstruct_typed::<HNSW, ProductQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        ("HNSW", "SQ") => {
+            reconstruct_typed::<HNSW, ScalarQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        ("HNSW", "FLAT") => {
+            reconstruct_typed::<HNSW, FlatQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        ("HNSW", "RQ") => {
+            reconstruct_typed::<HNSW, RabitQuantizer>(
+                state,
+                object_store,
+                file_metadata_cache,
+                index_cache,
+            )
+            .await
+        }
+        (s, q) => Err(Error::index(format!(
+            "unsupported index type for reconstruction: sub_index={s}, quantization={q}"
+        ))),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashSet;
diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index a24dc82d3cb..aa450483553 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -195,6 +195,11 @@ impl Session {
         self.store_registry.clone()
     }
 
+    /// Get a reference to the raw metadata cache (for use in index reconstruction).
+    pub fn file_metadata_cache(&self) -> &LanceCache {
+        &self.metadata_cache.0
+    }
+
     /// Fetch statistics for the metadata cache
     pub async fn metadata_cache_stats(&self) -> lance_core::cache::CacheStats {
         self.metadata_cache.0.stats().await

From a575f18efe54057665f848e59c197f4aa60ce7e5 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 20 Mar 2026 08:45:36 -0700
Subject: [PATCH 12/24] feat: add cache_key_prefix to IvfIndexState for
 reconstruction

Reconstructed VectorIndex instances need the original cache key prefix
to share partition entries with the two-tier cache backend. Also adds
LanceCache::with_backend_and_prefix() and WeakLanceCache::prefix().

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs          | 16 ++++++++++++++++
 rust/lance-index/src/vector.rs        |  7 +++++++
 rust/lance/src/index/vector/ivf/v2.rs |  1 +
 3 files changed, 24 insertions(+)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 83c8bb09acb..66778c5149d 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -294,6 +294,17 @@ impl LanceCache {
         }
     }
 
+    /// Create a cache with the given backend and an exact prefix string.
+    /// Unlike `with_key_prefix`, this sets the prefix verbatim (no trailing slash added).
+    pub fn with_backend_and_prefix(backend: Arc<dyn CacheBackend>, prefix: String) -> Self {
+        Self {
+            cache: backend,
+            prefix,
+            hits: Arc::new(AtomicU64::new(0)),
+            misses: Arc::new(AtomicU64::new(0)),
+        }
+    }
+
     /// Appends a prefix to the cache key.
     pub fn with_key_prefix(&self, prefix: &str) -> Self {
         Self {
@@ -509,6 +520,11 @@ impl WeakLanceCache {
         }
     }
 
+    /// The key prefix used for all entries in this cache.
+    pub fn prefix(&self) -> &str {
+        &self.prefix
+    }
+
     pub async fn get_with_key<K>(&self, cache_key: &K) -> Option<Arc<K::ValueType>>
     where
         K: CacheKey,
diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs
index 66f423c844d..76e37a824aa 100644
--- a/rust/lance-index/src/vector.rs
+++ b/rust/lance-index/src/vector.rs
@@ -160,6 +160,9 @@ pub struct IvfIndexState {
     pub quantizer_extra_data: Option<Vec<u8>>,
     pub sub_index_type: SubIndexType,
     pub quantization_type: QuantizationType,
+    /// The cache key prefix used by the original index's WeakLanceCache.
+    /// Needed to reconnect the reconstructed index to the shared cache backend.
+    pub cache_key_prefix: String,
 }
 
 /// Serialization header for [`IvfIndexState`].
@@ -172,6 +175,8 @@ struct IvfIndexStateHeader {
     sub_index_type: String,
     quantization_type: String,
     quantizer_metadata_json: String,
+    #[serde(default)]
+    cache_key_prefix: String,
 }
 
 impl IvfIndexState {
@@ -187,6 +192,7 @@ impl IvfIndexState {
             sub_index_type: self.sub_index_type.to_string(),
             quantization_type: self.quantization_type.to_string(),
             quantizer_metadata_json: self.quantizer_metadata_json.clone(),
+            cache_key_prefix: self.cache_key_prefix.clone(),
         };
         let header_json = serde_json::to_vec(&header)
             .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?;
@@ -261,6 +267,7 @@ impl IvfIndexState {
             quantizer_extra_data,
             sub_index_type,
             quantization_type,
+            cache_key_prefix: header.cache_key_prefix,
         })
     }
 }
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index 90ca1a4d619..f952546b25f 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -641,6 +641,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> VectorIndex for IVFInd
             quantizer_extra_data: extra_data.map(|b| b.to_vec()),
             sub_index_type,
             quantization_type,
+            cache_key_prefix: self.index_cache.prefix().to_string(),
         })
     }
 }

From ddc3f773d1849fc7d079f6ed692e05692d510c91 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 20 Mar 2026 12:32:15 -0700
Subject: [PATCH 13/24] refactor: move VectorIndex reconstruction from cache to
 call site

Previously, the disk cache codec reconstructed `Arc<dyn VectorIndex>`
from `IvfIndexState` during deserialization, requiring a
`ReconstructionContext` with deferred OnceLock initialization and
sync-to-async runtime juggling. The ObjectStore in that context also
lacked proper credential wrappers.

Now the cache stores `Arc<dyn VectorIndexData>` (serializable state)
instead of `Arc<dyn VectorIndex>` (live index). Lance's
`open_vector_index()` detects cached state and reconstructs using its
own ObjectStore (with credentials) and metadata cache. This eliminates
the ReconstructionContext, OnceLock pattern, and runtime juggling.

Changes:
- Add VectorIndexData trait (lance-index) with write_to/as_any/tag
- Add DeepSizeOf impl for IvfIndexState
- Change VectorIndexCacheKey::ValueType to dyn VectorIndexData
- Add reconstruction-from-cache path in open_vector_index()
- Fix panicking downcast in LanceCache::get_with_id (return None)
- Add Debug/Clone/Copy derives to SubIndexType

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache.rs               | 15 ++++-
 rust/lance-index/src/vector.rs             | 64 +++++++++++++++++++++-
 rust/lance-index/src/vector/v3/subindex.rs |  1 +
 rust/lance/src/index.rs                    | 51 +++++++++++------
 rust/lance/src/index/vector/ivf/v2.rs      |  8 +--
 5 files changed, 115 insertions(+), 24 deletions(-)

diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache.rs
index 66778c5149d..2013522ec62 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache.rs
@@ -353,8 +353,19 @@ impl LanceCache {
     ) -> Option<Arc<T>> {
         let cache_key = make_cache_key(&self.prefix, key, type_id);
         if let Some(entry) = self.cache.get(&cache_key).await {
-            self.hits.fetch_add(1, Ordering::Relaxed);
-            Some(entry.downcast::<T>().unwrap())
+            match entry.downcast::<T>() {
+                Ok(val) => {
+                    self.hits.fetch_add(1, Ordering::Relaxed);
+                    Some(val)
+                }
+                Err(_) => {
+                    // Type mismatch: the backend returned a different concrete
+                    // type than expected (e.g. a disk cache may store
+                    // intermediate state). Treat as a miss.
+                    self.misses.fetch_add(1, Ordering::Relaxed);
+                    None
+                }
+            }
         } else {
             self.misses.fetch_add(1, Ordering::Relaxed);
             None
diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs
index 76e37a824aa..973a7339cd4 100644
--- a/rust/lance-index/src/vector.rs
+++ b/rust/lance-index/src/vector.rs
@@ -142,11 +142,41 @@ impl From<DistanceType> for pb::VectorMetricType {
     }
 }
 
+/// Serializable snapshot of a vector index, suitable for disk caching.
+///
+/// Implementations must be cheaply reconstructable into a live
+/// [`VectorIndex`] given an ObjectStore, file metadata cache, and partition
+/// cache. The reconstruction cost should be dominated by re-opening
+/// `FileReader`s, which is cheap when the file metadata cache is warm.
+pub trait VectorIndexData: Send + Sync + DeepSizeOf + std::fmt::Debug {
+    /// Serialize this state into `writer`. Called on a blocking thread by
+    /// the disk cache codec.
+    fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()>;
+
+    /// Tag used to dispatch deserialization to the correct concrete type.
+    fn index_type_tag(&self) -> &'static str;
+
+    /// Downcast to `&dyn Any` for concrete type access during reconstruction.
+    fn as_any(&self) -> &dyn Any;
+}
+
+/// Deserialize a [`VectorIndexData`] from bytes previously written by
+/// [`VectorIndexData::write_to`].
+pub fn deserialize_vector_index_data(data: Bytes) -> Result<Arc<dyn VectorIndexData>> {
+    // Currently only IVF indices support disk caching. The serialization
+    // format is self-describing (IvfIndexState header), so no external tag
+    // is needed yet. When additional index types are added, prepend a
+    // version/tag byte to the wire format.
+    let state = IvfIndexState::deserialize(data)?;
+    Ok(Arc::new(state))
+}
+
 /// Serializable state of an IVF index, sufficient to reconstruct the index
 /// without re-reading global buffers from object storage.
 ///
 /// Produced by [`VectorIndex::cacheable_state`] and consumed by a
 /// reconstruction function that re-opens FileReaders using cached file metadata.
+#[derive(Debug, Clone)]
 pub struct IvfIndexState {
     /// Object-store path to the index file (before `to_local_path` conversion).
     pub index_file_path: String,
@@ -272,6 +302,38 @@ impl IvfIndexState {
     }
 }
 
+impl DeepSizeOf for IvfIndexState {
+    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
+        self.index_file_path.deep_size_of_children(context)
+            + self.uuid.deep_size_of_children(context)
+            + self.ivf.deep_size_of_children(context)
+            + self.sub_index_metadata.deep_size_of_children(context)
+            + self.quantizer_metadata_json.deep_size_of_children(context)
+            + self
+                .quantizer_extra_data
+                .as_ref()
+                .map(|v| v.deep_size_of_children(context))
+                .unwrap_or(0)
+            + self.cache_key_prefix.deep_size_of_children(context)
+    }
+}
+
+impl VectorIndexData for IvfIndexState {
+    fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()> {
+        let bytes = self.serialize()?;
+        writer.write_all(&bytes)?;
+        Ok(())
+    }
+
+    fn index_type_tag(&self) -> &'static str {
+        "IVF"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
 /// Vector Index for (Approximate) Nearest Neighbor (ANN) Search.
 ///
 /// Vector indices are often built as a chain of indices.  For example, IVF -> PQ
@@ -399,7 +461,7 @@ pub trait VectorIndex: Send + Sync + std::fmt::Debug + Index {
 
     /// Export the index state needed for reconstruction from a disk cache.
     /// Returns `None` if this index type doesn't support persistent caching.
-    fn cacheable_state(&self) -> Option<IvfIndexState> {
+    fn cacheable_state(&self) -> Option<Box<dyn VectorIndexData>> {
         None
     }
 }
diff --git a/rust/lance-index/src/vector/v3/subindex.rs b/rust/lance-index/src/vector/v3/subindex.rs
index af0bb337352..dd5d2b078a9 100644
--- a/rust/lance-index/src/vector/v3/subindex.rs
+++ b/rust/lance-index/src/vector/v3/subindex.rs
@@ -59,6 +59,7 @@ pub trait IvfSubIndex: Send + Sync + Debug + DeepSizeOf {
     fn to_batch(&self) -> Result<RecordBatch>;
 }
 
+#[derive(Debug, Clone, Copy)]
 pub enum SubIndexType {
     Flat,
     Hnsw,
diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index 16faab4e48f..aacc0284fdc 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -42,6 +42,7 @@ use lance_index::vector::flat::index::{FlatBinQuantizer, FlatIndex, FlatQuantize
 use lance_index::vector::hnsw::HNSW;
 use lance_index::vector::pq::ProductQuantizer;
 use lance_index::vector::sq::ScalarQuantizer;
+use lance_index::vector::{IvfIndexState, VectorIndexData};
 use lance_index::{DatasetIndexExt, INDEX_METADATA_SCHEMA_KEY, IndexDescription};
 use lance_index::{INDEX_FILE_NAME, Index, IndexType, pb, vector::VectorIndex};
 use lance_index::{
@@ -129,7 +130,7 @@ impl<'a> VectorIndexCacheKey<'a> {
 }
 
 impl UnsizedCacheKey for VectorIndexCacheKey<'_> {
-    type ValueType = dyn VectorIndex;
+    type ValueType = dyn VectorIndexData;
 
     fn key(&self) -> std::borrow::Cow<'_, str> {
         if let Some(fri_uuid) = self.fri_uuid {
@@ -1296,22 +1297,16 @@ impl DatasetIndexInternalExt for Dataset {
         uuid: &str,
         metrics: &dyn MetricsCollector,
     ) -> Result<Arc<dyn Index>> {
-        // Checking for cache existence is cheap so we just check both scalar and vector caches
+        // Quick cache checks for scalar and frag-reuse indices. VectorIndex
+        // is not checked here because the cache stores VectorIndexData (serializable
+        // state), not a live VectorIndex — reconstruction is handled by
+        // open_vector_index.
         let frag_reuse_uuid = self.frag_reuse_index_uuid().await;
         let cache_key = ScalarIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
         if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await {
             return Ok(index.as_index());
         }
 
-        let vector_cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
-        if let Some(index) = self
-            .index_cache
-            .get_unsized_with_key(&vector_cache_key)
-            .await
-        {
-            return Ok(index.as_index());
-        }
-
         let frag_reuse_cache_key = FragReuseIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
         if let Some(index) = self.index_cache.get_with_key(&frag_reuse_cache_key).await {
             return Ok(index.as_index());
@@ -1378,9 +1373,26 @@ impl DatasetIndexInternalExt for Dataset {
         let frag_reuse_uuid = self.frag_reuse_index_uuid().await;
         let cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
 
-        if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await {
-            log::debug!("Found vector index in cache uuid: {}", uuid);
-            return Ok(index);
+        // Check cache for serialized VectorIndexData and reconstruct if found.
+        if let Some(data) = self.index_cache.get_unsized_with_key(&cache_key).await {
+            if let Some(state) = data.as_any().downcast_ref::<IvfIndexState>() {
+                log::debug!(
+                    "Reconstructing vector index from cached state uuid: {}",
+                    uuid
+                );
+                let partition_cache = self.index_cache.with_key_prefix(&cache_key.key());
+                // Namespace the file metadata cache by the index file path,
+                // matching what the full-load path does.
+                let index_path = object_store::path::Path::from(state.index_file_path.as_str());
+                let fmc = self.metadata_cache.file_metadata_cache(&index_path);
+                return vector::ivf::v2::reconstruct_vector_index(
+                    state.clone(),
+                    self.object_store.clone(),
+                    &fmc,
+                    partition_cache,
+                )
+                .await;
+            }
         }
 
         let frag_reuse_index = self.open_frag_reuse_index(metrics).await?;
@@ -1596,9 +1608,14 @@ impl DatasetIndexInternalExt for Dataset {
         };
         let index = index?;
         metrics.record_index_load();
-        self.index_cache
-            .insert_unsized_with_key(&cache_key, index.clone())
-            .await;
+        // Cache the serializable state, not the live index. The live index
+        // holds FileReader handles that can't survive serialization; the
+        // state can be cheaply reconstructed on the next cache hit.
+        if let Some(state) = index.cacheable_state() {
+            self.index_cache
+                .insert_unsized_with_key(&cache_key, Arc::from(state))
+                .await;
+        }
         Ok(index)
     }
 
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index f952546b25f..b3724285bd3 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -38,7 +38,7 @@ use lance_index::vector::quantizer::{QuantizationType, Quantizer};
 use lance_index::vector::sq::ScalarQuantizer;
 use lance_index::vector::storage::VectorStore;
 use lance_index::vector::v3::subindex::SubIndexType;
-use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry};
+use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry, VectorIndexData};
 use lance_index::{
     INDEX_AUXILIARY_FILE_NAME, INDEX_FILE_NAME, Index, IndexType, pb,
     vector::{
@@ -625,13 +625,13 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> VectorIndex for IVFInd
         self.distance_type
     }
 
-    fn cacheable_state(&self) -> Option<IvfIndexState> {
+    fn cacheable_state(&self) -> Option<Box<dyn VectorIndexData>> {
         let extra_data = self.storage.metadata().extra_metadata().ok().flatten();
         let metadata_json = serde_json::to_string(self.storage.metadata()).ok()?;
         let (sub_index_type, quantization_type) = self.sub_index_type();
         // Convert local path back to object_store Path (undo to_local_path's "/" prefix)
         let index_file_path = self.uri.trim_start_matches('/').to_string();
-        Some(IvfIndexState {
+        Some(Box::new(IvfIndexState {
             index_file_path,
             uuid: self.uuid.clone(),
             ivf: self.ivf.clone(),
@@ -642,7 +642,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> VectorIndex for IVFInd
             sub_index_type,
             quantization_type,
             cache_key_prefix: self.index_cache.prefix().to_string(),
-        })
+        }))
     }
 }
 

From 4fdbe5199816ba265af84a366fb433070a4d1fb1 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 20 Mar 2026 14:02:49 -0700
Subject: [PATCH 14/24] fix

---
 rust/lance/src/index/vector/ivf/v2.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index 00e076d0a62..ffd502550c2 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -234,6 +234,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
     }
 
     /// Reconstruct from cached state, skipping global buffer reads.
+    #[allow(clippy::too_many_arguments)]
     pub(crate) fn from_cached_state(
         uri: String,
         uuid: String,

From 9ff1ab961b6c9f71369a3f00cb338b14b46df7c8 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 20 Mar 2026 16:06:34 -0700
Subject: [PATCH 15/24] refactor: address PR review feedback

- Split cache.rs into submodules (backend, keys, moka, mod)
- Rename CacheKey::type_id() to type_name() across all implementors
- Improve CacheBackend and get_or_insert docs
- Add Spillable trait with writer-based serialize for partition_serde
- Cache file metadata and file sizes to enable zero-IO reconstruction
- Add test_reconstruct_from_cache_zero_io test

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache/backend.rs          |  76 ++++
 rust/lance-core/src/cache/keys.rs             |  52 +++
 .../lance-core/src/{cache.rs => cache/mod.rs} | 324 +++---------------
 rust/lance-core/src/cache/moka.rs             | 125 +++++++
 .../src/encodings/logical/primitive.rs        |   2 +-
 rust/lance-file/src/previous/reader.rs        |   2 +-
 rust/lance-file/src/reader.rs                 |  10 +
 rust/lance-index/src/scalar/bitmap.rs         |   2 +-
 rust/lance-index/src/scalar/btree.rs          |   2 +-
 rust/lance-index/src/scalar/inverted/index.rs |   4 +-
 rust/lance-index/src/scalar/ngram.rs          |   2 +-
 rust/lance-index/src/scalar/rtree.rs          |   2 +-
 rust/lance-index/src/vector.rs                |  12 +
 rust/lance-index/src/vector/storage.rs        |   4 +
 rust/lance/src/dataset/fragment.rs            |   2 +-
 rust/lance/src/index.rs                       |  14 +-
 rust/lance/src/index/vector/ivf.rs            |   2 +-
 .../src/index/vector/ivf/partition_serde.rs   | 124 ++++---
 rust/lance/src/index/vector/ivf/v2.rs         | 160 +++++++--
 rust/lance/src/session.rs                     |   2 +-
 rust/lance/src/session/caches.rs              |  12 +-
 rust/lance/src/session/index_caches.rs        |   6 +-
 22 files changed, 563 insertions(+), 378 deletions(-)
 create mode 100644 rust/lance-core/src/cache/backend.rs
 create mode 100644 rust/lance-core/src/cache/keys.rs
 rename rust/lance-core/src/{cache.rs => cache/mod.rs} (70%)
 create mode 100644 rust/lance-core/src/cache/moka.rs

diff --git a/rust/lance-core/src/cache/backend.rs b/rust/lance-core/src/cache/backend.rs
new file mode 100644
index 00000000000..970fb75888c
--- /dev/null
+++ b/rust/lance-core/src/cache/backend.rs
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::any::Any;
+use std::pin::Pin;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use futures::Future;
+
+use crate::Result;
+
+/// A type-erased cache entry.
+pub type CacheEntry = Arc<dyn Any + Send + Sync>;
+
+/// Low-level pluggable cache backend.
+///
+/// Implementations store entries keyed by opaque byte slices.
+/// The [`LanceCache`](super::LanceCache) wrapper handles key construction and type safety;
+/// backend authors do not need to worry about key encoding.
+///
+/// Keys are structured as `user_key\0type_name` where `type_name` comes from
+/// [`CacheKey::type_name()`](super::CacheKey::type_name). Backend authors who need to
+/// inspect keys can use [`parse_cache_key()`](super::parse_cache_key) to split them.
+#[async_trait]
+pub trait CacheBackend: Send + Sync + std::fmt::Debug {
+    /// Look up an entry by its opaque key.
+    async fn get(&self, key: &[u8]) -> Option<CacheEntry>;
+
+    /// Store an entry. `size_bytes` is used for eviction accounting.
+    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize);
+
+    /// Get an existing entry or compute it from `loader`.
+    ///
+    /// Implementations should deduplicate concurrent loads for the same key
+    /// so the loader runs at most once.
+    ///
+    /// The loader is a pinned, boxed future rather than a generic closure
+    /// because `async_trait` erases the `Self` lifetime, making it impossible
+    /// to express a generic closure whose returned future borrows from the
+    /// caller. Boxing the future once at the call site (in `LanceCache`)
+    /// avoids this lifetime conflict while keeping the trait object-safe.
+    ///
+    /// The future borrows from the caller's scope and will be `.await`ed within
+    /// this method — implementations must not store it beyond the call.
+    async fn get_or_insert<'a>(
+        &self,
+        key: &[u8],
+        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
+    ) -> Result<CacheEntry>;
+
+    /// Remove all entries whose key starts with `prefix`.
+    async fn invalidate_prefix(&self, prefix: &[u8]);
+
+    /// Remove all entries.
+    async fn clear(&self);
+
+    /// Number of entries currently stored (may flush pending operations).
+    async fn num_entries(&self) -> usize;
+
+    /// Total weighted size in bytes of all stored entries (may flush pending operations).
+    async fn size_bytes(&self) -> usize;
+
+    /// Approximate number of entries, callable from synchronous contexts.
+    /// Backends that cannot provide this cheaply should return 0.
+    fn approx_num_entries(&self) -> usize {
+        0
+    }
+
+    /// Approximate weighted size in bytes, callable from synchronous contexts.
+    /// Used by `DeepSizeOf` to report cache memory usage.
+    /// Backends that cannot provide this cheaply should return 0.
+    fn approx_size_bytes(&self) -> usize {
+        0
+    }
+}
diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs
new file mode 100644
index 00000000000..db412cc632f
--- /dev/null
+++ b/rust/lance-core/src/cache/keys.rs
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::borrow::Cow;
+
+/// Cache keys are structured as `user_key\0type_name`.
+///
+/// This function splits an opaque cache key into the user-visible portion
+/// and the type_name string. Backend implementations can use this to inspect keys.
+/// Returns `(empty slice, "")` if no separator is found.
+pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) {
+    if let Some(sep) = key.iter().position(|&b| b == 0) {
+        let user_key = &key[..sep];
+        let type_name = std::str::from_utf8(&key[sep + 1..]).unwrap_or("");
+        (user_key, type_name)
+    } else {
+        (key, "")
+    }
+}
+
+/// Build a key: `prefix/user_key\0type_name`.
+pub(super) fn make_cache_key(prefix: &str, key: &str, type_name: &str) -> Vec<u8> {
+    let full_key = if prefix.is_empty() {
+        key.to_string()
+    } else {
+        format!("{}/{}", prefix, key)
+    };
+    let mut bytes = full_key.into_bytes();
+    bytes.push(0);
+    bytes.extend_from_slice(type_name.as_bytes());
+    bytes
+}
+
+pub trait CacheKey {
+    type ValueType: 'static;
+
+    fn key(&self) -> Cow<'_, str>;
+
+    /// Short, stable string that distinguishes this value type from others in
+    /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_name`).
+    /// Must be consistent across crate boundaries — use a short literal, not
+    /// `std::any::type_name` pointers.
+    fn type_name(&self) -> &'static str;
+}
+
+pub trait UnsizedCacheKey {
+    type ValueType: 'static + ?Sized;
+
+    fn key(&self) -> Cow<'_, str>;
+
+    fn type_name(&self) -> &'static str;
+}
diff --git a/rust/lance-core/src/cache.rs b/rust/lance-core/src/cache/mod.rs
similarity index 70%
rename from rust/lance-core/src/cache.rs
rename to rust/lance-core/src/cache/mod.rs
index 2013522ec62..6bdd0c07152 100644
--- a/rust/lance-core/src/cache.rs
+++ b/rust/lance-core/src/cache/mod.rs
@@ -9,231 +9,31 @@
 //!   can implement. It uses opaque byte keys and type-erased entries.
 //! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag
 //!   encoding), type-safe get/insert, and DeepSizeOf-based size computation.
+//!
+//! Cache keys are handled by the [`keys`] submodule: [`CacheKey`] / [`UnsizedCacheKey`]
+//! define the typed key interface, and [`parse_cache_key`] lets backends inspect the
+//! encoded `user_key\0type_name` format.
+
+mod backend;
+mod keys;
+mod moka;
+
+pub use backend::{CacheBackend, CacheEntry};
+pub use keys::{CacheKey, UnsizedCacheKey, parse_cache_key};
+pub use moka::MokaCacheBackend;
 
-use std::any::Any;
-use std::borrow::Cow;
-use std::pin::Pin;
 use std::sync::{
     Arc,
     atomic::{AtomicU64, Ordering},
 };
 
-use async_trait::async_trait;
 use futures::{Future, FutureExt};
 
 use crate::Result;
 
 pub use deepsize::{Context, DeepSizeOf};
 
-/// A type-erased cache entry.
-pub type CacheEntry = Arc<dyn Any + Send + Sync>;
-
-// ---------------------------------------------------------------------------
-// CacheBackend trait
-// ---------------------------------------------------------------------------
-
-/// Low-level pluggable cache backend.
-///
-/// Implementations store entries keyed by opaque byte slices.
-/// The [`LanceCache`] wrapper handles key construction and type safety;
-/// backend authors do not need to worry about key encoding.
-#[async_trait]
-pub trait CacheBackend: Send + Sync + std::fmt::Debug {
-    /// Look up an entry by its opaque key.
-    async fn get(&self, key: &[u8]) -> Option<CacheEntry>;
-
-    /// Store an entry. `size_bytes` is used for eviction accounting.
-    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize);
-
-    /// Get an existing entry or compute it from `loader`.
-    ///
-    /// Implementations should deduplicate concurrent loads for the same key
-    /// so the loader runs at most once.
-    ///
-    /// The loader is a pinned future that produces `(entry, size_bytes)`.
-    /// It borrows from the caller's scope and will be `.await`ed within
-    /// this method — implementations must not store it beyond the call.
-    async fn get_or_insert<'a>(
-        &self,
-        key: &[u8],
-        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
-    ) -> Result<CacheEntry>;
-
-    /// Remove all entries whose key starts with `prefix`.
-    async fn invalidate_prefix(&self, prefix: &[u8]);
-
-    /// Remove all entries.
-    async fn clear(&self);
-
-    /// Number of entries currently stored (may flush pending operations).
-    async fn num_entries(&self) -> usize;
-
-    /// Total weighted size in bytes of all stored entries (may flush pending operations).
-    async fn size_bytes(&self) -> usize;
-
-    /// Approximate number of entries, callable from synchronous contexts.
-    /// Backends that cannot provide this cheaply should return 0.
-    fn approx_num_entries(&self) -> usize {
-        0
-    }
-
-    /// Approximate weighted size in bytes, callable from synchronous contexts.
-    /// Used by `DeepSizeOf` to report cache memory usage.
-    /// Backends that cannot provide this cheaply should return 0.
-    fn approx_size_bytes(&self) -> usize {
-        0
-    }
-}
-
-// ---------------------------------------------------------------------------
-// MokaCacheBackend — default moka-based implementation
-// ---------------------------------------------------------------------------
-
-/// Internal record stored in the moka cache.
-#[derive(Clone, Debug)]
-struct MokaCacheEntry {
-    entry: CacheEntry,
-    size_bytes: usize,
-}
-
-/// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache.
-///
-/// Provides weighted-capacity eviction and concurrent-load deduplication
-/// via moka's built-in `optionally_get_with`.
-pub struct MokaCacheBackend {
-    cache: moka::future::Cache<Vec<u8>, MokaCacheEntry>,
-}
-
-impl std::fmt::Debug for MokaCacheBackend {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("MokaCacheBackend")
-            .field("entry_count", &self.cache.entry_count())
-            .finish()
-    }
-}
-
-impl MokaCacheBackend {
-    pub fn with_capacity(capacity: usize) -> Self {
-        let cache = moka::future::Cache::builder()
-            .max_capacity(capacity as u64)
-            .weigher(|_, v: &MokaCacheEntry| v.size_bytes.try_into().unwrap_or(u32::MAX))
-            .support_invalidation_closures()
-            .build();
-        Self { cache }
-    }
-
-    pub fn no_cache() -> Self {
-        Self {
-            cache: moka::future::Cache::new(0),
-        }
-    }
-}
-
-#[async_trait]
-impl CacheBackend for MokaCacheBackend {
-    async fn get(&self, key: &[u8]) -> Option<CacheEntry> {
-        self.cache.get(key).await.map(|r| r.entry)
-    }
-
-    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) {
-        self.cache
-            .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes })
-            .await;
-    }
-
-    async fn get_or_insert<'a>(
-        &self,
-        key: &[u8],
-        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
-    ) -> Result<CacheEntry> {
-        // Use moka's built-in dedup: optionally_get_with runs the init future
-        // at most once per key, even under concurrent access.
-        let (error_tx, error_rx) = tokio::sync::oneshot::channel();
-
-        let init = async move {
-            match loader.await {
-                Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }),
-                Err(e) => {
-                    let _ = error_tx.send(e);
-                    None
-                }
-            }
-        };
-
-        let owned_key = key.to_vec();
-        match self.cache.optionally_get_with(owned_key, init).await {
-            Some(record) => Ok(record.entry),
-            None => match error_rx.await {
-                Ok(err) => Err(err),
-                Err(_) => Err(crate::Error::internal(
-                    "Failed to retrieve error from cache loader",
-                )),
-            },
-        }
-    }
-
-    async fn invalidate_prefix(&self, prefix: &[u8]) {
-        let prefix = prefix.to_vec();
-        self.cache
-            .invalidate_entries_if(move |key, _value| key.starts_with(&prefix))
-            .expect("Cache configured correctly");
-    }
-
-    async fn clear(&self) {
-        self.cache.invalidate_all();
-        self.cache.run_pending_tasks().await;
-    }
-
-    async fn num_entries(&self) -> usize {
-        self.cache.run_pending_tasks().await;
-        self.cache.entry_count() as usize
-    }
-
-    async fn size_bytes(&self) -> usize {
-        self.cache.run_pending_tasks().await;
-        self.cache.weighted_size() as usize
-    }
-
-    fn approx_num_entries(&self) -> usize {
-        self.cache.entry_count() as usize
-    }
-
-    fn approx_size_bytes(&self) -> usize {
-        self.cache.iter().map(|(_, v)| v.size_bytes).sum()
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Type identity helpers
-// ---------------------------------------------------------------------------
-
-/// Cache keys are structured as `user_key\0type_id`.
-///
-/// This function splits an opaque cache key into the user-visible portion
-/// and the type_id string. Backend implementations can use this to inspect keys.
-/// Returns `(empty slice, "")` if no separator is found.
-pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) {
-    if let Some(sep) = key.iter().position(|&b| b == 0) {
-        let user_key = &key[..sep];
-        let type_id = std::str::from_utf8(&key[sep + 1..]).unwrap_or("");
-        (user_key, type_id)
-    } else {
-        (key, "")
-    }
-}
-
-/// Build a key: `prefix/user_key\0type_id`.
-fn make_cache_key(prefix: &str, key: &str, type_id: &str) -> Vec<u8> {
-    let full_key = if prefix.is_empty() {
-        key.to_string()
-    } else {
-        format!("{}/{}", prefix, key)
-    };
-    let mut bytes = full_key.into_bytes();
-    bytes.push(0);
-    bytes.extend_from_slice(type_id.as_bytes());
-    bytes
-}
+use keys::make_cache_key;
 
 // ---------------------------------------------------------------------------
 // LanceCache — typed wrapper around dyn CacheBackend
@@ -338,20 +138,20 @@ impl LanceCache {
     async fn insert_with_id<T: DeepSizeOf + Send + Sync + 'static>(
         &self,
         key: &str,
-        type_id: &str,
+        type_name: &str,
         metadata: Arc<T>,
     ) {
         let size = metadata.deep_size_of() + 8;
-        let cache_key = make_cache_key(&self.prefix, key, type_id);
+        let cache_key = make_cache_key(&self.prefix, key, type_name);
         self.cache.insert(&cache_key, metadata, size).await;
     }
 
     async fn get_with_id<T: Send + Sync + 'static>(
         &self,
         key: &str,
-        type_id: &str,
+        type_name: &str,
     ) -> Option<Arc<T>> {
-        let cache_key = make_cache_key(&self.prefix, key, type_id);
+        let cache_key = make_cache_key(&self.prefix, key, type_name);
         if let Some(entry) = self.cache.get(&cache_key).await {
             match entry.downcast::<T>() {
                 Ok(val) => {
@@ -375,14 +175,14 @@ impl LanceCache {
     async fn get_or_insert_with_id<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
         &self,
         key: &str,
-        type_id: &str,
+        type_name: &str,
         loader: F,
     ) -> Result<Arc<T>>
     where
         F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<T>> + Send,
     {
-        let cache_key = make_cache_key(&self.prefix, key, type_id);
+        let cache_key = make_cache_key(&self.prefix, key, type_name);
 
         // Type-erase the loader into a pinned future for the backend.
         let typed_loader = Box::pin(async move {
@@ -407,18 +207,19 @@ impl LanceCache {
     async fn insert_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
-        type_id: &str,
+        type_name: &str,
         metadata: Arc<T>,
     ) {
-        self.insert_with_id(key, type_id, Arc::new(metadata)).await
+        self.insert_with_id(key, type_name, Arc::new(metadata))
+            .await
     }
 
     async fn get_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
         key: &str,
-        type_id: &str,
+        type_name: &str,
     ) -> Option<Arc<T>> {
-        let outer = self.get_with_id::<Arc<T>>(key, type_id).await?;
+        let outer = self.get_with_id::<Arc<T>>(key, type_name).await?;
         Some(outer.as_ref().clone())
     }
 
@@ -446,7 +247,7 @@ impl LanceCache {
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.insert_with_id(&cache_key.key(), cache_key.type_id(), metadata)
+        self.insert_with_id(&cache_key.key(), cache_key.type_name(), metadata)
             .boxed()
             .await
     }
@@ -456,7 +257,7 @@ impl LanceCache {
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.get_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_id())
+        self.get_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_name())
             .boxed()
             .await
     }
@@ -472,9 +273,9 @@ impl LanceCache {
         F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
-        let type_id = cache_key.type_id();
+        let type_name = cache_key.type_name();
         let key_str = cache_key.key().into_owned();
-        Box::pin(self.get_or_insert_with_id(&key_str, type_id, loader)).await
+        Box::pin(self.get_or_insert_with_id(&key_str, type_name, loader)).await
     }
 
     pub async fn insert_unsized_with_key<K>(&self, cache_key: &K, metadata: Arc<K::ValueType>)
@@ -482,7 +283,7 @@ impl LanceCache {
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.insert_unsized_with_id(&cache_key.key(), cache_key.type_id(), metadata)
+        self.insert_unsized_with_id(&cache_key.key(), cache_key.type_name(), metadata)
             .boxed()
             .await
     }
@@ -492,7 +293,7 @@ impl LanceCache {
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.get_unsized_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_id())
+        self.get_unsized_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_name())
             .boxed()
             .await
     }
@@ -542,7 +343,7 @@ impl WeakLanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         let cache = self.inner.upgrade()?;
-        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
         if let Some(entry) = cache.get(&key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
             Some(entry.downcast::<K::ValueType>().unwrap())
@@ -559,7 +360,7 @@ impl WeakLanceCache {
     {
         if let Some(cache) = self.inner.upgrade() {
             let size = value.deep_size_of() + 8;
-            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             cache.insert(&key, value, size).await;
             true
         } else {
@@ -583,7 +384,7 @@ impl WeakLanceCache {
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
         if let Some(cache) = self.inner.upgrade() {
-            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             let typed_loader = Box::pin(async move {
                 let value = loader().await?;
                 let arc = Arc::new(value);
@@ -605,7 +406,7 @@ impl WeakLanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         let cache = self.inner.upgrade()?;
-        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
         if let Some(entry) = cache.get(&key).await {
             entry
                 .downcast::<Arc<K::ValueType>>()
@@ -624,7 +425,7 @@ impl WeakLanceCache {
         if let Some(cache) = self.inner.upgrade() {
             let wrapper = Arc::new(value);
             let size = wrapper.deep_size_of() + 8;
-            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_id());
+            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             cache.insert(&key, wrapper, size).await;
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item");
@@ -632,39 +433,19 @@ impl WeakLanceCache {
     }
 }
 
-// ---------------------------------------------------------------------------
-// CacheKey traits
-// ---------------------------------------------------------------------------
-
-pub trait CacheKey {
-    type ValueType: 'static;
-
-    fn key(&self) -> Cow<'_, str>;
-
-    /// Short, stable string that distinguishes this value type from others in
-    /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_id`).
-    /// Must be consistent across crate boundaries — use a short literal, not
-    /// `type_name` pointers.
-    fn type_id(&self) -> &'static str;
-}
-
-pub trait UnsizedCacheKey {
-    type ValueType: 'static + ?Sized;
-
-    fn key(&self) -> Cow<'_, str>;
-
-    fn type_id(&self) -> &'static str;
-}
-
 // ---------------------------------------------------------------------------
 // CacheStats
 // ---------------------------------------------------------------------------
 
 #[derive(Debug, Clone)]
 pub struct CacheStats {
+    /// Number of times `get`, `get_unsized`, or `get_or_insert` found an item in the cache.
     pub hits: u64,
+    /// Number of times `get`, `get_unsized`, or `get_or_insert` did not find an item in the cache.
     pub misses: u64,
+    /// Number of entries currently in the cache.
     pub num_entries: usize,
+    /// Total size in bytes of all entries in the cache.
     pub size_bytes: usize,
 }
 
@@ -686,10 +467,6 @@ impl CacheStats {
     }
 }
 
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -712,10 +489,10 @@ mod tests {
 
     impl<T: 'static> CacheKey for TestKey<T> {
         type ValueType = T;
-        fn key(&self) -> Cow<'_, str> {
-            Cow::Borrowed(&self.key)
+        fn key(&self) -> std::borrow::Cow<'_, str> {
+            std::borrow::Cow::Borrowed(&self.key)
         }
-        fn type_id(&self) -> &'static str {
+        fn type_name(&self) -> &'static str {
             std::any::type_name::<T>()
         }
     }
@@ -737,10 +514,10 @@ mod tests {
 
     impl<T: 'static + ?Sized> UnsizedCacheKey for TestUnsizedKey<T> {
         type ValueType = T;
-        fn key(&self) -> Cow<'_, str> {
-            Cow::Borrowed(&self.key)
+        fn key(&self) -> std::borrow::Cow<'_, str> {
+            std::borrow::Cow::Borrowed(&self.key)
         }
-        fn type_id(&self) -> &'static str {
+        fn type_name(&self) -> &'static str {
             std::any::type_name::<T>()
         }
     }
@@ -779,12 +556,12 @@ mod tests {
         #[derive(Debug, DeepSizeOf)]
         struct MyType(i32);
 
-        trait MyTrait: DeepSizeOf + Send + Sync + Any {
-            fn as_any(&self) -> &dyn Any;
+        trait MyTrait: DeepSizeOf + Send + Sync + std::any::Any {
+            fn as_any(&self) -> &dyn std::any::Any;
         }
 
         impl MyTrait for MyType {
-            fn as_any(&self) -> &dyn Any {
+            fn as_any(&self) -> &dyn std::any::Any {
                 self
             }
         }
@@ -878,6 +655,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_custom_backend() {
+        use async_trait::async_trait;
         use tokio::sync::Mutex;
 
         #[derive(Debug)]
@@ -907,7 +685,9 @@ mod tests {
             async fn get_or_insert<'a>(
                 &self,
                 key: &[u8],
-                loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
+                loader: std::pin::Pin<
+                    Box<dyn futures::Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>,
+                >,
             ) -> Result<CacheEntry> {
                 if let Some((entry, _)) = self.map.lock().await.get(key) {
                     Ok(entry.clone())
diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs
new file mode 100644
index 00000000000..6a2cd673409
--- /dev/null
+++ b/rust/lance-core/src/cache/moka.rs
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The Lance Authors
+
+use std::pin::Pin;
+
+use async_trait::async_trait;
+use futures::Future;
+
+use crate::Result;
+
+use super::backend::{CacheBackend, CacheEntry};
+
+/// Internal record stored in the moka cache.
+#[derive(Clone, Debug)]
+struct MokaCacheEntry {
+    entry: CacheEntry,
+    size_bytes: usize,
+}
+
+/// Default [`CacheBackend`] backed by a [moka](https://crates.io/crates/moka) cache.
+///
+/// Provides weighted-capacity eviction and concurrent-load deduplication
+/// via moka's built-in `optionally_get_with`.
+pub struct MokaCacheBackend {
+    cache: moka::future::Cache<Vec<u8>, MokaCacheEntry>,
+}
+
+impl std::fmt::Debug for MokaCacheBackend {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("MokaCacheBackend")
+            .field("entry_count", &self.cache.entry_count())
+            .finish()
+    }
+}
+
+impl MokaCacheBackend {
+    pub fn with_capacity(capacity: usize) -> Self {
+        let cache = moka::future::Cache::builder()
+            .max_capacity(capacity as u64)
+            .weigher(|_, v: &MokaCacheEntry| v.size_bytes.try_into().unwrap_or(u32::MAX))
+            .support_invalidation_closures()
+            .build();
+        Self { cache }
+    }
+
+    pub fn no_cache() -> Self {
+        Self {
+            cache: moka::future::Cache::new(0),
+        }
+    }
+}
+
+#[async_trait]
+impl CacheBackend for MokaCacheBackend {
+    async fn get(&self, key: &[u8]) -> Option<CacheEntry> {
+        self.cache.get(key).await.map(|r| r.entry)
+    }
+
+    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) {
+        self.cache
+            .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes })
+            .await;
+    }
+
+    async fn get_or_insert<'a>(
+        &self,
+        key: &[u8],
+        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
+    ) -> Result<CacheEntry> {
+        // Use moka's built-in dedup: optionally_get_with runs the init future
+        // at most once per key, even under concurrent access.
+        let (error_tx, error_rx) = tokio::sync::oneshot::channel();
+
+        let init = async move {
+            match loader.await {
+                Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }),
+                Err(e) => {
+                    let _ = error_tx.send(e);
+                    None
+                }
+            }
+        };
+
+        let owned_key = key.to_vec();
+        match self.cache.optionally_get_with(owned_key, init).await {
+            Some(record) => Ok(record.entry),
+            None => match error_rx.await {
+                Ok(err) => Err(err),
+                Err(_) => Err(crate::Error::internal(
+                    "Failed to retrieve error from cache loader",
+                )),
+            },
+        }
+    }
+
+    async fn invalidate_prefix(&self, prefix: &[u8]) {
+        let prefix = prefix.to_vec();
+        self.cache
+            .invalidate_entries_if(move |key, _value| key.starts_with(&prefix))
+            .expect("Cache configured correctly");
+    }
+
+    async fn clear(&self) {
+        self.cache.invalidate_all();
+        self.cache.run_pending_tasks().await;
+    }
+
+    async fn num_entries(&self) -> usize {
+        self.cache.run_pending_tasks().await;
+        self.cache.entry_count() as usize
+    }
+
+    async fn size_bytes(&self) -> usize {
+        self.cache.run_pending_tasks().await;
+        self.cache.weighted_size() as usize
+    }
+
+    fn approx_num_entries(&self) -> usize {
+        self.cache.entry_count() as usize
+    }
+
+    fn approx_size_bytes(&self) -> usize {
+        self.cache.iter().map(|(_, v)| v.size_bytes).sum()
+    }
+}
diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs
index ef1f1ca1faf..ba8a551f737 100644
--- a/rust/lance-encoding/src/encodings/logical/primitive.rs
+++ b/rust/lance-encoding/src/encodings/logical/primitive.rs
@@ -3417,7 +3417,7 @@ impl CacheKey for FieldDataCacheKey {
         self.column_index.to_string().into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "FieldData"
     }
 }
diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs
index 6dd40af45c1..fac113b4c10 100644
--- a/rust/lance-file/src/previous/reader.rs
+++ b/rust/lance-file/src/previous/reader.rs
@@ -90,7 +90,7 @@ impl<T: 'static> CacheKey for StringCacheKey<'_, T> {
         self.key.into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         std::any::type_name::<T>()
     }
 }
diff --git a/rust/lance-file/src/reader.rs b/rust/lance-file/src/reader.rs
index 29c1aa3ccc0..3a3f41854c0 100644
--- a/rust/lance-file/src/reader.rs
+++ b/rust/lance-file/src/reader.rs
@@ -105,6 +105,16 @@ pub struct CachedFileMetadata {
     pub minor_version: u16,
 }
 
+impl CachedFileMetadata {
+    /// Total file size in bytes.
+    pub fn file_size(&self) -> u64 {
+        self.num_data_bytes
+            + self.num_global_buffer_bytes
+            + self.num_column_metadata_bytes
+            + self.num_footer_bytes
+    }
+}
+
 impl DeepSizeOf for CachedFileMetadata {
     // TODO: include size for `column_metadatas` and `column_infos`.
     fn deep_size_of_children(&self, context: &mut Context) -> usize {
diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs
index a55b317860f..c593bb72a63 100644
--- a/rust/lance-index/src/scalar/bitmap.rs
+++ b/rust/lance-index/src/scalar/bitmap.rs
@@ -129,7 +129,7 @@ impl CacheKey for BitmapKey {
         format!("{}", self.value.0).into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "Bitmap"
     }
 }
diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs
index 9e208b8eea4..8fbe2377d13 100644
--- a/rust/lance-index/src/scalar/btree.rs
+++ b/rust/lance-index/src/scalar/btree.rs
@@ -990,7 +990,7 @@ impl CacheKey for BTreePageKey {
         format!("page-{}", self.page_number).into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "BTreePage"
     }
 }
diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs
index 89b64c18ffd..e5caf09cd78 100644
--- a/rust/lance-index/src/scalar/inverted/index.rs
+++ b/rust/lance-index/src/scalar/inverted/index.rs
@@ -1889,7 +1889,7 @@ impl CacheKey for PostingListKey {
         format!("postings-{}", self.token_id).into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "PostingList"
     }
 }
@@ -1906,7 +1906,7 @@ impl CacheKey for PositionKey {
         format!("positions-{}", self.token_id).into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "Position"
     }
 }
diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs
index d2095841428..2a439ae6b34 100644
--- a/rust/lance-index/src/scalar/ngram.rs
+++ b/rust/lance-index/src/scalar/ngram.rs
@@ -171,7 +171,7 @@ impl CacheKey for NGramPostingListKey {
         format!("posting-list-{}", self.row_offset).into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "NGramPostingList"
     }
 }
diff --git a/rust/lance-index/src/scalar/rtree.rs b/rust/lance-index/src/scalar/rtree.rs
index 724f5479e1c..225e3be6e2a 100644
--- a/rust/lance-index/src/scalar/rtree.rs
+++ b/rust/lance-index/src/scalar/rtree.rs
@@ -250,7 +250,7 @@ impl CacheKey for RTreeCacheKey {
         }
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "RTree"
     }
 }
diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs
index 973a7339cd4..0a1b50297d3 100644
--- a/rust/lance-index/src/vector.rs
+++ b/rust/lance-index/src/vector.rs
@@ -193,6 +193,10 @@ pub struct IvfIndexState {
     /// The cache key prefix used by the original index's WeakLanceCache.
     /// Needed to reconnect the reconstructed index to the shared cache backend.
     pub cache_key_prefix: String,
+    /// File sizes for the index and auxiliary files, used to avoid HEAD requests
+    /// when reconstructing from cache.
+    pub index_file_size: u64,
+    pub aux_file_size: u64,
 }
 
 /// Serialization header for [`IvfIndexState`].
@@ -207,6 +211,10 @@ struct IvfIndexStateHeader {
     quantizer_metadata_json: String,
     #[serde(default)]
     cache_key_prefix: String,
+    #[serde(default)]
+    index_file_size: u64,
+    #[serde(default)]
+    aux_file_size: u64,
 }
 
 impl IvfIndexState {
@@ -223,6 +231,8 @@ impl IvfIndexState {
             quantization_type: self.quantization_type.to_string(),
             quantizer_metadata_json: self.quantizer_metadata_json.clone(),
             cache_key_prefix: self.cache_key_prefix.clone(),
+            index_file_size: self.index_file_size,
+            aux_file_size: self.aux_file_size,
         };
         let header_json = serde_json::to_vec(&header)
             .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?;
@@ -298,6 +308,8 @@ impl IvfIndexState {
             sub_index_type,
             quantization_type,
             cache_key_prefix: header.cache_key_prefix,
+            index_file_size: header.index_file_size,
+            aux_file_size: header.aux_file_size,
         })
     }
 }
diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs
index 1879774ce84..526ba8e78d8 100644
--- a/rust/lance-index/src/vector/storage.rs
+++ b/rust/lance-index/src/vector/storage.rs
@@ -257,6 +257,10 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
         }
     }
 
+    pub fn reader(&self) -> &FileReader {
+        &self.reader
+    }
+
     pub fn num_rows(&self) -> u64 {
         self.reader.num_rows()
     }
diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs
index 55c23211871..81e1473c921 100644
--- a/rust/lance/src/dataset/fragment.rs
+++ b/rust/lance/src/dataset/fragment.rs
@@ -1880,7 +1880,7 @@ impl CacheKey for FileMetadataCacheKey {
         "".into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "FileMetadata"
     }
 }
diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index 851cd113268..3c920e19426 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -112,7 +112,7 @@ impl UnsizedCacheKey for ScalarIndexCacheKey<'_> {
         }
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "ScalarIndex"
     }
 }
@@ -140,7 +140,7 @@ impl UnsizedCacheKey for VectorIndexCacheKey<'_> {
         }
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "VectorIndex"
     }
 }
@@ -168,7 +168,7 @@ impl CacheKey for FragReuseIndexCacheKey<'_> {
         }
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "FragReuseIndex"
     }
 }
@@ -196,7 +196,7 @@ impl CacheKey for MemWalCacheKey<'_> {
         }
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "MemWalIndex"
     }
 }
@@ -1431,14 +1431,10 @@ impl DatasetIndexInternalExt for Dataset {
                 uuid
             );
             let partition_cache = self.index_cache.with_key_prefix(&cache_key.key());
-            // Namespace the file metadata cache by the index file path,
-            // matching what the full-load path does.
-            let index_path = object_store::path::Path::from(state.index_file_path.as_str());
-            let fmc = self.metadata_cache.file_metadata_cache(&index_path);
             return vector::ivf::v2::reconstruct_vector_index(
                 state.clone(),
                 self.object_store.clone(),
-                &fmc,
+                &self.metadata_cache,
                 partition_cache,
             )
             .await;
diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs
index ec05278b311..3768fad045c 100644
--- a/rust/lance/src/index/vector/ivf.rs
+++ b/rust/lance/src/index/vector/ivf.rs
@@ -125,7 +125,7 @@ impl UnsizedCacheKey for LegacyIVFPartitionKey {
         format!("ivf-{}", self.partition_id).into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "LegacyIVFPartition"
     }
 }
diff --git a/rust/lance/src/index/vector/ivf/partition_serde.rs b/rust/lance/src/index/vector/ivf/partition_serde.rs
index 9139e940c2a..3db0dcc634a 100644
--- a/rust/lance/src/index/vector/ivf/partition_serde.rs
+++ b/rust/lance/src/index/vector/ivf/partition_serde.rs
@@ -17,6 +17,7 @@
 //! sections are read zero-copy using [`FileDecoder`] so that Arrow arrays
 //! reference the original buffer directly.
 
+use std::io::Write;
 use std::sync::Arc;
 
 use arrow_array::{FixedSizeListArray, RecordBatch};
@@ -44,6 +45,17 @@ use serde::{Deserialize, Serialize};
 
 use super::v2::PartitionEntry;
 
+/// Serialization interface for spilling cache entries to an external store.
+///
+/// `serialize` writes the entry into the provided writer and returns the
+/// number of bytes written.  `deserialize` reconstructs the entry from a
+/// contiguous `Bytes` buffer (typically obtained by reading back whatever
+/// was written).
+pub trait Spillable: Sized {
+    fn serialize(&self, writer: &mut dyn Write) -> Result<usize>;
+    fn deserialize(data: Bytes) -> Result<Self>;
+}
+
 // ---------------------------------------------------------------------------
 // Common helpers
 // ---------------------------------------------------------------------------
@@ -224,13 +236,13 @@ struct PqPartitionHeader {
     storage_len: u64,
 }
 
-impl<S: IvfSubIndex> PartitionEntry<S, ProductQuantizer> {
+impl<S: IvfSubIndex> Spillable for PartitionEntry<S, ProductQuantizer> {
     /// Serialize this partition entry to bytes.
     ///
     /// The sub-index, PQ codebook, and storage batch are each written as Arrow
     /// IPC file sections, preceded by a small JSON header containing scalar
     /// metadata and section lengths.
-    pub fn serialize(&self) -> Result<Vec<u8>> {
+    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
         let metadata = self.storage.metadata();
         let distance_type = self.storage.distance_type();
 
@@ -261,24 +273,23 @@ impl<S: IvfSubIndex> PartitionEntry<S, ProductQuantizer> {
         };
 
         let header_json = serde_json::to_vec(&header)?;
-
         let total_len =
             8 + header_json.len() + sub_index_ipc.len() + codebook_ipc.len() + storage_ipc.len();
-        let mut out = Vec::with_capacity(total_len);
-        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
-        out.extend_from_slice(&header_json);
-        out.extend_from_slice(&sub_index_ipc);
-        out.extend_from_slice(&codebook_ipc);
-        out.extend_from_slice(&storage_ipc);
-
-        Ok(out)
+
+        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
+        writer.write_all(&header_json)?;
+        writer.write_all(&sub_index_ipc)?;
+        writer.write_all(&codebook_ipc)?;
+        writer.write_all(&storage_ipc)?;
+
+        Ok(total_len)
     }
 
     /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
     ///
     /// The Arrow IPC sections are decoded using [`FileDecoder`] so that the
     /// resulting arrays reference slices of the provided `Bytes` buffer directly.
-    pub fn deserialize(data: Bytes) -> Result<Self> {
+    fn deserialize(data: Bytes) -> Result<Self> {
         if data.len() < 8 {
             return Err(Error::io("partition data too small".to_string()));
         }
@@ -352,9 +363,9 @@ struct FlatPartitionHeader {
     storage_len: u64,
 }
 
-impl<S: IvfSubIndex> PartitionEntry<S, FlatQuantizer> {
+impl<S: IvfSubIndex> Spillable for PartitionEntry<S, FlatQuantizer> {
     /// Serialize this partition entry to bytes.
-    pub fn serialize(&self) -> Result<Vec<u8>> {
+    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
         let metadata = self.storage.metadata();
         let distance_type = self.storage.distance_type();
 
@@ -377,16 +388,17 @@ impl<S: IvfSubIndex> PartitionEntry<S, FlatQuantizer> {
 
         let header_json = serde_json::to_vec(&header)?;
         let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len();
-        let mut out = Vec::with_capacity(total_len);
-        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
-        out.extend_from_slice(&header_json);
-        out.extend_from_slice(&sub_index_ipc);
-        out.extend_from_slice(&storage_ipc);
-        Ok(out)
+
+        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
+        writer.write_all(&header_json)?;
+        writer.write_all(&sub_index_ipc)?;
+        writer.write_all(&storage_ipc)?;
+
+        Ok(total_len)
     }
 
     /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
-    pub fn deserialize(data: Bytes) -> Result<Self> {
+    fn deserialize(data: Bytes) -> Result<Self> {
         if data.len() < 8 {
             return Err(Error::io("partition data too small".to_string()));
         }
@@ -446,11 +458,11 @@ struct SqPartitionHeader {
     storage_len: u64,
 }
 
-impl<S: IvfSubIndex> PartitionEntry<S, ScalarQuantizer> {
+impl<S: IvfSubIndex> Spillable for PartitionEntry<S, ScalarQuantizer> {
     /// Serialize this partition entry to bytes.
     ///
     /// Multiple SQ storage chunks are concatenated into a single IPC section.
-    pub fn serialize(&self) -> Result<Vec<u8>> {
+    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
         let metadata = self.storage.metadata();
         let distance_type = self.storage.distance_type();
 
@@ -475,16 +487,17 @@ impl<S: IvfSubIndex> PartitionEntry<S, ScalarQuantizer> {
 
         let header_json = serde_json::to_vec(&header)?;
         let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len();
-        let mut out = Vec::with_capacity(total_len);
-        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
-        out.extend_from_slice(&header_json);
-        out.extend_from_slice(&sub_index_ipc);
-        out.extend_from_slice(&storage_ipc);
-        Ok(out)
+
+        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
+        writer.write_all(&header_json)?;
+        writer.write_all(&sub_index_ipc)?;
+        writer.write_all(&storage_ipc)?;
+
+        Ok(total_len)
     }
 
     /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
-    pub fn deserialize(data: Bytes) -> Result<Self> {
+    fn deserialize(data: Bytes) -> Result<Self> {
         if data.len() < 8 {
             return Err(Error::io("partition data too small".to_string()));
         }
@@ -553,7 +566,7 @@ struct RabitPartitionHeader {
     storage_len: u64,
 }
 
-impl<S: IvfSubIndex> PartitionEntry<S, RabitQuantizer> {
+impl<S: IvfSubIndex> Spillable for PartitionEntry<S, RabitQuantizer> {
     /// Serialize this partition entry to bytes.
     ///
     /// For Matrix rotation the rotation matrix is stored as an Arrow IPC section.
@@ -561,7 +574,7 @@ impl<S: IvfSubIndex> PartitionEntry<S, RabitQuantizer> {
     ///
     /// The storage batch is stored with already-packed codes so deserialization
     /// can skip re-packing.
-    pub fn serialize(&self) -> Result<Vec<u8>> {
+    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
         let metadata = self.storage.metadata();
         let distance_type = self.storage.distance_type();
 
@@ -603,17 +616,18 @@ impl<S: IvfSubIndex> PartitionEntry<S, RabitQuantizer> {
         let header_json = serde_json::to_vec(&header)?;
         let total_len =
             8 + header_json.len() + sub_index_ipc.len() + rotate_mat_ipc.len() + storage_ipc.len();
-        let mut out = Vec::with_capacity(total_len);
-        out.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
-        out.extend_from_slice(&header_json);
-        out.extend_from_slice(&sub_index_ipc);
-        out.extend_from_slice(&rotate_mat_ipc);
-        out.extend_from_slice(&storage_ipc);
-        Ok(out)
+
+        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
+        writer.write_all(&header_json)?;
+        writer.write_all(&sub_index_ipc)?;
+        writer.write_all(&rotate_mat_ipc)?;
+        writer.write_all(&storage_ipc)?;
+
+        Ok(total_len)
     }
 
     /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
-    pub fn deserialize(data: Bytes) -> Result<Self> {
+    fn deserialize(data: Bytes) -> Result<Self> {
         if data.len() < 8 {
             return Err(Error::io("partition data too small".to_string()));
         }
@@ -769,7 +783,8 @@ mod tests {
             storage,
         };
 
-        let serialized = entry.serialize().unwrap();
+        let mut serialized = Vec::new();
+        entry.serialize(&mut serialized).unwrap();
         let deserialized =
             PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(serialized.into()).unwrap();
 
@@ -819,7 +834,8 @@ mod tests {
                 storage,
             };
 
-            let bytes = entry.serialize().unwrap();
+            let mut bytes = Vec::new();
+            entry.serialize(&mut bytes).unwrap();
             let restored =
                 PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(bytes.into()).unwrap();
             assert_eq!(
@@ -839,7 +855,8 @@ mod tests {
             storage,
         };
 
-        let serialized = entry.serialize().unwrap();
+        let mut serialized = Vec::new();
+        entry.serialize(&mut serialized).unwrap();
         let deserialized =
             PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(serialized.into()).unwrap();
         assert_eq!(entry.storage, deserialized.storage);
@@ -874,7 +891,8 @@ mod tests {
             storage,
         };
 
-        let bytes = entry.serialize().unwrap();
+        let mut bytes = Vec::new();
+        entry.serialize(&mut bytes).unwrap();
         let restored =
             PartitionEntry::<FlatIndex, FlatQuantizer>::deserialize(bytes.into()).unwrap();
 
@@ -902,7 +920,8 @@ mod tests {
                 index: FlatIndex::default(),
                 storage,
             };
-            let bytes = entry.serialize().unwrap();
+            let mut bytes = Vec::new();
+            entry.serialize(&mut bytes).unwrap();
             let restored =
                 PartitionEntry::<FlatIndex, FlatQuantizer>::deserialize(bytes.into()).unwrap();
             assert_eq!(restored.storage.distance_type(), dt);
@@ -948,7 +967,8 @@ mod tests {
             storage,
         };
 
-        let bytes = entry.serialize().unwrap();
+        let mut bytes = Vec::new();
+        entry.serialize(&mut bytes).unwrap();
         let restored =
             PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
 
@@ -977,7 +997,8 @@ mod tests {
                 index: FlatIndex::default(),
                 storage,
             };
-            let bytes = entry.serialize().unwrap();
+            let mut bytes = Vec::new();
+            entry.serialize(&mut bytes).unwrap();
             let restored =
                 PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
             assert_eq!(restored.storage.distance_type(), dt);
@@ -1020,7 +1041,8 @@ mod tests {
             index: FlatIndex::default(),
             storage,
         };
-        let bytes = entry.serialize().unwrap();
+        let mut bytes = Vec::new();
+        entry.serialize(&mut bytes).unwrap();
         let restored =
             PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
 
@@ -1103,7 +1125,8 @@ mod tests {
             storage,
         };
 
-        let bytes = entry.serialize().unwrap();
+        let mut bytes = Vec::new();
+        entry.serialize(&mut bytes).unwrap();
         let restored =
             PartitionEntry::<FlatIndex, RabitQuantizer>::deserialize(bytes.into()).unwrap();
 
@@ -1144,7 +1167,8 @@ mod tests {
                 index: FlatIndex::default(),
                 storage,
             };
-            let bytes = entry.serialize().unwrap();
+            let mut bytes = Vec::new();
+            entry.serialize(&mut bytes).unwrap();
             let restored =
                 PartitionEntry::<FlatIndex, RabitQuantizer>::deserialize(bytes.into()).unwrap();
             assert_eq!(restored.storage.distance_type(), dt);
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index ffd502550c2..4dce97ebdb1 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -26,7 +26,7 @@ use lance_core::utils::tokio::spawn_cpu;
 use lance_core::utils::tracing::{IO_TYPE_LOAD_VECTOR_PART, TRACE_IO_EVENTS};
 use lance_core::{Error, ROW_ID, Result};
 use lance_encoding::decoder::{DecoderPlugins, FilterExpression};
-use lance_file::reader::{FileReader, FileReaderOptions};
+use lance_file::reader::{CachedFileMetadata, FileReader, FileReaderOptions};
 use lance_index::frag_reuse::FragReuseIndex;
 use lance_index::metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector};
 use lance_index::vector::flat::index::{FlatIndex, FlatQuantizer};
@@ -98,7 +98,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> CacheKey for IVFPartit
         format!("ivf-{}", self.partition_id).into()
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         // Using type_name is safe here: the impl is in the same crate as the
         // types, so the monomorphized pointer is consistent.
         std::any::type_name::<PartitionEntry<S, Q>>()
@@ -153,7 +153,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
     ) -> Result<Self> {
         let io_parallelism = object_store.io_parallelism();
         let scheduler_config = SchedulerConfig::max_bandwidth(&object_store);
-        let scheduler = ScanScheduler::new(object_store, scheduler_config);
+        let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config));
 
         let uri = index_dir.child(uuid.as_str()).child(INDEX_FILE_NAME);
         let cached_size = file_sizes
@@ -168,6 +168,11 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
             FileReaderOptions::default(),
         )
         .await?;
+        // Cache file metadata so reconstruct_typed can skip the metadata read.
+        file_metadata_cache
+            .with_key_prefix(uri.as_ref())
+            .insert_with_key(&FileMetadataCacheKey, index_reader.metadata().clone())
+            .await;
         let index_metadata: IndexMetadata = serde_json::from_str(
             index_reader
                 .schema()
@@ -199,21 +204,22 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
             .get(INDEX_AUXILIARY_FILE_NAME)
             .map(|&size| CachedFileSize::new(size))
             .unwrap_or_else(CachedFileSize::unknown);
+        let aux_path = index_dir
+            .child(uuid.as_str())
+            .child(INDEX_AUXILIARY_FILE_NAME);
         let storage_reader = FileReader::try_open(
-            scheduler
-                .open_file(
-                    &index_dir
-                        .child(uuid.as_str())
-                        .child(INDEX_AUXILIARY_FILE_NAME),
-                    &aux_cached_size,
-                )
-                .await?,
+            scheduler.open_file(&aux_path, &aux_cached_size).await?,
             None,
             Arc::<DecoderPlugins>::default(),
             file_metadata_cache,
             FileReaderOptions::default(),
         )
         .await?;
+        // Cache aux file metadata for reconstruction.
+        file_metadata_cache
+            .with_key_prefix(aux_path.as_ref())
+            .insert_with_key(&FileMetadataCacheKey, storage_reader.metadata().clone())
+            .await;
         let storage =
             IvfQuantizationStorage::try_new(storage_reader, frag_reuse_index.clone()).await?;
 
@@ -639,6 +645,8 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> VectorIndex for IVFInd
         let (sub_index_type, quantization_type) = self.sub_index_type();
         // Convert local path back to object_store Path (undo to_local_path's "/" prefix)
         let index_file_path = self.uri.trim_start_matches('/').to_string();
+        let index_meta = self.reader.metadata();
+        let aux_meta = self.storage.reader().metadata();
         Some(Box::new(IvfIndexState {
             index_file_path,
             uuid: self.uuid.clone(),
@@ -650,6 +658,8 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> VectorIndex for IVFInd
             sub_index_type,
             quantization_type,
             cache_key_prefix: self.index_cache.prefix().to_string(),
+            index_file_size: index_meta.file_size(),
+            aux_file_size: aux_meta.file_size(),
         }))
     }
 }
@@ -659,6 +669,60 @@ pub type IvfPq = IVFIndex<FlatIndex, ProductQuantizer>;
 pub type IvfHnswSqIndex = IVFIndex<HNSW, ScalarQuantizer>;
 pub type IvfHnswPqIndex = IVFIndex<HNSW, ProductQuantizer>;
 
+/// CacheKey for file metadata, matching the key used by fragment reads.
+struct FileMetadataCacheKey;
+
+impl CacheKey for FileMetadataCacheKey {
+    type ValueType = CachedFileMetadata;
+
+    fn key(&self) -> std::borrow::Cow<'_, str> {
+        "".into()
+    }
+
+    fn type_name(&self) -> &'static str {
+        "FileMetadata"
+    }
+}
+
+/// Open a FileReader, using cached file metadata when available to avoid IO.
+async fn open_reader_cached(
+    scheduler: &Arc<ScanScheduler>,
+    path: &Path,
+    cache: &LanceCache,
+    known_file_size: u64,
+) -> Result<FileReader> {
+    let file_cache = cache.with_key_prefix(path.as_ref());
+    let cached_size = if known_file_size > 0 {
+        CachedFileSize::new(known_file_size)
+    } else {
+        CachedFileSize::unknown()
+    };
+    let file_scheduler = scheduler.open_file(path, &cached_size).await?;
+
+    if let Some(cached_meta) = file_cache.get_with_key(&FileMetadataCacheKey).await {
+        let encodings_io = Arc::new(lance_file::LanceEncodingsIo::new(file_scheduler));
+        FileReader::try_open_with_file_metadata(
+            encodings_io,
+            path.clone(),
+            None,
+            Arc::<DecoderPlugins>::default(),
+            cached_meta,
+            cache,
+            FileReaderOptions::default(),
+        )
+        .await
+    } else {
+        FileReader::try_open(
+            file_scheduler,
+            None,
+            Arc::<DecoderPlugins>::default(),
+            cache,
+            FileReaderOptions::default(),
+        )
+        .await
+    }
+}
+
 /// Reconstruct a concrete `IVFIndex<S, Q>` from cached state.
 async fn reconstruct_typed<S: IvfSubIndex + 'static, Q: Quantization + 'static>(
     state: IvfIndexState,
@@ -671,20 +735,16 @@ where
 {
     let io_parallelism = object_store.io_parallelism();
     let scheduler_config = SchedulerConfig::max_bandwidth(&object_store);
-    let scheduler = ScanScheduler::new(object_store, scheduler_config);
+    let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config));
 
     let index_path = Path::parse(&state.index_file_path)
         .map_err(|e| Error::io(format!("invalid index path: {e}")))?;
 
-    // Re-open index FileReader (cheap if file metadata cache is warm)
-    let index_reader = FileReader::try_open(
-        scheduler
-            .open_file(&index_path, &CachedFileSize::unknown())
-            .await?,
-        None,
-        Arc::<DecoderPlugins>::default(),
+    let index_reader = open_reader_cached(
+        &scheduler,
+        &index_path,
         file_metadata_cache,
-        FileReaderOptions::default(),
+        state.index_file_size,
     )
     .await?;
 
@@ -697,14 +757,11 @@ where
         .unwrap_or("");
     let aux_path = Path::parse(format!("{}/{}", parent_str, INDEX_AUXILIARY_FILE_NAME))
         .map_err(|e| Error::io(format!("invalid aux path: {e}")))?;
-    let storage_reader = FileReader::try_open(
-        scheduler
-            .open_file(&aux_path, &CachedFileSize::unknown())
-            .await?,
-        None,
-        Arc::<DecoderPlugins>::default(),
+    let storage_reader = open_reader_cached(
+        &scheduler,
+        &aux_path,
         file_metadata_cache,
-        FileReaderOptions::default(),
+        state.aux_file_size,
     )
     .await?;
 
@@ -3869,4 +3926,53 @@ mod tests {
         let stats = dataset.object_store().io_stats_incremental();
         assert_io_eq!(stats, read_iops, 0, "second prewarm should not perform IO");
     }
+
+    #[tokio::test]
+    async fn test_reconstruct_from_cache_zero_io() {
+        use lance_io::assert_io_eq;
+
+        let test_dir = TempStrDir::default();
+        let test_uri = test_dir.as_str();
+        let (mut dataset, _) = generate_test_dataset::<Float32Type>(test_uri, 0.0..1.0).await;
+
+        let params = VectorIndexParams::with_ivf_pq_params(
+            DistanceType::L2,
+            IvfBuildParams::new(4),
+            PQBuildParams::default(),
+        );
+        dataset
+            .create_index(
+                &["vector"],
+                IndexType::Vector,
+                Some("my_idx".to_owned()),
+                &params,
+                true,
+            )
+            .await
+            .unwrap();
+
+        // First open: populates file metadata cache and VectorIndexData cache.
+        let indices = dataset.load_indices_by_name("my_idx").await.unwrap();
+        let uuid = indices[0].uuid.to_string();
+        dataset
+            .open_vector_index("vector", &uuid, &NoOpMetricsCollector)
+            .await
+            .unwrap();
+
+        // Reset IO stats, then open again — should reconstruct from cache.
+        dataset.object_store().io_stats_incremental();
+
+        dataset
+            .open_vector_index("vector", &uuid, &NoOpMetricsCollector)
+            .await
+            .unwrap();
+
+        let stats = dataset.object_store().io_stats_incremental();
+        assert_io_eq!(
+            stats,
+            read_iops,
+            0,
+            "reconstructing from cached state should not perform IO"
+        );
+    }
 }
diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index aa450483553..7242c0cca6a 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -235,7 +235,7 @@ mod tests {
             Cow::Borrowed(self.0)
         }
 
-        fn type_id(&self) -> &'static str {
+        fn type_name(&self) -> &'static str {
             "TestUnsized"
         }
     }
diff --git a/rust/lance/src/session/caches.rs b/rust/lance/src/session/caches.rs
index e4ac180d563..2654e356ac1 100644
--- a/rust/lance/src/session/caches.rs
+++ b/rust/lance/src/session/caches.rs
@@ -82,7 +82,7 @@ impl CacheKey for ManifestKey<'_> {
             Cow::Owned(format!("manifest/{}", self.version))
         }
     }
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "Manifest"
     }
 }
@@ -97,7 +97,7 @@ impl CacheKey for TransactionKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("txn/{}", self.version))
     }
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "Transaction"
     }
 }
@@ -119,7 +119,7 @@ impl CacheKey for DeletionFileKey<'_> {
             self.deletion_file.file_type.suffix()
         ))
     }
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "DeletionVector"
     }
 }
@@ -134,7 +134,7 @@ impl CacheKey for RowAddrMaskKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_addr_mask/{}", self.version))
     }
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "RowAddrMask"
     }
 }
@@ -149,7 +149,7 @@ impl CacheKey for RowIdIndexKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_id_index/{}", self.version))
     }
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "RowIdIndex"
     }
 }
@@ -164,7 +164,7 @@ impl CacheKey for RowIdSequenceKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_id_sequence/{}", self.fragment_id))
     }
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "RowIdSequence"
     }
 }
diff --git a/rust/lance/src/session/index_caches.rs b/rust/lance/src/session/index_caches.rs
index c3430f4c840..04aa9791c8d 100644
--- a/rust/lance/src/session/index_caches.rs
+++ b/rust/lance/src/session/index_caches.rs
@@ -89,7 +89,7 @@ impl CacheKey for FragReuseIndexKey<'_> {
         Cow::Owned(format!("frag_reuse/{}", self.uuid))
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "FragReuseIndex"
     }
 }
@@ -106,7 +106,7 @@ impl CacheKey for IndexMetadataKey {
         Cow::Owned(self.version.to_string())
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "Vec<IndexMetadata>"
     }
 }
@@ -137,7 +137,7 @@ impl CacheKey for ScalarIndexDetailsKey<'_> {
         Cow::Owned(format!("type/{}", self.uuid))
     }
 
-    fn type_id(&self) -> &'static str {
+    fn type_name(&self) -> &'static str {
         "ScalarIndexDetails"
     }
 }

From a1fb0ba6420bba402a8bee197c993e9d2385fcc6 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 20 Mar 2026 16:43:24 -0700
Subject: [PATCH 16/24] refactor: move serialization/reconstruction code to PR
 #6223

Move VectorIndexData, IvfIndexState, partition_serde, cacheable_state,
and zero-IO reconstruction out of this PR to keep it focused on the
pluggable cache backend.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-file/src/reader.rs                 |   10 -
 rust/lance-index/src/vector.rs                |  212 ---
 rust/lance-index/src/vector/storage.rs        |   22 -
 rust/lance/src/index.rs                       |   55 +-
 rust/lance/src/index/vector/ivf.rs            |    1 -
 .../src/index/vector/ivf/partition_serde.rs   | 1177 -----------------
 rust/lance/src/index/vector/ivf/v2.rs         |  375 +-----
 7 files changed, 27 insertions(+), 1825 deletions(-)
 delete mode 100644 rust/lance/src/index/vector/ivf/partition_serde.rs

diff --git a/rust/lance-file/src/reader.rs b/rust/lance-file/src/reader.rs
index 3a3f41854c0..29c1aa3ccc0 100644
--- a/rust/lance-file/src/reader.rs
+++ b/rust/lance-file/src/reader.rs
@@ -105,16 +105,6 @@ pub struct CachedFileMetadata {
     pub minor_version: u16,
 }
 
-impl CachedFileMetadata {
-    /// Total file size in bytes.
-    pub fn file_size(&self) -> u64 {
-        self.num_data_bytes
-            + self.num_global_buffer_bytes
-            + self.num_column_metadata_bytes
-            + self.num_footer_bytes
-    }
-}
-
 impl DeepSizeOf for CachedFileMetadata {
     // TODO: include size for `column_metadatas` and `column_infos`.
     fn deep_size_of_children(&self, context: &mut Context) -> usize {
diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs
index 0a1b50297d3..0fbff4475cb 100644
--- a/rust/lance-index/src/vector.rs
+++ b/rust/lance-index/src/vector.rs
@@ -11,14 +11,12 @@ use std::{collections::HashMap, sync::Arc};
 use arrow_array::{ArrayRef, Float32Array, RecordBatch, UInt32Array};
 use arrow_schema::Field;
 use async_trait::async_trait;
-use bytes::Bytes;
 use datafusion::execution::SendableRecordBatchStream;
 use deepsize::DeepSizeOf;
 use ivf::storage::IvfModel;
 use lance_core::{ROW_ID_FIELD, Result};
 use lance_io::traits::Reader;
 use lance_linalg::distance::DistanceType;
-use prost::Message;
 use quantizer::{QuantizationType, Quantizer};
 use std::sync::LazyLock;
 use v3::subindex::SubIndexType;
@@ -142,210 +140,6 @@ impl From<DistanceType> for pb::VectorMetricType {
     }
 }
 
-/// Serializable snapshot of a vector index, suitable for disk caching.
-///
-/// Implementations must be cheaply reconstructable into a live
-/// [`VectorIndex`] given an ObjectStore, file metadata cache, and partition
-/// cache. The reconstruction cost should be dominated by re-opening
-/// `FileReader`s, which is cheap when the file metadata cache is warm.
-pub trait VectorIndexData: Send + Sync + DeepSizeOf + std::fmt::Debug {
-    /// Serialize this state into `writer`. Called on a blocking thread by
-    /// the disk cache codec.
-    fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()>;
-
-    /// Tag used to dispatch deserialization to the correct concrete type.
-    fn index_type_tag(&self) -> &'static str;
-
-    /// Downcast to `&dyn Any` for concrete type access during reconstruction.
-    fn as_any(&self) -> &dyn Any;
-}
-
-/// Deserialize a [`VectorIndexData`] from bytes previously written by
-/// [`VectorIndexData::write_to`].
-pub fn deserialize_vector_index_data(data: Bytes) -> Result<Arc<dyn VectorIndexData>> {
-    // Currently only IVF indices support disk caching. The serialization
-    // format is self-describing (IvfIndexState header), so no external tag
-    // is needed yet. When additional index types are added, prepend a
-    // version/tag byte to the wire format.
-    let state = IvfIndexState::deserialize(data)?;
-    Ok(Arc::new(state))
-}
-
-/// Serializable state of an IVF index, sufficient to reconstruct the index
-/// without re-reading global buffers from object storage.
-///
-/// Produced by [`VectorIndex::cacheable_state`] and consumed by a
-/// reconstruction function that re-opens FileReaders using cached file metadata.
-#[derive(Debug, Clone)]
-pub struct IvfIndexState {
-    /// Object-store path to the index file (before `to_local_path` conversion).
-    pub index_file_path: String,
-    pub uuid: String,
-    pub ivf: IvfModel,
-    pub distance_type: DistanceType,
-    pub sub_index_metadata: Vec<String>,
-    /// JSON serialization of `Q::Metadata` (quantizer-specific metadata).
-    pub quantizer_metadata_json: String,
-    /// Large quantizer data (PQ codebook, RQ rotation matrix) from `extra_metadata()`.
-    pub quantizer_extra_data: Option<Vec<u8>>,
-    pub sub_index_type: SubIndexType,
-    pub quantization_type: QuantizationType,
-    /// The cache key prefix used by the original index's WeakLanceCache.
-    /// Needed to reconnect the reconstructed index to the shared cache backend.
-    pub cache_key_prefix: String,
-    /// File sizes for the index and auxiliary files, used to avoid HEAD requests
-    /// when reconstructing from cache.
-    pub index_file_size: u64,
-    pub aux_file_size: u64,
-}
-
-/// Serialization header for [`IvfIndexState`].
-#[derive(serde::Serialize, serde::Deserialize)]
-struct IvfIndexStateHeader {
-    index_file_path: String,
-    uuid: String,
-    distance_type: String,
-    sub_index_metadata: Vec<String>,
-    sub_index_type: String,
-    quantization_type: String,
-    quantizer_metadata_json: String,
-    #[serde(default)]
-    cache_key_prefix: String,
-    #[serde(default)]
-    index_file_size: u64,
-    #[serde(default)]
-    aux_file_size: u64,
-}
-
-impl IvfIndexState {
-    /// Wire format:
-    /// `[header_json_len: u64 LE][header JSON][ivf_pb_len: u64 LE][ivf protobuf]
-    ///  [extra_len: u64 LE][extra bytes]`
-    pub fn serialize(&self) -> Result<Vec<u8>> {
-        let header = IvfIndexStateHeader {
-            index_file_path: self.index_file_path.clone(),
-            uuid: self.uuid.clone(),
-            distance_type: self.distance_type.to_string(),
-            sub_index_metadata: self.sub_index_metadata.clone(),
-            sub_index_type: self.sub_index_type.to_string(),
-            quantization_type: self.quantization_type.to_string(),
-            quantizer_metadata_json: self.quantizer_metadata_json.clone(),
-            cache_key_prefix: self.cache_key_prefix.clone(),
-            index_file_size: self.index_file_size,
-            aux_file_size: self.aux_file_size,
-        };
-        let header_json = serde_json::to_vec(&header)
-            .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?;
-
-        let ivf_pb = pb::Ivf::try_from(&self.ivf)?;
-        let ivf_bytes = ivf_pb.encode_to_vec();
-
-        let extra = self.quantizer_extra_data.as_deref().unwrap_or(&[]);
-
-        let total = 8 + header_json.len() + 8 + ivf_bytes.len() + 8 + extra.len();
-        let mut buf = Vec::with_capacity(total);
-        buf.extend_from_slice(&(header_json.len() as u64).to_le_bytes());
-        buf.extend_from_slice(&header_json);
-        buf.extend_from_slice(&(ivf_bytes.len() as u64).to_le_bytes());
-        buf.extend_from_slice(&ivf_bytes);
-        buf.extend_from_slice(&(extra.len() as u64).to_le_bytes());
-        buf.extend_from_slice(extra);
-        Ok(buf)
-    }
-
-    pub fn deserialize(data: Bytes) -> Result<Self> {
-        let mut offset = 0;
-
-        let read_u64 = |data: &[u8], off: &mut usize| -> Result<u64> {
-            if *off + 8 > data.len() {
-                return Err(lance_core::Error::io("IvfIndexState data truncated"));
-            }
-            let val = u64::from_le_bytes(data[*off..*off + 8].try_into().unwrap());
-            *off += 8;
-            Ok(val)
-        };
-
-        let header_len = read_u64(&data, &mut offset)? as usize;
-        if offset + header_len > data.len() {
-            return Err(lance_core::Error::io("IvfIndexState header truncated"));
-        }
-        let header: IvfIndexStateHeader =
-            serde_json::from_slice(&data[offset..offset + header_len])
-                .map_err(|e| lance_core::Error::io(format!("IvfIndexState header: {e}")))?;
-        offset += header_len;
-
-        let ivf_len = read_u64(&data, &mut offset)? as usize;
-        if offset + ivf_len > data.len() {
-            return Err(lance_core::Error::io("IvfIndexState IVF data truncated"));
-        }
-        let ivf_pb = pb::Ivf::decode(&data[offset..offset + ivf_len])
-            .map_err(|e| lance_core::Error::io(format!("IvfIndexState IVF decode: {e}")))?;
-        let ivf = IvfModel::try_from(ivf_pb)?;
-        offset += ivf_len;
-
-        let extra_len = read_u64(&data, &mut offset)? as usize;
-        if offset + extra_len > data.len() {
-            return Err(lance_core::Error::io("IvfIndexState extra data truncated"));
-        }
-        let quantizer_extra_data = if extra_len > 0 {
-            Some(data[offset..offset + extra_len].to_vec())
-        } else {
-            None
-        };
-
-        let distance_type = DistanceType::try_from(header.distance_type.as_str())?;
-        let sub_index_type = SubIndexType::try_from(header.sub_index_type.as_str())?;
-        let quantization_type = header.quantization_type.parse::<QuantizationType>()?;
-
-        Ok(Self {
-            index_file_path: header.index_file_path,
-            uuid: header.uuid,
-            ivf,
-            distance_type,
-            sub_index_metadata: header.sub_index_metadata,
-            quantizer_metadata_json: header.quantizer_metadata_json,
-            quantizer_extra_data,
-            sub_index_type,
-            quantization_type,
-            cache_key_prefix: header.cache_key_prefix,
-            index_file_size: header.index_file_size,
-            aux_file_size: header.aux_file_size,
-        })
-    }
-}
-
-impl DeepSizeOf for IvfIndexState {
-    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
-        self.index_file_path.deep_size_of_children(context)
-            + self.uuid.deep_size_of_children(context)
-            + self.ivf.deep_size_of_children(context)
-            + self.sub_index_metadata.deep_size_of_children(context)
-            + self.quantizer_metadata_json.deep_size_of_children(context)
-            + self
-                .quantizer_extra_data
-                .as_ref()
-                .map(|v| v.deep_size_of_children(context))
-                .unwrap_or(0)
-            + self.cache_key_prefix.deep_size_of_children(context)
-    }
-}
-
-impl VectorIndexData for IvfIndexState {
-    fn write_to(&self, writer: &mut dyn std::io::Write) -> Result<()> {
-        let bytes = self.serialize()?;
-        writer.write_all(&bytes)?;
-        Ok(())
-    }
-
-    fn index_type_tag(&self) -> &'static str {
-        "IVF"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-}
-
 /// Vector Index for (Approximate) Nearest Neighbor (ANN) Search.
 ///
 /// Vector indices are often built as a chain of indices.  For example, IVF -> PQ
@@ -470,12 +264,6 @@ pub trait VectorIndex: Send + Sync + std::fmt::Debug + Index {
 
     /// the index type of this vector index.
     fn sub_index_type(&self) -> (SubIndexType, QuantizationType);
-
-    /// Export the index state needed for reconstruction from a disk cache.
-    /// Returns `None` if this index type doesn't support persistent caching.
-    fn cacheable_state(&self) -> Option<Box<dyn VectorIndexData>> {
-        None
-    }
 }
 
 // it can be an IVF index or a partition of IVF index
diff --git a/rust/lance-index/src/vector/storage.rs b/rust/lance-index/src/vector/storage.rs
index 526ba8e78d8..5a1c0e7e6f5 100644
--- a/rust/lance-index/src/vector/storage.rs
+++ b/rust/lance-index/src/vector/storage.rs
@@ -239,28 +239,6 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
         })
     }
 
-    /// Construct from pre-parsed metadata, skipping global buffer reads.
-    /// Used when reconstructing from a disk cache.
-    pub fn from_cached(
-        reader: FileReader,
-        ivf: IvfModel,
-        metadata: Q::Metadata,
-        distance_type: DistanceType,
-        frag_reuse_index: Option<Arc<FragReuseIndex>>,
-    ) -> Self {
-        Self {
-            reader,
-            distance_type,
-            metadata,
-            ivf,
-            frag_reuse_index,
-        }
-    }
-
-    pub fn reader(&self) -> &FileReader {
-        &self.reader
-    }
-
     pub fn num_rows(&self) -> u64 {
         self.reader.num_rows()
     }
diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index 3c920e19426..7fc85b0b1dd 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -42,7 +42,6 @@ use lance_index::vector::flat::index::{FlatBinQuantizer, FlatIndex, FlatQuantize
 use lance_index::vector::hnsw::HNSW;
 use lance_index::vector::pq::ProductQuantizer;
 use lance_index::vector::sq::ScalarQuantizer;
-use lance_index::vector::{IvfIndexState, VectorIndexData};
 use lance_index::{DatasetIndexExt, INDEX_METADATA_SCHEMA_KEY, IndexDescription, IndexSegment};
 use lance_index::{INDEX_FILE_NAME, Index, IndexType, pb, vector::VectorIndex};
 use lance_index::{
@@ -130,7 +129,7 @@ impl<'a> VectorIndexCacheKey<'a> {
 }
 
 impl UnsizedCacheKey for VectorIndexCacheKey<'_> {
-    type ValueType = dyn VectorIndexData;
+    type ValueType = dyn VectorIndex;
 
     fn key(&self) -> std::borrow::Cow<'_, str> {
         if let Some(fri_uuid) = self.fri_uuid {
@@ -1338,10 +1337,7 @@ impl DatasetIndexInternalExt for Dataset {
         uuid: &str,
         metrics: &dyn MetricsCollector,
     ) -> Result<Arc<dyn Index>> {
-        // Quick cache checks for scalar and frag-reuse indices. VectorIndex
-        // is not checked here because the cache stores VectorIndexData (serializable
-        // state), not a live VectorIndex — reconstruction is handled by
-        // open_vector_index.
+        // Quick cache checks for scalar and frag-reuse indices.
         let frag_reuse_uuid = self.frag_reuse_index_uuid().await;
         let cache_key = ScalarIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
         if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await {
@@ -1422,22 +1418,9 @@ impl DatasetIndexInternalExt for Dataset {
         let frag_reuse_uuid = self.frag_reuse_index_uuid().await;
         let cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
 
-        // Check cache for serialized VectorIndexData and reconstruct if found.
-        if let Some(data) = self.index_cache.get_unsized_with_key(&cache_key).await
-            && let Some(state) = data.as_any().downcast_ref::<IvfIndexState>()
-        {
-            log::debug!(
-                "Reconstructing vector index from cached state uuid: {}",
-                uuid
-            );
-            let partition_cache = self.index_cache.with_key_prefix(&cache_key.key());
-            return vector::ivf::v2::reconstruct_vector_index(
-                state.clone(),
-                self.object_store.clone(),
-                &self.metadata_cache,
-                partition_cache,
-            )
-            .await;
+        if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await {
+            log::debug!("Found vector index in cache uuid: {}", uuid);
+            return Ok(index);
         }
 
         let frag_reuse_index = self.open_frag_reuse_index(metrics).await?;
@@ -1501,12 +1484,9 @@ impl DatasetIndexInternalExt for Dataset {
                     self.object_store.clone(),
                     SchedulerConfig::max_bandwidth(&self.object_store),
                 );
-                let file_sizes = index_meta.file_size_map();
-                let cached_size = file_sizes
-                    .get(INDEX_FILE_NAME)
-                    .map(|&size| CachedFileSize::new(size))
-                    .unwrap_or_else(CachedFileSize::unknown);
-                let file = scheduler.open_file(&index_file, &cached_size).await?;
+                let file = scheduler
+                    .open_file(&index_file, &CachedFileSize::unknown())
+                    .await?;
                 let reader = lance_file::reader::FileReader::try_open(
                     file,
                     None,
@@ -1540,7 +1520,6 @@ impl DatasetIndexInternalExt for Dataset {
                                 frag_reuse_index,
                                 self.metadata_cache.as_ref(),
                                 index_cache,
-                                file_sizes,
                             )
                             .await?;
                             Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1553,7 +1532,6 @@ impl DatasetIndexInternalExt for Dataset {
                                 frag_reuse_index,
                                 self.metadata_cache.as_ref(),
                                 index_cache,
-                                file_sizes,
                             )
                             .await?;
                             Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1572,7 +1550,6 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
-                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1586,7 +1563,6 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
-                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1600,7 +1576,6 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
-                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1617,7 +1592,6 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             &file_metadata_cache,
                             index_cache,
-                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1631,7 +1605,6 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
-                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1645,7 +1618,6 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
-                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1664,14 +1636,9 @@ impl DatasetIndexInternalExt for Dataset {
         };
         let index = index?;
         metrics.record_index_load();
-        // Cache the serializable state, not the live index. The live index
-        // holds FileReader handles that can't survive serialization; the
-        // state can be cheaply reconstructed on the next cache hit.
-        if let Some(state) = index.cacheable_state() {
-            self.index_cache
-                .insert_unsized_with_key(&cache_key, Arc::from(state))
-                .await;
-        }
+        self.index_cache
+            .insert_unsized_with_key(&cache_key, index.clone())
+            .await;
         Ok(index)
     }
 
diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs
index 3768fad045c..c26da61d7ef 100644
--- a/rust/lance/src/index/vector/ivf.rs
+++ b/rust/lance/src/index/vector/ivf.rs
@@ -102,7 +102,6 @@ use uuid::Uuid;
 
 pub mod builder;
 pub mod io;
-pub mod partition_serde;
 pub mod v2;
 
 // Cache wrapper for vector index trait objects
diff --git a/rust/lance/src/index/vector/ivf/partition_serde.rs b/rust/lance/src/index/vector/ivf/partition_serde.rs
deleted file mode 100644
index 3db0dcc634a..00000000000
--- a/rust/lance/src/index/vector/ivf/partition_serde.rs
+++ /dev/null
@@ -1,1177 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The Lance Authors
-
-//! Serialization and zero-copy deserialization for IVF partition cache entries.
-//!
-//! The format is a simple binary layout designed for ephemeral caching (not stable across versions):
-//!
-//! ```text
-//! [header_len: u64 LE]
-//! [header: JSON bytes]
-//! [sub_index IPC file bytes]
-//! [... quantizer-specific IPC sections ...]
-//! [storage batch IPC file bytes]
-//! ```
-//!
-//! Each IPC section is a complete Arrow IPC file. On deserialization, the IPC
-//! sections are read zero-copy using [`FileDecoder`] so that Arrow arrays
-//! reference the original buffer directly.
-
-use std::io::Write;
-use std::sync::Arc;
-
-use arrow_array::{FixedSizeListArray, RecordBatch};
-use arrow_buffer::Buffer;
-use arrow_ipc::convert::fb_to_schema;
-use arrow_ipc::reader::{FileDecoder, read_footer_length};
-use arrow_ipc::root_as_footer;
-use arrow_ipc::writer::FileWriter;
-use arrow_schema::{DataType, Field, Schema};
-use bytes::Bytes;
-use lance_core::{Error, Result};
-use lance_index::vector::bq::RQRotationType;
-use lance_index::vector::bq::builder::RabitQuantizer;
-use lance_index::vector::bq::storage::RabitQuantizationMetadata;
-use lance_index::vector::flat::index::{FlatMetadata, FlatQuantizer};
-use lance_index::vector::pq::ProductQuantizer;
-use lance_index::vector::pq::storage::ProductQuantizationMetadata;
-use lance_index::vector::quantizer::{Quantization, QuantizerStorage};
-use lance_index::vector::sq::ScalarQuantizer;
-use lance_index::vector::sq::storage::ScalarQuantizationMetadata;
-use lance_index::vector::storage::VectorStore;
-use lance_index::vector::v3::subindex::IvfSubIndex;
-use lance_linalg::distance::DistanceType;
-use serde::{Deserialize, Serialize};
-
-use super::v2::PartitionEntry;
-
-/// Serialization interface for spilling cache entries to an external store.
-///
-/// `serialize` writes the entry into the provided writer and returns the
-/// number of bytes written.  `deserialize` reconstructs the entry from a
-/// contiguous `Bytes` buffer (typically obtained by reading back whatever
-/// was written).
-pub trait Spillable: Sized {
-    fn serialize(&self, writer: &mut dyn Write) -> Result<usize>;
-    fn deserialize(data: Bytes) -> Result<Self>;
-}
-
-// ---------------------------------------------------------------------------
-// Common helpers
-// ---------------------------------------------------------------------------
-
-fn distance_type_to_u8(dt: DistanceType) -> u8 {
-    match dt {
-        DistanceType::L2 => 0,
-        DistanceType::Cosine => 1,
-        DistanceType::Dot => 2,
-        DistanceType::Hamming => 3,
-    }
-}
-
-fn u8_to_distance_type(v: u8) -> Result<DistanceType> {
-    match v {
-        0 => Ok(DistanceType::L2),
-        1 => Ok(DistanceType::Cosine),
-        2 => Ok(DistanceType::Dot),
-        3 => Ok(DistanceType::Hamming),
-        _ => Err(Error::io(format!("unknown distance type: {v}"))),
-    }
-}
-
-fn rotation_type_to_u8(rt: RQRotationType) -> u8 {
-    match rt {
-        RQRotationType::Matrix => 0,
-        RQRotationType::Fast => 1,
-    }
-}
-
-fn u8_to_rotation_type(v: u8) -> Result<RQRotationType> {
-    match v {
-        0 => Ok(RQRotationType::Matrix),
-        1 => Ok(RQRotationType::Fast),
-        _ => Err(Error::io(format!("unknown rotation type: {v}"))),
-    }
-}
-
-/// Write one or more RecordBatches as a complete Arrow IPC file into a Vec<u8>.
-///
-/// Panics if `batches` is empty (caller is responsible for checking).
-fn write_ipc_batches(batches: &[RecordBatch]) -> Result<Vec<u8>> {
-    let mut buf = Vec::new();
-    let mut writer = FileWriter::try_new(&mut buf, batches[0].schema_ref())?;
-    for batch in batches {
-        writer.write(batch)?;
-    }
-    writer.finish()?;
-    Ok(buf)
-}
-
-/// Write a single RecordBatch as a complete Arrow IPC file into a Vec<u8>.
-fn write_ipc(batch: &RecordBatch) -> Result<Vec<u8>> {
-    write_ipc_batches(std::slice::from_ref(batch))
-}
-
-/// Decode the IPC footer and schema from a `Buffer`, returning the decoder and
-/// the list of record-batch blocks. Zero-copy: all returned data references
-/// the original buffer.
-fn parse_ipc_footer(data: &Buffer) -> Result<(FileDecoder, Vec<arrow_ipc::Block>)> {
-    let trailer_start = data
-        .len()
-        .checked_sub(10)
-        .ok_or_else(|| Error::io("IPC section too small to contain footer".to_string()))?;
-    let footer_len = read_footer_length(
-        data[trailer_start..]
-            .try_into()
-            .map_err(|_| Error::io("IPC section too small for footer length".to_string()))?,
-    )?;
-    let footer_start = trailer_start
-        .checked_sub(footer_len)
-        .ok_or_else(|| Error::io("IPC footer length exceeds section size".to_string()))?;
-    let footer = root_as_footer(&data[footer_start..trailer_start])
-        .map_err(|e| Error::io(format!("failed to parse IPC footer: {e}")))?;
-
-    let schema =
-        Arc::new(fb_to_schema(footer.schema().ok_or_else(|| {
-            Error::io("IPC footer missing schema".to_string())
-        })?));
-
-    let mut decoder = FileDecoder::new(schema, footer.version());
-
-    for block in footer.dictionaries().iter().flatten() {
-        let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
-        let block_data = data.slice_with_length(block.offset() as usize, block_len);
-        decoder.read_dictionary(block, &block_data)?;
-    }
-
-    let batch_blocks: Vec<arrow_ipc::Block> = footer
-        .recordBatches()
-        .map(|b| b.iter().copied().collect())
-        .unwrap_or_default();
-
-    Ok((decoder, batch_blocks))
-}
-
-/// Read all RecordBatches from an Arrow IPC file stored in a `Buffer`, zero-copy.
-///
-/// The returned arrays reference slices of the provided buffer directly.
-fn read_ipc_all_zero_copy(data: Buffer) -> Result<Vec<RecordBatch>> {
-    let (decoder, batch_blocks) = parse_ipc_footer(&data)?;
-    batch_blocks
-        .iter()
-        .map(|block| {
-            let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
-            let block_data = data.slice_with_length(block.offset() as usize, block_len);
-            decoder
-                .read_record_batch(block, &block_data)?
-                .ok_or_else(|| Error::io("IPC record batch was None".to_string()))
-        })
-        .collect()
-}
-
-/// Read a single RecordBatch from an Arrow IPC file stored in a `Buffer`, zero-copy.
-///
-/// The returned `RecordBatch`'s arrays reference slices of the provided buffer
-/// directly, avoiding copies.
-fn read_ipc_zero_copy(data: Buffer) -> Result<RecordBatch> {
-    let (decoder, batch_blocks) = parse_ipc_footer(&data)?;
-    if batch_blocks.is_empty() {
-        return Err(Error::io("IPC file contains no record batches".to_string()));
-    }
-    let block = &batch_blocks[0];
-    let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
-    let block_data = data.slice_with_length(block.offset() as usize, block_len);
-    decoder
-        .read_record_batch(block, &block_data)?
-        .ok_or_else(|| Error::io("IPC record batch was None".to_string()))
-}
-
-/// Wrap a `FixedSizeListArray` in a single-column RecordBatch with the given column name.
-fn fsl_to_batch(arr: &FixedSizeListArray, name: &str) -> Result<RecordBatch> {
-    let field = Field::new(
-        name,
-        DataType::FixedSizeList(
-            Arc::new(Field::new("item", arr.value_type(), true)),
-            arr.value_length(),
-        ),
-        false,
-    );
-    let schema = Arc::new(Schema::new(vec![field]));
-    Ok(RecordBatch::try_new(schema, vec![Arc::new(arr.clone())])?)
-}
-
-/// Extract a `FixedSizeListArray` from the first column of a RecordBatch.
-fn batch_to_fsl(batch: &RecordBatch) -> Result<FixedSizeListArray> {
-    let col = batch.column(0);
-    col.as_any()
-        .downcast_ref::<FixedSizeListArray>()
-        .cloned()
-        .ok_or_else(|| Error::io("column is not FixedSizeListArray".to_string()))
-}
-
-fn codebook_to_batch(codebook: &FixedSizeListArray) -> Result<RecordBatch> {
-    fsl_to_batch(codebook, "codebook")
-}
-
-fn batch_to_codebook(batch: &RecordBatch) -> Result<FixedSizeListArray> {
-    batch_to_fsl(batch)
-}
-
-// ---------------------------------------------------------------------------
-// PQ
-// ---------------------------------------------------------------------------
-
-#[derive(Serialize, Deserialize)]
-struct PqPartitionHeader {
-    distance_type: u8,
-    nbits: u32,
-    num_sub_vectors: usize,
-    dimension: usize,
-    transposed: bool,
-    /// Length of the sub-index IPC section in bytes.
-    sub_index_len: u64,
-    /// Length of the codebook IPC section in bytes.
-    codebook_len: u64,
-    /// Length of the storage batch IPC section in bytes.
-    storage_len: u64,
-}
-
-impl<S: IvfSubIndex> Spillable for PartitionEntry<S, ProductQuantizer> {
-    /// Serialize this partition entry to bytes.
-    ///
-    /// The sub-index, PQ codebook, and storage batch are each written as Arrow
-    /// IPC file sections, preceded by a small JSON header containing scalar
-    /// metadata and section lengths.
-    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
-        let metadata = self.storage.metadata();
-        let distance_type = self.storage.distance_type();
-
-        // Serialize the three Arrow sections.
-        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
-        let codebook = metadata.codebook.as_ref().ok_or_else(|| {
-            Error::io("PQ metadata missing codebook during serialization".to_string())
-        })?;
-        let codebook_ipc = write_ipc(&codebook_to_batch(codebook)?)?;
-        let storage_batches: Vec<_> = self.storage.to_batches()?.collect();
-        let storage_ipc = if storage_batches.len() == 1 {
-            write_ipc(&storage_batches[0])?
-        } else {
-            return Err(Error::io(
-                "expected exactly one storage batch for PQ storage".to_string(),
-            ));
-        };
-
-        let header = PqPartitionHeader {
-            distance_type: distance_type_to_u8(distance_type),
-            nbits: metadata.nbits,
-            num_sub_vectors: metadata.num_sub_vectors,
-            dimension: metadata.dimension,
-            transposed: metadata.transposed,
-            sub_index_len: sub_index_ipc.len() as u64,
-            codebook_len: codebook_ipc.len() as u64,
-            storage_len: storage_ipc.len() as u64,
-        };
-
-        let header_json = serde_json::to_vec(&header)?;
-        let total_len =
-            8 + header_json.len() + sub_index_ipc.len() + codebook_ipc.len() + storage_ipc.len();
-
-        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
-        writer.write_all(&header_json)?;
-        writer.write_all(&sub_index_ipc)?;
-        writer.write_all(&codebook_ipc)?;
-        writer.write_all(&storage_ipc)?;
-
-        Ok(total_len)
-    }
-
-    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
-    ///
-    /// The Arrow IPC sections are decoded using [`FileDecoder`] so that the
-    /// resulting arrays reference slices of the provided `Bytes` buffer directly.
-    fn deserialize(data: Bytes) -> Result<Self> {
-        if data.len() < 8 {
-            return Err(Error::io("partition data too small".to_string()));
-        }
-
-        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
-        let header_end = 8 + header_len;
-        if data.len() < header_end {
-            return Err(Error::io("partition data truncated in header".to_string()));
-        }
-
-        let header: PqPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
-        let distance_type = u8_to_distance_type(header.distance_type)?;
-
-        let sub_index_start = header_end;
-        let sub_index_end = sub_index_start + header.sub_index_len as usize;
-        let codebook_start = sub_index_end;
-        let codebook_end = codebook_start + header.codebook_len as usize;
-        let storage_start = codebook_end;
-        let storage_end = storage_start + header.storage_len as usize;
-
-        if data.len() < storage_end {
-            return Err(Error::io(
-                "partition data truncated in IPC sections".to_string(),
-            ));
-        }
-
-        // Zero-copy: create Buffer slices backed by the original Bytes.
-        let buffer = Buffer::from(data);
-        let sub_index_buf =
-            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
-        let codebook_buf = buffer.slice_with_length(codebook_start, header.codebook_len as usize);
-        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
-
-        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
-        let codebook_batch = read_ipc_zero_copy(codebook_buf)?;
-        let storage_batch = read_ipc_zero_copy(storage_buf)?;
-
-        let index = S::load(sub_index_batch)?;
-        let codebook = batch_to_codebook(&codebook_batch)?;
-
-        let metadata = ProductQuantizationMetadata {
-            codebook_position: 0,
-            nbits: header.nbits,
-            num_sub_vectors: header.num_sub_vectors,
-            dimension: header.dimension,
-            codebook: Some(codebook),
-            codebook_tensor: Vec::new(),
-            transposed: header.transposed,
-        };
-
-        let storage = <ProductQuantizer as Quantization>::Storage::try_from_batch(
-            storage_batch,
-            &metadata,
-            distance_type,
-            None,
-        )?;
-
-        Ok(Self { index, storage })
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Flat
-// ---------------------------------------------------------------------------
-
-#[derive(Serialize, Deserialize)]
-struct FlatPartitionHeader {
-    distance_type: u8,
-    dim: usize,
-    sub_index_len: u64,
-    storage_len: u64,
-}
-
-impl<S: IvfSubIndex> Spillable for PartitionEntry<S, FlatQuantizer> {
-    /// Serialize this partition entry to bytes.
-    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
-        let metadata = self.storage.metadata();
-        let distance_type = self.storage.distance_type();
-
-        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
-        let storage_batches: Vec<_> = self.storage.to_batches()?.collect();
-        let storage_ipc = if storage_batches.len() == 1 {
-            write_ipc(&storage_batches[0])?
-        } else {
-            return Err(Error::io(
-                "expected exactly one storage batch for Flat storage".to_string(),
-            ));
-        };
-
-        let header = FlatPartitionHeader {
-            distance_type: distance_type_to_u8(distance_type),
-            dim: metadata.dim,
-            sub_index_len: sub_index_ipc.len() as u64,
-            storage_len: storage_ipc.len() as u64,
-        };
-
-        let header_json = serde_json::to_vec(&header)?;
-        let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len();
-
-        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
-        writer.write_all(&header_json)?;
-        writer.write_all(&sub_index_ipc)?;
-        writer.write_all(&storage_ipc)?;
-
-        Ok(total_len)
-    }
-
-    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
-    fn deserialize(data: Bytes) -> Result<Self> {
-        if data.len() < 8 {
-            return Err(Error::io("partition data too small".to_string()));
-        }
-        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
-        let header_end = 8 + header_len;
-        if data.len() < header_end {
-            return Err(Error::io("partition data truncated in header".to_string()));
-        }
-
-        let header: FlatPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
-        let distance_type = u8_to_distance_type(header.distance_type)?;
-
-        let sub_index_start = header_end;
-        let sub_index_end = sub_index_start + header.sub_index_len as usize;
-        let storage_start = sub_index_end;
-        let storage_end = storage_start + header.storage_len as usize;
-
-        if data.len() < storage_end {
-            return Err(Error::io(
-                "partition data truncated in IPC sections".to_string(),
-            ));
-        }
-
-        let buffer = Buffer::from(data);
-        let sub_index_buf =
-            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
-        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
-
-        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
-        let storage_batch = read_ipc_zero_copy(storage_buf)?;
-
-        let index = S::load(sub_index_batch)?;
-        let metadata = FlatMetadata { dim: header.dim };
-        let storage = <FlatQuantizer as Quantization>::Storage::try_from_batch(
-            storage_batch,
-            &metadata,
-            distance_type,
-            None,
-        )?;
-
-        Ok(Self { index, storage })
-    }
-}
-
-// ---------------------------------------------------------------------------
-// SQ
-// ---------------------------------------------------------------------------
-
-#[derive(Serialize, Deserialize)]
-struct SqPartitionHeader {
-    distance_type: u8,
-    num_bits: u16,
-    dim: usize,
-    bounds_start: f64,
-    bounds_end: f64,
-    sub_index_len: u64,
-    storage_len: u64,
-}
-
-impl<S: IvfSubIndex> Spillable for PartitionEntry<S, ScalarQuantizer> {
-    /// Serialize this partition entry to bytes.
-    ///
-    /// Multiple SQ storage chunks are concatenated into a single IPC section.
-    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
-        let metadata = self.storage.metadata();
-        let distance_type = self.storage.distance_type();
-
-        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
-
-        // Write all SQ chunks as multiple record batches in one IPC file, avoiding copies.
-        let batches: Vec<_> = self.storage.to_batches()?.collect();
-        if batches.is_empty() {
-            return Err(Error::io("SQ storage has no batches".to_string()));
-        }
-        let storage_ipc = write_ipc_batches(&batches)?;
-
-        let header = SqPartitionHeader {
-            distance_type: distance_type_to_u8(distance_type),
-            num_bits: metadata.num_bits,
-            dim: metadata.dim,
-            bounds_start: metadata.bounds.start,
-            bounds_end: metadata.bounds.end,
-            sub_index_len: sub_index_ipc.len() as u64,
-            storage_len: storage_ipc.len() as u64,
-        };
-
-        let header_json = serde_json::to_vec(&header)?;
-        let total_len = 8 + header_json.len() + sub_index_ipc.len() + storage_ipc.len();
-
-        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
-        writer.write_all(&header_json)?;
-        writer.write_all(&sub_index_ipc)?;
-        writer.write_all(&storage_ipc)?;
-
-        Ok(total_len)
-    }
-
-    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
-    fn deserialize(data: Bytes) -> Result<Self> {
-        if data.len() < 8 {
-            return Err(Error::io("partition data too small".to_string()));
-        }
-        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
-        let header_end = 8 + header_len;
-        if data.len() < header_end {
-            return Err(Error::io("partition data truncated in header".to_string()));
-        }
-
-        let header: SqPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
-        let distance_type = u8_to_distance_type(header.distance_type)?;
-
-        let sub_index_start = header_end;
-        let sub_index_end = sub_index_start + header.sub_index_len as usize;
-        let storage_start = sub_index_end;
-        let storage_end = storage_start + header.storage_len as usize;
-
-        if data.len() < storage_end {
-            return Err(Error::io(
-                "partition data truncated in IPC sections".to_string(),
-            ));
-        }
-
-        let buffer = Buffer::from(data);
-        let sub_index_buf =
-            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
-        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
-
-        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
-        let storage_batches = read_ipc_all_zero_copy(storage_buf)?;
-
-        let index = S::load(sub_index_batch)?;
-        let metadata = ScalarQuantizationMetadata {
-            dim: header.dim,
-            num_bits: header.num_bits,
-            bounds: header.bounds_start..header.bounds_end,
-        };
-        let storage = <ScalarQuantizer as Quantization>::Storage::try_new(
-            metadata.num_bits,
-            distance_type,
-            metadata.bounds,
-            storage_batches,
-            None,
-        )?;
-
-        Ok(Self { index, storage })
-    }
-}
-
-// ---------------------------------------------------------------------------
-// RabitQ
-// ---------------------------------------------------------------------------
-
-#[derive(Serialize, Deserialize)]
-struct RabitPartitionHeader {
-    distance_type: u8,
-    num_bits: u8,
-    code_dim: u32,
-    /// 0 = Matrix, 1 = Fast
-    rotation_type: u8,
-    /// Fast rotation signs (only set when rotation_type == Fast).
-    fast_rotation_signs: Option<Vec<u8>>,
-    sub_index_len: u64,
-    /// Length of the rotation matrix IPC section; 0 when rotation_type == Fast.
-    rotate_mat_len: u64,
-    storage_len: u64,
-}
-
-impl<S: IvfSubIndex> Spillable for PartitionEntry<S, RabitQuantizer> {
-    /// Serialize this partition entry to bytes.
-    ///
-    /// For Matrix rotation the rotation matrix is stored as an Arrow IPC section.
-    /// For Fast rotation the signs are stored compactly in the JSON header.
-    ///
-    /// The storage batch is stored with already-packed codes so deserialization
-    /// can skip re-packing.
-    fn serialize(&self, writer: &mut dyn Write) -> Result<usize> {
-        let metadata = self.storage.metadata();
-        let distance_type = self.storage.distance_type();
-
-        let sub_index_ipc = write_ipc(&self.index.to_batch()?)?;
-
-        let rotate_mat_ipc = match metadata.rotation_type {
-            RQRotationType::Matrix => {
-                let mat = metadata.rotate_mat.as_ref().ok_or_else(|| {
-                    Error::io(
-                        "RabitQ Matrix metadata missing rotate_mat during serialization"
-                            .to_string(),
-                    )
-                })?;
-                write_ipc(&fsl_to_batch(mat, "rotate_mat")?)?
-            }
-            RQRotationType::Fast => Vec::new(),
-        };
-
-        let storage_batches: Vec<_> = self.storage.to_batches()?.collect();
-        let storage_ipc = if storage_batches.len() == 1 {
-            write_ipc(&storage_batches[0])?
-        } else {
-            return Err(Error::io(
-                "expected exactly one storage batch for RabitQ storage".to_string(),
-            ));
-        };
-
-        let header = RabitPartitionHeader {
-            distance_type: distance_type_to_u8(distance_type),
-            num_bits: metadata.num_bits,
-            code_dim: metadata.code_dim,
-            rotation_type: rotation_type_to_u8(metadata.rotation_type),
-            fast_rotation_signs: metadata.fast_rotation_signs.clone(),
-            sub_index_len: sub_index_ipc.len() as u64,
-            rotate_mat_len: rotate_mat_ipc.len() as u64,
-            storage_len: storage_ipc.len() as u64,
-        };
-
-        let header_json = serde_json::to_vec(&header)?;
-        let total_len =
-            8 + header_json.len() + sub_index_ipc.len() + rotate_mat_ipc.len() + storage_ipc.len();
-
-        writer.write_all(&(header_json.len() as u64).to_le_bytes())?;
-        writer.write_all(&header_json)?;
-        writer.write_all(&sub_index_ipc)?;
-        writer.write_all(&rotate_mat_ipc)?;
-        writer.write_all(&storage_ipc)?;
-
-        Ok(total_len)
-    }
-
-    /// Deserialize a partition entry from bytes, zero-copy for Arrow data.
-    fn deserialize(data: Bytes) -> Result<Self> {
-        if data.len() < 8 {
-            return Err(Error::io("partition data too small".to_string()));
-        }
-        let header_len = u64::from_le_bytes(data[..8].try_into().unwrap()) as usize;
-        let header_end = 8 + header_len;
-        if data.len() < header_end {
-            return Err(Error::io("partition data truncated in header".to_string()));
-        }
-
-        let header: RabitPartitionHeader = serde_json::from_slice(&data[8..header_end])?;
-        let distance_type = u8_to_distance_type(header.distance_type)?;
-        let rotation_type = u8_to_rotation_type(header.rotation_type)?;
-
-        let sub_index_start = header_end;
-        let sub_index_end = sub_index_start + header.sub_index_len as usize;
-        let rotate_mat_start = sub_index_end;
-        let rotate_mat_end = rotate_mat_start + header.rotate_mat_len as usize;
-        let storage_start = rotate_mat_end;
-        let storage_end = storage_start + header.storage_len as usize;
-
-        if data.len() < storage_end {
-            return Err(Error::io(
-                "partition data truncated in IPC sections".to_string(),
-            ));
-        }
-
-        let buffer = Buffer::from(data);
-        let sub_index_buf =
-            buffer.slice_with_length(sub_index_start, header.sub_index_len as usize);
-        let storage_buf = buffer.slice_with_length(storage_start, header.storage_len as usize);
-
-        let sub_index_batch = read_ipc_zero_copy(sub_index_buf)?;
-        let storage_batch = read_ipc_zero_copy(storage_buf)?;
-
-        let rotate_mat = if header.rotate_mat_len > 0 {
-            let rotate_mat_buf =
-                buffer.slice_with_length(rotate_mat_start, header.rotate_mat_len as usize);
-            let mat_batch = read_ipc_zero_copy(rotate_mat_buf)?;
-            Some(batch_to_fsl(&mat_batch)?)
-        } else {
-            None
-        };
-
-        let index = S::load(sub_index_batch)?;
-        let metadata = RabitQuantizationMetadata {
-            rotate_mat,
-            rotate_mat_position: None,
-            fast_rotation_signs: header.fast_rotation_signs,
-            rotation_type,
-            code_dim: header.code_dim,
-            num_bits: header.num_bits,
-            // The storage batch already has packed codes; skip re-packing.
-            packed: true,
-        };
-        let storage = <RabitQuantizer as Quantization>::Storage::try_from_batch(
-            storage_batch,
-            &metadata,
-            distance_type,
-            None,
-        )?;
-
-        Ok(Self { index, storage })
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::sync::Arc;
-
-    use arrow_array::cast::AsArray;
-    use arrow_array::{
-        Float32Array, UInt8Array, UInt64Array,
-        types::{Float32Type, UInt8Type},
-    };
-    use arrow_schema::{DataType, Field, Schema};
-    use lance_arrow::FixedSizeListArrayExt;
-    use lance_index::vector::bq::storage::RABIT_CODE_COLUMN;
-    use lance_index::vector::bq::transform::{ADD_FACTORS_COLUMN, SCALE_FACTORS_COLUMN};
-    use lance_index::vector::bq::{RQRotationType, builder::RabitQuantizer};
-    use lance_index::vector::flat::index::FlatIndex;
-    use lance_index::vector::flat::storage::FlatFloatStorage;
-    use lance_index::vector::sq::storage::ScalarQuantizationStorage;
-
-    // ----- PQ helpers -------------------------------------------------------
-
-    fn make_test_codebook(dim: usize, num_sub_vectors: usize) -> FixedSizeListArray {
-        let sub_dim = dim / num_sub_vectors;
-        let num_centroids = 256;
-        let total_values = num_sub_vectors * num_centroids * sub_dim;
-        let values: Vec<f32> = (0..total_values).map(|i| i as f32 * 0.01).collect();
-        let values_array = Float32Array::from(values);
-        FixedSizeListArray::try_new_from_values(values_array, sub_dim as i32).unwrap()
-    }
-
-    fn make_test_pq_storage(
-        num_rows: usize,
-        dim: usize,
-        num_sub_vectors: usize,
-    ) -> <ProductQuantizer as Quantization>::Storage {
-        let codebook = make_test_codebook(dim, num_sub_vectors);
-        let row_ids = UInt64Array::from((0..num_rows as u64).collect::<Vec<_>>());
-        let pq_codes_flat: Vec<u8> = (0..num_rows * num_sub_vectors)
-            .map(|i| (i % 256) as u8)
-            .collect();
-        let pq_codes = UInt8Array::from(pq_codes_flat);
-        let pq_codes_fsl =
-            FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new(lance_core::ROW_ID, DataType::UInt64, false),
-            Field::new(
-                lance_index::vector::PQ_CODE_COLUMN,
-                DataType::FixedSizeList(
-                    Arc::new(Field::new("item", DataType::UInt8, true)),
-                    num_sub_vectors as i32,
-                ),
-                false,
-            ),
-        ]));
-
-        let batch =
-            RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)]).unwrap();
-
-        <ProductQuantizer as Quantization>::Storage::new(
-            codebook,
-            batch,
-            8,
-            num_sub_vectors,
-            dim,
-            DistanceType::L2,
-            false,
-            None,
-        )
-        .unwrap()
-    }
-
-    // ----- PQ tests ---------------------------------------------------------
-
-    #[test]
-    fn test_roundtrip_flat_pq() {
-        let dim = 128;
-        let num_sub_vectors = 16;
-        let num_rows = 100;
-
-        let storage = make_test_pq_storage(num_rows, dim, num_sub_vectors);
-        let entry = PartitionEntry::<FlatIndex, ProductQuantizer> {
-            index: FlatIndex::default(),
-            storage,
-        };
-
-        let mut serialized = Vec::new();
-        entry.serialize(&mut serialized).unwrap();
-        let deserialized =
-            PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(serialized.into()).unwrap();
-
-        assert_eq!(entry.storage, deserialized.storage);
-    }
-
-    #[test]
-    fn test_roundtrip_preserves_distance_type() {
-        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
-            let dim = 32;
-            let num_sub_vectors = 4;
-            let codebook = make_test_codebook(dim, num_sub_vectors);
-            let row_ids = UInt64Array::from(vec![0u64, 1, 2]);
-            let pq_codes = UInt8Array::from(vec![0u8; 3 * num_sub_vectors]);
-            let pq_codes_fsl =
-                FixedSizeListArray::try_new_from_values(pq_codes, num_sub_vectors as i32).unwrap();
-
-            let schema = Arc::new(Schema::new(vec![
-                Field::new(lance_core::ROW_ID, DataType::UInt64, false),
-                Field::new(
-                    lance_index::vector::PQ_CODE_COLUMN,
-                    DataType::FixedSizeList(
-                        Arc::new(Field::new("item", DataType::UInt8, true)),
-                        num_sub_vectors as i32,
-                    ),
-                    false,
-                ),
-            ]));
-            let batch =
-                RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(pq_codes_fsl)])
-                    .unwrap();
-
-            let storage = <ProductQuantizer as Quantization>::Storage::new(
-                codebook,
-                batch,
-                8,
-                num_sub_vectors,
-                dim,
-                dt,
-                false,
-                None,
-            )
-            .unwrap();
-
-            let entry = PartitionEntry::<FlatIndex, ProductQuantizer> {
-                index: FlatIndex::default(),
-                storage,
-            };
-
-            let mut bytes = Vec::new();
-            entry.serialize(&mut bytes).unwrap();
-            let restored =
-                PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(bytes.into()).unwrap();
-            assert_eq!(
-                restored.storage.distance_type(),
-                entry.storage.distance_type()
-            );
-        }
-    }
-
-    #[test]
-    fn test_empty_partition() {
-        let dim = 16;
-        let num_sub_vectors = 2;
-        let storage = make_test_pq_storage(0, dim, num_sub_vectors);
-        let entry = PartitionEntry::<FlatIndex, ProductQuantizer> {
-            index: FlatIndex::default(),
-            storage,
-        };
-
-        let mut serialized = Vec::new();
-        entry.serialize(&mut serialized).unwrap();
-        let deserialized =
-            PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(serialized.into()).unwrap();
-        assert_eq!(entry.storage, deserialized.storage);
-    }
-
-    #[test]
-    fn test_truncated_data_errors() {
-        assert!(
-            PartitionEntry::<FlatIndex, ProductQuantizer>::deserialize(Bytes::from_static(
-                b"short"
-            ))
-            .is_err()
-        );
-    }
-
-    // ----- Flat helpers -----------------------------------------------------
-
-    fn make_flat_storage(num_rows: usize, dim: usize) -> FlatFloatStorage {
-        let values: Vec<f32> = (0..num_rows * dim).map(|i| i as f32 * 0.01).collect();
-        let values_array = Float32Array::from(values);
-        let vectors = FixedSizeListArray::try_new_from_values(values_array, dim as i32).unwrap();
-        FlatFloatStorage::new(vectors, DistanceType::L2)
-    }
-
-    // ----- Flat tests -------------------------------------------------------
-
-    #[test]
-    fn test_roundtrip_flat_flat() {
-        let storage = make_flat_storage(50, 64);
-        let entry = PartitionEntry::<FlatIndex, FlatQuantizer> {
-            index: FlatIndex::default(),
-            storage,
-        };
-
-        let mut bytes = Vec::new();
-        entry.serialize(&mut bytes).unwrap();
-        let restored =
-            PartitionEntry::<FlatIndex, FlatQuantizer>::deserialize(bytes.into()).unwrap();
-
-        assert_eq!(
-            restored.storage.metadata().dim,
-            entry.storage.metadata().dim
-        );
-        assert_eq!(
-            restored.storage.distance_type(),
-            entry.storage.distance_type()
-        );
-        assert_eq!(restored.storage.len(), entry.storage.len());
-        let orig_batch = entry.storage.to_batches().unwrap().next().unwrap();
-        let rest_batch = restored.storage.to_batches().unwrap().next().unwrap();
-        assert_eq!(orig_batch, rest_batch);
-    }
-
-    #[test]
-    fn test_flat_distance_types() {
-        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
-            let values = Float32Array::from(vec![1.0f32; 32]);
-            let vectors = FixedSizeListArray::try_new_from_values(values, 32).unwrap();
-            let storage = FlatFloatStorage::new(vectors, dt);
-            let entry = PartitionEntry::<FlatIndex, FlatQuantizer> {
-                index: FlatIndex::default(),
-                storage,
-            };
-            let mut bytes = Vec::new();
-            entry.serialize(&mut bytes).unwrap();
-            let restored =
-                PartitionEntry::<FlatIndex, FlatQuantizer>::deserialize(bytes.into()).unwrap();
-            assert_eq!(restored.storage.distance_type(), dt);
-        }
-    }
-
-    // ----- SQ helpers -------------------------------------------------------
-
-    fn make_sq_storage(
-        num_rows: usize,
-        dim: usize,
-        distance_type: DistanceType,
-    ) -> ScalarQuantizationStorage {
-        let row_ids = UInt64Array::from_iter_values(0..num_rows as u64);
-        let sq_codes_flat: Vec<u8> = (0..num_rows * dim).map(|i| (i % 256) as u8).collect();
-        let sq_codes = UInt8Array::from(sq_codes_flat);
-        let sq_codes_fsl = FixedSizeListArray::try_new_from_values(sq_codes, dim as i32).unwrap();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new(lance_core::ROW_ID, DataType::UInt64, false),
-            Field::new(
-                lance_index::vector::SQ_CODE_COLUMN,
-                DataType::FixedSizeList(
-                    Arc::new(Field::new("item", DataType::UInt8, true)),
-                    dim as i32,
-                ),
-                false,
-            ),
-        ]));
-        let batch =
-            RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(sq_codes_fsl)]).unwrap();
-
-        ScalarQuantizationStorage::try_new(8, distance_type, -1.0..1.0, [batch], None).unwrap()
-    }
-
-    // ----- SQ tests ---------------------------------------------------------
-
-    #[test]
-    fn test_roundtrip_flat_sq() {
-        let storage = make_sq_storage(100, 64, DistanceType::L2);
-        let entry = PartitionEntry::<FlatIndex, ScalarQuantizer> {
-            index: FlatIndex::default(),
-            storage,
-        };
-
-        let mut bytes = Vec::new();
-        entry.serialize(&mut bytes).unwrap();
-        let restored =
-            PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
-
-        let m = entry.storage.metadata();
-        let rm = restored.storage.metadata();
-        assert_eq!(rm.dim, m.dim);
-        assert_eq!(rm.num_bits, m.num_bits);
-        assert_eq!(rm.bounds, m.bounds);
-        assert_eq!(
-            restored.storage.distance_type(),
-            entry.storage.distance_type()
-        );
-        assert_eq!(restored.storage.len(), entry.storage.len());
-
-        // Verify row IDs are preserved.
-        let orig_ids: Vec<u64> = entry.storage.row_ids().copied().collect();
-        let rest_ids: Vec<u64> = restored.storage.row_ids().copied().collect();
-        assert_eq!(orig_ids, rest_ids);
-    }
-
-    #[test]
-    fn test_sq_distance_types() {
-        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
-            let storage = make_sq_storage(10, 16, dt);
-            let entry = PartitionEntry::<FlatIndex, ScalarQuantizer> {
-                index: FlatIndex::default(),
-                storage,
-            };
-            let mut bytes = Vec::new();
-            entry.serialize(&mut bytes).unwrap();
-            let restored =
-                PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
-            assert_eq!(restored.storage.distance_type(), dt);
-        }
-    }
-
-    #[test]
-    fn test_sq_multiple_chunks_no_copy() {
-        // Build SQ storage with multiple chunks by appending batches separately.
-        let dim = 16usize;
-        let make_batch = |start: u64, n: usize| {
-            let row_ids = UInt64Array::from_iter_values(start..start + n as u64);
-            let codes = UInt8Array::from(vec![0u8; n * dim]);
-            let fsl = FixedSizeListArray::try_new_from_values(codes, dim as i32).unwrap();
-            let schema = Arc::new(Schema::new(vec![
-                Field::new(lance_core::ROW_ID, DataType::UInt64, false),
-                Field::new(
-                    lance_index::vector::SQ_CODE_COLUMN,
-                    DataType::FixedSizeList(
-                        Arc::new(Field::new("item", DataType::UInt8, true)),
-                        dim as i32,
-                    ),
-                    false,
-                ),
-            ]));
-            RecordBatch::try_new(schema, vec![Arc::new(row_ids), Arc::new(fsl)]).unwrap()
-        };
-        // Three chunks with 10 rows each.
-        let storage = ScalarQuantizationStorage::try_new(
-            8,
-            DistanceType::L2,
-            -1.0..1.0,
-            [make_batch(0, 10), make_batch(10, 10), make_batch(20, 10)],
-            None,
-        )
-        .unwrap();
-        assert_eq!(storage.len(), 30);
-
-        let entry = PartitionEntry::<FlatIndex, ScalarQuantizer> {
-            index: FlatIndex::default(),
-            storage,
-        };
-        let mut bytes = Vec::new();
-        entry.serialize(&mut bytes).unwrap();
-        let restored =
-            PartitionEntry::<FlatIndex, ScalarQuantizer>::deserialize(bytes.into()).unwrap();
-
-        assert_eq!(restored.storage.len(), 30);
-        let orig_ids: Vec<u64> = entry.storage.row_ids().copied().collect();
-        let rest_ids: Vec<u64> = restored.storage.row_ids().copied().collect();
-        assert_eq!(orig_ids, rest_ids);
-    }
-
-    // ----- RabitQ helpers ---------------------------------------------------
-
-    fn make_rabit_storage_fast(
-        num_rows: usize,
-        code_dim: usize,
-        distance_type: DistanceType,
-    ) -> <RabitQuantizer as Quantization>::Storage {
-        use lance_arrow::FixedSizeListArrayExt;
-
-        let quantizer = RabitQuantizer::new_with_rotation::<Float32Type>(
-            1,
-            code_dim as i32,
-            RQRotationType::Fast,
-        );
-        // Generate float vectors and quantize them to binary codes.
-        let values: Vec<f32> = (0..num_rows * code_dim)
-            .map(|i| (i % 100) as f32 / 100.0 - 0.5)
-            .collect();
-        let values_arr = Float32Array::from(values);
-        let vectors = FixedSizeListArray::try_new_from_values(values_arr, code_dim as i32).unwrap();
-        let codes = quantizer
-            .quantize(&vectors)
-            .unwrap()
-            .as_fixed_size_list()
-            .clone();
-
-        let metadata = quantizer.metadata(None);
-        let batch = RecordBatch::try_from_iter(vec![
-            (
-                lance_core::ROW_ID,
-                Arc::new(UInt64Array::from_iter_values(0..num_rows as u64))
-                    as Arc<dyn arrow_array::Array>,
-            ),
-            (
-                RABIT_CODE_COLUMN,
-                Arc::new(codes) as Arc<dyn arrow_array::Array>,
-            ),
-            (
-                ADD_FACTORS_COLUMN,
-                Arc::new(Float32Array::from_iter_values(
-                    (0..num_rows).map(|i| i as f32 * 0.1),
-                )) as Arc<dyn arrow_array::Array>,
-            ),
-            (
-                SCALE_FACTORS_COLUMN,
-                Arc::new(Float32Array::from_iter_values(
-                    (0..num_rows).map(|i| i as f32 * 0.01 + 0.5),
-                )) as Arc<dyn arrow_array::Array>,
-            ),
-        ])
-        .unwrap();
-
-        <RabitQuantizer as Quantization>::Storage::try_from_batch(
-            batch,
-            &metadata,
-            distance_type,
-            None,
-        )
-        .unwrap()
-    }
-
-    // ----- RabitQ tests -----------------------------------------------------
-
-    #[test]
-    fn test_roundtrip_flat_rabitq_fast() {
-        let num_rows = 50;
-        let code_dim = 64;
-        let storage = make_rabit_storage_fast(num_rows, code_dim, DistanceType::L2);
-        let entry = PartitionEntry::<FlatIndex, RabitQuantizer> {
-            index: FlatIndex::default(),
-            storage,
-        };
-
-        let mut bytes = Vec::new();
-        entry.serialize(&mut bytes).unwrap();
-        let restored =
-            PartitionEntry::<FlatIndex, RabitQuantizer>::deserialize(bytes.into()).unwrap();
-
-        let m = entry.storage.metadata();
-        let rm = restored.storage.metadata();
-        assert_eq!(rm.num_bits, m.num_bits);
-        assert_eq!(rm.code_dim, m.code_dim);
-        assert_eq!(rm.rotation_type, m.rotation_type);
-        assert_eq!(rm.fast_rotation_signs, m.fast_rotation_signs);
-        assert!(rm.packed);
-        assert_eq!(
-            restored.storage.distance_type(),
-            entry.storage.distance_type()
-        );
-        assert_eq!(restored.storage.len(), entry.storage.len());
-
-        // Verify row IDs are preserved.
-        let orig_ids: Vec<u64> = entry.storage.row_ids().copied().collect();
-        let rest_ids: Vec<u64> = restored.storage.row_ids().copied().collect();
-        assert_eq!(orig_ids, rest_ids);
-
-        // Verify codes are preserved.
-        let orig_batch = entry.storage.to_batches().unwrap().next().unwrap();
-        let rest_batch = restored.storage.to_batches().unwrap().next().unwrap();
-        let orig_codes = orig_batch[RABIT_CODE_COLUMN].as_fixed_size_list();
-        let rest_codes = rest_batch[RABIT_CODE_COLUMN].as_fixed_size_list();
-        assert_eq!(
-            orig_codes.values().as_primitive::<UInt8Type>().values(),
-            rest_codes.values().as_primitive::<UInt8Type>().values(),
-        );
-    }
-
-    #[test]
-    fn test_rabitq_distance_types() {
-        for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] {
-            let storage = make_rabit_storage_fast(10, 32, dt);
-            let entry = PartitionEntry::<FlatIndex, RabitQuantizer> {
-                index: FlatIndex::default(),
-                storage,
-            };
-            let mut bytes = Vec::new();
-            entry.serialize(&mut bytes).unwrap();
-            let restored =
-                PartitionEntry::<FlatIndex, RabitQuantizer>::deserialize(bytes.into()).unwrap();
-            assert_eq!(restored.storage.distance_type(), dt);
-        }
-    }
-}
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index 4dce97ebdb1..26776e52e12 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -26,19 +26,18 @@ use lance_core::utils::tokio::spawn_cpu;
 use lance_core::utils::tracing::{IO_TYPE_LOAD_VECTOR_PART, TRACE_IO_EVENTS};
 use lance_core::{Error, ROW_ID, Result};
 use lance_encoding::decoder::{DecoderPlugins, FilterExpression};
-use lance_file::reader::{CachedFileMetadata, FileReader, FileReaderOptions};
+use lance_file::reader::{FileReader, FileReaderOptions};
 use lance_index::frag_reuse::FragReuseIndex;
 use lance_index::metrics::{LocalMetricsCollector, MetricsCollector, NoOpMetricsCollector};
+use lance_index::vector::VectorIndexCacheEntry;
 use lance_index::vector::flat::index::{FlatIndex, FlatQuantizer};
 use lance_index::vector::hnsw::HNSW;
 use lance_index::vector::ivf::storage::IvfModel;
 use lance_index::vector::pq::ProductQuantizer;
-use lance_index::vector::quantizer::QuantizerMetadata;
 use lance_index::vector::quantizer::{QuantizationType, Quantizer};
 use lance_index::vector::sq::ScalarQuantizer;
 use lance_index::vector::storage::VectorStore;
 use lance_index::vector::v3::subindex::SubIndexType;
-use lance_index::vector::{IvfIndexState, VectorIndexCacheEntry, VectorIndexData};
 use lance_index::{
     INDEX_AUXILIARY_FILE_NAME, INDEX_FILE_NAME, Index, IndexType, pb,
     vector::{
@@ -149,30 +148,22 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
         frag_reuse_index: Option<Arc<FragReuseIndex>>,
         file_metadata_cache: &LanceCache,
         index_cache: LanceCache,
-        file_sizes: HashMap<String, u64>,
     ) -> Result<Self> {
         let io_parallelism = object_store.io_parallelism();
         let scheduler_config = SchedulerConfig::max_bandwidth(&object_store);
-        let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config));
+        let scheduler = ScanScheduler::new(object_store, scheduler_config);
 
         let uri = index_dir.child(uuid.as_str()).child(INDEX_FILE_NAME);
-        let cached_size = file_sizes
-            .get(INDEX_FILE_NAME)
-            .map(|&size| CachedFileSize::new(size))
-            .unwrap_or_else(CachedFileSize::unknown);
         let index_reader = FileReader::try_open(
-            scheduler.open_file(&uri, &cached_size).await?,
+            scheduler
+                .open_file(&uri, &CachedFileSize::unknown())
+                .await?,
             None,
             Arc::<DecoderPlugins>::default(),
             file_metadata_cache,
             FileReaderOptions::default(),
         )
         .await?;
-        // Cache file metadata so reconstruct_typed can skip the metadata read.
-        file_metadata_cache
-            .with_key_prefix(uri.as_ref())
-            .insert_with_key(&FileMetadataCacheKey, index_reader.metadata().clone())
-            .await;
         let index_metadata: IndexMetadata = serde_json::from_str(
             index_reader
                 .schema()
@@ -200,26 +191,21 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
             .ok_or(Error::index(format!("{} not found", S::metadata_key())))?;
         let sub_index_metadata: Vec<String> = serde_json::from_str(sub_index_metadata)?;
 
-        let aux_cached_size = file_sizes
-            .get(INDEX_AUXILIARY_FILE_NAME)
-            .map(|&size| CachedFileSize::new(size))
-            .unwrap_or_else(CachedFileSize::unknown);
-        let aux_path = index_dir
-            .child(uuid.as_str())
-            .child(INDEX_AUXILIARY_FILE_NAME);
         let storage_reader = FileReader::try_open(
-            scheduler.open_file(&aux_path, &aux_cached_size).await?,
+            scheduler
+                .open_file(
+                    &index_dir
+                        .child(uuid.as_str())
+                        .child(INDEX_AUXILIARY_FILE_NAME),
+                    &CachedFileSize::unknown(),
+                )
+                .await?,
             None,
             Arc::<DecoderPlugins>::default(),
             file_metadata_cache,
             FileReaderOptions::default(),
         )
         .await?;
-        // Cache aux file metadata for reconstruction.
-        file_metadata_cache
-            .with_key_prefix(aux_path.as_ref())
-            .insert_with_key(&FileMetadataCacheKey, storage_reader.metadata().clone())
-            .await;
         let storage =
             IvfQuantizationStorage::try_new(storage_reader, frag_reuse_index.clone()).await?;
 
@@ -239,35 +225,6 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
         })
     }
 
-    /// Reconstruct from cached state, skipping global buffer reads.
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) fn from_cached_state(
-        uri: String,
-        uuid: String,
-        ivf: IvfModel,
-        reader: FileReader,
-        storage: IvfQuantizationStorage<Q>,
-        sub_index_metadata: Vec<String>,
-        distance_type: DistanceType,
-        index_cache: LanceCache,
-        io_parallelism: usize,
-    ) -> Self {
-        let num_partitions = ivf.num_partitions();
-        Self {
-            uri,
-            uuid,
-            ivf,
-            reader,
-            storage,
-            partition_locks: PartitionLoadLock::new(num_partitions),
-            sub_index_metadata,
-            distance_type,
-            index_cache: WeakLanceCache::from(&index_cache),
-            io_parallelism,
-            _marker: PhantomData,
-        }
-    }
-
     #[instrument(level = "debug", skip(self, metrics))]
     pub async fn load_partition(
         &self,
@@ -638,30 +595,6 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> VectorIndex for IVFInd
     fn metric_type(&self) -> DistanceType {
         self.distance_type
     }
-
-    fn cacheable_state(&self) -> Option<Box<dyn VectorIndexData>> {
-        let extra_data = self.storage.metadata().extra_metadata().ok().flatten();
-        let metadata_json = serde_json::to_string(self.storage.metadata()).ok()?;
-        let (sub_index_type, quantization_type) = self.sub_index_type();
-        // Convert local path back to object_store Path (undo to_local_path's "/" prefix)
-        let index_file_path = self.uri.trim_start_matches('/').to_string();
-        let index_meta = self.reader.metadata();
-        let aux_meta = self.storage.reader().metadata();
-        Some(Box::new(IvfIndexState {
-            index_file_path,
-            uuid: self.uuid.clone(),
-            ivf: self.ivf.clone(),
-            distance_type: self.distance_type,
-            sub_index_metadata: self.sub_index_metadata.clone(),
-            quantizer_metadata_json: metadata_json,
-            quantizer_extra_data: extra_data.map(|b| b.to_vec()),
-            sub_index_type,
-            quantization_type,
-            cache_key_prefix: self.index_cache.prefix().to_string(),
-            index_file_size: index_meta.file_size(),
-            aux_file_size: aux_meta.file_size(),
-        }))
-    }
 }
 
 pub type IvfFlatIndex = IVFIndex<FlatIndex, FlatQuantizer>;
@@ -669,224 +602,6 @@ pub type IvfPq = IVFIndex<FlatIndex, ProductQuantizer>;
 pub type IvfHnswSqIndex = IVFIndex<HNSW, ScalarQuantizer>;
 pub type IvfHnswPqIndex = IVFIndex<HNSW, ProductQuantizer>;
 
-/// CacheKey for file metadata, matching the key used by fragment reads.
-struct FileMetadataCacheKey;
-
-impl CacheKey for FileMetadataCacheKey {
-    type ValueType = CachedFileMetadata;
-
-    fn key(&self) -> std::borrow::Cow<'_, str> {
-        "".into()
-    }
-
-    fn type_name(&self) -> &'static str {
-        "FileMetadata"
-    }
-}
-
-/// Open a FileReader, using cached file metadata when available to avoid IO.
-async fn open_reader_cached(
-    scheduler: &Arc<ScanScheduler>,
-    path: &Path,
-    cache: &LanceCache,
-    known_file_size: u64,
-) -> Result<FileReader> {
-    let file_cache = cache.with_key_prefix(path.as_ref());
-    let cached_size = if known_file_size > 0 {
-        CachedFileSize::new(known_file_size)
-    } else {
-        CachedFileSize::unknown()
-    };
-    let file_scheduler = scheduler.open_file(path, &cached_size).await?;
-
-    if let Some(cached_meta) = file_cache.get_with_key(&FileMetadataCacheKey).await {
-        let encodings_io = Arc::new(lance_file::LanceEncodingsIo::new(file_scheduler));
-        FileReader::try_open_with_file_metadata(
-            encodings_io,
-            path.clone(),
-            None,
-            Arc::<DecoderPlugins>::default(),
-            cached_meta,
-            cache,
-            FileReaderOptions::default(),
-        )
-        .await
-    } else {
-        FileReader::try_open(
-            file_scheduler,
-            None,
-            Arc::<DecoderPlugins>::default(),
-            cache,
-            FileReaderOptions::default(),
-        )
-        .await
-    }
-}
-
-/// Reconstruct a concrete `IVFIndex<S, Q>` from cached state.
-async fn reconstruct_typed<S: IvfSubIndex + 'static, Q: Quantization + 'static>(
-    state: IvfIndexState,
-    object_store: Arc<ObjectStore>,
-    file_metadata_cache: &LanceCache,
-    index_cache: LanceCache,
-) -> Result<Arc<dyn VectorIndex>>
-where
-    Q::Metadata: serde::de::DeserializeOwned,
-{
-    let io_parallelism = object_store.io_parallelism();
-    let scheduler_config = SchedulerConfig::max_bandwidth(&object_store);
-    let scheduler = Arc::new(ScanScheduler::new(object_store, scheduler_config));
-
-    let index_path = Path::parse(&state.index_file_path)
-        .map_err(|e| Error::io(format!("invalid index path: {e}")))?;
-
-    let index_reader = open_reader_cached(
-        &scheduler,
-        &index_path,
-        file_metadata_cache,
-        state.index_file_size,
-    )
-    .await?;
-
-    // Derive aux file path: replace the filename with INDEX_AUXILIARY_FILE_NAME.
-    // index_path is like "path/to/{uuid}/index.lance", aux is "path/to/{uuid}/aux.lance".
-    let index_path_str = index_path.as_ref();
-    let parent_str = index_path_str
-        .rsplit_once('/')
-        .map(|(p, _)| p)
-        .unwrap_or("");
-    let aux_path = Path::parse(format!("{}/{}", parent_str, INDEX_AUXILIARY_FILE_NAME))
-        .map_err(|e| Error::io(format!("invalid aux path: {e}")))?;
-    let storage_reader = open_reader_cached(
-        &scheduler,
-        &aux_path,
-        file_metadata_cache,
-        state.aux_file_size,
-    )
-    .await?;
-
-    // Parse quantizer metadata from cached JSON
-    let mut metadata: Q::Metadata = serde_json::from_str(&state.quantizer_metadata_json)?;
-    if let Some(extra) = state.quantizer_extra_data {
-        metadata.parse_buffer(extra.into())?;
-    }
-
-    let storage = IvfQuantizationStorage::from_cached(
-        storage_reader,
-        state.ivf.clone(),
-        metadata,
-        state.distance_type,
-        None, // frag_reuse_index not cached
-    );
-
-    let index = IVFIndex::<S, Q>::from_cached_state(
-        to_local_path(&index_path),
-        state.uuid,
-        state.ivf,
-        index_reader,
-        storage,
-        state.sub_index_metadata,
-        state.distance_type,
-        index_cache,
-        io_parallelism,
-    );
-
-    Ok(Arc::new(index))
-}
-
-/// Reconstruct a `dyn VectorIndex` from a cached [`IvfIndexState`], dispatching
-/// on the stored sub-index and quantization types.
-pub async fn reconstruct_vector_index(
-    state: IvfIndexState,
-    object_store: Arc<ObjectStore>,
-    file_metadata_cache: &LanceCache,
-    index_cache: LanceCache,
-) -> Result<Arc<dyn VectorIndex>> {
-    use lance_index::vector::bq::builder::RabitQuantizer;
-
-    // Extract type tags before consuming state.
-    let sub_idx = state.sub_index_type.to_string();
-    let quant = state.quantization_type.to_string();
-
-    match (sub_idx.as_str(), quant.as_str()) {
-        ("FLAT", "FLAT") => {
-            reconstruct_typed::<FlatIndex, FlatQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        ("FLAT", "PQ") => {
-            reconstruct_typed::<FlatIndex, ProductQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        ("FLAT", "SQ") => {
-            reconstruct_typed::<FlatIndex, ScalarQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        ("FLAT", "RQ") => {
-            reconstruct_typed::<FlatIndex, RabitQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        ("HNSW", "PQ") => {
-            reconstruct_typed::<HNSW, ProductQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        ("HNSW", "SQ") => {
-            reconstruct_typed::<HNSW, ScalarQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        ("HNSW", "FLAT") => {
-            reconstruct_typed::<HNSW, FlatQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        ("HNSW", "RQ") => {
-            reconstruct_typed::<HNSW, RabitQuantizer>(
-                state,
-                object_store,
-                file_metadata_cache,
-                index_cache,
-            )
-            .await
-        }
-        (s, q) => Err(Error::index(format!(
-            "unsupported index type for reconstruction: sub_index={s}, quantization={q}"
-        ))),
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use std::collections::HashSet;
@@ -937,7 +652,7 @@ mod tests {
     use lance_index::vector::{
         pq::storage::ProductQuantizationMetadata, storage::STORAGE_METADATA_KEY,
     };
-    use lance_index::{DatasetIndexExt, IndexSegment, IndexType};
+    use lance_index::{DatasetIndexExt, IndexType};
     use lance_index::{INDEX_AUXILIARY_FILE_NAME, metrics::NoOpMetricsCollector};
     use lance_index::{optimize::OptimizeOptions, scalar::IndexReader};
     use lance_index::{scalar::IndexWriter, vector::hnsw::builder::HnswBuildParams};
@@ -1751,16 +1466,7 @@ mod tests {
             .unwrap();
 
         dataset
-            .commit_existing_index_segments(
-                index_name,
-                "vector",
-                vec![IndexSegment::new(
-                    shared_uuid,
-                    dataset.fragment_bitmap.as_ref().clone(),
-                    Arc::new(crate::index::vector_index_details()),
-                    IndexType::IvfPq.version(),
-                )],
-            )
+            .commit_existing_index(index_name, "vector", shared_uuid)
             .await
             .unwrap();
     }
@@ -3926,53 +3632,4 @@ mod tests {
         let stats = dataset.object_store().io_stats_incremental();
         assert_io_eq!(stats, read_iops, 0, "second prewarm should not perform IO");
     }
-
-    #[tokio::test]
-    async fn test_reconstruct_from_cache_zero_io() {
-        use lance_io::assert_io_eq;
-
-        let test_dir = TempStrDir::default();
-        let test_uri = test_dir.as_str();
-        let (mut dataset, _) = generate_test_dataset::<Float32Type>(test_uri, 0.0..1.0).await;
-
-        let params = VectorIndexParams::with_ivf_pq_params(
-            DistanceType::L2,
-            IvfBuildParams::new(4),
-            PQBuildParams::default(),
-        );
-        dataset
-            .create_index(
-                &["vector"],
-                IndexType::Vector,
-                Some("my_idx".to_owned()),
-                &params,
-                true,
-            )
-            .await
-            .unwrap();
-
-        // First open: populates file metadata cache and VectorIndexData cache.
-        let indices = dataset.load_indices_by_name("my_idx").await.unwrap();
-        let uuid = indices[0].uuid.to_string();
-        dataset
-            .open_vector_index("vector", &uuid, &NoOpMetricsCollector)
-            .await
-            .unwrap();
-
-        // Reset IO stats, then open again — should reconstruct from cache.
-        dataset.object_store().io_stats_incremental();
-
-        dataset
-            .open_vector_index("vector", &uuid, &NoOpMetricsCollector)
-            .await
-            .unwrap();
-
-        let stats = dataset.object_store().io_stats_incremental();
-        assert_io_eq!(
-            stats,
-            read_iops,
-            0,
-            "reconstructing from cached state should not perform IO"
-        );
-    }
 }

From c85f9b5a2a3931e2b3d1932a2653466feab88bd8 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 20 Mar 2026 17:09:30 -0700
Subject: [PATCH 17/24] fix: update commit_existing_index to
 commit_existing_index_segments

The method was renamed in #6209 but the test call site in v2.rs was not
updated during the merge.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance/src/index/vector/ivf/v2.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index 26776e52e12..d7c7db5e61a 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -652,7 +652,7 @@ mod tests {
     use lance_index::vector::{
         pq::storage::ProductQuantizationMetadata, storage::STORAGE_METADATA_KEY,
     };
-    use lance_index::{DatasetIndexExt, IndexType};
+    use lance_index::{DatasetIndexExt, IndexSegment, IndexType};
     use lance_index::{INDEX_AUXILIARY_FILE_NAME, metrics::NoOpMetricsCollector};
     use lance_index::{optimize::OptimizeOptions, scalar::IndexReader};
     use lance_index::{scalar::IndexWriter, vector::hnsw::builder::HnswBuildParams};
@@ -1466,7 +1466,16 @@ mod tests {
             .unwrap();
 
         dataset
-            .commit_existing_index(index_name, "vector", shared_uuid)
+            .commit_existing_index_segments(
+                index_name,
+                "vector",
+                vec![IndexSegment::new(
+                    shared_uuid,
+                    dataset.fragment_bitmap.as_ref().clone(),
+                    Arc::new(crate::index::vector_index_details()),
+                    IndexType::IvfPq.version(),
+                )],
+            )
             .await
             .unwrap();
     }

From 32d8c62cb2e48f4c0c57a123494771610b81797b Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 20 Mar 2026 21:39:38 -0700
Subject: [PATCH 18/24] fix: restore file_sizes optimization and vector cache
 check

The file_sizes parameter on IVFIndex::try_new and the file_size_map()
usage in open_vector_index were from merged PR #5497, not the
serialization PR. Restoring them avoids unnecessary HEAD requests.
Also restores vector index cache check in open_generic_index.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance/src/index.rs               | 28 +++++++++++++++++++++++----
 rust/lance/src/index/vector/ivf/v2.rs | 15 ++++++++++----
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index 7fc85b0b1dd..97b25da6767 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -1337,13 +1337,22 @@ impl DatasetIndexInternalExt for Dataset {
         uuid: &str,
         metrics: &dyn MetricsCollector,
     ) -> Result<Arc<dyn Index>> {
-        // Quick cache checks for scalar and frag-reuse indices.
+        // Checking for cache existence is cheap so we just check both scalar and vector caches
         let frag_reuse_uuid = self.frag_reuse_index_uuid().await;
         let cache_key = ScalarIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
         if let Some(index) = self.index_cache.get_unsized_with_key(&cache_key).await {
             return Ok(index.as_index());
         }
 
+        let vector_cache_key = VectorIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
+        if let Some(index) = self
+            .index_cache
+            .get_unsized_with_key(&vector_cache_key)
+            .await
+        {
+            return Ok(index.as_index());
+        }
+
         let frag_reuse_cache_key = FragReuseIndexCacheKey::new(uuid, frag_reuse_uuid.as_ref());
         if let Some(index) = self.index_cache.get_with_key(&frag_reuse_cache_key).await {
             return Ok(index.as_index());
@@ -1484,9 +1493,12 @@ impl DatasetIndexInternalExt for Dataset {
                     self.object_store.clone(),
                     SchedulerConfig::max_bandwidth(&self.object_store),
                 );
-                let file = scheduler
-                    .open_file(&index_file, &CachedFileSize::unknown())
-                    .await?;
+                let file_sizes = index_meta.file_size_map();
+                let cached_size = file_sizes
+                    .get(INDEX_FILE_NAME)
+                    .map(|&size| CachedFileSize::new(size))
+                    .unwrap_or_else(CachedFileSize::unknown);
+                let file = scheduler.open_file(&index_file, &cached_size).await?;
                 let reader = lance_file::reader::FileReader::try_open(
                     file,
                     None,
@@ -1520,6 +1532,7 @@ impl DatasetIndexInternalExt for Dataset {
                                 frag_reuse_index,
                                 self.metadata_cache.as_ref(),
                                 index_cache,
+                                file_sizes,
                             )
                             .await?;
                             Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1532,6 +1545,7 @@ impl DatasetIndexInternalExt for Dataset {
                                 frag_reuse_index,
                                 self.metadata_cache.as_ref(),
                                 index_cache,
+                                file_sizes,
                             )
                             .await?;
                             Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1550,6 +1564,7 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
+                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1563,6 +1578,7 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
+                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1576,6 +1592,7 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
+                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1592,6 +1609,7 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             &file_metadata_cache,
                             index_cache,
+                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1605,6 +1623,7 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
+                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
@@ -1618,6 +1637,7 @@ impl DatasetIndexInternalExt for Dataset {
                             frag_reuse_index,
                             self.metadata_cache.as_ref(),
                             index_cache,
+                            file_sizes,
                         )
                         .await?;
                         Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index d7c7db5e61a..9561c187b18 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -148,16 +148,19 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
         frag_reuse_index: Option<Arc<FragReuseIndex>>,
         file_metadata_cache: &LanceCache,
         index_cache: LanceCache,
+        file_sizes: HashMap<String, u64>,
     ) -> Result<Self> {
         let io_parallelism = object_store.io_parallelism();
         let scheduler_config = SchedulerConfig::max_bandwidth(&object_store);
         let scheduler = ScanScheduler::new(object_store, scheduler_config);
 
         let uri = index_dir.child(uuid.as_str()).child(INDEX_FILE_NAME);
+        let cached_size = file_sizes
+            .get(INDEX_FILE_NAME)
+            .map(|&size| CachedFileSize::new(size))
+            .unwrap_or_else(CachedFileSize::unknown);
         let index_reader = FileReader::try_open(
-            scheduler
-                .open_file(&uri, &CachedFileSize::unknown())
-                .await?,
+            scheduler.open_file(&uri, &cached_size).await?,
             None,
             Arc::<DecoderPlugins>::default(),
             file_metadata_cache,
@@ -191,13 +194,17 @@ impl<S: IvfSubIndex + 'static, Q: Quantization> IVFIndex<S, Q> {
             .ok_or(Error::index(format!("{} not found", S::metadata_key())))?;
         let sub_index_metadata: Vec<String> = serde_json::from_str(sub_index_metadata)?;
 
+        let aux_cached_size = file_sizes
+            .get(INDEX_AUXILIARY_FILE_NAME)
+            .map(|&size| CachedFileSize::new(size))
+            .unwrap_or_else(CachedFileSize::unknown);
         let storage_reader = FileReader::try_open(
             scheduler
                 .open_file(
                     &index_dir
                         .child(uuid.as_str())
                         .child(INDEX_AUXILIARY_FILE_NAME),
-                    &CachedFileSize::unknown(),
+                    &aux_cached_size,
                 )
                 .await?,
             None,

From 8c3ef350b886fd1fdbec0982813628fae0251d39 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 26 Mar 2026 14:11:22 -0700
Subject: [PATCH 19/24] refactor: presize cache keys, extract size helper,
 document type_name contract

- Presize `make_cache_key` Vec to avoid intermediate String allocation
- Extract `cache_entry_size` helper to replace magic `+ 8` pattern
- Document `type_name` uniqueness requirement on CacheKey/UnsizedCacheKey
- Remove unused derives from SubIndexType (confirmed compiles without them)
- Use moka's `weighted_size()` instead of iterating in `approx_size_bytes`

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache/keys.rs          | 21 +++++++++++++++++----
 rust/lance-core/src/cache/mod.rs           | 17 ++++++++++++-----
 rust/lance-core/src/cache/moka.rs          |  2 +-
 rust/lance-index/src/vector/v3/subindex.rs |  1 -
 4 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs
index db412cc632f..164cd043952 100644
--- a/rust/lance-core/src/cache/keys.rs
+++ b/rust/lance-core/src/cache/keys.rs
@@ -20,12 +20,17 @@ pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) {
 
 /// Build a key: `prefix/user_key\0type_name`.
 pub(super) fn make_cache_key(prefix: &str, key: &str, type_name: &str) -> Vec<u8> {
-    let full_key = if prefix.is_empty() {
-        key.to_string()
+    let user_key_len = if prefix.is_empty() {
+        key.len()
     } else {
-        format!("{}/{}", prefix, key)
+        prefix.len() + 1 + key.len()
     };
-    let mut bytes = full_key.into_bytes();
+    let mut bytes = Vec::with_capacity(user_key_len + 1 + type_name.len());
+    if !prefix.is_empty() {
+        bytes.extend_from_slice(prefix.as_bytes());
+        bytes.push(b'/');
+    }
+    bytes.extend_from_slice(key.as_bytes());
     bytes.push(0);
     bytes.extend_from_slice(type_name.as_bytes());
     bytes
@@ -38,6 +43,11 @@ pub trait CacheKey {
 
     /// Short, stable string that distinguishes this value type from others in
     /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_name`).
+    ///
+    /// **Must be unique per value type.** If two `CacheKey` impls return the
+    /// same `type_name` but different `ValueType`s, entries will collide and
+    /// downcasts will fail silently (returning `None` on get).
+    ///
     /// Must be consistent across crate boundaries — use a short literal, not
     /// `std::any::type_name` pointers.
     fn type_name(&self) -> &'static str;
@@ -48,5 +58,8 @@ pub trait UnsizedCacheKey {
 
     fn key(&self) -> Cow<'_, str>;
 
+    /// Short, stable string that distinguishes this value type from others in
+    /// the cache. Must be unique per value type — collisions cause silent
+    /// downcast failures.
     fn type_name(&self) -> &'static str;
 }
diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs
index 6bdd0c07152..bfa0af34dae 100644
--- a/rust/lance-core/src/cache/mod.rs
+++ b/rust/lance-core/src/cache/mod.rs
@@ -35,6 +35,11 @@ pub use deepsize::{Context, DeepSizeOf};
 
 use keys::make_cache_key;
 
+/// Size of a cached `Arc<T>`, accounting for the Arc overhead (two atomic counters).
+fn cache_entry_size<T: DeepSizeOf + ?Sized>(value: &T) -> usize {
+    value.deep_size_of() + std::mem::size_of::<std::sync::atomic::AtomicUsize>() * 2
+}
+
 // ---------------------------------------------------------------------------
 // LanceCache — typed wrapper around dyn CacheBackend
 // ---------------------------------------------------------------------------
@@ -141,7 +146,7 @@ impl LanceCache {
         type_name: &str,
         metadata: Arc<T>,
     ) {
-        let size = metadata.deep_size_of() + 8;
+        let size = cache_entry_size(&*metadata);
         let cache_key = make_cache_key(&self.prefix, key, type_name);
         self.cache.insert(&cache_key, metadata, size).await;
     }
@@ -188,12 +193,13 @@ impl LanceCache {
         let typed_loader = Box::pin(async move {
             let value = loader().await?;
             let arc = Arc::new(value);
-            let size = arc.deep_size_of() + 8;
+            let size = cache_entry_size(&*arc);
             Ok((arc as CacheEntry, size))
         });
 
         let entry = self.cache.get_or_insert(&cache_key, typed_loader).await?;
 
+        // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses.
         // Track hit/miss based on whether we got a pre-existing entry.
         // (Approximate: we can't distinguish "backend had it" from "loader ran"
         // without a richer return type. Count all get_or_insert as misses for now.)
@@ -203,6 +209,7 @@ impl LanceCache {
     }
 
     // -- Unsized insert/get ---------------------------------------------------
+    // TODO: can we unify some of these methods?
 
     async fn insert_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
         &self,
@@ -359,7 +366,7 @@ impl WeakLanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         if let Some(cache) = self.inner.upgrade() {
-            let size = value.deep_size_of() + 8;
+            let size = cache_entry_size(&*value);
             let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             cache.insert(&key, value, size).await;
             true
@@ -388,7 +395,7 @@ impl WeakLanceCache {
             let typed_loader = Box::pin(async move {
                 let value = loader().await?;
                 let arc = Arc::new(value);
-                let size = arc.deep_size_of() + 8;
+                let size = cache_entry_size(&*arc);
                 Ok((arc as CacheEntry, size))
             });
             let entry = cache.get_or_insert(&key, typed_loader).await?;
@@ -424,7 +431,7 @@ impl WeakLanceCache {
     {
         if let Some(cache) = self.inner.upgrade() {
             let wrapper = Arc::new(value);
-            let size = wrapper.deep_size_of() + 8;
+            let size = cache_entry_size(&*wrapper);
             let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             cache.insert(&key, wrapper, size).await;
         } else {
diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs
index 6a2cd673409..9ab6702e455 100644
--- a/rust/lance-core/src/cache/moka.rs
+++ b/rust/lance-core/src/cache/moka.rs
@@ -120,6 +120,6 @@ impl CacheBackend for MokaCacheBackend {
     }
 
     fn approx_size_bytes(&self) -> usize {
-        self.cache.iter().map(|(_, v)| v.size_bytes).sum()
+        self.cache.weighted_size() as usize
     }
 }
diff --git a/rust/lance-index/src/vector/v3/subindex.rs b/rust/lance-index/src/vector/v3/subindex.rs
index dd5d2b078a9..af0bb337352 100644
--- a/rust/lance-index/src/vector/v3/subindex.rs
+++ b/rust/lance-index/src/vector/v3/subindex.rs
@@ -59,7 +59,6 @@ pub trait IvfSubIndex: Send + Sync + Debug + DeepSizeOf {
     fn to_batch(&self) -> Result<RecordBatch>;
 }
 
-#[derive(Debug, Clone, Copy)]
 pub enum SubIndexType {
     Flat,
     Hnsw,

From 19aa33833df578c7866707728f61c5ff1435f278 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 26 Mar 2026 14:12:47 -0700
Subject: [PATCH 20/24] refactor: inline single-caller private cache methods
 into public API

Inline `get_or_insert_with_id`, `insert_unsized_with_id`, and
`get_unsized_with_id` into their sole public callers. Keep
`insert_with_id` and `get_with_id` as shared helpers since they're
used by both sized and unsized paths.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache/mod.rs | 79 ++++++++------------------------
 1 file changed, 20 insertions(+), 59 deletions(-)

diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs
index bfa0af34dae..eb7c4c40bfe 100644
--- a/rust/lance-core/src/cache/mod.rs
+++ b/rust/lance-core/src/cache/mod.rs
@@ -177,59 +177,6 @@ impl LanceCache {
         }
     }
 
-    async fn get_or_insert_with_id<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
-        &self,
-        key: &str,
-        type_name: &str,
-        loader: F,
-    ) -> Result<Arc<T>>
-    where
-        F: FnOnce() -> Fut + Send,
-        Fut: Future<Output = Result<T>> + Send,
-    {
-        let cache_key = make_cache_key(&self.prefix, key, type_name);
-
-        // Type-erase the loader into a pinned future for the backend.
-        let typed_loader = Box::pin(async move {
-            let value = loader().await?;
-            let arc = Arc::new(value);
-            let size = cache_entry_size(&*arc);
-            Ok((arc as CacheEntry, size))
-        });
-
-        let entry = self.cache.get_or_insert(&cache_key, typed_loader).await?;
-
-        // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses.
-        // Track hit/miss based on whether we got a pre-existing entry.
-        // (Approximate: we can't distinguish "backend had it" from "loader ran"
-        // without a richer return type. Count all get_or_insert as misses for now.)
-        self.misses.fetch_add(1, Ordering::Relaxed);
-
-        Ok(entry.downcast::<T>().unwrap())
-    }
-
-    // -- Unsized insert/get ---------------------------------------------------
-    // TODO: can we unify some of these methods?
-
-    async fn insert_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
-        &self,
-        key: &str,
-        type_name: &str,
-        metadata: Arc<T>,
-    ) {
-        self.insert_with_id(key, type_name, Arc::new(metadata))
-            .await
-    }
-
-    async fn get_unsized_with_id<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
-        &self,
-        key: &str,
-        type_name: &str,
-    ) -> Option<Arc<T>> {
-        let outer = self.get_with_id::<Arc<T>>(key, type_name).await?;
-        Some(outer.as_ref().clone())
-    }
-
     // -- Stats / clear --------------------------------------------------------
 
     pub async fn stats(&self) -> CacheStats {
@@ -280,9 +227,21 @@ impl LanceCache {
         F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
-        let type_name = cache_key.type_name();
-        let key_str = cache_key.key().into_owned();
-        Box::pin(self.get_or_insert_with_id(&key_str, type_name, loader)).await
+        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+
+        let typed_loader = Box::pin(async move {
+            let value = loader().await?;
+            let arc = Arc::new(value);
+            let size = cache_entry_size(&*arc);
+            Ok((arc as CacheEntry, size))
+        });
+
+        let entry = self.cache.get_or_insert(&key, typed_loader).await?;
+
+        // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses.
+        self.misses.fetch_add(1, Ordering::Relaxed);
+
+        Ok(entry.downcast::<K::ValueType>().unwrap())
     }
 
     pub async fn insert_unsized_with_key<K>(&self, cache_key: &K, metadata: Arc<K::ValueType>)
@@ -290,7 +249,7 @@ impl LanceCache {
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.insert_unsized_with_id(&cache_key.key(), cache_key.type_name(), metadata)
+        self.insert_with_id(&cache_key.key(), cache_key.type_name(), Arc::new(metadata))
             .boxed()
             .await
     }
@@ -300,9 +259,11 @@ impl LanceCache {
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.get_unsized_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_name())
+        let outer = self
+            .get_with_id::<Arc<K::ValueType>>(&cache_key.key(), cache_key.type_name())
             .boxed()
-            .await
+            .await?;
+        Some(outer.as_ref().clone())
     }
 }
 

From c3f2cb600cfd88560209b88472e33dfc6947825f Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 26 Mar 2026 14:18:01 -0700
Subject: [PATCH 21/24] refactor: replace opaque byte keys with
 InternalCacheKey, add was_cached

Replace `&[u8]` keys in `CacheBackend` with a structured `InternalCacheKey`
type that provides direct access to prefix, key, and type_name fields.
This eliminates the need for `parse_cache_key()` and `make_cache_key()`.

Also change `get_or_insert` to return `(CacheEntry, bool)` where the bool
indicates whether the entry was already cached. This enables accurate
hit/miss tracking instead of counting all get_or_insert calls as misses.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache/backend.rs |  26 ++++---
 rust/lance-core/src/cache/keys.rs    |  62 ++++++++-------
 rust/lance-core/src/cache/mod.rs     | 108 +++++++++++++++------------
 rust/lance-core/src/cache/moka.rs    |  33 +++++---
 4 files changed, 131 insertions(+), 98 deletions(-)

diff --git a/rust/lance-core/src/cache/backend.rs b/rust/lance-core/src/cache/backend.rs
index 970fb75888c..e929bff3529 100644
--- a/rust/lance-core/src/cache/backend.rs
+++ b/rust/lance-core/src/cache/backend.rs
@@ -10,31 +10,33 @@ use futures::Future;
 
 use crate::Result;
 
+use super::keys::InternalCacheKey;
+
 /// A type-erased cache entry.
 pub type CacheEntry = Arc<dyn Any + Send + Sync>;
 
 /// Low-level pluggable cache backend.
 ///
-/// Implementations store entries keyed by opaque byte slices.
+/// Implementations store entries keyed by [`InternalCacheKey`], which provides
+/// structured access to the prefix, user key, and type name components.
 /// The [`LanceCache`](super::LanceCache) wrapper handles key construction and type safety;
 /// backend authors do not need to worry about key encoding.
-///
-/// Keys are structured as `user_key\0type_name` where `type_name` comes from
-/// [`CacheKey::type_name()`](super::CacheKey::type_name). Backend authors who need to
-/// inspect keys can use [`parse_cache_key()`](super::parse_cache_key) to split them.
 #[async_trait]
 pub trait CacheBackend: Send + Sync + std::fmt::Debug {
-    /// Look up an entry by its opaque key.
-    async fn get(&self, key: &[u8]) -> Option<CacheEntry>;
+    /// Look up an entry by its key.
+    async fn get(&self, key: &InternalCacheKey) -> Option<CacheEntry>;
 
     /// Store an entry. `size_bytes` is used for eviction accounting.
-    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize);
+    async fn insert(&self, key: &InternalCacheKey, entry: CacheEntry, size_bytes: usize);
 
     /// Get an existing entry or compute it from `loader`.
     ///
     /// Implementations should deduplicate concurrent loads for the same key
     /// so the loader runs at most once.
     ///
+    /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry
+    /// was already present in the cache (the loader was not invoked).
+    ///
     /// The loader is a pinned, boxed future rather than a generic closure
     /// because `async_trait` erases the `Self` lifetime, making it impossible
     /// to express a generic closure whose returned future borrows from the
@@ -45,12 +47,12 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     /// this method — implementations must not store it beyond the call.
     async fn get_or_insert<'a>(
         &self,
-        key: &[u8],
+        key: &InternalCacheKey,
         loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
-    ) -> Result<CacheEntry>;
+    ) -> Result<(CacheEntry, bool)>;
 
-    /// Remove all entries whose key starts with `prefix`.
-    async fn invalidate_prefix(&self, prefix: &[u8]);
+    /// Remove all entries whose prefix starts with the given string.
+    async fn invalidate_prefix(&self, prefix: &str);
 
     /// Remove all entries.
     async fn clear(&self);
diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs
index 164cd043952..d4afe55370f 100644
--- a/rust/lance-core/src/cache/keys.rs
+++ b/rust/lance-core/src/cache/keys.rs
@@ -1,39 +1,45 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-use std::borrow::Cow;
+use std::{borrow::Cow, sync::Arc};
 
-/// Cache keys are structured as `user_key\0type_name`.
+/// Structured cache key used by [`CacheBackend`](super::CacheBackend).
 ///
-/// This function splits an opaque cache key into the user-visible portion
-/// and the type_name string. Backend implementations can use this to inspect keys.
-/// Returns `(empty slice, "")` if no separator is found.
-pub fn parse_cache_key(key: &[u8]) -> (&[u8], &str) {
-    if let Some(sep) = key.iter().position(|&b| b == 0) {
-        let user_key = &key[..sep];
-        let type_name = std::str::from_utf8(&key[sep + 1..]).unwrap_or("");
-        (user_key, type_name)
-    } else {
-        (key, "")
-    }
+/// Composed of a prefix (scoping the key to a dataset/index), a user key
+/// (identifying the specific entry), and a type name (distinguishing value
+/// types that share the same user key).
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+pub struct InternalCacheKey {
+    prefix: Arc<str>,
+    key: Arc<str>,
+    type_name: &'static str,
 }
 
-/// Build a key: `prefix/user_key\0type_name`.
-pub(super) fn make_cache_key(prefix: &str, key: &str, type_name: &str) -> Vec<u8> {
-    let user_key_len = if prefix.is_empty() {
-        key.len()
-    } else {
-        prefix.len() + 1 + key.len()
-    };
-    let mut bytes = Vec::with_capacity(user_key_len + 1 + type_name.len());
-    if !prefix.is_empty() {
-        bytes.extend_from_slice(prefix.as_bytes());
-        bytes.push(b'/');
+impl InternalCacheKey {
+    pub fn new(prefix: Arc<str>, key: Arc<str>, type_name: &'static str) -> Self {
+        Self {
+            prefix,
+            key,
+            type_name,
+        }
+    }
+
+    pub fn prefix(&self) -> &str {
+        &self.prefix
+    }
+
+    pub fn key(&self) -> &str {
+        &self.key
+    }
+
+    pub fn type_name(&self) -> &'static str {
+        self.type_name
+    }
+
+    /// Returns true if this key's prefix starts with the given string.
+    pub fn has_prefix(&self, prefix: &str) -> bool {
+        self.prefix.starts_with(prefix)
     }
-    bytes.extend_from_slice(key.as_bytes());
-    bytes.push(0);
-    bytes.extend_from_slice(type_name.as_bytes());
-    bytes
 }
 
 pub trait CacheKey {
diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs
index eb7c4c40bfe..d338d69036f 100644
--- a/rust/lance-core/src/cache/mod.rs
+++ b/rust/lance-core/src/cache/mod.rs
@@ -6,20 +6,20 @@
 //! This module provides a two-layer caching system:
 //!
 //! - [`CacheBackend`] is the low-level, pluggable trait that custom cache implementations
-//!   can implement. It uses opaque byte keys and type-erased entries.
-//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag
-//!   encoding), type-safe get/insert, and DeepSizeOf-based size computation.
+//!   can implement. It uses [`InternalCacheKey`] keys and type-erased entries.
+//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag),
+//!   type-safe get/insert, and DeepSizeOf-based size computation.
 //!
 //! Cache keys are handled by the [`keys`] submodule: [`CacheKey`] / [`UnsizedCacheKey`]
-//! define the typed key interface, and [`parse_cache_key`] lets backends inspect the
-//! encoded `user_key\0type_name` format.
+//! define the typed key interface, and [`InternalCacheKey`] is the structured key passed
+//! to backends.
 
 mod backend;
 mod keys;
 mod moka;
 
 pub use backend::{CacheBackend, CacheEntry};
-pub use keys::{CacheKey, UnsizedCacheKey, parse_cache_key};
+pub use keys::{CacheKey, InternalCacheKey, UnsizedCacheKey};
 pub use moka::MokaCacheBackend;
 
 use std::sync::{
@@ -33,13 +33,17 @@ use crate::Result;
 
 pub use deepsize::{Context, DeepSizeOf};
 
-use keys::make_cache_key;
-
 /// Size of a cached `Arc<T>`, accounting for the Arc overhead (two atomic counters).
 fn cache_entry_size<T: DeepSizeOf + ?Sized>(value: &T) -> usize {
     value.deep_size_of() + std::mem::size_of::<std::sync::atomic::AtomicUsize>() * 2
 }
 
+/// Build an [`InternalCacheKey`] from a cache's prefix, a user key string,
+/// and a type name.
+fn build_key(prefix: &Arc<str>, key: &str, type_name: &'static str) -> InternalCacheKey {
+    InternalCacheKey::new(prefix.clone(), Arc::from(key), type_name)
+}
+
 // ---------------------------------------------------------------------------
 // LanceCache — typed wrapper around dyn CacheBackend
 // ---------------------------------------------------------------------------
@@ -51,7 +55,7 @@ fn cache_entry_size<T: DeepSizeOf + ?Sized>(value: &T) -> usize {
 #[derive(Clone)]
 pub struct LanceCache {
     cache: Arc<dyn CacheBackend>,
-    prefix: String,
+    prefix: Arc<str>,
     hits: Arc<AtomicU64>,
     misses: Arc<AtomicU64>,
 }
@@ -74,7 +78,7 @@ impl LanceCache {
     pub fn with_capacity(capacity: usize) -> Self {
         Self {
             cache: Arc::new(MokaCacheBackend::with_capacity(capacity)),
-            prefix: String::new(),
+            prefix: Arc::from(""),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
         }
@@ -84,7 +88,7 @@ impl LanceCache {
     pub fn with_backend(backend: Arc<dyn CacheBackend>) -> Self {
         Self {
             cache: backend,
-            prefix: String::new(),
+            prefix: Arc::from(""),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
         }
@@ -93,7 +97,7 @@ impl LanceCache {
     pub fn no_cache() -> Self {
         Self {
             cache: Arc::new(MokaCacheBackend::no_cache()),
-            prefix: String::new(),
+            prefix: Arc::from(""),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
         }
@@ -104,7 +108,7 @@ impl LanceCache {
     pub fn with_backend_and_prefix(backend: Arc<dyn CacheBackend>, prefix: String) -> Self {
         Self {
             cache: backend,
-            prefix,
+            prefix: Arc::from(prefix),
             hits: Arc::new(AtomicU64::new(0)),
             misses: Arc::new(AtomicU64::new(0)),
         }
@@ -114,16 +118,16 @@ impl LanceCache {
     pub fn with_key_prefix(&self, prefix: &str) -> Self {
         Self {
             cache: self.cache.clone(),
-            prefix: format!("{}{}/", self.prefix, prefix),
+            prefix: Arc::from(format!("{}{}/", self.prefix, prefix)),
             hits: self.hits.clone(),
             misses: self.misses.clone(),
         }
     }
 
-    /// Invalidate all entries whose key starts with the given prefix.
+    /// Invalidate all entries whose prefix starts with the given string.
     pub async fn invalidate_prefix(&self, prefix: &str) {
-        let prefix_bytes = format!("{}{}", self.prefix, prefix).into_bytes();
-        self.cache.invalidate_prefix(&prefix_bytes).await;
+        let full_prefix = format!("{}{}", self.prefix, prefix);
+        self.cache.invalidate_prefix(&full_prefix).await;
     }
 
     pub async fn size(&self) -> usize {
@@ -138,25 +142,25 @@ impl LanceCache {
         self.cache.size_bytes().await
     }
 
-    // -- Sized insert/get (internal, used by CacheKey methods) ----------------
+    // -- Sized insert/get (internal, shared by sized and unsized paths) --------
 
     async fn insert_with_id<T: DeepSizeOf + Send + Sync + 'static>(
         &self,
         key: &str,
-        type_name: &str,
+        type_name: &'static str,
         metadata: Arc<T>,
     ) {
         let size = cache_entry_size(&*metadata);
-        let cache_key = make_cache_key(&self.prefix, key, type_name);
+        let cache_key = build_key(&self.prefix, key, type_name);
         self.cache.insert(&cache_key, metadata, size).await;
     }
 
     async fn get_with_id<T: Send + Sync + 'static>(
         &self,
         key: &str,
-        type_name: &str,
+        type_name: &'static str,
     ) -> Option<Arc<T>> {
-        let cache_key = make_cache_key(&self.prefix, key, type_name);
+        let cache_key = build_key(&self.prefix, key, type_name);
         if let Some(entry) = self.cache.get(&cache_key).await {
             match entry.downcast::<T>() {
                 Ok(val) => {
@@ -227,7 +231,7 @@ impl LanceCache {
         F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
-        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+        let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
 
         let typed_loader = Box::pin(async move {
             let value = loader().await?;
@@ -236,10 +240,13 @@ impl LanceCache {
             Ok((arc as CacheEntry, size))
         });
 
-        let entry = self.cache.get_or_insert(&key, typed_loader).await?;
+        let (entry, was_cached) = self.cache.get_or_insert(&key, typed_loader).await?;
 
-        // TODO: distinguish "backend had it" from "loader ran and inserted" to track true hits vs misses.
-        self.misses.fetch_add(1, Ordering::Relaxed);
+        if was_cached {
+            self.hits.fetch_add(1, Ordering::Relaxed);
+        } else {
+            self.misses.fetch_add(1, Ordering::Relaxed);
+        }
 
         Ok(entry.downcast::<K::ValueType>().unwrap())
     }
@@ -276,7 +283,7 @@ impl LanceCache {
 #[derive(Clone, Debug)]
 pub struct WeakLanceCache {
     inner: std::sync::Weak<dyn CacheBackend>,
-    prefix: String,
+    prefix: Arc<str>,
     hits: Arc<AtomicU64>,
     misses: Arc<AtomicU64>,
 }
@@ -294,7 +301,7 @@ impl WeakLanceCache {
     pub fn with_key_prefix(&self, prefix: &str) -> Self {
         Self {
             inner: self.inner.clone(),
-            prefix: format!("{}{}/", self.prefix, prefix),
+            prefix: Arc::from(format!("{}{}/", self.prefix, prefix)),
             hits: self.hits.clone(),
             misses: self.misses.clone(),
         }
@@ -311,7 +318,7 @@ impl WeakLanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         let cache = self.inner.upgrade()?;
-        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+        let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
         if let Some(entry) = cache.get(&key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
             Some(entry.downcast::<K::ValueType>().unwrap())
@@ -328,7 +335,7 @@ impl WeakLanceCache {
     {
         if let Some(cache) = self.inner.upgrade() {
             let size = cache_entry_size(&*value);
-            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+            let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             cache.insert(&key, value, size).await;
             true
         } else {
@@ -352,15 +359,19 @@ impl WeakLanceCache {
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
         if let Some(cache) = self.inner.upgrade() {
-            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+            let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             let typed_loader = Box::pin(async move {
                 let value = loader().await?;
                 let arc = Arc::new(value);
                 let size = cache_entry_size(&*arc);
                 Ok((arc as CacheEntry, size))
             });
-            let entry = cache.get_or_insert(&key, typed_loader).await?;
-            self.misses.fetch_add(1, Ordering::Relaxed);
+            let (entry, was_cached) = cache.get_or_insert(&key, typed_loader).await?;
+            if was_cached {
+                self.hits.fetch_add(1, Ordering::Relaxed);
+            } else {
+                self.misses.fetch_add(1, Ordering::Relaxed);
+            }
             Ok(entry.downcast::<K::ValueType>().unwrap())
         } else {
             log::warn!("WeakLanceCache: cache no longer available, computing without caching");
@@ -374,7 +385,7 @@ impl WeakLanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         let cache = self.inner.upgrade()?;
-        let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+        let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
         if let Some(entry) = cache.get(&key).await {
             entry
                 .downcast::<Arc<K::ValueType>>()
@@ -393,7 +404,7 @@ impl WeakLanceCache {
         if let Some(cache) = self.inner.upgrade() {
             let wrapper = Arc::new(value);
             let size = cache_entry_size(&*wrapper);
-            let key = make_cache_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+            let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
             cache.insert(&key, wrapper, size).await;
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item");
@@ -610,8 +621,10 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(*v, vec![1, 2, 3]);
+        assert_eq!(cache.stats().await.misses, 1);
+        assert_eq!(cache.stats().await.hits, 0);
 
-        // Second call should not invoke loader
+        // Second call should not invoke loader and should be a hit
         let v: Arc<Vec<i32>> = cache
             .get_or_insert_with_key(TestKey::<Vec<i32>>::new("k"), || async {
                 panic!("should not be called")
@@ -619,6 +632,7 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(*v, vec![1, 2, 3]);
+        assert_eq!(cache.stats().await.hits, 1);
     }
 
     #[tokio::test]
@@ -628,7 +642,7 @@ mod tests {
 
         #[derive(Debug)]
         struct HashMapBackend {
-            map: Mutex<HashMap<Vec<u8>, (CacheEntry, usize)>>,
+            map: Mutex<HashMap<InternalCacheKey, (CacheEntry, usize)>>,
         }
 
         impl HashMapBackend {
@@ -641,35 +655,35 @@ mod tests {
 
         #[async_trait]
         impl CacheBackend for HashMapBackend {
-            async fn get(&self, key: &[u8]) -> Option<CacheEntry> {
+            async fn get(&self, key: &InternalCacheKey) -> Option<CacheEntry> {
                 self.map.lock().await.get(key).map(|(e, _)| e.clone())
             }
-            async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) {
+            async fn insert(&self, key: &InternalCacheKey, entry: CacheEntry, size_bytes: usize) {
                 self.map
                     .lock()
                     .await
-                    .insert(key.to_vec(), (entry, size_bytes));
+                    .insert(key.clone(), (entry, size_bytes));
             }
             async fn get_or_insert<'a>(
                 &self,
-                key: &[u8],
+                key: &InternalCacheKey,
                 loader: std::pin::Pin<
                     Box<dyn futures::Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>,
                 >,
-            ) -> Result<CacheEntry> {
+            ) -> Result<(CacheEntry, bool)> {
                 if let Some((entry, _)) = self.map.lock().await.get(key) {
-                    Ok(entry.clone())
+                    Ok((entry.clone(), true))
                 } else {
                     let (entry, size) = loader.await?;
                     self.map
                         .lock()
                         .await
-                        .insert(key.to_vec(), (entry.clone(), size));
-                    Ok(entry)
+                        .insert(key.clone(), (entry.clone(), size));
+                    Ok((entry, false))
                 }
             }
-            async fn invalidate_prefix(&self, prefix: &[u8]) {
-                self.map.lock().await.retain(|k, _| !k.starts_with(prefix));
+            async fn invalidate_prefix(&self, prefix: &str) {
+                self.map.lock().await.retain(|k, _| !k.has_prefix(prefix));
             }
             async fn clear(&self) {
                 self.map.lock().await.clear();
diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs
index 9ab6702e455..a977ea0a61f 100644
--- a/rust/lance-core/src/cache/moka.rs
+++ b/rust/lance-core/src/cache/moka.rs
@@ -2,6 +2,8 @@
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
 use std::pin::Pin;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
 
 use async_trait::async_trait;
 use futures::Future;
@@ -9,6 +11,7 @@ use futures::Future;
 use crate::Result;
 
 use super::backend::{CacheBackend, CacheEntry};
+use super::keys::InternalCacheKey;
 
 /// Internal record stored in the moka cache.
 #[derive(Clone, Debug)]
@@ -22,7 +25,7 @@ struct MokaCacheEntry {
 /// Provides weighted-capacity eviction and concurrent-load deduplication
 /// via moka's built-in `optionally_get_with`.
 pub struct MokaCacheBackend {
-    cache: moka::future::Cache<Vec<u8>, MokaCacheEntry>,
+    cache: moka::future::Cache<InternalCacheKey, MokaCacheEntry>,
 }
 
 impl std::fmt::Debug for MokaCacheBackend {
@@ -52,26 +55,31 @@ impl MokaCacheBackend {
 
 #[async_trait]
 impl CacheBackend for MokaCacheBackend {
-    async fn get(&self, key: &[u8]) -> Option<CacheEntry> {
+    async fn get(&self, key: &InternalCacheKey) -> Option<CacheEntry> {
         self.cache.get(key).await.map(|r| r.entry)
     }
 
-    async fn insert(&self, key: &[u8], entry: CacheEntry, size_bytes: usize) {
+    async fn insert(&self, key: &InternalCacheKey, entry: CacheEntry, size_bytes: usize) {
         self.cache
-            .insert(key.to_vec(), MokaCacheEntry { entry, size_bytes })
+            .insert(key.clone(), MokaCacheEntry { entry, size_bytes })
             .await;
     }
 
     async fn get_or_insert<'a>(
         &self,
-        key: &[u8],
+        key: &InternalCacheKey,
         loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
-    ) -> Result<CacheEntry> {
+    ) -> Result<(CacheEntry, bool)> {
         // Use moka's built-in dedup: optionally_get_with runs the init future
         // at most once per key, even under concurrent access.
         let (error_tx, error_rx) = tokio::sync::oneshot::channel();
 
+        // Track whether the loader actually ran (= cache miss).
+        let was_miss = Arc::new(AtomicBool::new(false));
+        let was_miss_clone = was_miss.clone();
+
         let init = async move {
+            was_miss_clone.store(true, Ordering::Relaxed);
             match loader.await {
                 Ok((entry, size_bytes)) => Some(MokaCacheEntry { entry, size_bytes }),
                 Err(e) => {
@@ -81,9 +89,12 @@ impl CacheBackend for MokaCacheBackend {
             }
         };
 
-        let owned_key = key.to_vec();
+        let owned_key = key.clone();
         match self.cache.optionally_get_with(owned_key, init).await {
-            Some(record) => Ok(record.entry),
+            Some(record) => {
+                let was_cached = !was_miss.load(Ordering::Relaxed);
+                Ok((record.entry, was_cached))
+            }
             None => match error_rx.await {
                 Ok(err) => Err(err),
                 Err(_) => Err(crate::Error::internal(
@@ -93,10 +104,10 @@ impl CacheBackend for MokaCacheBackend {
         }
     }
 
-    async fn invalidate_prefix(&self, prefix: &[u8]) {
-        let prefix = prefix.to_vec();
+    async fn invalidate_prefix(&self, prefix: &str) {
+        let prefix = prefix.to_owned();
         self.cache
-            .invalidate_entries_if(move |key, _value| key.starts_with(&prefix))
+            .invalidate_entries_if(move |key, _value| key.has_prefix(&prefix))
             .expect("Cache configured correctly");
     }
 

From 1494cd7d7667dcb182190c9d8d63f2469e004651 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 26 Mar 2026 14:59:21 -0700
Subject: [PATCH 22/24] fix: revert approx_size_bytes to iterate entries, fix
 rustdoc link

`weighted_size()` can be stale without `run_pending_tasks()` (which is
async). Revert `approx_size_bytes` to iterating entries so the
synchronous `DeepSizeOf` path returns accurate values.

Also remove reference to private `keys` module in module-level doc.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache/mod.rs  | 5 ++---
 rust/lance-core/src/cache/moka.rs | 5 ++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs
index d338d69036f..3eba0415f04 100644
--- a/rust/lance-core/src/cache/mod.rs
+++ b/rust/lance-core/src/cache/mod.rs
@@ -10,9 +10,8 @@
 //! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag),
 //!   type-safe get/insert, and DeepSizeOf-based size computation.
 //!
-//! Cache keys are handled by the [`keys`] submodule: [`CacheKey`] / [`UnsizedCacheKey`]
-//! define the typed key interface, and [`InternalCacheKey`] is the structured key passed
-//! to backends.
+//! [`CacheKey`] / [`UnsizedCacheKey`] define the typed key interface, and
+//! [`InternalCacheKey`] is the structured key passed to backends.
 
 mod backend;
 mod keys;
diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs
index a977ea0a61f..13a6c7e976c 100644
--- a/rust/lance-core/src/cache/moka.rs
+++ b/rust/lance-core/src/cache/moka.rs
@@ -131,6 +131,9 @@ impl CacheBackend for MokaCacheBackend {
     }
 
     fn approx_size_bytes(&self) -> usize {
-        self.cache.weighted_size() as usize
+        // Iterate rather than using `weighted_size()` because moka's
+        // weighted_size can be stale without `run_pending_tasks()`, which
+        // is async and can't be called from this synchronous context.
+        self.cache.iter().map(|(_, v)| v.size_bytes).sum()
     }
 }

From ae4783a897ceef7974bdda68206e32fb496d7a36 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 27 Mar 2026 14:45:52 -0700
Subject: [PATCH 23/24] refactor: remove unused `&self` from
 CacheKey::type_name and UnsizedCacheKey::type_name

No implementation uses `self`, so make `type_name` an associated function
instead of a method. Update all call sites to use `K::type_name()`.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache/keys.rs             |  4 ++--
 rust/lance-core/src/cache/mod.rs              | 24 +++++++++----------
 .../src/encodings/logical/primitive.rs        |  2 +-
 rust/lance-file/src/previous/reader.rs        |  2 +-
 rust/lance-index/src/scalar/bitmap.rs         |  2 +-
 rust/lance-index/src/scalar/btree.rs          |  2 +-
 rust/lance-index/src/scalar/inverted/index.rs |  4 ++--
 rust/lance-index/src/scalar/ngram.rs          |  2 +-
 rust/lance-index/src/scalar/rtree.rs          |  2 +-
 rust/lance/src/dataset/fragment.rs            |  2 +-
 rust/lance/src/index.rs                       |  8 +++----
 rust/lance/src/index/vector/ivf.rs            |  2 +-
 rust/lance/src/index/vector/ivf/v2.rs         |  2 +-
 rust/lance/src/session.rs                     |  2 +-
 rust/lance/src/session/caches.rs              | 12 +++++-----
 rust/lance/src/session/index_caches.rs        |  6 ++---
 16 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs
index d4afe55370f..5182c3e8e72 100644
--- a/rust/lance-core/src/cache/keys.rs
+++ b/rust/lance-core/src/cache/keys.rs
@@ -56,7 +56,7 @@ pub trait CacheKey {
     ///
     /// Must be consistent across crate boundaries — use a short literal, not
     /// `std::any::type_name` pointers.
-    fn type_name(&self) -> &'static str;
+    fn type_name() -> &'static str;
 }
 
 pub trait UnsizedCacheKey {
@@ -67,5 +67,5 @@ pub trait UnsizedCacheKey {
     /// Short, stable string that distinguishes this value type from others in
     /// the cache. Must be unique per value type — collisions cause silent
     /// downcast failures.
-    fn type_name(&self) -> &'static str;
+    fn type_name() -> &'static str;
 }
diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs
index 3eba0415f04..01014997d7d 100644
--- a/rust/lance-core/src/cache/mod.rs
+++ b/rust/lance-core/src/cache/mod.rs
@@ -204,7 +204,7 @@ impl LanceCache {
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.insert_with_id(&cache_key.key(), cache_key.type_name(), metadata)
+        self.insert_with_id(&cache_key.key(), K::type_name(), metadata)
             .boxed()
             .await
     }
@@ -214,7 +214,7 @@ impl LanceCache {
         K: CacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.get_with_id::<K::ValueType>(&cache_key.key(), cache_key.type_name())
+        self.get_with_id::<K::ValueType>(&cache_key.key(), K::type_name())
             .boxed()
             .await
     }
@@ -230,7 +230,7 @@ impl LanceCache {
         F: FnOnce() -> Fut + Send,
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
-        let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+        let key = build_key(&self.prefix, &cache_key.key(), K::type_name());
 
         let typed_loader = Box::pin(async move {
             let value = loader().await?;
@@ -255,7 +255,7 @@ impl LanceCache {
         K: UnsizedCacheKey,
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
-        self.insert_with_id(&cache_key.key(), cache_key.type_name(), Arc::new(metadata))
+        self.insert_with_id(&cache_key.key(), K::type_name(), Arc::new(metadata))
             .boxed()
             .await
     }
@@ -266,7 +266,7 @@ impl LanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         let outer = self
-            .get_with_id::<Arc<K::ValueType>>(&cache_key.key(), cache_key.type_name())
+            .get_with_id::<Arc<K::ValueType>>(&cache_key.key(), K::type_name())
             .boxed()
             .await?;
         Some(outer.as_ref().clone())
@@ -317,7 +317,7 @@ impl WeakLanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         let cache = self.inner.upgrade()?;
-        let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+        let key = build_key(&self.prefix, &cache_key.key(), K::type_name());
         if let Some(entry) = cache.get(&key).await {
             self.hits.fetch_add(1, Ordering::Relaxed);
             Some(entry.downcast::<K::ValueType>().unwrap())
@@ -334,7 +334,7 @@ impl WeakLanceCache {
     {
         if let Some(cache) = self.inner.upgrade() {
             let size = cache_entry_size(&*value);
-            let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+            let key = build_key(&self.prefix, &cache_key.key(), K::type_name());
             cache.insert(&key, value, size).await;
             true
         } else {
@@ -358,7 +358,7 @@ impl WeakLanceCache {
         Fut: Future<Output = Result<K::ValueType>> + Send,
     {
         if let Some(cache) = self.inner.upgrade() {
-            let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+            let key = build_key(&self.prefix, &cache_key.key(), K::type_name());
             let typed_loader = Box::pin(async move {
                 let value = loader().await?;
                 let arc = Arc::new(value);
@@ -384,7 +384,7 @@ impl WeakLanceCache {
         K::ValueType: DeepSizeOf + Send + Sync + 'static,
     {
         let cache = self.inner.upgrade()?;
-        let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+        let key = build_key(&self.prefix, &cache_key.key(), K::type_name());
         if let Some(entry) = cache.get(&key).await {
             entry
                 .downcast::<Arc<K::ValueType>>()
@@ -403,7 +403,7 @@ impl WeakLanceCache {
         if let Some(cache) = self.inner.upgrade() {
             let wrapper = Arc::new(value);
             let size = cache_entry_size(&*wrapper);
-            let key = build_key(&self.prefix, &cache_key.key(), cache_key.type_name());
+            let key = build_key(&self.prefix, &cache_key.key(), K::type_name());
             cache.insert(&key, wrapper, size).await;
         } else {
             log::warn!("WeakLanceCache: cache no longer available, unable to insert unsized item");
@@ -470,7 +470,7 @@ mod tests {
         fn key(&self) -> std::borrow::Cow<'_, str> {
             std::borrow::Cow::Borrowed(&self.key)
         }
-        fn type_name(&self) -> &'static str {
+        fn type_name() -> &'static str {
             std::any::type_name::<T>()
         }
     }
@@ -495,7 +495,7 @@ mod tests {
         fn key(&self) -> std::borrow::Cow<'_, str> {
             std::borrow::Cow::Borrowed(&self.key)
         }
-        fn type_name(&self) -> &'static str {
+        fn type_name() -> &'static str {
             std::any::type_name::<T>()
         }
     }
diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs
index ba8a551f737..f4f8b6e8627 100644
--- a/rust/lance-encoding/src/encodings/logical/primitive.rs
+++ b/rust/lance-encoding/src/encodings/logical/primitive.rs
@@ -3417,7 +3417,7 @@ impl CacheKey for FieldDataCacheKey {
         self.column_index.to_string().into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "FieldData"
     }
 }
diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs
index fac113b4c10..0edaa7eb972 100644
--- a/rust/lance-file/src/previous/reader.rs
+++ b/rust/lance-file/src/previous/reader.rs
@@ -90,7 +90,7 @@ impl<T: 'static> CacheKey for StringCacheKey<'_, T> {
         self.key.into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         std::any::type_name::<T>()
     }
 }
diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs
index d10829b151c..05405344efd 100644
--- a/rust/lance-index/src/scalar/bitmap.rs
+++ b/rust/lance-index/src/scalar/bitmap.rs
@@ -129,7 +129,7 @@ impl CacheKey for BitmapKey {
         format!("{}", self.value.0).into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "Bitmap"
     }
 }
diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs
index 48db3d43b11..3fa05dfd131 100644
--- a/rust/lance-index/src/scalar/btree.rs
+++ b/rust/lance-index/src/scalar/btree.rs
@@ -991,7 +991,7 @@ impl CacheKey for BTreePageKey {
         format!("page-{}", self.page_number).into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "BTreePage"
     }
 }
diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs
index e5caf09cd78..88cecddd697 100644
--- a/rust/lance-index/src/scalar/inverted/index.rs
+++ b/rust/lance-index/src/scalar/inverted/index.rs
@@ -1889,7 +1889,7 @@ impl CacheKey for PostingListKey {
         format!("postings-{}", self.token_id).into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "PostingList"
     }
 }
@@ -1906,7 +1906,7 @@ impl CacheKey for PositionKey {
         format!("positions-{}", self.token_id).into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "Position"
     }
 }
diff --git a/rust/lance-index/src/scalar/ngram.rs b/rust/lance-index/src/scalar/ngram.rs
index 2a439ae6b34..4e614d99d99 100644
--- a/rust/lance-index/src/scalar/ngram.rs
+++ b/rust/lance-index/src/scalar/ngram.rs
@@ -171,7 +171,7 @@ impl CacheKey for NGramPostingListKey {
         format!("posting-list-{}", self.row_offset).into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "NGramPostingList"
     }
 }
diff --git a/rust/lance-index/src/scalar/rtree.rs b/rust/lance-index/src/scalar/rtree.rs
index 225e3be6e2a..920a59bb4b2 100644
--- a/rust/lance-index/src/scalar/rtree.rs
+++ b/rust/lance-index/src/scalar/rtree.rs
@@ -250,7 +250,7 @@ impl CacheKey for RTreeCacheKey {
         }
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "RTree"
     }
 }
diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs
index 81e1473c921..986fe8a8443 100644
--- a/rust/lance/src/dataset/fragment.rs
+++ b/rust/lance/src/dataset/fragment.rs
@@ -1880,7 +1880,7 @@ impl CacheKey for FileMetadataCacheKey {
         "".into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "FileMetadata"
     }
 }
diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index 97b25da6767..a1f41ead087 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -111,7 +111,7 @@ impl UnsizedCacheKey for ScalarIndexCacheKey<'_> {
         }
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "ScalarIndex"
     }
 }
@@ -139,7 +139,7 @@ impl UnsizedCacheKey for VectorIndexCacheKey<'_> {
         }
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "VectorIndex"
     }
 }
@@ -167,7 +167,7 @@ impl CacheKey for FragReuseIndexCacheKey<'_> {
         }
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "FragReuseIndex"
     }
 }
@@ -195,7 +195,7 @@ impl CacheKey for MemWalCacheKey<'_> {
         }
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "MemWalIndex"
     }
 }
diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs
index c26da61d7ef..cc243eac887 100644
--- a/rust/lance/src/index/vector/ivf.rs
+++ b/rust/lance/src/index/vector/ivf.rs
@@ -124,7 +124,7 @@ impl UnsizedCacheKey for LegacyIVFPartitionKey {
         format!("ivf-{}", self.partition_id).into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "LegacyIVFPartition"
     }
 }
diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs
index 9561c187b18..5da12b687a7 100644
--- a/rust/lance/src/index/vector/ivf/v2.rs
+++ b/rust/lance/src/index/vector/ivf/v2.rs
@@ -97,7 +97,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> CacheKey for IVFPartit
         format!("ivf-{}", self.partition_id).into()
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         // Using type_name is safe here: the impl is in the same crate as the
         // types, so the monomorphized pointer is consistent.
         std::any::type_name::<PartitionEntry<S, Q>>()
diff --git a/rust/lance/src/session.rs b/rust/lance/src/session.rs
index 7242c0cca6a..b032cbaa15e 100644
--- a/rust/lance/src/session.rs
+++ b/rust/lance/src/session.rs
@@ -235,7 +235,7 @@ mod tests {
             Cow::Borrowed(self.0)
         }
 
-        fn type_name(&self) -> &'static str {
+        fn type_name() -> &'static str {
             "TestUnsized"
         }
     }
diff --git a/rust/lance/src/session/caches.rs b/rust/lance/src/session/caches.rs
index 2654e356ac1..55f78a5068f 100644
--- a/rust/lance/src/session/caches.rs
+++ b/rust/lance/src/session/caches.rs
@@ -82,7 +82,7 @@ impl CacheKey for ManifestKey<'_> {
             Cow::Owned(format!("manifest/{}", self.version))
         }
     }
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "Manifest"
     }
 }
@@ -97,7 +97,7 @@ impl CacheKey for TransactionKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("txn/{}", self.version))
     }
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "Transaction"
     }
 }
@@ -119,7 +119,7 @@ impl CacheKey for DeletionFileKey<'_> {
             self.deletion_file.file_type.suffix()
         ))
     }
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "DeletionVector"
     }
 }
@@ -134,7 +134,7 @@ impl CacheKey for RowAddrMaskKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_addr_mask/{}", self.version))
     }
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "RowAddrMask"
     }
 }
@@ -149,7 +149,7 @@ impl CacheKey for RowIdIndexKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_id_index/{}", self.version))
     }
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "RowIdIndex"
     }
 }
@@ -164,7 +164,7 @@ impl CacheKey for RowIdSequenceKey {
     fn key(&self) -> Cow<'_, str> {
         Cow::Owned(format!("row_id_sequence/{}", self.fragment_id))
     }
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "RowIdSequence"
     }
 }
diff --git a/rust/lance/src/session/index_caches.rs b/rust/lance/src/session/index_caches.rs
index 04aa9791c8d..43443b5dd34 100644
--- a/rust/lance/src/session/index_caches.rs
+++ b/rust/lance/src/session/index_caches.rs
@@ -89,7 +89,7 @@ impl CacheKey for FragReuseIndexKey<'_> {
         Cow::Owned(format!("frag_reuse/{}", self.uuid))
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "FragReuseIndex"
     }
 }
@@ -106,7 +106,7 @@ impl CacheKey for IndexMetadataKey {
         Cow::Owned(self.version.to_string())
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "Vec<IndexMetadata>"
     }
 }
@@ -137,7 +137,7 @@ impl CacheKey for ScalarIndexDetailsKey<'_> {
         Cow::Owned(format!("type/{}", self.uuid))
     }
 
-    fn type_name(&self) -> &'static str {
+    fn type_name() -> &'static str {
         "ScalarIndexDetails"
     }
 }

From 8b6967163a069e80e0fc9ad02d752e7d2a7087fb Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Sun, 29 Mar 2026 15:09:40 -0700
Subject: [PATCH 24/24] refactor: clarify cache module API surfaces, address
 review feedback

Restructure the cache module so the two audiences are clear:
- mod.rs: user-facing API (LanceCache, CacheKey, UnsizedCacheKey)
- backend.rs: implementor-facing API (CacheBackend, InternalCacheKey)

Delete keys.rs, moving CacheKey/UnsizedCacheKey into mod.rs and
InternalCacheKey into backend.rs.

Also: rename has_prefix -> starts_with, trim verbose get_or_insert doc,
add shared-buffer note to approx_size_bytes, improve type_name docs,
explain std::any::type_name usage in previous/reader.rs.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 rust/lance-core/src/cache/backend.rs   | 74 ++++++++++++++++++-----
 rust/lance-core/src/cache/keys.rs      | 71 ----------------------
 rust/lance-core/src/cache/mod.rs       | 84 ++++++++++++++++++++++----
 rust/lance-core/src/cache/moka.rs      |  5 +-
 rust/lance-file/src/previous/reader.rs |  3 +
 5 files changed, 135 insertions(+), 102 deletions(-)
 delete mode 100644 rust/lance-core/src/cache/keys.rs

diff --git a/rust/lance-core/src/cache/backend.rs b/rust/lance-core/src/cache/backend.rs
index e929bff3529..54b24944ab0 100644
--- a/rust/lance-core/src/cache/backend.rs
+++ b/rust/lance-core/src/cache/backend.rs
@@ -1,6 +1,13 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
+//! Backend interface for cache implementors.
+//!
+//! This module defines the trait that custom cache backends must implement,
+//! along with the key and entry types they operate on. Most callers should
+//! use [`LanceCache`](super::LanceCache) instead of interacting with
+//! backends directly.
+
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
@@ -10,17 +17,60 @@ use futures::Future;
 
 use crate::Result;
 
-use super::keys::InternalCacheKey;
-
 /// A type-erased cache entry.
 pub type CacheEntry = Arc<dyn Any + Send + Sync>;
 
+/// Structured cache key passed to [`CacheBackend`] methods.
+///
+/// Composed of three parts:
+/// - **prefix**: scopes the key to a dataset or index (e.g. `"s3://bucket/dataset/"`)
+/// - **key**: identifies the specific entry (e.g. `"42"` for a version number)
+/// - **type_name**: distinguishes different value types stored under the same
+///   user key (e.g. `"Vec<IndexMetadata>"`)
+///
+/// [`LanceCache`](super::LanceCache) constructs these automatically from
+/// [`CacheKey`](super::CacheKey) values; backend authors receive them
+/// ready-made.
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+pub struct InternalCacheKey {
+    prefix: Arc<str>,
+    key: Arc<str>,
+    type_name: &'static str,
+}
+
+impl InternalCacheKey {
+    pub fn new(prefix: Arc<str>, key: Arc<str>, type_name: &'static str) -> Self {
+        Self {
+            prefix,
+            key,
+            type_name,
+        }
+    }
+
+    pub fn prefix(&self) -> &str {
+        &self.prefix
+    }
+
+    pub fn key(&self) -> &str {
+        &self.key
+    }
+
+    pub fn type_name(&self) -> &'static str {
+        self.type_name
+    }
+
+    /// Returns true if this key's prefix starts with the given string.
+    pub fn starts_with(&self, prefix: &str) -> bool {
+        self.prefix.starts_with(prefix)
+    }
+}
+
 /// Low-level pluggable cache backend.
 ///
-/// Implementations store entries keyed by [`InternalCacheKey`], which provides
-/// structured access to the prefix, user key, and type name components.
-/// The [`LanceCache`](super::LanceCache) wrapper handles key construction and type safety;
-/// backend authors do not need to worry about key encoding.
+/// Implementations store entries keyed by [`InternalCacheKey`] and return
+/// type-erased [`CacheEntry`] values.
+/// [`LanceCache`](super::LanceCache) handles key construction and type safety;
+/// backend authors only need to implement storage and eviction.
 #[async_trait]
 pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     /// Look up an entry by its key.
@@ -36,15 +86,6 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     ///
     /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry
     /// was already present in the cache (the loader was not invoked).
-    ///
-    /// The loader is a pinned, boxed future rather than a generic closure
-    /// because `async_trait` erases the `Self` lifetime, making it impossible
-    /// to express a generic closure whose returned future borrows from the
-    /// caller. Boxing the future once at the call site (in `LanceCache`)
-    /// avoids this lifetime conflict while keeping the trait object-safe.
-    ///
-    /// The future borrows from the caller's scope and will be `.await`ed within
-    /// this method — implementations must not store it beyond the call.
     async fn get_or_insert<'a>(
         &self,
         key: &InternalCacheKey,
@@ -72,6 +113,9 @@ pub trait CacheBackend: Send + Sync + std::fmt::Debug {
     /// Approximate weighted size in bytes, callable from synchronous contexts.
     /// Used by `DeepSizeOf` to report cache memory usage.
     /// Backends that cannot provide this cheaply should return 0.
+    ///
+    /// Assumes entries do not share underlying buffers; if they do, the
+    /// returned total may overcount.
     fn approx_size_bytes(&self) -> usize {
         0
     }
diff --git a/rust/lance-core/src/cache/keys.rs b/rust/lance-core/src/cache/keys.rs
deleted file mode 100644
index 5182c3e8e72..00000000000
--- a/rust/lance-core/src/cache/keys.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The Lance Authors
-
-use std::{borrow::Cow, sync::Arc};
-
-/// Structured cache key used by [`CacheBackend`](super::CacheBackend).
-///
-/// Composed of a prefix (scoping the key to a dataset/index), a user key
-/// (identifying the specific entry), and a type name (distinguishing value
-/// types that share the same user key).
-#[derive(Clone, Debug, Hash, PartialEq, Eq)]
-pub struct InternalCacheKey {
-    prefix: Arc<str>,
-    key: Arc<str>,
-    type_name: &'static str,
-}
-
-impl InternalCacheKey {
-    pub fn new(prefix: Arc<str>, key: Arc<str>, type_name: &'static str) -> Self {
-        Self {
-            prefix,
-            key,
-            type_name,
-        }
-    }
-
-    pub fn prefix(&self) -> &str {
-        &self.prefix
-    }
-
-    pub fn key(&self) -> &str {
-        &self.key
-    }
-
-    pub fn type_name(&self) -> &'static str {
-        self.type_name
-    }
-
-    /// Returns true if this key's prefix starts with the given string.
-    pub fn has_prefix(&self, prefix: &str) -> bool {
-        self.prefix.starts_with(prefix)
-    }
-}
-
-pub trait CacheKey {
-    type ValueType: 'static;
-
-    fn key(&self) -> Cow<'_, str>;
-
-    /// Short, stable string that distinguishes this value type from others in
-    /// the cache. Used as the suffix in the encoded cache key (`user_key\0type_name`).
-    ///
-    /// **Must be unique per value type.** If two `CacheKey` impls return the
-    /// same `type_name` but different `ValueType`s, entries will collide and
-    /// downcasts will fail silently (returning `None` on get).
-    ///
-    /// Must be consistent across crate boundaries — use a short literal, not
-    /// `std::any::type_name` pointers.
-    fn type_name() -> &'static str;
-}
-
-pub trait UnsizedCacheKey {
-    type ValueType: 'static + ?Sized;
-
-    fn key(&self) -> Cow<'_, str>;
-
-    /// Short, stable string that distinguishes this value type from others in
-    /// the cache. Must be unique per value type — collisions cause silent
-    /// downcast failures.
-    fn type_name() -> &'static str;
-}
diff --git a/rust/lance-core/src/cache/mod.rs b/rust/lance-core/src/cache/mod.rs
index 01014997d7d..43bb233df72 100644
--- a/rust/lance-core/src/cache/mod.rs
+++ b/rust/lance-core/src/cache/mod.rs
@@ -1,26 +1,28 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-//! Cache implementation
+//! Lance cache system.
 //!
-//! This module provides a two-layer caching system:
+//! ## For cache users
 //!
-//! - [`CacheBackend`] is the low-level, pluggable trait that custom cache implementations
-//!   can implement. It uses [`InternalCacheKey`] keys and type-erased entries.
-//! - [`LanceCache`] is the typed wrapper that handles key construction (prefix + type tag),
-//!   type-safe get/insert, and DeepSizeOf-based size computation.
+//! Use [`LanceCache`] (or [`WeakLanceCache`]) to store and retrieve typed
+//! values. Define a [`CacheKey`] (or [`UnsizedCacheKey`] for trait objects) to
+//! describe what you're caching and its type.
 //!
-//! [`CacheKey`] / [`UnsizedCacheKey`] define the typed key interface, and
-//! [`InternalCacheKey`] is the structured key passed to backends.
+//! ## For backend implementors
+//!
+//! Implement [`CacheBackend`] to provide a custom storage layer (disk, Redis,
+//! etc.). Backends receive [`InternalCacheKey`] keys and type-erased
+//! [`CacheEntry`] values — the typed wrapping is handled by [`LanceCache`].
+//! See the [`backend`] module for details.
 
-mod backend;
-mod keys;
+pub mod backend;
 mod moka;
 
-pub use backend::{CacheBackend, CacheEntry};
-pub use keys::{CacheKey, InternalCacheKey, UnsizedCacheKey};
+pub use backend::{CacheBackend, CacheEntry, InternalCacheKey};
 pub use moka::MokaCacheBackend;
 
+use std::borrow::Cow;
 use std::sync::{
     Arc,
     atomic::{AtomicU64, Ordering},
@@ -32,6 +34,62 @@ use crate::Result;
 
 pub use deepsize::{Context, DeepSizeOf};
 
+// ---------------------------------------------------------------------------
+// CacheKey / UnsizedCacheKey — typed key traits for cache users
+// ---------------------------------------------------------------------------
+
+/// Typed cache key for sized value types.
+///
+/// Implement this trait to define a new type of cached entry. [`LanceCache`]
+/// uses the key string and type name to construct an [`InternalCacheKey`]
+/// for the backend.
+///
+/// # Example
+///
+/// ```ignore
+/// struct MyKey { id: u64 }
+///
+/// impl CacheKey for MyKey {
+///     type ValueType = MyData;
+///     fn key(&self) -> Cow<'_, str> { self.id.to_string().into() }
+///     fn type_name() -> &'static str { "MyData" }
+/// }
+/// ```
+pub trait CacheKey {
+    type ValueType: 'static;
+
+    fn key(&self) -> Cow<'_, str>;
+
+    /// Short, stable string identifying this value type.
+    ///
+    /// Two `CacheKey` impls that store different `ValueType`s **must** return
+    /// different type names; if they collide, gets will silently return `None`
+    /// due to failed downcasts.
+    ///
+    /// Use a short literal (e.g. `"Vec<IndexMetadata>"`), not
+    /// `std::any::type_name` — the latter is not guaranteed stable across
+    /// compiler versions or build configurations.
+    fn type_name() -> &'static str;
+}
+
+/// Like [`CacheKey`] but for unsized value types (e.g. `dyn Trait`).
+///
+/// The cache wraps values in an extra `Arc` layer internally; callers pass
+/// and receive `Arc<T>` where `T: ?Sized`.
+pub trait UnsizedCacheKey {
+    type ValueType: 'static + ?Sized;
+
+    fn key(&self) -> Cow<'_, str>;
+
+    /// Short, stable string identifying this value type.
+    /// See [`CacheKey::type_name`] for requirements.
+    fn type_name() -> &'static str;
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
 /// Size of a cached `Arc<T>`, accounting for the Arc overhead (two atomic counters).
 fn cache_entry_size<T: DeepSizeOf + ?Sized>(value: &T) -> usize {
     value.deep_size_of() + std::mem::size_of::<std::sync::atomic::AtomicUsize>() * 2
@@ -682,7 +740,7 @@ mod tests {
                 }
             }
             async fn invalidate_prefix(&self, prefix: &str) {
-                self.map.lock().await.retain(|k, _| !k.has_prefix(prefix));
+                self.map.lock().await.retain(|k, _| !k.starts_with(prefix));
             }
             async fn clear(&self) {
                 self.map.lock().await.clear();
diff --git a/rust/lance-core/src/cache/moka.rs b/rust/lance-core/src/cache/moka.rs
index 13a6c7e976c..05cb1e5909f 100644
--- a/rust/lance-core/src/cache/moka.rs
+++ b/rust/lance-core/src/cache/moka.rs
@@ -10,8 +10,7 @@ use futures::Future;
 
 use crate::Result;
 
-use super::backend::{CacheBackend, CacheEntry};
-use super::keys::InternalCacheKey;
+use super::backend::{CacheBackend, CacheEntry, InternalCacheKey};
 
 /// Internal record stored in the moka cache.
 #[derive(Clone, Debug)]
@@ -107,7 +106,7 @@ impl CacheBackend for MokaCacheBackend {
     async fn invalidate_prefix(&self, prefix: &str) {
         let prefix = prefix.to_owned();
         self.cache
-            .invalidate_entries_if(move |key, _value| key.has_prefix(&prefix))
+            .invalidate_entries_if(move |key, _value| key.starts_with(&prefix))
             .expect("Cache configured correctly");
     }
 
diff --git a/rust/lance-file/src/previous/reader.rs b/rust/lance-file/src/previous/reader.rs
index 0edaa7eb972..9e1fc175d04 100644
--- a/rust/lance-file/src/previous/reader.rs
+++ b/rust/lance-file/src/previous/reader.rs
@@ -91,6 +91,9 @@ impl<T: 'static> CacheKey for StringCacheKey<'_, T> {
     }
 
     fn type_name() -> &'static str {
+        // This is a private, crate-internal key that is only instantiated with
+        // a single concrete T within one build, so std::any::type_name is fine
+        // here — there is no cross-crate collision risk.
         std::any::type_name::<T>()
     }
 }