From 77cc936ed8241b34f3baf75a0842ddc863d367ff Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Thu, 7 May 2026 01:18:55 +0530 Subject: [PATCH 1/7] Add benchmark crate for multi-vector --- Cargo.lock | 17 + Cargo.toml | 1 + diskann-benchmark-multi-vector/Cargo.toml | 30 + diskann-benchmark-multi-vector/README.md | 136 ++ .../examples/multi-vector.json | 70 + .../examples/test.json | 47 + .../examples/tolerance.json | 16 + diskann-benchmark-multi-vector/src/bin.rs | 96 + diskann-benchmark-multi-vector/src/lib.rs | 992 ++++++++ results.json | 2150 +++++++++++++++++ 10 files changed, 3555 insertions(+) create mode 100644 diskann-benchmark-multi-vector/Cargo.toml create mode 100644 diskann-benchmark-multi-vector/README.md create mode 100644 diskann-benchmark-multi-vector/examples/multi-vector.json create mode 100644 diskann-benchmark-multi-vector/examples/test.json create mode 100644 diskann-benchmark-multi-vector/examples/tolerance.json create mode 100644 diskann-benchmark-multi-vector/src/bin.rs create mode 100644 diskann-benchmark-multi-vector/src/lib.rs create mode 100644 results.json diff --git a/Cargo.lock b/Cargo.lock index beac316c4..fc0a7cc87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -697,6 +697,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "diskann-benchmark-multi-vector" +version = "0.50.1" +dependencies = [ + "anyhow", + "diskann-benchmark-runner", + "diskann-quantization", + "diskann-utils", + "diskann-vector", + "half", + "rand 0.9.4", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.17", +] + [[package]] name = "diskann-benchmark-runner" version = "0.50.1" diff --git a/Cargo.toml b/Cargo.toml index 6f31a1ae2..13fcbdd9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ members = [ "diskann-benchmark-runner", "diskann-benchmark-core", "diskann-benchmark-simd", + "diskann-benchmark-multi-vector", "diskann-benchmark", "diskann-tools", "vectorset", diff --git a/diskann-benchmark-multi-vector/Cargo.toml b/diskann-benchmark-multi-vector/Cargo.toml new file mode 100644 index 000000000..f8eb937e1 --- /dev/null +++ b/diskann-benchmark-multi-vector/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "diskann-benchmark-multi-vector" +version.workspace = true +description.workspace = true +authors.workspace = true +documentation.workspace = true +license.workspace = true +edition.workspace = true + +[[bin]] +name = "benchmark-multi-vector" +path = "src/bin.rs" + +[dependencies] +anyhow.workspace = true +diskann-utils = { workspace = true, default-features = false } +half = { workspace = true, features = ["rand_distr"] } +diskann-benchmark-runner = { workspace = true } +diskann-quantization = { workspace = true } +diskann-vector = { workspace = true } +rand.workspace = true +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true +thiserror.workspace = true + +[lints] +workspace = true + +[dev-dependencies] +tempfile.workspace = true diff --git a/diskann-benchmark-multi-vector/README.md b/diskann-benchmark-multi-vector/README.md new file mode 100644 index 000000000..014a393a1 --- /dev/null +++ b/diskann-benchmark-multi-vector/README.md @@ -0,0 +1,136 @@ +# diskann-benchmark-multi-vector + +Benchmarks and regression detection for the **multi-vector distance +operations** exposed by `diskann-quantization` — `Chamfer` and `MaxSim` — +across `f32` and `f16` element types. + +## Layout + +- `src/lib.rs` — benchmark library: input/tolerance schemas, kernel + dispatch, regression checker. +- `src/bin.rs` — `benchmark-multi-vector` CLI entry point. +- `examples/multi-vector.json` — full benchmark matrix covering both + operations across the registered kernels and a representative range of + shapes. +- `examples/test.json` — minimal smoke configuration consumed by the + integration tests. +- `examples/tolerance.json` — default regression thresholds. + +## Registered kernels + +The crate registers four kernels — one per `(element_type, implementation)` +pair: + +| Tag | Element | Implementation | +| -------------------------------- | ------- | -------------------- | +| `multi-vector-op-f32-optimized` | `f32` | `QueryComputer` | +| `multi-vector-op-f16-optimized` | `f16` | `QueryComputer` | +| `multi-vector-op-f32-reference` | `f32` | `Chamfer` / `MaxSim` | +| `multi-vector-op-f16-reference` | `f16` | `Chamfer` / `MaxSim` | + +The **optimized** path constructs a `QueryComputer` once per shape (which +internally selects the best available SIMD kernel for the host) and calls +`chamfer` / `max_sim` inside the timed loop. The **reference** path drives +the `Chamfer` / `MaxSim` fallback used by the `multi_vector` unit tests — +useful both as a numerical ground truth and as a baseline to measure SIMD +speedups against. + +## Time normalization + +Per-measurement latency is normalized to **nanoseconds per inner-product +call**, abbreviated `ns/IP`: + +``` +ns/IP = min_latency_µs * 1000 / (Q * D * loops_per_measurement) +``` + +Two important properties: + +- **Independent of `Q`, `D`, and `loops_per_measurement`.** Reshaping the + benchmark or scaling the loop budget leaves the metric unchanged, so + cache-residency effects and SIMD utilization show up directly. +- **Approximately linear in `Dim`.** Each inner-product call is itself an + O(`Dim`) operation, so `ns/IP` grows with `Dim` — that is why the table + headers read `ns/IP @ Dim`. Compare across rows with the same `Dim`; to + compare across different `Dim`s, divide further by `Dim` to recover ns + per scalar multiply. + +This is the right metric for the two things this crate cares about: +detecting per-shape regressions (the `Dim` factor cancels) and comparing +optimized vs. reference at a fixed shape. + +## Usage + +All examples below assume you are inside the crate directory and use a +small shell function for brevity: + +```bash +bench() { cargo run --release -p diskann-benchmark-multi-vector --bin benchmark-multi-vector -- "$@"; } +``` + +### Run benchmarks + +`run` executes every job in the input file and writes per-measurement +latencies plus percentiles to the output file: + +```bash +bench run --input-file examples/multi-vector.json --output-file before.json +``` + +### Regression check workflow + +The check workflow is **two-phase**: validate the tolerance file once, then +compare two recorded result files. + +**Phase 1 — preflight.** No benchmarks are executed. The verifier confirms +that every entry in `tolerance.json` matches at least one job in the input +file, and that every job is matched by exactly one entry. Run it whenever +you edit `tolerance.json`: + +```bash +bench check verify \ + --tolerances examples/tolerance.json \ + --input-file examples/multi-vector.json +``` + +**Phase 2 — comparison.** Record results before and after a code change, +then compare. The command exits non-zero if any run regresses past its +tolerance: + +```bash +# On the baseline commit +bench run --input-file examples/multi-vector.json --output-file before.json + +# On the change commit +bench run --input-file examples/multi-vector.json --output-file after.json + +# Compare +bench check run \ + --tolerances examples/tolerance.json \ + --input-file examples/multi-vector.json \ + --before before.json --after after.json \ + --output-file checks.json +``` + +A run **fails** when its post-change `ns/IP` minimum exceeds the +baseline minimum by more than `min_time_regression` (default `0.05` = +5%). Improvements (negative change) always pass. + +### How tolerances are matched to jobs + +Each entry in `tolerance.json` has the shape `{ input, tolerance }`. The +`input` block acts as a **partial template** against the jobs in the input +file: any field present must match; missing fields are wildcards. + +The shipped `tolerance.json` uses an empty `"content": {}`, which matches +every `multi-vector-op` job — so a single 5% threshold applies to all four +kernels. To apply different thresholds per implementation, add more +specific entries, e.g.: + +```json +{ "input": { "type": "multi-vector-op", "content": { "implementation": "reference" } }, + "tolerance": { "type": "multi-vector-tolerance", "content": { "min_time_regression": 0.10 } } } +``` + +`check verify` will reject the file if entries overlap or leave any job +unmatched. diff --git a/diskann-benchmark-multi-vector/examples/multi-vector.json b/diskann-benchmark-multi-vector/examples/multi-vector.json new file mode 100644 index 000000000..2626e5047 --- /dev/null +++ b/diskann-benchmark-multi-vector/examples/multi-vector.json @@ -0,0 +1,70 @@ +{ + "search_directories": [], + "jobs": [ + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "implementation": "optimized", + "runs": [ + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 20 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }, + + { "operation": "max_sim", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 20 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "implementation": "optimized", + "runs": [ + { "operation": "chamfer", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "implementation": "reference", + "runs": [ + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 2, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 2, "num_measurements": 50 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "implementation": "reference", + "runs": [ + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 2, "num_measurements": 50 } + ] + } + } + ] +} diff --git a/diskann-benchmark-multi-vector/examples/test.json b/diskann-benchmark-multi-vector/examples/test.json new file mode 100644 index 000000000..28e9b9d64 --- /dev/null +++ b/diskann-benchmark-multi-vector/examples/test.json @@ -0,0 +1,47 @@ +{ + "search_directories": [], + "jobs": [ + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "implementation": "optimized", + "runs": [ + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "implementation": "optimized", + "runs": [ + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float32", + "implementation": "reference", + "runs": [ + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + }, + { + "type": "multi-vector-op", + "content": { + "element_type": "float16", + "implementation": "reference", + "runs": [ + { "operation": "max_sim", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 2, "num_measurements": 1 } + ] + } + } + ] +} diff --git a/diskann-benchmark-multi-vector/examples/tolerance.json b/diskann-benchmark-multi-vector/examples/tolerance.json new file mode 100644 index 000000000..8d5997199 --- /dev/null +++ b/diskann-benchmark-multi-vector/examples/tolerance.json @@ -0,0 +1,16 @@ +{ + "checks": [ + { + "input": { + "type": "multi-vector-op", + "content": {} + }, + "tolerance": { + "type": "multi-vector-tolerance", + "content": { + "min_time_regression": 0.05 + } + } + } + ] +} diff --git a/diskann-benchmark-multi-vector/src/bin.rs b/diskann-benchmark-multi-vector/src/bin.rs new file mode 100644 index 000000000..d595533e7 --- /dev/null +++ b/diskann-benchmark-multi-vector/src/bin.rs @@ -0,0 +1,96 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + */ + +use diskann_benchmark_multi_vector::{register, MultiVectorOp}; +use diskann_benchmark_runner::{output, registry, App, Output}; + +pub fn main() -> anyhow::Result<()> { + // Create the pocket bench application. + let app = App::parse(); + main_inner(&app, &mut output::default()) +} + +fn main_inner(app: &App, output: &mut dyn Output) -> anyhow::Result<()> { + // Register inputs and benchmarks. + let mut inputs = registry::Inputs::new(); + inputs.register::()?; + + let mut benchmarks = registry::Benchmarks::new(); + register(&mut benchmarks); + + // Here we go! + app.run(&inputs, &benchmarks, output) +} + +/////////// +// Tests // +/////////// + +#[cfg(test)] +mod tests { + use super::*; + + use std::path::{Path, PathBuf}; + + use diskann_benchmark_runner::app::{Check, Commands}; + + fn run_integration_test(input_file: &Path, output_file: &Path) { + let commands = Commands::Run { + input_file: input_file.to_str().unwrap().into(), + output_file: output_file.to_str().unwrap().into(), + dry_run: false, + allow_debug: true, + }; + + let app = App::from_commands(commands); + + let mut output = output::Memory::new(); + main_inner(&app, &mut output).unwrap(); + println!( + "output = {}", + String::from_utf8(output.into_inner()).unwrap() + ); + + assert!(output_file.exists()); + } + + fn run_check_test(input_file: &Path, tolerances: &Path) -> String { + let commands = Commands::Check(Check::Verify { + tolerances: tolerances.to_str().unwrap().into(), + input_file: input_file.to_str().unwrap().into(), + }); + + let app = App::from_commands(commands); + + let mut output = output::Memory::new(); + main_inner(&app, &mut output).unwrap(); + String::from_utf8(output.into_inner()).unwrap() + } + + #[test] + fn integration_test() { + let input_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("test.json"); + + let tempdir = tempfile::tempdir().unwrap(); + let output_path = tempdir.path().join("output.json"); + + run_integration_test(&input_path, &output_path); + } + + #[test] + fn check_verify() { + let input_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("test.json"); + let tolerance_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("tolerance.json"); + + let stdout = run_check_test(&input_path, &tolerance_path); + println!("stdout = {}", stdout); + } +} diff --git a/diskann-benchmark-multi-vector/src/lib.rs b/diskann-benchmark-multi-vector/src/lib.rs new file mode 100644 index 000000000..7cadf4f29 --- /dev/null +++ b/diskann-benchmark-multi-vector/src/lib.rs @@ -0,0 +1,992 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + */ + +//! Multi-vector distance benchmarks with regression detection. + +use std::{io::Write, num::NonZeroUsize}; + +use diskann_quantization::multi_vector::{Chamfer, MatRef, MaxSim, QueryComputer, Standard}; +use diskann_vector::distance::InnerProduct; +use diskann_vector::{DistanceFunctionMut, PureDistanceFunction}; +use half::f16; +use rand::{ + distr::{Distribution, StandardUniform}, + rngs::StdRng, + SeedableRng, +}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +use diskann_benchmark_runner::{ + benchmark::{PassFail, Regression}, + dispatcher::{Description, DispatchRule, FailureScore, MatchScore}, + utils::{ + datatype::{self, DataType}, + num::{relative_change, NonNegativeFinite}, + percentiles, MicroSeconds, + }, + Any, Benchmark, CheckDeserialization, Checker, Input, +}; + +//////////////// +// Public API // +//////////////// + +/// Register all multi-vector benchmarks with the runner's dispatcher. +pub fn register(dispatcher: &mut diskann_benchmark_runner::registry::Benchmarks) { + register_benchmarks_impl(dispatcher) +} + +/////////// +// Utils // +/////////// + +#[derive(Debug, Clone, Copy)] +struct DisplayWrapper<'a, T: ?Sized>(&'a T); + +impl std::ops::Deref for DisplayWrapper<'_, T> { + type Target = T; + fn deref(&self) -> &T { + self.0 + } +} + +//////////// +// Inputs // +//////////// + +/// The two distance operations exposed by [`QueryComputer`]. +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Operation { + Chamfer, + MaxSim, +} + +impl std::fmt::Display for Operation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let st = match self { + Self::Chamfer => "chamfer", + Self::MaxSim => "max_sim", + }; + write!(f, "{}", st) + } +} + +/// Which implementation tier to benchmark. +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +enum Implementation { + Optimized, + Reference, +} + +impl std::fmt::Display for Implementation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let st = match self { + Self::Optimized => "optimized", + Self::Reference => "reference", + }; + write!(f, "{}", st) + } +} + +/// One benchmark configuration: a single (operation, shape) measurement. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +struct Run { + operation: Operation, + num_query_vectors: NonZeroUsize, + num_doc_vectors: NonZeroUsize, + dim: NonZeroUsize, + loops_per_measurement: NonZeroUsize, + num_measurements: NonZeroUsize, +} + +/// A complete multi-vector benchmark job. +#[derive(Debug, Serialize, Deserialize)] +pub struct MultiVectorOp { + element_type: DataType, + implementation: Implementation, + runs: Vec, +} + +impl CheckDeserialization for MultiVectorOp { + fn check_deserialization(&mut self, _checker: &mut Checker) -> Result<(), anyhow::Error> { + Ok(()) + } +} + +macro_rules! write_field { + ($f:ident, $field:tt, $($expr:tt)*) => { + writeln!($f, "{:>18}: {}", $field, $($expr)*) + } +} + +impl MultiVectorOp { + fn summarize_fields(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write_field!(f, "element type", self.element_type)?; + write_field!(f, "implementation", self.implementation)?; + write_field!(f, "number of runs", self.runs.len())?; + Ok(()) + } +} + +impl std::fmt::Display for MultiVectorOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Multi-Vector Operation\n")?; + write_field!(f, "tag", Self::tag())?; + self.summarize_fields(f) + } +} + +impl Input for MultiVectorOp { + fn tag() -> &'static str { + "multi-vector-op" + } + + fn try_deserialize( + serialized: &serde_json::Value, + checker: &mut Checker, + ) -> anyhow::Result { + checker.any(Self::deserialize(serialized)?) + } + + fn example() -> anyhow::Result { + const NUM_QUERY_VECTORS: NonZeroUsize = NonZeroUsize::new(32).unwrap(); + const NUM_DOC_VECTORS: NonZeroUsize = NonZeroUsize::new(64).unwrap(); + const DIM: NonZeroUsize = NonZeroUsize::new(128).unwrap(); + const LOOPS_PER_MEASUREMENT: NonZeroUsize = NonZeroUsize::new(200).unwrap(); + const NUM_MEASUREMENTS: NonZeroUsize = NonZeroUsize::new(100).unwrap(); + + let runs = vec![ + Run { + operation: Operation::Chamfer, + num_query_vectors: NUM_QUERY_VECTORS, + num_doc_vectors: NUM_DOC_VECTORS, + dim: DIM, + loops_per_measurement: LOOPS_PER_MEASUREMENT, + num_measurements: NUM_MEASUREMENTS, + }, + Run { + operation: Operation::MaxSim, + num_query_vectors: NUM_QUERY_VECTORS, + num_doc_vectors: NUM_DOC_VECTORS, + dim: DIM, + loops_per_measurement: LOOPS_PER_MEASUREMENT, + num_measurements: NUM_MEASUREMENTS, + }, + ]; + + Ok(serde_json::to_value(&Self { + element_type: DataType::Float32, + implementation: Implementation::Optimized, + runs, + })?) + } +} + +////////////////////// +// Regression Check // +////////////////////// + +/// Tolerance thresholds for multi-vector benchmark regression detection. +/// +/// Each field specifies the maximum allowed relative increase in the corresponding metric. +/// For example, a value of `0.05` means a 5% increase is tolerated. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +struct MultiVectorTolerance { + min_time_regression: NonNegativeFinite, +} + +impl CheckDeserialization for MultiVectorTolerance { + fn check_deserialization(&mut self, _checker: &mut Checker) -> Result<(), anyhow::Error> { + Ok(()) + } +} + +impl Input for MultiVectorTolerance { + fn tag() -> &'static str { + "multi-vector-tolerance" + } + + fn try_deserialize( + serialized: &serde_json::Value, + checker: &mut Checker, + ) -> anyhow::Result { + checker.any(Self::deserialize(serialized)?) + } + + fn example() -> anyhow::Result { + const EXAMPLE: NonNegativeFinite = match NonNegativeFinite::new(0.05) { + Ok(v) => v, + Err(_) => panic!("use a non-negative finite please"), + }; + + Ok(serde_json::to_value(MultiVectorTolerance { + min_time_regression: EXAMPLE, + })?) + } +} + +/// Per-run comparison result showing before/after percentile differences. +#[derive(Debug, Serialize)] +struct Comparison { + run: Run, + tolerance: MultiVectorTolerance, + before_min: f64, + after_min: f64, +} + +/// Aggregated result of the regression check across all runs. +#[derive(Debug, Serialize)] +struct CheckResult { + checks: Vec, +} + +impl std::fmt::Display for CheckResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let header = [ + "Operation", + "Q", + "D", + "Dim", + "Min Before (ns/IP @ Dim)", + "Min After (ns/IP @ Dim)", + "Change (%)", + "Remark", + ]; + + let mut table = diskann_benchmark_runner::utils::fmt::Table::new(header, self.checks.len()); + + for (i, c) in self.checks.iter().enumerate() { + let mut row = table.row(i); + let change = relative_change(c.before_min, c.after_min); + + row.insert(c.run.operation, 0); + row.insert(c.run.num_query_vectors, 1); + row.insert(c.run.num_doc_vectors, 2); + row.insert(c.run.dim, 3); + row.insert(format!("{:.3}", c.before_min), 4); + row.insert(format!("{:.3}", c.after_min), 5); + match change { + Ok(change) => { + row.insert(format!("{:.3} %", change * 100.0), 6); + if change > c.tolerance.min_time_regression.get() { + row.insert("FAIL", 7); + } + } + Err(err) => { + row.insert("invalid", 6); + row.insert(err, 7); + } + } + } + + table.fmt(f) + } +} + +//////////////////////////// +// Benchmark Registration // +//////////////////////////// + +fn register_benchmarks_impl(dispatcher: &mut diskann_benchmark_runner::registry::Benchmarks) { + // Optimized (architecture-dispatched QueryComputer). + dispatcher.register_regression( + "multi-vector-op-f32-optimized", + Kernel::::new(), + ); + dispatcher.register_regression( + "multi-vector-op-f16-optimized", + Kernel::::new(), + ); + + // Reference (Chamfer / MaxSim fallback path). + dispatcher.register_regression( + "multi-vector-op-f32-reference", + Kernel::::new(), + ); + dispatcher.register_regression( + "multi-vector-op-f16-reference", + Kernel::::new(), + ); +} + +////////////// +// Dispatch // +////////////// + +/// Dispatch marker for the [`QueryComputer`] implementation. +#[derive(Debug)] +struct Optimized; + +/// Dispatch marker for the [`Chamfer`] / [`MaxSim`] fallback. +#[derive(Debug)] +struct Reference; + +/// A multi-vector benchmark. +struct Kernel { + _type: std::marker::PhantomData<(I, T)>, +} + +impl Kernel { + fn new() -> Self { + Self { + _type: std::marker::PhantomData, + } + } +} + +#[derive(Debug, Error)] +#[error("implementation {0} is not registered for this benchmark")] +pub(crate) struct ImplementationMismatch(Implementation); + +impl DispatchRule for Optimized { + type Error = ImplementationMismatch; + + fn try_match(from: &Implementation) -> Result { + if *from == Implementation::Optimized { + Ok(MatchScore(0)) + } else { + Err(FailureScore(1)) + } + } + + fn convert(from: Implementation) -> Result { + if from == Implementation::Optimized { + Ok(Optimized) + } else { + Err(ImplementationMismatch(from)) + } + } + + fn description( + f: &mut std::fmt::Formatter<'_>, + from: Option<&Implementation>, + ) -> std::fmt::Result { + match from { + None => write!(f, "QueryComputer (architecture-dispatched)"), + Some(impl_) => { + if Self::try_match(impl_).is_ok() { + write!(f, "matched {}", impl_) + } else { + write!(f, "expected {}, got {}", Implementation::Optimized, impl_) + } + } + } + } +} + +impl DispatchRule for Reference { + type Error = ImplementationMismatch; + + fn try_match(from: &Implementation) -> Result { + if *from == Implementation::Reference { + Ok(MatchScore(0)) + } else { + Err(FailureScore(1)) + } + } + + fn convert(from: Implementation) -> Result { + if from == Implementation::Reference { + Ok(Reference) + } else { + Err(ImplementationMismatch(from)) + } + } + + fn description( + f: &mut std::fmt::Formatter<'_>, + from: Option<&Implementation>, + ) -> std::fmt::Result { + match from { + None => write!(f, "Chamfer / MaxSim fallback"), + Some(impl_) => { + if Self::try_match(impl_).is_ok() { + write!(f, "matched {}", impl_) + } else { + write!(f, "expected {}, got {}", Implementation::Reference, impl_) + } + } + } + } +} + +impl Benchmark for Kernel +where + datatype::Type: DispatchRule, + I: DispatchRule + 'static, + Kernel: RunBenchmark, + T: 'static, +{ + type Input = MultiVectorOp; + type Output = Vec; + + fn try_match(&self, from: &MultiVectorOp) -> Result { + let mut failscore: Option = None; + if datatype::Type::::try_match(&from.element_type).is_err() { + *failscore.get_or_insert(0) += 10; + } + if let Err(FailureScore(score)) = I::try_match(&from.implementation) { + *failscore.get_or_insert(0) += 2 + score; + } + + match failscore { + None => Ok(MatchScore(0)), + Some(score) => Err(FailureScore(score)), + } + } + + fn run( + &self, + input: &MultiVectorOp, + _: diskann_benchmark_runner::Checkpoint<'_>, + mut output: &mut dyn diskann_benchmark_runner::Output, + ) -> anyhow::Result { + let _ = I::convert(input.implementation)?; + writeln!(output, "{}", input)?; + let results = self.run_benchmark(input)?; + writeln!(output, "\n\n{}", DisplayWrapper(&*results))?; + Ok(results) + } + + fn description( + &self, + f: &mut std::fmt::Formatter<'_>, + input: Option<&MultiVectorOp>, + ) -> std::fmt::Result { + match input { + None => { + writeln!( + f, + "- Element Type: {}", + Description::>::new() + )?; + writeln!( + f, + "- Implementation: {}", + Description::::new() + )?; + } + Some(input) => { + if let Err(err) = datatype::Type::::try_match_verbose(&input.element_type) { + writeln!(f, "\n - Mismatched element type: {}", err)?; + } + if let Err(err) = I::try_match_verbose(&input.implementation) { + writeln!(f, "\n - Mismatched implementation: {}", err)?; + } + } + } + Ok(()) + } +} + +impl Regression for Kernel +where + datatype::Type: DispatchRule, + I: DispatchRule + 'static, + Kernel: RunBenchmark, + T: 'static, +{ + type Tolerances = MultiVectorTolerance; + type Pass = CheckResult; + type Fail = CheckResult; + + fn check( + &self, + tolerance: &MultiVectorTolerance, + _input: &MultiVectorOp, + before: &Vec, + after: &Vec, + ) -> anyhow::Result> { + anyhow::ensure!( + before.len() == after.len(), + "before has {} runs but after has {}", + before.len(), + after.len(), + ); + + let mut passed = true; + let checks: Vec = std::iter::zip(before.iter(), after.iter()) + .enumerate() + .map(|(i, (b, a))| { + anyhow::ensure!(b.run == a.run, "run {i} mismatched"); + + let computations_per_latency = b.computations_per_latency() as f64; + + let before_min = b.percentiles.minimum.as_f64() * 1000.0 / computations_per_latency; + let after_min = a.percentiles.minimum.as_f64() * 1000.0 / computations_per_latency; + + let comparison = Comparison { + run: b.run.clone(), + tolerance: *tolerance, + before_min, + after_min, + }; + + match relative_change(before_min, after_min) { + Ok(change) => { + if change > tolerance.min_time_regression.get() { + passed = false; + } + } + Err(_) => passed = false, + }; + + Ok(comparison) + }) + .collect::>>()?; + + let check = CheckResult { checks }; + + if passed { + Ok(PassFail::Pass(check)) + } else { + Ok(PassFail::Fail(check)) + } + } +} + +/////////////// +// Benchmark // +/////////////// + +trait RunBenchmark { + fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error>; +} + +#[derive(Debug, Serialize, Deserialize)] +struct RunResult { + /// The configuration for this run. + run: Run, + /// Per-measurement latencies (over `loops_per_measurement` calls). + latencies: Vec, + /// Latency percentiles. + percentiles: percentiles::Percentiles, +} + +impl RunResult { + fn computations_per_latency(&self) -> usize { + self.run.num_query_vectors.get() + * self.run.num_doc_vectors.get() + * self.run.loops_per_measurement.get() + } +} + +impl std::fmt::Display for DisplayWrapper<'_, [RunResult]> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_empty() { + return Ok(()); + } + + // ns/IP is normalized as `min_latency_us * 1000 / (Q * D * loops)` and is + // approximately linear in `dim`. Compare across rows with the same `Dim`; + // divide further by `Dim` to recover ns per scalar multiply. + writeln!( + f, + "ns/IP = time per (query, doc) inner-product call (~ linear in Dim)" + )?; + + let header = [ + "Operation", + "Q", + "D", + "Dim", + "Min Time (ns/IP @ Dim)", + "Mean Time (ns/IP @ Dim)", + "Loops", + "Measurements", + ]; + + let mut table = diskann_benchmark_runner::utils::fmt::Table::new(header, self.len()); + + self.iter().enumerate().for_each(|(row, r)| { + let mut row = table.row(row); + + let min_latency = r + .latencies + .iter() + .min() + .copied() + .unwrap_or(MicroSeconds::new(u64::MAX)); + let mean_latency = r.percentiles.mean; + + let computations_per_latency = r.computations_per_latency() as f64; + + // Convert time from micro-seconds to nano-seconds per inner-product call + // (one (query, doc) pair, ~ linear in dim). + let min_time = min_latency.as_f64() / computations_per_latency * 1000.0; + let mean_time = mean_latency / computations_per_latency * 1000.0; + + row.insert(r.run.operation, 0); + row.insert(r.run.num_query_vectors, 1); + row.insert(r.run.num_doc_vectors, 2); + row.insert(r.run.dim, 3); + row.insert(format!("{:.3}", min_time), 4); + row.insert(format!("{:.3}", mean_time), 5); + row.insert(r.run.loops_per_measurement, 6); + row.insert(r.run.num_measurements, 7); + }); + + table.fmt(f) + } +} + +fn run_loops(run: &Run, mut body: F) -> RunResult +where + F: FnMut(), +{ + let mut latencies = Vec::with_capacity(run.num_measurements.get()); + + for _ in 0..run.num_measurements.get() { + let start = std::time::Instant::now(); + for _ in 0..run.loops_per_measurement.get() { + body(); + } + latencies.push(start.elapsed().into()); + } + + let percentiles = percentiles::compute_percentiles(&mut latencies).unwrap(); + RunResult { + run: run.clone(), + latencies, + percentiles, + } +} + +/////////////////// +// Data fixtures // +/////////////////// + +const RNG_SEED: u64 = 0x12345; + +struct Data { + query_data: Box<[T]>, + doc_data: Box<[T]>, +} + +impl Data +where + StandardUniform: Distribution, +{ + fn new(run: &Run) -> Self { + let mut rng = StdRng::seed_from_u64(RNG_SEED); + let query_data: Box<[T]> = (0..run.num_query_vectors.get() * run.dim.get()) + .map(|_| StandardUniform.sample(&mut rng)) + .collect(); + let doc_data: Box<[T]> = (0..run.num_doc_vectors.get() * run.dim.get()) + .map(|_| StandardUniform.sample(&mut rng)) + .collect(); + + Self { + query_data, + doc_data, + } + } + + fn query(&self, run: &Run) -> MatRef<'_, Standard> { + MatRef::new( + Standard::new(run.num_query_vectors.get(), run.dim.get()).unwrap(), + &self.query_data, + ) + .unwrap() + } + + fn doc(&self, run: &Run) -> MatRef<'_, Standard> { + MatRef::new( + Standard::new(run.num_doc_vectors.get(), run.dim.get()).unwrap(), + &self.doc_data, + ) + .unwrap() + } +} + +///////////////////// +// Implementations // +///////////////////// + +fn run_optimized(input: &MultiVectorOp) -> anyhow::Result> +where + T: Copy, + StandardUniform: Distribution, + QueryComputer: NewFromMatRef, +{ + let mut results = Vec::with_capacity(input.runs.len()); + for run in input.runs.iter() { + let data = Data::::new(run); + let computer = as NewFromMatRef>::new_from(data.query(run)); + let doc = data.doc(run); + + let result = match run.operation { + Operation::Chamfer => run_loops(run, || { + let v = computer.chamfer(doc); + std::hint::black_box(v); + }), + Operation::MaxSim => { + let mut scores = vec![0.0f32; run.num_query_vectors.get()]; + run_loops(run, || { + computer.max_sim(doc, &mut scores); + std::hint::black_box(&mut scores); + }) + } + }; + results.push(result); + } + Ok(results) +} + +/// Drive the [`Chamfer`] / [`MaxSim`] fallback path. +fn run_reference(input: &MultiVectorOp) -> anyhow::Result> +where + T: Copy, + StandardUniform: Distribution, + InnerProduct: for<'a, 'b> PureDistanceFunction<&'a [T], &'b [T], f32>, +{ + let mut results = Vec::with_capacity(input.runs.len()); + for run in input.runs.iter() { + let data = Data::::new(run); + let query = data.query(run); + let doc = data.doc(run); + + let result = match run.operation { + Operation::Chamfer => run_loops(run, || { + let v = Chamfer::evaluate(query.into(), doc); + std::hint::black_box(v); + }), + Operation::MaxSim => { + let mut scores = vec![0.0f32; run.num_query_vectors.get()]; + run_loops(run, || { + let mut max_sim = MaxSim::new(&mut scores).unwrap(); + let _ = max_sim.evaluate(query.into(), doc); + std::hint::black_box(&mut scores); + }) + } + }; + results.push(result); + } + Ok(results) +} + +impl RunBenchmark for Kernel { + fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { + run_optimized::(input) + } +} + +impl RunBenchmark for Kernel { + fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { + run_optimized::(input) + } +} + +impl RunBenchmark for Kernel { + fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { + run_reference::(input) + } +} + +impl RunBenchmark for Kernel { + fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { + run_reference::(input) + } +} + +/// Element-type-erasing constructor for [`QueryComputer`]. +trait NewFromMatRef { + fn new_from(query: MatRef<'_, Standard>) -> QueryComputer; +} + +impl NewFromMatRef for QueryComputer { + fn new_from(query: MatRef<'_, Standard>) -> QueryComputer { + QueryComputer::::new(query) + } +} + +impl NewFromMatRef for QueryComputer { + fn new_from(query: MatRef<'_, Standard>) -> QueryComputer { + QueryComputer::::new(query) + } +} + +/////////// +// Tests // +/////////// + +#[cfg(test)] +mod tests { + use super::*; + + use diskann_benchmark_runner::{ + benchmark::{PassFail, Regression}, + utils::percentiles::compute_percentiles, + }; + + fn tiny_run(operation: Operation) -> Run { + Run { + operation, + num_query_vectors: NonZeroUsize::new(2).unwrap(), + num_doc_vectors: NonZeroUsize::new(2).unwrap(), + dim: NonZeroUsize::new(4).unwrap(), + loops_per_measurement: NonZeroUsize::new(1).unwrap(), + num_measurements: NonZeroUsize::new(1).unwrap(), + } + } + + fn tiny_op() -> MultiVectorOp { + MultiVectorOp { + element_type: DataType::Float32, + implementation: Implementation::Optimized, + runs: vec![tiny_run(Operation::Chamfer)], + } + } + + fn tiny_result(operation: Operation, minimum: u64) -> RunResult { + let run = tiny_run(operation); + let minimum = MicroSeconds::new(minimum); + let mut latencies = vec![minimum]; + let percentiles = compute_percentiles(&mut latencies).unwrap(); + RunResult { + run, + latencies, + percentiles, + } + } + + fn tolerance(limit: f64) -> MultiVectorTolerance { + MultiVectorTolerance { + min_time_regression: NonNegativeFinite::new(limit).unwrap(), + } + } + + #[test] + fn check_rejects_mismatched_runs() { + let kernel = Kernel::::new(); + + let err = kernel + .check( + &tolerance(0.0), + &tiny_op(), + &vec![tiny_result(Operation::Chamfer, 100)], + &vec![tiny_result(Operation::MaxSim, 100)], + ) + .unwrap_err(); + + assert_eq!(err.to_string(), "run 0 mismatched"); + } + + #[test] + fn check_allows_negative_relative_change() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.0), + &tiny_op(), + &vec![tiny_result(Operation::Chamfer, 100)], + &vec![tiny_result(Operation::Chamfer, 95)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Pass(_))); + } + + #[test] + fn check_passes_on_tolerance_boundary() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.05), + &tiny_op(), + &vec![tiny_result(Operation::Chamfer, 100)], + &vec![tiny_result(Operation::Chamfer, 105)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Pass(_))); + } + + #[test] + fn check_fails_above_tolerance_boundary() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.05), + &tiny_op(), + &vec![tiny_result(Operation::Chamfer, 100)], + &vec![tiny_result(Operation::Chamfer, 106)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Fail(_))); + } + + #[test] + fn check_result_display_includes_failure_details() { + let check = CheckResult { + checks: vec![Comparison { + run: tiny_run(Operation::Chamfer), + tolerance: tolerance(0.05), + before_min: 100.0, + after_min: 106.0, + }], + }; + + let rendered = check.to_string(); + assert!(rendered.contains("Operation"), "rendered = {rendered}"); + assert!(rendered.contains("chamfer"), "rendered = {rendered}"); + assert!(rendered.contains("100.000"), "rendered = {rendered}"); + assert!(rendered.contains("106.000"), "rendered = {rendered}"); + assert!(rendered.contains("6.000 %"), "rendered = {rendered}"); + assert!(rendered.contains("FAIL"), "rendered = {rendered}"); + } + + /// A "before" value of 0 means the measurement was too fast to obtain a + /// reliable signal, so we *could* be letting a regression through. We + /// require at least a non-zero value. + #[test] + fn zero_values_rejected() { + let kernel = Kernel::::new(); + + let result = kernel + .check( + &tolerance(0.05), + &tiny_op(), + &vec![tiny_result(Operation::Chamfer, 0)], + &vec![tiny_result(Operation::Chamfer, 0)], + ) + .unwrap(); + + assert!(matches!(result, PassFail::Fail(_))); + } + + /// Sanity-check that the optimized kernel and the reference path produce + /// numerically equivalent Chamfer scores on a small fixture. + #[test] + fn optimized_chamfer_matches_reference_f32() { + let run = Run { + operation: Operation::Chamfer, + num_query_vectors: NonZeroUsize::new(5).unwrap(), + num_doc_vectors: NonZeroUsize::new(7).unwrap(), + dim: NonZeroUsize::new(16).unwrap(), + loops_per_measurement: NonZeroUsize::new(1).unwrap(), + num_measurements: NonZeroUsize::new(1).unwrap(), + }; + + let data = Data::::new(&run); + let query = data.query(&run); + let doc = data.doc(&run); + + let optimized = QueryComputer::::new(query).chamfer(doc); + let reference = Chamfer::evaluate(query.into(), doc); + + assert!( + (optimized - reference).abs() < 1e-4, + "optimized={optimized}, reference={reference}", + ); + } +} diff --git a/results.json b/results.json new file mode 100644 index 000000000..f061f6750 --- /dev/null +++ b/results.json @@ -0,0 +1,2150 @@ +[ + { + "input": { + "content": { + "element_type": "float32", + "implementation": "optimized", + "runs": [ + { + "dim": 128, + "loops_per_measurement": 500, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "chamfer" + }, + { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "chamfer" + }, + { + "dim": 384, + "loops_per_measurement": 20, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + }, + { + "dim": 256, + "loops_per_measurement": 200, + "num_doc_vectors": 16, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + }, + { + "dim": 264, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "chamfer" + }, + { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + }, + { + "dim": 512, + "loops_per_measurement": 2, + "num_doc_vectors": 1250, + "num_measurements": 20, + "num_query_vectors": 64, + "operation": "chamfer" + }, + { + "dim": 128, + "loops_per_measurement": 200, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "chamfer" + }, + { + "dim": 512, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + }, + { + "dim": 128, + "loops_per_measurement": 500, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "max_sim" + }, + { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "max_sim" + }, + { + "dim": 384, + "loops_per_measurement": 20, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + }, + { + "dim": 256, + "loops_per_measurement": 200, + "num_doc_vectors": 16, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + }, + { + "dim": 264, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "max_sim" + }, + { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + }, + { + "dim": 512, + "loops_per_measurement": 2, + "num_doc_vectors": 1250, + "num_measurements": 20, + "num_query_vectors": 64, + "operation": "max_sim" + }, + { + "dim": 128, + "loops_per_measurement": 200, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "max_sim" + }, + { + "dim": 512, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + ] + }, + "type": "multi-vector-op" + }, + "results": [ + { + "latencies": [ + 777, + 777, + 778, + 780, + 780, + 781, + 804, + 838, + 838, + 838, + 838, + 839, + 839, + 839, + 840, + 842, + 845, + 850, + 899, + 926, + 927, + 931, + 932, + 937, + 939, + 956, + 978, + 1034, + 1035, + 1036, + 1053, + 1064, + 1065, + 1147, + 1164, + 1165, + 1165, + 1166, + 1173, + 1221, + 1323, + 1333, + 1350, + 1352, + 1353, + 1353, + 1357, + 1393, + 1529, + 1537 + ], + "percentiles": { + "mean": 1030.32, + "median": 947.5, + "minimum": 777, + "p90": 1353, + "p99": 1537 + }, + "run": { + "dim": 128, + "loops_per_measurement": 500, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "chamfer" + } + }, + { + "latencies": [ + 1029, + 1029, + 1030, + 1030, + 1030, + 1030, + 1030, + 1031, + 1032, + 1034, + 1035, + 1038, + 1050, + 1058, + 1070, + 1112, + 1112, + 1112, + 1112, + 1112, + 1112, + 1112, + 1113, + 1117, + 1119, + 1120, + 1123, + 1145, + 1146, + 1146, + 1146, + 1148, + 1152, + 1167, + 1192, + 1192, + 1192, + 1192, + 1193, + 1207, + 1235, + 1251, + 1254, + 1256, + 1257, + 1261, + 1293, + 1330, + 1330, + 1344 + ], + "percentiles": { + "mean": 1139.22, + "median": 1119.5, + "minimum": 1029, + "p90": 1261, + "p99": 1344 + }, + "run": { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "chamfer" + } + }, + { + "latencies": [ + 1210, + 1210, + 1210, + 1210, + 1210, + 1210, + 1210, + 1210, + 1210, + 1211, + 1212, + 1212, + 1212, + 1212, + 1213, + 1213, + 1213, + 1213, + 1213, + 1214, + 1217, + 1217, + 1220, + 1223, + 1225, + 1226, + 1227, + 1229, + 1231, + 1235, + 1235, + 1239, + 1239, + 1240, + 1244, + 1249, + 1252, + 1259, + 1264, + 1270, + 1281, + 1294, + 1299, + 1306, + 1312, + 1315, + 1332, + 1341, + 1383, + 1484 + ], + "percentiles": { + "mean": 1246.32, + "median": 1225.5, + "minimum": 1210, + "p90": 1315, + "p99": 1484 + }, + "run": { + "dim": 384, + "loops_per_measurement": 20, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + } + }, + { + "latencies": [ + 958, + 958, + 958, + 958, + 958, + 960, + 960, + 960, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 962, + 962, + 963, + 964, + 964, + 965, + 965, + 965, + 966, + 966, + 973, + 974, + 974, + 981, + 981, + 983, + 985, + 987, + 987, + 987, + 990, + 999, + 999 + ], + "percentiles": { + "mean": 967.42, + "median": 961.0, + "minimum": 958, + "p90": 987, + "p99": 999 + }, + "run": { + "dim": 256, + "loops_per_measurement": 200, + "num_doc_vectors": 16, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + } + }, + { + "latencies": [ + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1018, + 1018, + 1018, + 1018, + 1018, + 1018, + 1018, + 1018, + 1018, + 1019, + 1019, + 1019, + 1019, + 1020, + 1020, + 1020, + 1020, + 1020, + 1020, + 1021, + 1022, + 1023, + 1023, + 1026, + 1029, + 1031, + 1032, + 1033, + 1034, + 1035, + 1036, + 1037, + 1041, + 1044, + 1044, + 1045, + 1046, + 1065 + ], + "percentiles": { + "mean": 1024.58, + "median": 1019.5, + "minimum": 1017, + "p90": 1044, + "p99": 1065 + }, + "run": { + "dim": 264, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "chamfer" + } + }, + { + "latencies": [ + 1854, + 1855, + 1855, + 1855, + 1855, + 1855, + 1856, + 1856, + 1856, + 1857, + 1857, + 1857, + 1857, + 1857, + 1857, + 1858, + 1858, + 1858, + 1858, + 1858, + 1858, + 1858, + 1859, + 1860, + 1861, + 1861, + 1863, + 1866, + 1869, + 1870, + 1871, + 1871, + 1871, + 1872, + 1874, + 1875, + 1881, + 1883, + 1885, + 1885, + 1890, + 1892, + 1892, + 1892, + 1892, + 1899, + 1906, + 1909, + 1909, + 1916 + ], + "percentiles": { + "mean": 1870.38, + "median": 1861.0, + "minimum": 1854, + "p90": 1899, + "p99": 1916 + }, + "run": { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + } + }, + { + "latencies": [ + 3180, + 3180, + 3180, + 3180, + 3180, + 3181, + 3181, + 3181, + 3181, + 3183, + 3185, + 3187, + 3205, + 3206, + 3207, + 3208, + 3211, + 3218, + 3220, + 3268 + ], + "percentiles": { + "mean": 3196.1, + "median": 3184.0, + "minimum": 3180, + "p90": 3220, + "p99": 3268 + }, + "run": { + "dim": 512, + "loops_per_measurement": 2, + "num_doc_vectors": 1250, + "num_measurements": 20, + "num_query_vectors": 64, + "operation": "chamfer" + } + }, + { + "latencies": [ + 1784, + 1784, + 1784, + 1784, + 1784, + 1784, + 1785, + 1785, + 1790, + 1791, + 1791, + 1792, + 1792, + 1792, + 1792, + 1792, + 1792, + 1792, + 1795, + 1795, + 1796, + 1796, + 1796, + 1796, + 1798, + 1800, + 1803, + 1805, + 1814, + 1815, + 1817, + 1818, + 1821, + 1826, + 1840, + 1845, + 1856, + 1858, + 1878, + 1879, + 1879, + 1884, + 1888, + 1890, + 1893, + 1905, + 1907, + 1912, + 1918, + 1950 + ], + "percentiles": { + "mean": 1825.26, + "median": 1799.0, + "minimum": 1784, + "p90": 1905, + "p99": 1950 + }, + "run": { + "dim": 128, + "loops_per_measurement": 200, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "chamfer" + } + }, + { + "latencies": [ + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1018, + 1018, + 1018, + 1018, + 1018, + 1019, + 1020, + 1020, + 1021, + 1021, + 1022, + 1022, + 1022, + 1023, + 1027, + 1030, + 1030, + 1035, + 1043, + 1043, + 1044, + 1045, + 1049, + 1049, + 1060 + ], + "percentiles": { + "mean": 1023.2, + "median": 1017.5, + "minimum": 1017, + "p90": 1044, + "p99": 1060 + }, + "run": { + "dim": 512, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + } + }, + { + "latencies": [ + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 567, + 569, + 569, + 569, + 569, + 569, + 569, + 570, + 570, + 570, + 570, + 570, + 570, + 570, + 570, + 570, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 571, + 574, + 578, + 578, + 594, + 595, + 598 + ], + "percentiles": { + "mean": 571.2, + "median": 570.0, + "minimum": 567, + "p90": 578, + "p99": 598 + }, + "run": { + "dim": 128, + "loops_per_measurement": 500, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "max_sim" + } + }, + { + "latencies": [ + 988, + 988, + 988, + 988, + 988, + 988, + 988, + 988, + 988, + 989, + 989, + 989, + 989, + 989, + 989, + 989, + 989, + 989, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 991, + 992, + 992, + 992, + 992, + 992, + 992, + 992, + 992, + 992, + 996, + 996, + 1004, + 1009, + 1013, + 1018, + 1020, + 1047, + 1057 + ], + "percentiles": { + "mean": 995.1, + "median": 991.0, + "minimum": 988, + "p90": 1013, + "p99": 1057 + }, + "run": { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "max_sim" + } + }, + { + "latencies": [ + 1210, + 1210, + 1210, + 1210, + 1210, + 1210, + 1210, + 1210, + 1211, + 1211, + 1211, + 1212, + 1213, + 1213, + 1213, + 1213, + 1213, + 1213, + 1213, + 1213, + 1213, + 1213, + 1214, + 1214, + 1214, + 1214, + 1214, + 1214, + 1214, + 1214, + 1214, + 1214, + 1216, + 1217, + 1217, + 1217, + 1218, + 1220, + 1222, + 1223, + 1224, + 1224, + 1225, + 1227, + 1238, + 1239, + 1239, + 1241, + 1242, + 1243 + ], + "percentiles": { + "mean": 1217.74, + "median": 1214.0, + "minimum": 1210, + "p90": 1239, + "p99": 1243 + }, + "run": { + "dim": 384, + "loops_per_measurement": 20, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + }, + { + "latencies": [ + 953, + 953, + 953, + 953, + 953, + 953, + 954, + 954, + 956, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 957, + 958, + 958, + 958, + 958, + 958, + 958, + 960, + 961, + 961, + 961, + 961, + 961, + 961, + 961, + 962, + 963, + 971, + 976, + 978, + 984, + 984, + 987 + ], + "percentiles": { + "mean": 960.1, + "median": 957.0, + "minimum": 953, + "p90": 976, + "p99": 987 + }, + "run": { + "dim": 256, + "loops_per_measurement": 200, + "num_doc_vectors": 16, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + }, + { + "latencies": [ + 1016, + 1016, + 1016, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1018, + 1018, + 1018, + 1018, + 1018, + 1018, + 1018, + 1018, + 1019, + 1019, + 1019, + 1019, + 1019, + 1019, + 1019, + 1021, + 1021, + 1023, + 1023, + 1025, + 1032, + 1044, + 1045, + 1045, + 1045, + 1047, + 1052, + 1058, + 1061 + ], + "percentiles": { + "mean": 1023.46, + "median": 1018.0, + "minimum": 1016, + "p90": 1045, + "p99": 1061 + }, + "run": { + "dim": 264, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "max_sim" + } + }, + { + "latencies": [ + 1858, + 1858, + 1860, + 1860, + 1860, + 1860, + 1860, + 1860, + 1860, + 1860, + 1860, + 1861, + 1861, + 1861, + 1861, + 1861, + 1861, + 1861, + 1862, + 1863, + 1863, + 1864, + 1865, + 1867, + 1868, + 1872, + 1873, + 1876, + 1878, + 1881, + 1882, + 1883, + 1888, + 1889, + 1889, + 1890, + 1890, + 1890, + 1891, + 1892, + 1905, + 1906, + 1908, + 1934, + 1962, + 1967, + 1974, + 1988, + 2004, + 2014 + ], + "percentiles": { + "mean": 1887.22, + "median": 1870.0, + "minimum": 1858, + "p90": 1967, + "p99": 2014 + }, + "run": { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + }, + { + "latencies": [ + 3177, + 3177, + 3177, + 3179, + 3192, + 3201, + 3212, + 3222, + 3251, + 3251, + 3255, + 3256, + 3256, + 3321, + 3381, + 3399, + 3400, + 3419, + 3422, + 3445 + ], + "percentiles": { + "mean": 3279.65, + "median": 3253.0, + "minimum": 3177, + "p90": 3422, + "p99": 3445 + }, + "run": { + "dim": 512, + "loops_per_measurement": 2, + "num_doc_vectors": 1250, + "num_measurements": 20, + "num_query_vectors": 64, + "operation": "max_sim" + } + }, + { + "latencies": [ + 1783, + 1784, + 1787, + 1791, + 1791, + 1791, + 1813, + 1838, + 1853, + 1868, + 1871, + 1882, + 1882, + 1884, + 1890, + 1899, + 1899, + 1899, + 1900, + 1901, + 1905, + 1906, + 1908, + 1909, + 1911, + 1911, + 1911, + 1911, + 1914, + 1915, + 1915, + 1916, + 1916, + 1917, + 1919, + 1922, + 1922, + 1923, + 1923, + 1925, + 1927, + 1927, + 1928, + 1929, + 1929, + 1933, + 1937, + 1938, + 1940, + 1983 + ], + "percentiles": { + "mean": 1893.52, + "median": 1911.0, + "minimum": 1783, + "p90": 1933, + "p99": 1983 + }, + "run": { + "dim": 128, + "loops_per_measurement": 200, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 64, + "operation": "max_sim" + } + }, + { + "latencies": [ + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1017, + 1020, + 1023, + 1023, + 1025, + 1028, + 1033, + 1033, + 1034, + 1037, + 1038, + 1040, + 1043, + 1044, + 1052, + 1052, + 1057, + 1060, + 1063, + 1078, + 1088, + 1088, + 1088, + 1088, + 1088, + 1088, + 1088, + 1088, + 1090, + 1090, + 1090, + 1092, + 1093, + 1093, + 1094, + 1094 + ], + "percentiles": { + "mean": 1049.56, + "median": 1039.0, + "minimum": 1017, + "p90": 1092, + "p99": 1094 + }, + "run": { + "dim": 512, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + } + ] + }, + { + "input": { + "content": { + "element_type": "float16", + "implementation": "optimized", + "runs": [ + { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "chamfer" + }, + { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + }, + { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "max_sim" + }, + { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + ] + }, + "type": "multi-vector-op" + }, + "results": [ + { + "latencies": [ + 1734, + 1734, + 1736, + 1736, + 1737, + 1737, + 1737, + 1738, + 1738, + 1738, + 1738, + 1739, + 1740, + 1740, + 1741, + 1744, + 1744, + 1751, + 1751, + 1753, + 1754, + 1754, + 1756, + 1759, + 1761, + 1764, + 1767, + 1767, + 1767, + 1768, + 1768, + 1769, + 1769, + 1773, + 1774, + 1775, + 1779, + 1787, + 1794, + 1808, + 1822, + 1825, + 1829, + 1829, + 1844, + 1846, + 1852, + 1859, + 1903, + 2194 + ], + "percentiles": { + "mean": 1780.44, + "median": 1762.5, + "minimum": 1734, + "p90": 1846, + "p99": 2194 + }, + "run": { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "chamfer" + } + }, + { + "latencies": [ + 2130, + 2130, + 2130, + 2131, + 2133, + 2133, + 2140, + 2142, + 2149, + 2151, + 2158, + 2160, + 2163, + 2164, + 2166, + 2167, + 2167, + 2168, + 2171, + 2173, + 2174, + 2176, + 2177, + 2178, + 2178, + 2181, + 2184, + 2189, + 2195, + 2195, + 2197, + 2198, + 2198, + 2201, + 2203, + 2207, + 2215, + 2217, + 2220, + 2229, + 2240, + 2242, + 2243, + 2249, + 2250, + 2291, + 2305, + 2438, + 2613, + 2643 + ], + "percentiles": { + "mean": 2209.04, + "median": 2179.5, + "minimum": 2130, + "p90": 2291, + "p99": 2643 + }, + "run": { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + } + }, + { + "latencies": [ + 1731, + 1733, + 1737, + 1737, + 1737, + 1741, + 1741, + 1745, + 1745, + 1750, + 1750, + 1750, + 1750, + 1751, + 1754, + 1754, + 1755, + 1758, + 1758, + 1759, + 1761, + 1761, + 1766, + 1768, + 1770, + 1771, + 1771, + 1772, + 1773, + 1773, + 1775, + 1776, + 1776, + 1778, + 1785, + 1788, + 1789, + 1791, + 1795, + 1800, + 1804, + 1808, + 1814, + 1822, + 1832, + 1833, + 1834, + 1864, + 1867, + 1869 + ], + "percentiles": { + "mean": 1776.44, + "median": 1770.5, + "minimum": 1731, + "p90": 1833, + "p99": 1869 + }, + "run": { + "dim": 256, + "loops_per_measurement": 100, + "num_doc_vectors": 64, + "num_measurements": 50, + "num_query_vectors": 16, + "operation": "max_sim" + } + }, + { + "latencies": [ + 2127, + 2127, + 2129, + 2130, + 2132, + 2141, + 2142, + 2142, + 2147, + 2148, + 2149, + 2150, + 2154, + 2154, + 2159, + 2162, + 2166, + 2168, + 2170, + 2173, + 2177, + 2180, + 2180, + 2181, + 2181, + 2182, + 2183, + 2187, + 2196, + 2196, + 2199, + 2200, + 2204, + 2211, + 2213, + 2216, + 2224, + 2255, + 2256, + 2271, + 2354, + 2488, + 2493, + 2495, + 2498, + 2505, + 2525, + 2653, + 2657, + 3515 + ], + "percentiles": { + "mean": 2264.9, + "median": 2181.5, + "minimum": 2127, + "p90": 2505, + "p99": 3515 + }, + "run": { + "dim": 128, + "loops_per_measurement": 10, + "num_doc_vectors": 1250, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + } + ] + }, + { + "input": { + "content": { + "element_type": "float32", + "implementation": "reference", + "runs": [ + { + "dim": 128, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "chamfer" + }, + { + "dim": 384, + "loops_per_measurement": 2, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + }, + { + "dim": 128, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "max_sim" + }, + { + "dim": 384, + "loops_per_measurement": 2, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + ] + }, + "type": "multi-vector-op" + }, + "results": [ + { + "latencies": [ + 64, + 64, + 64, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 67, + 67, + 67, + 67, + 67, + 67, + 67, + 67, + 67, + 68, + 68, + 69, + 71, + 127 + ], + "percentiles": { + "mean": 67.52, + "median": 66.0, + "minimum": 64, + "p90": 68, + "p99": 127 + }, + "run": { + "dim": 128, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "chamfer" + } + }, + { + "latencies": [ + 130, + 130, + 130, + 130, + 130, + 130, + 130, + 130, + 130, + 130, + 131, + 131, + 131, + 131, + 132, + 132, + 133, + 133, + 135, + 136, + 136, + 137, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 138, + 139, + 139, + 139, + 139, + 139, + 140, + 140, + 140, + 141, + 143, + 147, + 161 + ], + "percentiles": { + "mean": 136.26, + "median": 138.0, + "minimum": 130, + "p90": 140, + "p99": 161 + }, + "run": { + "dim": 384, + "loops_per_measurement": 2, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "chamfer" + } + }, + { + "latencies": [ + 62, + 62, + 62, + 62, + 62, + 63, + 63, + 63, + 63, + 63, + 63, + 63, + 63, + 63, + 63, + 64, + 64, + 65, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 66, + 67, + 67, + 67, + 67, + 67, + 67, + 67, + 68, + 68, + 69, + 71, + 72, + 78, + 106 + ], + "percentiles": { + "mean": 66.44, + "median": 66.0, + "minimum": 62, + "p90": 69, + "p99": 106 + }, + "run": { + "dim": 128, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "max_sim" + } + }, + { + "latencies": [ + 130, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 131, + 132, + 132, + 132, + 132, + 132, + 133, + 133, + 134, + 134, + 135, + 135, + 135, + 136, + 136, + 137, + 139, + 139, + 140, + 142, + 142, + 143, + 144, + 145, + 145, + 147, + 155, + 158 + ], + "percentiles": { + "mean": 135.18, + "median": 132.0, + "minimum": 130, + "p90": 145, + "p99": 158 + }, + "run": { + "dim": 384, + "loops_per_measurement": 2, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + } + ] + }, + { + "input": { + "content": { + "element_type": "float16", + "implementation": "reference", + "runs": [ + { + "dim": 128, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "chamfer" + }, + { + "dim": 384, + "loops_per_measurement": 2, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + ] + }, + "type": "multi-vector-op" + }, + "results": [ + { + "latencies": [ + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 73, + 74, + 74, + 74, + 74, + 74, + 74, + 75, + 75, + 76, + 76, + 76, + 76, + 76, + 76, + 77, + 77, + 77, + 77, + 77, + 77, + 77, + 77, + 77, + 78, + 78, + 78, + 79, + 80, + 80, + 80, + 84, + 87, + 92 + ], + "percentiles": { + "mean": 76.0, + "median": 75.5, + "minimum": 73, + "p90": 80, + "p99": 92 + }, + "run": { + "dim": 128, + "loops_per_measurement": 50, + "num_doc_vectors": 32, + "num_measurements": 50, + "num_query_vectors": 8, + "operation": "chamfer" + } + }, + { + "latencies": [ + 135, + 135, + 135, + 135, + 135, + 135, + 135, + 135, + 136, + 136, + 137, + 138, + 140, + 141, + 141, + 141, + 141, + 141, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 142, + 143, + 143, + 143, + 144, + 144, + 145, + 145, + 145, + 147, + 150, + 151, + 151, + 153, + 154, + 158, + 158 + ], + "percentiles": { + "mean": 142.36, + "median": 142.0, + "minimum": 135, + "p90": 151, + "p99": 158 + }, + "run": { + "dim": 384, + "loops_per_measurement": 2, + "num_doc_vectors": 128, + "num_measurements": 50, + "num_query_vectors": 32, + "operation": "max_sim" + } + } + ] + } +] \ No newline at end of file From 54a21ec8f274006c433fcddf111cd2580aa184e1 Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Thu, 7 May 2026 02:38:23 +0530 Subject: [PATCH 2/7] Move some repetetive code to macros and add more benchmark cases --- .../examples/multi-vector.json | 65 ++++- diskann-benchmark-multi-vector/src/lib.rs | 228 +++++++----------- 2 files changed, 141 insertions(+), 152 deletions(-) diff --git a/diskann-benchmark-multi-vector/examples/multi-vector.json b/diskann-benchmark-multi-vector/examples/multi-vector.json index 2626e5047..553a6a9d8 100644 --- a/diskann-benchmark-multi-vector/examples/multi-vector.json +++ b/diskann-benchmark-multi-vector/examples/multi-vector.json @@ -13,7 +13,7 @@ { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, - { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 20 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }, @@ -23,7 +23,7 @@ { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, - { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 20 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } ] @@ -35,10 +35,25 @@ "element_type": "float16", "implementation": "optimized", "runs": [ + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, { "operation": "chamfer", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }, + + { "operation": "max_sim", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, { "operation": "max_sim", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, - { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 } + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } ] } }, @@ -48,10 +63,25 @@ "element_type": "float32", "implementation": "reference", "runs": [ - { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 }, - { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 2, "num_measurements": 50 }, - { "operation": "max_sim", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 }, - { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 2, "num_measurements": 50 } + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }, + + { "operation": "max_sim", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } ] } }, @@ -61,8 +91,25 @@ "element_type": "float16", "implementation": "reference", "runs": [ - { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 50, "num_measurements": 50 }, - { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 2, "num_measurements": 50 } + { "operation": "chamfer", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "chamfer", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 }, + + { "operation": "max_sim", "num_query_vectors": 8, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 500, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 16, "num_doc_vectors": 64, "dim": 256, "loops_per_measurement": 100, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 128, "dim": 384, "loops_per_measurement": 20, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 16, "dim": 256, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 264, "loops_per_measurement": 50, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 1250, "dim": 128, "loops_per_measurement": 10, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 1250, "dim": 512, "loops_per_measurement": 2, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 64, "num_doc_vectors": 32, "dim": 128, "loops_per_measurement": 200, "num_measurements": 50 }, + { "operation": "max_sim", "num_query_vectors": 32, "num_doc_vectors": 32, "dim": 512, "loops_per_measurement": 50, "num_measurements": 50 } ] } } diff --git a/diskann-benchmark-multi-vector/src/lib.rs b/diskann-benchmark-multi-vector/src/lib.rs index 7cadf4f29..ea6a09715 100644 --- a/diskann-benchmark-multi-vector/src/lib.rs +++ b/diskann-benchmark-multi-vector/src/lib.rs @@ -293,25 +293,19 @@ impl std::fmt::Display for CheckResult { //////////////////////////// fn register_benchmarks_impl(dispatcher: &mut diskann_benchmark_runner::registry::Benchmarks) { + macro_rules! register { + ($impl:ident, $t:ty, $tag:literal) => { + dispatcher.register_regression($tag, Kernel::<$impl, $t>::new()); + }; + } + // Optimized (architecture-dispatched QueryComputer). - dispatcher.register_regression( - "multi-vector-op-f32-optimized", - Kernel::::new(), - ); - dispatcher.register_regression( - "multi-vector-op-f16-optimized", - Kernel::::new(), - ); + register!(Optimized, f32, "multi-vector-op-f32-optimized"); + register!(Optimized, f16, "multi-vector-op-f16-optimized"); // Reference (Chamfer / MaxSim fallback path). - dispatcher.register_regression( - "multi-vector-op-f32-reference", - Kernel::::new(), - ); - dispatcher.register_regression( - "multi-vector-op-f16-reference", - Kernel::::new(), - ); + register!(Reference, f32, "multi-vector-op-f32-reference"); + register!(Reference, f16, "multi-vector-op-f16-reference"); } ////////////// @@ -340,81 +334,52 @@ impl Kernel { } #[derive(Debug, Error)] -#[error("implementation {0} is not registered for this benchmark")] +#[error("this kernel handles a different implementation than {0}")] pub(crate) struct ImplementationMismatch(Implementation); -impl DispatchRule for Optimized { - type Error = ImplementationMismatch; - - fn try_match(from: &Implementation) -> Result { - if *from == Implementation::Optimized { - Ok(MatchScore(0)) - } else { - Err(FailureScore(1)) - } - } - - fn convert(from: Implementation) -> Result { - if from == Implementation::Optimized { - Ok(Optimized) - } else { - Err(ImplementationMismatch(from)) - } - } +macro_rules! impl_dispatch_rule { + ($marker:ident, $variant:ident, $description:literal) => { + impl DispatchRule for $marker { + type Error = ImplementationMismatch; - fn description( - f: &mut std::fmt::Formatter<'_>, - from: Option<&Implementation>, - ) -> std::fmt::Result { - match from { - None => write!(f, "QueryComputer (architecture-dispatched)"), - Some(impl_) => { - if Self::try_match(impl_).is_ok() { - write!(f, "matched {}", impl_) + fn try_match(from: &Implementation) -> Result { + if *from == Implementation::$variant { + Ok(MatchScore(0)) } else { - write!(f, "expected {}, got {}", Implementation::Optimized, impl_) + Err(FailureScore(1)) } } - } - } -} - -impl DispatchRule for Reference { - type Error = ImplementationMismatch; - fn try_match(from: &Implementation) -> Result { - if *from == Implementation::Reference { - Ok(MatchScore(0)) - } else { - Err(FailureScore(1)) - } - } - - fn convert(from: Implementation) -> Result { - if from == Implementation::Reference { - Ok(Reference) - } else { - Err(ImplementationMismatch(from)) - } - } - - fn description( - f: &mut std::fmt::Formatter<'_>, - from: Option<&Implementation>, - ) -> std::fmt::Result { - match from { - None => write!(f, "Chamfer / MaxSim fallback"), - Some(impl_) => { - if Self::try_match(impl_).is_ok() { - write!(f, "matched {}", impl_) + fn convert(from: Implementation) -> Result { + if from == Implementation::$variant { + Ok($marker) } else { - write!(f, "expected {}, got {}", Implementation::Reference, impl_) + Err(ImplementationMismatch(from)) + } + } + + fn description( + f: &mut std::fmt::Formatter<'_>, + from: Option<&Implementation>, + ) -> std::fmt::Result { + match from { + None => write!(f, $description), + Some(impl_) => { + if Self::try_match(impl_).is_ok() { + write!(f, "matched {}", impl_) + } else { + write!(f, "expected {}, got {}", Implementation::$variant, impl_) + } + } } } } - } + }; } +impl_dispatch_rule!(Optimized, Optimized, "QueryComputer (architecture-dispatched)"); +impl_dispatch_rule!(Reference, Reference, "Chamfer / MaxSim fallback"); + impl Benchmark for Kernel where datatype::Type: DispatchRule, @@ -446,7 +411,9 @@ where _: diskann_benchmark_runner::Checkpoint<'_>, mut output: &mut dyn diskann_benchmark_runner::Output, ) -> anyhow::Result { - let _ = I::convert(input.implementation)?; + // The dispatcher only invokes `run` after `try_match` has already accepted + // the input, so a failure here would indicate a dispatcher bug. + I::convert(input.implementation).expect("try_match accepted the input"); writeln!(output, "{}", input)?; let results = self.run_benchmark(input)?; writeln!(output, "\n\n{}", DisplayWrapper(&*results))?; @@ -717,6 +684,9 @@ where let mut results = Vec::with_capacity(input.runs.len()); for run in input.runs.iter() { let data = Data::::new(run); + // `QueryComputer` performs query-side precomputation that is intentionally + // amortized across many `chamfer` / `max_sim` calls; construct it once per + // shape, outside the timed loop. let computer = as NewFromMatRef>::new_from(data.query(run)); let doc = data.doc(run); @@ -748,20 +718,23 @@ where let mut results = Vec::with_capacity(input.runs.len()); for run in input.runs.iter() { let data = Data::::new(run); - let query = data.query(run); let doc = data.doc(run); + // Hoist out of the timed loop to mirror the optimized path's + // per-shape precomputation. + let query: diskann_quantization::multi_vector::distance::QueryMatRef<'_, _> = + data.query(run).into(); let result = match run.operation { Operation::Chamfer => run_loops(run, || { - let v = Chamfer::evaluate(query.into(), doc); + let v = Chamfer::evaluate(query, doc); std::hint::black_box(v); }), Operation::MaxSim => { let mut scores = vec![0.0f32; run.num_query_vectors.get()]; + let mut max_sim = MaxSim::new(&mut scores).unwrap(); run_loops(run, || { - let mut max_sim = MaxSim::new(&mut scores).unwrap(); - let _ = max_sim.evaluate(query.into(), doc); - std::hint::black_box(&mut scores); + let _ = max_sim.evaluate(query, doc); + std::hint::black_box(max_sim.scores_mut()); }) } }; @@ -770,47 +743,42 @@ where Ok(results) } -impl RunBenchmark for Kernel { - fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { - run_optimized::(input) - } -} - -impl RunBenchmark for Kernel { - fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { - run_optimized::(input) - } -} - -impl RunBenchmark for Kernel { - fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { - run_reference::(input) - } -} - -impl RunBenchmark for Kernel { - fn run_benchmark(&self, input: &MultiVectorOp) -> Result, anyhow::Error> { - run_reference::(input) - } -} - /// Element-type-erasing constructor for [`QueryComputer`]. trait NewFromMatRef { fn new_from(query: MatRef<'_, Standard>) -> QueryComputer; } -impl NewFromMatRef for QueryComputer { - fn new_from(query: MatRef<'_, Standard>) -> QueryComputer { - QueryComputer::::new(query) - } -} +macro_rules! impl_kernel_for { + ($t:ty) => { + impl NewFromMatRef<$t> for QueryComputer<$t> { + fn new_from(query: MatRef<'_, Standard<$t>>) -> QueryComputer<$t> { + QueryComputer::<$t>::new(query) + } + } -impl NewFromMatRef for QueryComputer { - fn new_from(query: MatRef<'_, Standard>) -> QueryComputer { - QueryComputer::::new(query) - } + impl RunBenchmark for Kernel { + fn run_benchmark( + &self, + input: &MultiVectorOp, + ) -> Result, anyhow::Error> { + run_optimized::<$t>(input) + } + } + + impl RunBenchmark for Kernel { + fn run_benchmark( + &self, + input: &MultiVectorOp, + ) -> Result, anyhow::Error> { + run_reference::<$t>(input) + } + } + }; } +impl_kernel_for!(f32); +impl_kernel_for!(f16); + /////////// // Tests // /////////// @@ -963,30 +931,4 @@ mod tests { assert!(matches!(result, PassFail::Fail(_))); } - - /// Sanity-check that the optimized kernel and the reference path produce - /// numerically equivalent Chamfer scores on a small fixture. - #[test] - fn optimized_chamfer_matches_reference_f32() { - let run = Run { - operation: Operation::Chamfer, - num_query_vectors: NonZeroUsize::new(5).unwrap(), - num_doc_vectors: NonZeroUsize::new(7).unwrap(), - dim: NonZeroUsize::new(16).unwrap(), - loops_per_measurement: NonZeroUsize::new(1).unwrap(), - num_measurements: NonZeroUsize::new(1).unwrap(), - }; - - let data = Data::::new(&run); - let query = data.query(&run); - let doc = data.doc(&run); - - let optimized = QueryComputer::::new(query).chamfer(doc); - let reference = Chamfer::evaluate(query.into(), doc); - - assert!( - (optimized - reference).abs() < 1e-4, - "optimized={optimized}, reference={reference}", - ); - } } From f3a5d9fb33cc2dbb0864c88f8bd90bbb65e26dca Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Thu, 7 May 2026 02:38:43 +0530 Subject: [PATCH 3/7] Move some repetetive code to macros and add more benchmark cases --- diskann-benchmark-multi-vector/src/lib.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/diskann-benchmark-multi-vector/src/lib.rs b/diskann-benchmark-multi-vector/src/lib.rs index ea6a09715..df08d93dd 100644 --- a/diskann-benchmark-multi-vector/src/lib.rs +++ b/diskann-benchmark-multi-vector/src/lib.rs @@ -377,7 +377,11 @@ macro_rules! impl_dispatch_rule { }; } -impl_dispatch_rule!(Optimized, Optimized, "QueryComputer (architecture-dispatched)"); +impl_dispatch_rule!( + Optimized, + Optimized, + "QueryComputer (architecture-dispatched)" +); impl_dispatch_rule!(Reference, Reference, "Chamfer / MaxSim fallback"); impl Benchmark for Kernel From 8efdbcd5e79bf48068dfc8e1b4d6a6cdfadd35f0 Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Thu, 7 May 2026 02:39:33 +0530 Subject: [PATCH 4/7] Move some repetetive code to macros and add more benchmark cases --- results.json | 2150 -------------------------------------------------- 1 file changed, 2150 deletions(-) delete mode 100644 results.json diff --git a/results.json b/results.json deleted file mode 100644 index f061f6750..000000000 --- a/results.json +++ /dev/null @@ -1,2150 +0,0 @@ -[ - { - "input": { - "content": { - "element_type": "float32", - "implementation": "optimized", - "runs": [ - { - "dim": 128, - "loops_per_measurement": 500, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "chamfer" - }, - { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "chamfer" - }, - { - "dim": 384, - "loops_per_measurement": 20, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - }, - { - "dim": 256, - "loops_per_measurement": 200, - "num_doc_vectors": 16, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - }, - { - "dim": 264, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "chamfer" - }, - { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - }, - { - "dim": 512, - "loops_per_measurement": 2, - "num_doc_vectors": 1250, - "num_measurements": 20, - "num_query_vectors": 64, - "operation": "chamfer" - }, - { - "dim": 128, - "loops_per_measurement": 200, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "chamfer" - }, - { - "dim": 512, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - }, - { - "dim": 128, - "loops_per_measurement": 500, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "max_sim" - }, - { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "max_sim" - }, - { - "dim": 384, - "loops_per_measurement": 20, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - }, - { - "dim": 256, - "loops_per_measurement": 200, - "num_doc_vectors": 16, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - }, - { - "dim": 264, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "max_sim" - }, - { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - }, - { - "dim": 512, - "loops_per_measurement": 2, - "num_doc_vectors": 1250, - "num_measurements": 20, - "num_query_vectors": 64, - "operation": "max_sim" - }, - { - "dim": 128, - "loops_per_measurement": 200, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "max_sim" - }, - { - "dim": 512, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - ] - }, - "type": "multi-vector-op" - }, - "results": [ - { - "latencies": [ - 777, - 777, - 778, - 780, - 780, - 781, - 804, - 838, - 838, - 838, - 838, - 839, - 839, - 839, - 840, - 842, - 845, - 850, - 899, - 926, - 927, - 931, - 932, - 937, - 939, - 956, - 978, - 1034, - 1035, - 1036, - 1053, - 1064, - 1065, - 1147, - 1164, - 1165, - 1165, - 1166, - 1173, - 1221, - 1323, - 1333, - 1350, - 1352, - 1353, - 1353, - 1357, - 1393, - 1529, - 1537 - ], - "percentiles": { - "mean": 1030.32, - "median": 947.5, - "minimum": 777, - "p90": 1353, - "p99": 1537 - }, - "run": { - "dim": 128, - "loops_per_measurement": 500, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "chamfer" - } - }, - { - "latencies": [ - 1029, - 1029, - 1030, - 1030, - 1030, - 1030, - 1030, - 1031, - 1032, - 1034, - 1035, - 1038, - 1050, - 1058, - 1070, - 1112, - 1112, - 1112, - 1112, - 1112, - 1112, - 1112, - 1113, - 1117, - 1119, - 1120, - 1123, - 1145, - 1146, - 1146, - 1146, - 1148, - 1152, - 1167, - 1192, - 1192, - 1192, - 1192, - 1193, - 1207, - 1235, - 1251, - 1254, - 1256, - 1257, - 1261, - 1293, - 1330, - 1330, - 1344 - ], - "percentiles": { - "mean": 1139.22, - "median": 1119.5, - "minimum": 1029, - "p90": 1261, - "p99": 1344 - }, - "run": { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "chamfer" - } - }, - { - "latencies": [ - 1210, - 1210, - 1210, - 1210, - 1210, - 1210, - 1210, - 1210, - 1210, - 1211, - 1212, - 1212, - 1212, - 1212, - 1213, - 1213, - 1213, - 1213, - 1213, - 1214, - 1217, - 1217, - 1220, - 1223, - 1225, - 1226, - 1227, - 1229, - 1231, - 1235, - 1235, - 1239, - 1239, - 1240, - 1244, - 1249, - 1252, - 1259, - 1264, - 1270, - 1281, - 1294, - 1299, - 1306, - 1312, - 1315, - 1332, - 1341, - 1383, - 1484 - ], - "percentiles": { - "mean": 1246.32, - "median": 1225.5, - "minimum": 1210, - "p90": 1315, - "p99": 1484 - }, - "run": { - "dim": 384, - "loops_per_measurement": 20, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - } - }, - { - "latencies": [ - 958, - 958, - 958, - 958, - 958, - 960, - 960, - 960, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 962, - 962, - 963, - 964, - 964, - 965, - 965, - 965, - 966, - 966, - 973, - 974, - 974, - 981, - 981, - 983, - 985, - 987, - 987, - 987, - 990, - 999, - 999 - ], - "percentiles": { - "mean": 967.42, - "median": 961.0, - "minimum": 958, - "p90": 987, - "p99": 999 - }, - "run": { - "dim": 256, - "loops_per_measurement": 200, - "num_doc_vectors": 16, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - } - }, - { - "latencies": [ - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1018, - 1018, - 1018, - 1018, - 1018, - 1018, - 1018, - 1018, - 1018, - 1019, - 1019, - 1019, - 1019, - 1020, - 1020, - 1020, - 1020, - 1020, - 1020, - 1021, - 1022, - 1023, - 1023, - 1026, - 1029, - 1031, - 1032, - 1033, - 1034, - 1035, - 1036, - 1037, - 1041, - 1044, - 1044, - 1045, - 1046, - 1065 - ], - "percentiles": { - "mean": 1024.58, - "median": 1019.5, - "minimum": 1017, - "p90": 1044, - "p99": 1065 - }, - "run": { - "dim": 264, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "chamfer" - } - }, - { - "latencies": [ - 1854, - 1855, - 1855, - 1855, - 1855, - 1855, - 1856, - 1856, - 1856, - 1857, - 1857, - 1857, - 1857, - 1857, - 1857, - 1858, - 1858, - 1858, - 1858, - 1858, - 1858, - 1858, - 1859, - 1860, - 1861, - 1861, - 1863, - 1866, - 1869, - 1870, - 1871, - 1871, - 1871, - 1872, - 1874, - 1875, - 1881, - 1883, - 1885, - 1885, - 1890, - 1892, - 1892, - 1892, - 1892, - 1899, - 1906, - 1909, - 1909, - 1916 - ], - "percentiles": { - "mean": 1870.38, - "median": 1861.0, - "minimum": 1854, - "p90": 1899, - "p99": 1916 - }, - "run": { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - } - }, - { - "latencies": [ - 3180, - 3180, - 3180, - 3180, - 3180, - 3181, - 3181, - 3181, - 3181, - 3183, - 3185, - 3187, - 3205, - 3206, - 3207, - 3208, - 3211, - 3218, - 3220, - 3268 - ], - "percentiles": { - "mean": 3196.1, - "median": 3184.0, - "minimum": 3180, - "p90": 3220, - "p99": 3268 - }, - "run": { - "dim": 512, - "loops_per_measurement": 2, - "num_doc_vectors": 1250, - "num_measurements": 20, - "num_query_vectors": 64, - "operation": "chamfer" - } - }, - { - "latencies": [ - 1784, - 1784, - 1784, - 1784, - 1784, - 1784, - 1785, - 1785, - 1790, - 1791, - 1791, - 1792, - 1792, - 1792, - 1792, - 1792, - 1792, - 1792, - 1795, - 1795, - 1796, - 1796, - 1796, - 1796, - 1798, - 1800, - 1803, - 1805, - 1814, - 1815, - 1817, - 1818, - 1821, - 1826, - 1840, - 1845, - 1856, - 1858, - 1878, - 1879, - 1879, - 1884, - 1888, - 1890, - 1893, - 1905, - 1907, - 1912, - 1918, - 1950 - ], - "percentiles": { - "mean": 1825.26, - "median": 1799.0, - "minimum": 1784, - "p90": 1905, - "p99": 1950 - }, - "run": { - "dim": 128, - "loops_per_measurement": 200, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "chamfer" - } - }, - { - "latencies": [ - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1018, - 1018, - 1018, - 1018, - 1018, - 1019, - 1020, - 1020, - 1021, - 1021, - 1022, - 1022, - 1022, - 1023, - 1027, - 1030, - 1030, - 1035, - 1043, - 1043, - 1044, - 1045, - 1049, - 1049, - 1060 - ], - "percentiles": { - "mean": 1023.2, - "median": 1017.5, - "minimum": 1017, - "p90": 1044, - "p99": 1060 - }, - "run": { - "dim": 512, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - } - }, - { - "latencies": [ - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 567, - 569, - 569, - 569, - 569, - 569, - 569, - 570, - 570, - 570, - 570, - 570, - 570, - 570, - 570, - 570, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 571, - 574, - 578, - 578, - 594, - 595, - 598 - ], - "percentiles": { - "mean": 571.2, - "median": 570.0, - "minimum": 567, - "p90": 578, - "p99": 598 - }, - "run": { - "dim": 128, - "loops_per_measurement": 500, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "max_sim" - } - }, - { - "latencies": [ - 988, - 988, - 988, - 988, - 988, - 988, - 988, - 988, - 988, - 989, - 989, - 989, - 989, - 989, - 989, - 989, - 989, - 989, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 991, - 992, - 992, - 992, - 992, - 992, - 992, - 992, - 992, - 992, - 996, - 996, - 1004, - 1009, - 1013, - 1018, - 1020, - 1047, - 1057 - ], - "percentiles": { - "mean": 995.1, - "median": 991.0, - "minimum": 988, - "p90": 1013, - "p99": 1057 - }, - "run": { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "max_sim" - } - }, - { - "latencies": [ - 1210, - 1210, - 1210, - 1210, - 1210, - 1210, - 1210, - 1210, - 1211, - 1211, - 1211, - 1212, - 1213, - 1213, - 1213, - 1213, - 1213, - 1213, - 1213, - 1213, - 1213, - 1213, - 1214, - 1214, - 1214, - 1214, - 1214, - 1214, - 1214, - 1214, - 1214, - 1214, - 1216, - 1217, - 1217, - 1217, - 1218, - 1220, - 1222, - 1223, - 1224, - 1224, - 1225, - 1227, - 1238, - 1239, - 1239, - 1241, - 1242, - 1243 - ], - "percentiles": { - "mean": 1217.74, - "median": 1214.0, - "minimum": 1210, - "p90": 1239, - "p99": 1243 - }, - "run": { - "dim": 384, - "loops_per_measurement": 20, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - }, - { - "latencies": [ - 953, - 953, - 953, - 953, - 953, - 953, - 954, - 954, - 956, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 957, - 958, - 958, - 958, - 958, - 958, - 958, - 960, - 961, - 961, - 961, - 961, - 961, - 961, - 961, - 962, - 963, - 971, - 976, - 978, - 984, - 984, - 987 - ], - "percentiles": { - "mean": 960.1, - "median": 957.0, - "minimum": 953, - "p90": 976, - "p99": 987 - }, - "run": { - "dim": 256, - "loops_per_measurement": 200, - "num_doc_vectors": 16, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - }, - { - "latencies": [ - 1016, - 1016, - 1016, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1018, - 1018, - 1018, - 1018, - 1018, - 1018, - 1018, - 1018, - 1019, - 1019, - 1019, - 1019, - 1019, - 1019, - 1019, - 1021, - 1021, - 1023, - 1023, - 1025, - 1032, - 1044, - 1045, - 1045, - 1045, - 1047, - 1052, - 1058, - 1061 - ], - "percentiles": { - "mean": 1023.46, - "median": 1018.0, - "minimum": 1016, - "p90": 1045, - "p99": 1061 - }, - "run": { - "dim": 264, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "max_sim" - } - }, - { - "latencies": [ - 1858, - 1858, - 1860, - 1860, - 1860, - 1860, - 1860, - 1860, - 1860, - 1860, - 1860, - 1861, - 1861, - 1861, - 1861, - 1861, - 1861, - 1861, - 1862, - 1863, - 1863, - 1864, - 1865, - 1867, - 1868, - 1872, - 1873, - 1876, - 1878, - 1881, - 1882, - 1883, - 1888, - 1889, - 1889, - 1890, - 1890, - 1890, - 1891, - 1892, - 1905, - 1906, - 1908, - 1934, - 1962, - 1967, - 1974, - 1988, - 2004, - 2014 - ], - "percentiles": { - "mean": 1887.22, - "median": 1870.0, - "minimum": 1858, - "p90": 1967, - "p99": 2014 - }, - "run": { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - }, - { - "latencies": [ - 3177, - 3177, - 3177, - 3179, - 3192, - 3201, - 3212, - 3222, - 3251, - 3251, - 3255, - 3256, - 3256, - 3321, - 3381, - 3399, - 3400, - 3419, - 3422, - 3445 - ], - "percentiles": { - "mean": 3279.65, - "median": 3253.0, - "minimum": 3177, - "p90": 3422, - "p99": 3445 - }, - "run": { - "dim": 512, - "loops_per_measurement": 2, - "num_doc_vectors": 1250, - "num_measurements": 20, - "num_query_vectors": 64, - "operation": "max_sim" - } - }, - { - "latencies": [ - 1783, - 1784, - 1787, - 1791, - 1791, - 1791, - 1813, - 1838, - 1853, - 1868, - 1871, - 1882, - 1882, - 1884, - 1890, - 1899, - 1899, - 1899, - 1900, - 1901, - 1905, - 1906, - 1908, - 1909, - 1911, - 1911, - 1911, - 1911, - 1914, - 1915, - 1915, - 1916, - 1916, - 1917, - 1919, - 1922, - 1922, - 1923, - 1923, - 1925, - 1927, - 1927, - 1928, - 1929, - 1929, - 1933, - 1937, - 1938, - 1940, - 1983 - ], - "percentiles": { - "mean": 1893.52, - "median": 1911.0, - "minimum": 1783, - "p90": 1933, - "p99": 1983 - }, - "run": { - "dim": 128, - "loops_per_measurement": 200, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 64, - "operation": "max_sim" - } - }, - { - "latencies": [ - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1017, - 1020, - 1023, - 1023, - 1025, - 1028, - 1033, - 1033, - 1034, - 1037, - 1038, - 1040, - 1043, - 1044, - 1052, - 1052, - 1057, - 1060, - 1063, - 1078, - 1088, - 1088, - 1088, - 1088, - 1088, - 1088, - 1088, - 1088, - 1090, - 1090, - 1090, - 1092, - 1093, - 1093, - 1094, - 1094 - ], - "percentiles": { - "mean": 1049.56, - "median": 1039.0, - "minimum": 1017, - "p90": 1092, - "p99": 1094 - }, - "run": { - "dim": 512, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - } - ] - }, - { - "input": { - "content": { - "element_type": "float16", - "implementation": "optimized", - "runs": [ - { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "chamfer" - }, - { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - }, - { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "max_sim" - }, - { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - ] - }, - "type": "multi-vector-op" - }, - "results": [ - { - "latencies": [ - 1734, - 1734, - 1736, - 1736, - 1737, - 1737, - 1737, - 1738, - 1738, - 1738, - 1738, - 1739, - 1740, - 1740, - 1741, - 1744, - 1744, - 1751, - 1751, - 1753, - 1754, - 1754, - 1756, - 1759, - 1761, - 1764, - 1767, - 1767, - 1767, - 1768, - 1768, - 1769, - 1769, - 1773, - 1774, - 1775, - 1779, - 1787, - 1794, - 1808, - 1822, - 1825, - 1829, - 1829, - 1844, - 1846, - 1852, - 1859, - 1903, - 2194 - ], - "percentiles": { - "mean": 1780.44, - "median": 1762.5, - "minimum": 1734, - "p90": 1846, - "p99": 2194 - }, - "run": { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "chamfer" - } - }, - { - "latencies": [ - 2130, - 2130, - 2130, - 2131, - 2133, - 2133, - 2140, - 2142, - 2149, - 2151, - 2158, - 2160, - 2163, - 2164, - 2166, - 2167, - 2167, - 2168, - 2171, - 2173, - 2174, - 2176, - 2177, - 2178, - 2178, - 2181, - 2184, - 2189, - 2195, - 2195, - 2197, - 2198, - 2198, - 2201, - 2203, - 2207, - 2215, - 2217, - 2220, - 2229, - 2240, - 2242, - 2243, - 2249, - 2250, - 2291, - 2305, - 2438, - 2613, - 2643 - ], - "percentiles": { - "mean": 2209.04, - "median": 2179.5, - "minimum": 2130, - "p90": 2291, - "p99": 2643 - }, - "run": { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - } - }, - { - "latencies": [ - 1731, - 1733, - 1737, - 1737, - 1737, - 1741, - 1741, - 1745, - 1745, - 1750, - 1750, - 1750, - 1750, - 1751, - 1754, - 1754, - 1755, - 1758, - 1758, - 1759, - 1761, - 1761, - 1766, - 1768, - 1770, - 1771, - 1771, - 1772, - 1773, - 1773, - 1775, - 1776, - 1776, - 1778, - 1785, - 1788, - 1789, - 1791, - 1795, - 1800, - 1804, - 1808, - 1814, - 1822, - 1832, - 1833, - 1834, - 1864, - 1867, - 1869 - ], - "percentiles": { - "mean": 1776.44, - "median": 1770.5, - "minimum": 1731, - "p90": 1833, - "p99": 1869 - }, - "run": { - "dim": 256, - "loops_per_measurement": 100, - "num_doc_vectors": 64, - "num_measurements": 50, - "num_query_vectors": 16, - "operation": "max_sim" - } - }, - { - "latencies": [ - 2127, - 2127, - 2129, - 2130, - 2132, - 2141, - 2142, - 2142, - 2147, - 2148, - 2149, - 2150, - 2154, - 2154, - 2159, - 2162, - 2166, - 2168, - 2170, - 2173, - 2177, - 2180, - 2180, - 2181, - 2181, - 2182, - 2183, - 2187, - 2196, - 2196, - 2199, - 2200, - 2204, - 2211, - 2213, - 2216, - 2224, - 2255, - 2256, - 2271, - 2354, - 2488, - 2493, - 2495, - 2498, - 2505, - 2525, - 2653, - 2657, - 3515 - ], - "percentiles": { - "mean": 2264.9, - "median": 2181.5, - "minimum": 2127, - "p90": 2505, - "p99": 3515 - }, - "run": { - "dim": 128, - "loops_per_measurement": 10, - "num_doc_vectors": 1250, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - } - ] - }, - { - "input": { - "content": { - "element_type": "float32", - "implementation": "reference", - "runs": [ - { - "dim": 128, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "chamfer" - }, - { - "dim": 384, - "loops_per_measurement": 2, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - }, - { - "dim": 128, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "max_sim" - }, - { - "dim": 384, - "loops_per_measurement": 2, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - ] - }, - "type": "multi-vector-op" - }, - "results": [ - { - "latencies": [ - 64, - 64, - 64, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 67, - 67, - 67, - 67, - 67, - 67, - 67, - 67, - 67, - 68, - 68, - 69, - 71, - 127 - ], - "percentiles": { - "mean": 67.52, - "median": 66.0, - "minimum": 64, - "p90": 68, - "p99": 127 - }, - "run": { - "dim": 128, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "chamfer" - } - }, - { - "latencies": [ - 130, - 130, - 130, - 130, - 130, - 130, - 130, - 130, - 130, - 130, - 131, - 131, - 131, - 131, - 132, - 132, - 133, - 133, - 135, - 136, - 136, - 137, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 138, - 139, - 139, - 139, - 139, - 139, - 140, - 140, - 140, - 141, - 143, - 147, - 161 - ], - "percentiles": { - "mean": 136.26, - "median": 138.0, - "minimum": 130, - "p90": 140, - "p99": 161 - }, - "run": { - "dim": 384, - "loops_per_measurement": 2, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "chamfer" - } - }, - { - "latencies": [ - 62, - 62, - 62, - 62, - 62, - 63, - 63, - 63, - 63, - 63, - 63, - 63, - 63, - 63, - 63, - 64, - 64, - 65, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 66, - 67, - 67, - 67, - 67, - 67, - 67, - 67, - 68, - 68, - 69, - 71, - 72, - 78, - 106 - ], - "percentiles": { - "mean": 66.44, - "median": 66.0, - "minimum": 62, - "p90": 69, - "p99": 106 - }, - "run": { - "dim": 128, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "max_sim" - } - }, - { - "latencies": [ - 130, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 131, - 132, - 132, - 132, - 132, - 132, - 133, - 133, - 134, - 134, - 135, - 135, - 135, - 136, - 136, - 137, - 139, - 139, - 140, - 142, - 142, - 143, - 144, - 145, - 145, - 147, - 155, - 158 - ], - "percentiles": { - "mean": 135.18, - "median": 132.0, - "minimum": 130, - "p90": 145, - "p99": 158 - }, - "run": { - "dim": 384, - "loops_per_measurement": 2, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - } - ] - }, - { - "input": { - "content": { - "element_type": "float16", - "implementation": "reference", - "runs": [ - { - "dim": 128, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "chamfer" - }, - { - "dim": 384, - "loops_per_measurement": 2, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - ] - }, - "type": "multi-vector-op" - }, - "results": [ - { - "latencies": [ - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 73, - 74, - 74, - 74, - 74, - 74, - 74, - 75, - 75, - 76, - 76, - 76, - 76, - 76, - 76, - 77, - 77, - 77, - 77, - 77, - 77, - 77, - 77, - 77, - 78, - 78, - 78, - 79, - 80, - 80, - 80, - 84, - 87, - 92 - ], - "percentiles": { - "mean": 76.0, - "median": 75.5, - "minimum": 73, - "p90": 80, - "p99": 92 - }, - "run": { - "dim": 128, - "loops_per_measurement": 50, - "num_doc_vectors": 32, - "num_measurements": 50, - "num_query_vectors": 8, - "operation": "chamfer" - } - }, - { - "latencies": [ - 135, - 135, - 135, - 135, - 135, - 135, - 135, - 135, - 136, - 136, - 137, - 138, - 140, - 141, - 141, - 141, - 141, - 141, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 142, - 143, - 143, - 143, - 144, - 144, - 145, - 145, - 145, - 147, - 150, - 151, - 151, - 153, - 154, - 158, - 158 - ], - "percentiles": { - "mean": 142.36, - "median": 142.0, - "minimum": 135, - "p90": 151, - "p99": 158 - }, - "run": { - "dim": 384, - "loops_per_measurement": 2, - "num_doc_vectors": 128, - "num_measurements": 50, - "num_query_vectors": 32, - "operation": "max_sim" - } - } - ] - } -] \ No newline at end of file From 3a89c3750bef66322e5e5c3f90e91d43e941a74b Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Thu, 7 May 2026 12:09:00 +0530 Subject: [PATCH 5/7] Add Cargo.lock --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index c7b68684e..e179d3320 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -699,7 +699,7 @@ dependencies = [ [[package]] name = "diskann-benchmark-multi-vector" -version = "0.50.1" +version = "0.51.0" dependencies = [ "anyhow", "diskann-benchmark-runner", From 96d17b30378159ffdfc04b5afbeb0bf225992de2 Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Fri, 8 May 2026 01:48:37 +0530 Subject: [PATCH 6/7] Remove unused scalar benchmark config file --- .../graph_index_scalar_oai_large.json | 115 ------------------ 1 file changed, 115 deletions(-) delete mode 100644 diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json diff --git a/diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json b/diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json deleted file mode 100644 index 09752477a..000000000 --- a/diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json +++ /dev/null @@ -1,115 +0,0 @@ -{ - "search_directories": [ - "/mnt/nvme/s" - ], - "jobs": [ - { - "type": "graph-index-build-sq", - "content": { - "build": { - "data_type": "float16", - "data": "SentenceChunk_OAILarge_1M_normalized_1000000.bin", - "distance": "squared_l2", - "max_degree": 32, - "l_build": 100, - "alpha": 1.2, - "backedge_ratio": 1.0, - "num_threads": 8, - "multi_insert":null, - "search_phase": { - "queries": "SentenceChunk_OAILarge_query_normalized_6809.bin", - "groundtruth": "SentenceChunk-1M-gt-6k-recall-at2000", - "reps": 2, - "num_threads": [ - 8 - ], - "runs": [ - { - "search_n": 10, - "search_l": [ - 50 - ], - "recall_k": 10 - } - ] - } - }, - "num_bits": 1, - "standard_deviations": 2, - "use_fp_for_search": true - } - }, - { - "type": "graph-index-build-sq", - "content": { - "build": { - "data_type": "float16", - "data": "SentenceChunk_OAILarge_1M_normalized_1000000.bin", - "distance": "squared_l2", - "max_degree": 32, - "l_build": 100, - "alpha": 1.2, - "backedge_ratio": 1.0, - "num_threads": 8, - "multi_insert":null, - "search_phase": { - "queries": "SentenceChunk_OAILarge_query_normalized_6809.bin", - "groundtruth": "SentenceChunk-1M-gt-6k-recall-at2000", - "reps": 2, - "num_threads": [ - 8 - ], - "runs": [ - { - "search_n": 10, - "search_l": [ - 50 - ], - "recall_k": 10 - } - ] - } - }, - "num_bits": 4, - "standard_deviations": 2, - "use_fp_for_search": true - } - }, - { - "type": "graph-index-build-sq", - "content": { - "build": { - "data_type": "float16", - "data": "SentenceChunk_OAILarge_1M_normalized_1000000.bin", - "distance": "squared_l2", - "max_degree": 32, - "l_build": 100, - "alpha": 1.2, - "backedge_ratio": 1.0, - "num_threads": 8, - "multi_insert":null, - "search_phase": { - "queries": "SentenceChunk_OAILarge_query_normalized_6809.bin", - "groundtruth": "SentenceChunk-1M-gt-6k-recall-at2000", - "reps": 1, - "num_threads": [ - 8 - ], - "runs": [ - { - "search_n": 10, - "search_l": [ - 50 - ], - "recall_k": 10 - } - ] - } - }, - "num_bits": 8, - "standard_deviations": 2, - "use_fp_for_search": true - } - } - ] - } \ No newline at end of file From 6b33719c7b082fb6142d0b372c0c227c501fdc8c Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Fri, 8 May 2026 01:51:13 +0530 Subject: [PATCH 7/7] Revert "Remove unused scalar benchmark config file" This reverts commit 96d17b30378159ffdfc04b5afbeb0bf225992de2. --- .../graph_index_scalar_oai_large.json | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json diff --git a/diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json b/diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json new file mode 100644 index 000000000..09752477a --- /dev/null +++ b/diskann-benchmark/perf_test_inputs/graph_index_scalar_oai_large.json @@ -0,0 +1,115 @@ +{ + "search_directories": [ + "/mnt/nvme/s" + ], + "jobs": [ + { + "type": "graph-index-build-sq", + "content": { + "build": { + "data_type": "float16", + "data": "SentenceChunk_OAILarge_1M_normalized_1000000.bin", + "distance": "squared_l2", + "max_degree": 32, + "l_build": 100, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert":null, + "search_phase": { + "queries": "SentenceChunk_OAILarge_query_normalized_6809.bin", + "groundtruth": "SentenceChunk-1M-gt-6k-recall-at2000", + "reps": 2, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 50 + ], + "recall_k": 10 + } + ] + } + }, + "num_bits": 1, + "standard_deviations": 2, + "use_fp_for_search": true + } + }, + { + "type": "graph-index-build-sq", + "content": { + "build": { + "data_type": "float16", + "data": "SentenceChunk_OAILarge_1M_normalized_1000000.bin", + "distance": "squared_l2", + "max_degree": 32, + "l_build": 100, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert":null, + "search_phase": { + "queries": "SentenceChunk_OAILarge_query_normalized_6809.bin", + "groundtruth": "SentenceChunk-1M-gt-6k-recall-at2000", + "reps": 2, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 50 + ], + "recall_k": 10 + } + ] + } + }, + "num_bits": 4, + "standard_deviations": 2, + "use_fp_for_search": true + } + }, + { + "type": "graph-index-build-sq", + "content": { + "build": { + "data_type": "float16", + "data": "SentenceChunk_OAILarge_1M_normalized_1000000.bin", + "distance": "squared_l2", + "max_degree": 32, + "l_build": 100, + "alpha": 1.2, + "backedge_ratio": 1.0, + "num_threads": 8, + "multi_insert":null, + "search_phase": { + "queries": "SentenceChunk_OAILarge_query_normalized_6809.bin", + "groundtruth": "SentenceChunk-1M-gt-6k-recall-at2000", + "reps": 1, + "num_threads": [ + 8 + ], + "runs": [ + { + "search_n": 10, + "search_l": [ + 50 + ], + "recall_k": 10 + } + ] + } + }, + "num_bits": 8, + "standard_deviations": 2, + "use_fp_for_search": true + } + } + ] + } \ No newline at end of file