diff --git a/benchmarks/PureHDF.Benchmarks/VariableLengthCompoundRead.cs b/benchmarks/PureHDF.Benchmarks/VariableLengthCompoundRead.cs new file mode 100644 index 00000000..94fa6114 --- /dev/null +++ b/benchmarks/PureHDF.Benchmarks/VariableLengthCompoundRead.cs @@ -0,0 +1,124 @@ +using BenchmarkDotNet.Attributes; +using PureHDF; +using PureHDF.Selections; +using PureHDF.VOL.Native; +using System.Runtime.InteropServices; + +namespace Benchmark; + +// Exercises Read on a 1-D dataset of variable-length sequences of a +// small blittable struct, under three access patterns: +// +// - ReadAll : 1 Read call for the whole dataset +// - ReadByWindow : 10 Read calls of 60 cells each +// - ReadPerCell : 600 Read calls of 1 cell each +// +// Each access pattern measures the same total decode work but a different +// per-Read-call multiplier, so the relative cost of per-Read fixed overhead +// versus per-cell decode work shows up across the three rows. +[MemoryDiagnoser] +public class VariableLengthCompoundRead +{ + [StructLayout(LayoutKind.Sequential, Pack = 1)] + public struct Sample + { + public double X; + public float Y; + } + + private const int CellCount = 600; + private const int ElementsPerCell = 200; + private const int WindowSize = 60; + + private string _filePath = default!; + private NativeFile _file = default!; + private IH5Dataset _dataset = default!; + + [GlobalSetup] + public void GlobalSetup() + { + _filePath = Path.Combine(Path.GetTempPath(), $"purehdf-vl-bench-{Guid.NewGuid():N}.h5"); + + var random = new Random(42); + var data = new Sample[CellCount][]; + for (int i = 0; i < CellCount; i++) + { + var arr = new Sample[ElementsPerCell]; + for (int j = 0; j < ElementsPerCell; j++) + arr[j] = new Sample { X = random.NextDouble(), Y = (float)random.NextDouble() }; + data[i] = arr; + } + + var writeFile = new H5File(); + var declared = new H5Dataset([(ulong)CellCount]); + writeFile["dataset"] = declared; + + using (var writer = writeFile.BeginWrite(_filePath)) + writer.Write(declared, data); + + _file = H5File.OpenRead(_filePath); + _dataset = _file.Dataset("dataset"); + + var probe = _dataset.Read()!; + if (probe.Length != CellCount) + throw new Exception($"setup produced {probe.Length} cells, expected {CellCount}"); + + for (int i = 0; i < CellCount; i++) + { + if (probe[i] is null || probe[i]!.Length != ElementsPerCell) + throw new Exception( + $"cell {i} has length {probe[i]?.Length ?? -1}, expected {ElementsPerCell}"); + } + } + + [GlobalCleanup] + public void GlobalCleanup() + { + _file?.Dispose(); + if (File.Exists(_filePath)) + { + try { File.Delete(_filePath); } catch { /* ignore */ } + } + } + + [Benchmark(Baseline = true)] + public int ReadAll() + { + var result = _dataset.Read()!; + var total = 0; + for (int i = 0; i < result.Length; i++) + total += result[i]?.Length ?? 0; + return total; + } + + [Benchmark] + public int ReadByWindow() + { + var total = 0; + for (int start = 0; start + WindowSize <= CellCount; start += WindowSize) + { + var sel = new HyperslabSelection(start: (ulong)start, block: (ulong)WindowSize); + var window = _dataset.Read( + fileSelection: sel, + memoryDims: [(ulong)WindowSize])!; + for (int i = 0; i < window.Length; i++) + total += window[i]?.Length ?? 0; + } + return total; + } + + [Benchmark] + public int ReadPerCell() + { + var total = 0; + for (int i = 0; i < CellCount; i++) + { + var sel = new HyperslabSelection(start: (ulong)i, block: 1); + var cell = _dataset.Read( + fileSelection: sel, + memoryDims: [1UL])!; + total += cell[0]?.Length ?? 0; + } + return total; + } +} diff --git a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs index 8bb6611f..47d0b8cf 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs @@ -989,6 +989,55 @@ NativeReadContext context { var elementDecode = GetDecodeInfoForScalar(context, typeof(T)).Decode; + // Variable-length sequences and strings store a fixed-size (length + global + // heap id) header per cell in the dataset stream, with the payload living + // in the global heap. The per-cell element decoder reads that header via + // source.ReadDataset(headerBytes) before resolving the heap object — and on + // an N-cell decode pass that becomes N small ReadDataset calls into the + // underlying IH5ReadStream. Pre-reading all N headers in one bulk call and + // feeding the per-cell decoder from an in-memory wrapper collapses the + // per-call dispatch + position-tracking overhead. The per-cell element + // decoder itself is unchanged. + var isVariableLengthHeaderBatchable = + Class == DatatypeMessageClass.VariableLength && + BitField is VariableLengthBitFieldDescription vlBitField && + (vlBitField.Type == InternalVariableLengthType.Sequence || + vlBitField.Type == InternalVariableLengthType.String); + + if (isVariableLengthHeaderBatchable) + { + var cellHeaderSize = sizeof(uint) + (int)context.Superblock.OffsetsSize + sizeof(uint); + + void decodeBatched(IH5ReadStream source, Span target) + { + if (target.Length == 0) + return; + + var totalBytes = target.Length * cellHeaderSize; + var rented = ArrayPool.Shared.Rent(totalBytes); + + try + { + var bulk = rented.AsMemory(0, totalBytes); + source.ReadDataset(bulk.Span); + + var localSource = new SystemMemoryStream(bulk); + var targetSpan = target; + + for (int i = 0; i < target.Length; i++) + { + targetSpan[i] = (T)elementDecode(localSource)!; + } + } + finally + { + ArrayPool.Shared.Return(rented); + } + } + + return decodeBatched; + } + void decode(IH5ReadStream source, Span target) { var targetSpan = target;