From d135e85f9944592ad4d8e2ae1d23fb8e18caf9c0 Mon Sep 17 00:00:00 2001 From: Mark Lambert Date: Wed, 3 Jun 2026 14:13:55 +0100 Subject: [PATCH] Add fast path for blittable variable-length sequence read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Introduce efficient decoding for blittable variable-length sequences in DatatypeMessage, avoiding boxing and allocations by casting global heap bytes directly to typed arrays. The fast path is used when element types are unmanaged and sizes match; otherwise, fallback to the existing logic. - Add a benchmark (VariableLengthSequenceRead) to measure performance improvements. Benchmark before: ``` BenchmarkDotNet v0.14.0, Windows 11 (10.0.26200.8524) 13th Gen Intel Core i9-13900KS, 1 CPU, 24 logical and 24 physical cores .NET SDK 10.0.300 [Host] : .NET 8.0.27 (8.0.2726.22922), X64 RyuJIT AVX2 DefaultJob : .NET 8.0.27 (8.0.2726.22922), X64 RyuJIT AVX2 ``` | Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated | |------------------------ |---------:|--------:|--------:|--------:|--------:|----------:| | ReadVariableLengthPeaks | 563.2 μs | 9.85 μs | 9.21 μs | 45.8984 | 18.5547 | 858.59 KB | Benchmark after: ``` BenchmarkDotNet v0.14.0, Windows 11 (10.0.26200.8524) 13th Gen Intel Core i9-13900KS, 1 CPU, 24 logical and 24 physical cores .NET SDK 10.0.300 [Host] : .NET 8.0.27 (8.0.2726.22922), X64 RyuJIT AVX2 DefaultJob : .NET 8.0.27 (8.0.2726.22922), X64 RyuJIT AVX2 ``` | Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated | |------------------------ |---------:|---------:|---------:|--------:|-------:|----------:| | ReadVariableLengthPeaks | 95.85 μs | 1.885 μs | 1.671 μs | 10.6201 | 4.3945 | 195.91 KB | --- .../VariableLengthSequenceRead.cs | 93 +++++++++++++++++++ .../Datatype/DatatypeMessage.Reading.cs | 68 ++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 benchmarks/PureHDF.Benchmarks/VariableLengthSequenceRead.cs diff --git a/benchmarks/PureHDF.Benchmarks/VariableLengthSequenceRead.cs b/benchmarks/PureHDF.Benchmarks/VariableLengthSequenceRead.cs new file mode 100644 index 00000000..ca7974b9 --- /dev/null +++ b/benchmarks/PureHDF.Benchmarks/VariableLengthSequenceRead.cs @@ -0,0 +1,93 @@ +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; +using PureHDF; +using PureHDF.VOL.Native; + +namespace Benchmark; + +[MemoryDiagnoser] +public class VariableLengthSequenceRead +{ + [StructLayout(LayoutKind.Sequential)] + public struct Peak + { + public double Mz; + public double Intensity; + } + + private const int CellCount = 60; + private const int PeaksPerCell = 200; + + private string _filePath = null!; + private NativeFile _file = null!; + private IH5Dataset _dataset = null!; + + [GlobalSetup] + public void GlobalSetup() + { + _filePath = Path.Combine(Path.GetTempPath(), $"purehdf-variable-length-bench-{Guid.NewGuid():N}.h5"); + + var data = new Peak[CellCount][]; + var rng = new Random(Seed: 0); + + for (int c = 0; c < CellCount; c++) + { + var cell = new Peak[PeaksPerCell]; + + for (int i = 0; i < PeaksPerCell; i++) + { + cell[i] = new Peak + { + Mz = 100.0 + rng.NextDouble() * 900.0, + Intensity = rng.NextDouble() * 1_000_000.0 + }; + } + + data[c] = cell; + } + + var file = new H5File + { + ["peaks"] = new H5Dataset([(ulong)CellCount]) + }; + + using (var writer = file.BeginWrite(_filePath)) + { + writer.Write((H5Dataset)file["peaks"], data); + } + + _file = H5File.OpenRead(_filePath); + _dataset = _file.Dataset("peaks"); + + // warm the global-heap cache + _ = _dataset.Read(); + } + + [GlobalCleanup] + public void GlobalCleanup() + { + _file.Dispose(); + + if (File.Exists(_filePath)) + File.Delete(_filePath); + } + + [Benchmark] + public int ReadVariableLengthPeaks() + { + var actual = _dataset.Read(); + + // sanity check + var total = 0; + + for (int c = 0; c < actual.Length; c++) + { + total += actual[c]!.Length; + } + + if (total != CellCount * PeaksPerCell) + throw new Exception($"Unexpected peak count: {total}"); + + return total; + } +} diff --git a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs index 8bb6611f..a87456e8 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs @@ -16,6 +16,9 @@ DatatypePropertyDescription[] Properties private static readonly MethodInfo _methodInfoGetDecodeInfoForUnmanagedMemory = typeof(DatatypeMessage) .GetMethod(nameof(GetDecodeInfoForUnmanagedMemory), BindingFlags.NonPublic | BindingFlags.Static)!; + private static readonly MethodInfo _methodInfoBuildVariableLengthSequenceUnmanagedDecoder = typeof(DatatypeMessage) + .GetMethod(nameof(BuildVariableLengthSequenceUnmanagedDecoder), BindingFlags.NonPublic | BindingFlags.Static)!; + private byte _version; private DatatypeMessageClass _class; @@ -771,6 +774,23 @@ private ElementDecodeDelegate GetDecodeInfoForOpaqueAsByteArray() var elementType = memoryType?.GetElementType(); (elementType, var elementDecode) = property.BaseType.GetDecodeInfoForScalar(context, elementType); + // Fast path: blittable element type whose in-memory size matches the on-disk size. + // Eliminates per-element boxing and the staging object[] allocation by casting the + // global-heap object bytes directly into a freshly allocated typed array. + if (!DataUtils.IsReferenceOrContainsReferences(elementType) && + !property.BaseType.IsReferenceOrContainsReferences() && + DataUtils.UnmanagedSizeOf(elementType) == (int)property.BaseType.Size) + { + var fastDecode = (ElementDecodeDelegate)_methodInfoBuildVariableLengthSequenceUnmanagedDecoder + .MakeGenericMethod(elementType) + .Invoke(default, [context, (int)property.BaseType.Size])!; + + memoryType ??= Type.GetType($"{elementType}[]") + ?? throw new Exception($"Unable to find array type for element type {elementType}."); + + return (memoryType, fastDecode); + } + object? decode(IH5ReadStream source) { // https://github.com/HDFGroup/hdf5/blob/1d90890a7b38834074169ce56720b7ea7f4b01ae/src/H5Tpublic.h#L1621-L1642 @@ -1010,4 +1030,52 @@ static void decode(IH5ReadStream source, Span target) return decode; } + + private static ElementDecodeDelegate BuildVariableLengthSequenceUnmanagedDecoder( + NativeReadContext context, + int fileTypeSize) + where TElement : unmanaged + { + var lengthSize = sizeof(uint); + var globalHeapIdSize = (int)context.Superblock.OffsetsSize + sizeof(uint); + var headerSize = lengthSize + globalHeapIdSize; + + object? decode(IH5ReadStream source) + { + using var memoryOwner = MemoryPool.Shared.Rent(headerSize); + var headerBuffer = memoryOwner.Memory[..headerSize]; + + source.ReadDataset(headerBuffer.Span); + + var sequenceLength = BinaryPrimitives.ReadUInt32LittleEndian(headerBuffer.Span); + var globalHeapId = ReadingGlobalHeapId.Decode( + context.Superblock, + headerBuffer.Span[lengthSize..]); + + if (globalHeapId.Equals(default)) + return default; + + var globalHeapCollection = NativeCache.GetGlobalHeapObject( + context, + globalHeapId.CollectionAddress, + restoreAddress: true); + + if (!globalHeapCollection.GlobalHeapObjects.TryGetValue((int)globalHeapId.ObjectIndex, out var globalHeapObject)) + return default; + + var count = (int)sequenceLength; + var result = GC.AllocateUninitializedArray(count); + + if (count == 0) + return result; + + var byteCount = count * fileTypeSize; + var source2 = MemoryMarshal.Cast(globalHeapObject.ObjectData.AsSpan(0, byteCount)); + source2.CopyTo(result); + + return result; + } + + return decode; + } } \ No newline at end of file