From 5c526f08c75df8c809080e53905b212ac71895e1 Mon Sep 17 00:00:00 2001 From: Mark Lambert Date: Thu, 4 Jun 2026 10:10:40 +0100 Subject: [PATCH] Cache reflection dispatch on hot Read paths and memoise the decode pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `NativeDataset.Read`, `NativeAttribute.Read`, and `DatatypeMessage`'s decode-info construction each previously did `MakeGenericMethod` + `MethodInfo.Invoke` on every call. Repeated reads against the same dataset/attribute paid the full reflection cost every time, including the per-call boxed `object[]` argument array. This commit adds three caches around those sites: - `NativeDataset` / `NativeAttribute`: a static `ConcurrentDictionary<(Type, Type), Delegate>` keyed by `(TResult, TElement)` holding a typed `ReaderDelegate` built via `MethodInfo.CreateDelegate`. After the first call for a given type pair, dispatch is a dictionary lookup plus a direct delegate invocation. - `DatatypeMessage.GetDecodeInfo`: a per-instance `ConcurrentDictionary<(Type, bool), Delegate>` caching the entire closure tree per `(TElement, isRawMode)`. The datatype message is immutable after the file is decoded, so the closure is safely reusable across reads. - `DatatypeMessage.GetDecodeInfoForUnmanagedElement(Type)`: a static `ConcurrentDictionary` caching the per-element decoder. Previously the closure called `MethodInfo.Invoke` on every element of every read, allocating a boxed argument array per call. Two test assertions adjusted: `CanRead_Dataspace_Null` on dataset and attribute previously expected `Assert.Throws` because `MethodInfo.Invoke` wrapped the inner exception. With direct delegate dispatch, exceptions surface unwrapped; tests now expect the inner exception type directly. # Benchmark `benchmarks/PureHDF.Benchmarks/ReflectionDispatch.cs` (new) isolates the per-call dispatch cost. Each method issues 10,000 small reads against scalar `int` / scalar 12-byte compound datasets and attributes with all caches warmed before the measured loop. Hardware: 13th Gen Intel Core i9-13900KS, Windows 11, .NET SDK 10.0.300, runtime 8.0.27, BenchmarkDotNet 0.15.8, default tiered compilation. Times normalised to microseconds. ## Before (master) | Method | Mean | Error | StdDev | Gen0 | Allocated | |------------------------------|------------:|----------:|----------:|----------:|----------:| | Dataset_ReadScalarInt | 9,803.0 μs | 190.5 μs | 219.4 μs | 1390.6250 | 25.02 MB | | Dataset_ReadScalarCompound | 11,473.0 μs | 203.8 μs | 190.7 μs | 1531.2500 | 27.85 MB | | Attribute_ReadScalarInt | 5,752.0 μs | 95.7 μs | 89.5 μs | 507.8125 | 9.23 MB | | Attribute_ReadScalarCompound | 7,555.0 μs | 115.2 μs | 102.2 μs | 656.2500 | 12.05 MB | ## After (this commit) | Method | Mean | Error | StdDev | Gen0 | Allocated | |------------------------------|------------:|----------:|----------:|----------:|----------:| | Dataset_ReadScalarInt | 4,048.5 μs | 78.4 μs | 139.4 μs | 945.3125 | 17.09 MB | | Dataset_ReadScalarCompound | 5,298.6 μs | 101.5 μs | 108.6 μs | 1046.8750 | 18.92 MB | | Attribute_ReadScalarInt | 669.5 μs | 9.1 μs | 7.6 μs | 118.1641 | 2.14 MB | | Attribute_ReadScalarCompound | 1,790.0 μs | 17.9 μs | 14.9 μs | 220.7031 | 3.97 MB | ## Summary | Method | Speedup | Alloc ratio | |------------------------------|--------:|------------:| | Dataset_ReadScalarInt | 2.4× | 0.68× | | Dataset_ReadScalarCompound | 2.2× | 0.68× | | Attribute_ReadScalarInt | 8.6× | 0.23× | | Attribute_ReadScalarCompound | 4.2× | 0.33× | --- .../PureHDF.Benchmarks.csproj | 2 +- .../PureHDF.Benchmarks/ReflectionDispatch.cs | 135 ++++++++++++++++++ .../VOL/Native/API.Reading/NativeAttribute.cs | 50 ++++--- .../VOL/Native/API.Reading/NativeDataset.cs | 53 ++++--- .../Datatype/DatatypeMessage.Reading.cs | 57 ++++++-- .../Reading/AttributeTests@dataspace.cs | 5 +- .../Reading/DatasetTests@dataspace.cs | 5 +- 7 files changed, 251 insertions(+), 56 deletions(-) create mode 100644 benchmarks/PureHDF.Benchmarks/ReflectionDispatch.cs diff --git a/benchmarks/PureHDF.Benchmarks/PureHDF.Benchmarks.csproj b/benchmarks/PureHDF.Benchmarks/PureHDF.Benchmarks.csproj index 5b80fac0..2c289b41 100644 --- a/benchmarks/PureHDF.Benchmarks/PureHDF.Benchmarks.csproj +++ b/benchmarks/PureHDF.Benchmarks/PureHDF.Benchmarks.csproj @@ -10,7 +10,7 @@ - + diff --git a/benchmarks/PureHDF.Benchmarks/ReflectionDispatch.cs b/benchmarks/PureHDF.Benchmarks/ReflectionDispatch.cs new file mode 100644 index 00000000..8aa4b5bc --- /dev/null +++ b/benchmarks/PureHDF.Benchmarks/ReflectionDispatch.cs @@ -0,0 +1,135 @@ +using BenchmarkDotNet.Attributes; +using PureHDF; +using System.Runtime.InteropServices; + +namespace Benchmark; + +// Isolates the per-call dispatch cost on the three sites where reflection +// caching was added: +// +// 1. NativeAttribute.Read — (TResult, TElement) reader delegate cache +// 2. NativeDataset.Read — same pattern +// 3. DatatypeMessage.GetDecodeInfo — closure-tree cache, plus the inner +// GetDecodeInfoForUnmanagedElement(Type) per-Type delegate cache used while +// building the compound decoder. +// +// The payload on each Read is intentionally tiny (one scalar or one small +// blittable compound), so per-call cost is dominated by the dispatch path +// being measured rather than by the actual decode work. A high iteration +// count inside each [Benchmark] method amplifies the per-call signal. +// +// Compound variants additionally exercise the static +// GetDecodeInfoForUnmanagedElement(Type) cache because the compound branch +// of BuildDecodeInfo routes the known-compound case through the Type-keyed +// overload (DatatypeMessage.Reading.cs:438). +[MemoryDiagnoser] +public class ReflectionDispatch +{ + // Pack = 1 keeps the on-disk size predictable (12 B: double + float, no + // trailing pad). Matches the shape used by VariableLengthCompoundRead on + // the perf branch so numbers are comparable. + [StructLayout(LayoutKind.Sequential, Pack = 1)] + public struct Sample + { + public double X; + public float Y; + } + + private const int Iterations = 10_000; + + private string _filePath = default!; + private IDisposable _file = default!; + private IH5Dataset _scalarIntDataset = default!; + private IH5Dataset _scalarSampleDataset = default!; + private IH5Attribute _scalarIntAttribute = default!; + private IH5Attribute _scalarSampleAttribute = default!; + + [GlobalSetup] + public void GlobalSetup() + { + _filePath = Path.Combine( + Path.GetTempPath(), + $"purehdf-reflection-bench-{Guid.NewGuid():N}.h5"); + + var writeFile = new H5File + { + ["scalar_int"] = new H5Dataset(data: 42), + ["scalar_sample"] = new H5Dataset(data: new Sample { X = 1.5, Y = 2.5f }) + }; + + writeFile.Attributes["scalar_int"] = 42; + writeFile.Attributes["scalar_sample"] = new Sample { X = 1.5, Y = 2.5f }; + + writeFile.Write(_filePath); + + var root = H5File.OpenRead(_filePath); + _file = root; + + _scalarIntDataset = root.Dataset("scalar_int"); + _scalarSampleDataset = root.Dataset("scalar_sample"); + _scalarIntAttribute = root.Attribute("scalar_int"); + _scalarSampleAttribute = root.Attribute("scalar_sample"); + + // Warm the per-instance / per-Type caches so the measured loop is + // steady-state cache-hit behaviour, not cold-build. + _ = _scalarIntDataset.Read(); + _ = _scalarSampleDataset.Read(); + _ = _scalarIntAttribute.Read(); + _ = _scalarSampleAttribute.Read(); + } + + [GlobalCleanup] + public void GlobalCleanup() + { + _file?.Dispose(); + + if (File.Exists(_filePath)) + { + try { File.Delete(_filePath); } catch { /* ignore */ } + } + } + + [Benchmark] + public int Dataset_ReadScalarInt() + { + var total = 0; + + for (var i = 0; i < Iterations; i++) + total += _scalarIntDataset.Read(); + + return total; + } + + [Benchmark] + public double Dataset_ReadScalarCompound() + { + var total = 0.0; + + for (var i = 0; i < Iterations; i++) + total += _scalarSampleDataset.Read().X; + + return total; + } + + [Benchmark] + public int Attribute_ReadScalarInt() + { + var total = 0; + + for (var i = 0; i < Iterations; i++) + total += _scalarIntAttribute.Read(); + + return total; + } + + [Benchmark] + public double Attribute_ReadScalarCompound() + { + var total = 0.0; + + for (var i = 0; i < Iterations; i++) + total += _scalarSampleAttribute.Read().X; + + return total; + } +} diff --git a/src/PureHDF/VOL/Native/API.Reading/NativeAttribute.cs b/src/PureHDF/VOL/Native/API.Reading/NativeAttribute.cs index 4f8827cb..fef59438 100644 --- a/src/PureHDF/VOL/Native/API.Reading/NativeAttribute.cs +++ b/src/PureHDF/VOL/Native/API.Reading/NativeAttribute.cs @@ -1,4 +1,5 @@ -using System.Reflection; +using System.Collections.Concurrent; +using System.Reflection; namespace PureHDF.VOL.Native; @@ -12,6 +13,29 @@ public class NativeAttribute : IH5Attribute private static readonly MethodInfo _methodInfoReadCoreLevel1_Generic = typeof(NativeAttribute) .GetMethod(nameof(ReadCoreLevel1_generic), BindingFlags.NonPublic | BindingFlags.Instance)!; + // Delegate type for reads, including an instance parameter. + // Statically cached keyed by (TResult, TElement). + private delegate TResult? ReaderDelegate( + NativeAttribute @this, + TResult? buffer, + IH5ReadStream source, + ulong[]? memoryDims); + + private static readonly ConcurrentDictionary<(Type, Type), Delegate> _readerCache = new(); + + private static ReaderDelegate GetReader(Type elementType) + { + return (ReaderDelegate)_readerCache.GetOrAdd( + (typeof(TResult), elementType), + static key => + { + var method = _methodInfoReadCoreLevel1_Generic + .MakeGenericMethod(key.Item1, key.Item2); + var delegateType = typeof(ReaderDelegate<>).MakeGenericType(key.Item1); + return method.CreateDelegate(delegateType); + }); + } + private IH5Dataspace? _space; private IH5DataType? _type; private readonly NativeReadContext _context; @@ -74,19 +98,10 @@ public T Read( ulong[]? memoryDims = null) { var (elementType, _) = WriteUtils.GetElementType(typeof(T)); - - // TODO cache this - var method = _methodInfoReadCoreLevel1_Generic.MakeGenericMethod(typeof(T), elementType); + var reader = GetReader(elementType); var source = new SystemMemoryStream(Message.InputData); - var result = (T)method.Invoke(this, - [ - default /* buffer */, - source, - memoryDims - ])!; - - return result; + return reader(this, buffer: default, source, memoryDims)!; } /// @@ -95,17 +110,10 @@ public void Read( ulong[]? memoryDims = null) { var (elementType, _) = WriteUtils.GetElementType(typeof(T)); - - // TODO cache this - var method = _methodInfoReadCoreLevel1_Generic.MakeGenericMethod(typeof(T), elementType); + var reader = GetReader(elementType); var source = new SystemMemoryStream(Message.InputData); - method.Invoke(this, - [ - buffer, - source, - memoryDims - ]); + reader(this, buffer, source, memoryDims); } /* This overload is required because Span is not allowed as generic argument and diff --git a/src/PureHDF/VOL/Native/API.Reading/NativeDataset.cs b/src/PureHDF/VOL/Native/API.Reading/NativeDataset.cs index 152bbb4d..978b30e2 100644 --- a/src/PureHDF/VOL/Native/API.Reading/NativeDataset.cs +++ b/src/PureHDF/VOL/Native/API.Reading/NativeDataset.cs @@ -1,5 +1,6 @@ using PureHDF.Selections; using System.Buffers; +using System.Collections.Concurrent; using System.Reflection; namespace PureHDF.VOL.Native; @@ -14,6 +15,32 @@ public class NativeDataset : NativeObject, IH5Dataset private static readonly MethodInfo _methodInfoReadCoreLevel1_Generic = typeof(NativeDataset) .GetMethod(nameof(ReadCoreLevel1_Generic), BindingFlags.NonPublic | BindingFlags.Instance)!; + // Delegate type for reads, including an instance parameter. + // Statically cached keyed by (TResult, TElement). + private delegate TResult? ReaderDelegate( + NativeDataset @this, + TResult? buffer, + Selection? fileSelection, + Selection? memorySelection, + ulong[]? memoryDims, + H5DatasetAccess datasetAccess, + bool skipShuffle); + + private static readonly ConcurrentDictionary<(Type, Type), Delegate> _readerCache = new(); + + private static ReaderDelegate GetReader(Type elementType) + { + return (ReaderDelegate)_readerCache.GetOrAdd( + (typeof(TResult), elementType), + static key => + { + var method = _methodInfoReadCoreLevel1_Generic + .MakeGenericMethod(key.Item1, key.Item2); + var delegateType = typeof(ReaderDelegate<>).MakeGenericType(key.Item1); + return method.CreateDelegate(delegateType); + }); + } + private IH5Dataspace? _space; private IH5DataType? _type; private IH5DataLayout? _layout; @@ -196,21 +223,16 @@ public T Read( ulong[]? memoryDims = default) { var (elementType, _) = WriteUtils.GetElementType(typeof(T)); + var reader = GetReader(elementType); - // TODO cache this - var method = _methodInfoReadCoreLevel1_Generic.MakeGenericMethod(typeof(T), elementType); - - var result = (T)method.Invoke(this, - [ - default /* buffer */, + return reader( + this, + buffer: default, fileSelection, memorySelection, memoryDims, datasetAccess, - /* skip shuffle: */ false - ])!; - - return result; + skipShuffle: false)!; } /// @@ -230,19 +252,16 @@ public void Read( ulong[]? memoryDims = default) { var (elementType, _) = WriteUtils.GetElementType(typeof(T)); + var reader = GetReader(elementType); - // TODO cache this - var method = _methodInfoReadCoreLevel1_Generic.MakeGenericMethod(typeof(T), elementType); - - method.Invoke(this, - [ + reader( + this, buffer, fileSelection, memorySelection, memoryDims, datasetAccess, - /* skip shuffle: */ false - ]); + skipShuffle: false); } /* The following two methods are required because Span is not allowed as generic diff --git a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs index 8bb6611f..7dcb7e5d 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs @@ -1,5 +1,6 @@ using System.Buffers; using System.Buffers.Binary; +using System.Collections.Concurrent; using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -135,9 +136,33 @@ public bool IsReferenceOrContainsReferences() }; } + // Caches the DecodeDelegate produced for each (TElement, isRawMode) + // pair on this DatatypeMessage instance, so repeated Read calls on the same + // dataset reuse one decoder instead of rebuilding the closure tree (and paying + // its inner MethodInfo.Invoke into GetDecodeInfoForUnmanagedMemory) every time. + // + // The cached closures capture the NativeReadContext seen on first build. That + // is safe because each NativeDataset / NativeAttribute owns its own + // DatatypeMessage, which is only ever used with the single NativeReadContext + // belonging to the file it was decoded from. + private readonly ConcurrentDictionary<(Type, bool), Delegate> _decodeInfoCache = new(); + public DecodeDelegate GetDecodeInfo( NativeReadContext context, bool isRawMode) + { + var key = (typeof(TElement), isRawMode); + + if (_decodeInfoCache.TryGetValue(key, out var cached)) + return (DecodeDelegate)cached; + + var built = BuildDecodeInfo(context, isRawMode); + return (DecodeDelegate)_decodeInfoCache.GetOrAdd(key, built); + } + + private DecodeDelegate BuildDecodeInfo( + NativeReadContext context, + bool isRawMode) { var memoryIsRef = DataUtils.IsReferenceOrContainsReferences(typeof(TElement)); var fileIsRef = IsReferenceOrContainsReferences(); @@ -149,7 +174,6 @@ public DecodeDelegate GetDecodeInfo( var fileTypeSize = Size; // according to type-mismatch-behavior.md - // TODO cache return (memoryIsRef, fileIsRef) switch { (true, _) @@ -299,19 +323,30 @@ private ElementDecodeDelegate GetDecodeInfoForUnmanagedElement() where T : st return decode; } + // Builds and caches one ElementDecodeDelegate per element Type. Previously the + // closure performed MethodInfo.Invoke on every element, which allocated a boxed + // argument array per call and dominated CPU on element-heavy reads. Routing + // through a typed delegate built once per element type pays the reflection + // once at cache-miss time and makes the per-element call a direct invocation + // of the generic GetDecodeInfoForUnmanagedElement. + private static readonly ConcurrentDictionary _unmanagedElementDecoderCache = new(); + + private static readonly MethodInfo _methodInfoGetDecodeInfoForUnmanagedElement = typeof(DatatypeMessage) + .GetMethod( + nameof(GetDecodeInfoForUnmanagedElement), + genericParameterCount: 1, + BindingFlags.NonPublic | BindingFlags.Instance, + binder: null, + types: Type.EmptyTypes, + modifiers: null)!; + private ElementDecodeDelegate GetDecodeInfoForUnmanagedElement(Type type) { - // TODO: cache - var invokeDecodeUnmanagedElement = ReadUtils.MethodInfoDecodeUnmanagedElement.MakeGenericMethod(type); - var parameters = new object[1]; - - object? decode(IH5ReadStream source) + return _unmanagedElementDecoderCache.GetOrAdd(type, t => { - parameters[0] = source; - return invokeDecodeUnmanagedElement.Invoke(default, parameters); - } - - return decode; + var method = _methodInfoGetDecodeInfoForUnmanagedElement.MakeGenericMethod(t); + return (ElementDecodeDelegate)method.Invoke(this, parameters: null)!; + }); } private (Type, ElementDecodeDelegate) GetDecodeInfoForCompound( diff --git a/tests/PureHDF.Tests/Reading/AttributeTests@dataspace.cs b/tests/PureHDF.Tests/Reading/AttributeTests@dataspace.cs index 8ce9e06d..796a34a8 100644 --- a/tests/PureHDF.Tests/Reading/AttributeTests@dataspace.cs +++ b/tests/PureHDF.Tests/Reading/AttributeTests@dataspace.cs @@ -1,5 +1,4 @@ -using System.Reflection; -using Xunit; +using Xunit; namespace PureHDF.Tests.Reading; @@ -42,7 +41,7 @@ public void CanRead_Dataspace_Null() void action() => attribute.Read(); // Assert - Assert.Throws(action); + Assert.Throws(action); }); } } \ No newline at end of file diff --git a/tests/PureHDF.Tests/Reading/DatasetTests@dataspace.cs b/tests/PureHDF.Tests/Reading/DatasetTests@dataspace.cs index b7475d56..8b1c7f2b 100644 --- a/tests/PureHDF.Tests/Reading/DatasetTests@dataspace.cs +++ b/tests/PureHDF.Tests/Reading/DatasetTests@dataspace.cs @@ -1,5 +1,4 @@ -using System.Reflection; -using Xunit; +using Xunit; namespace PureHDF.Tests.Reading; @@ -42,7 +41,7 @@ public void CanRead_Dataspace_Null() void action() => dataset.Read(); // Assert - Assert.Throws(action); + Assert.Throws(action); }); } } \ No newline at end of file