Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions benchmarks/PureHDF.Benchmarks/VariableLengthSequenceRead.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
using PureHDF;
using PureHDF.VOL.Native;

namespace Benchmark;

[MemoryDiagnoser]
public class VariableLengthSequenceRead
{
[StructLayout(LayoutKind.Sequential)]
public struct Peak
{
public double Mz;
public double Intensity;
}

private const int CellCount = 60;
private const int PeaksPerCell = 200;

private string _filePath = null!;
private NativeFile _file = null!;
private IH5Dataset _dataset = null!;

[GlobalSetup]
public void GlobalSetup()
{
_filePath = Path.Combine(Path.GetTempPath(), $"purehdf-variable-length-bench-{Guid.NewGuid():N}.h5");

var data = new Peak[CellCount][];
var rng = new Random(Seed: 0);

for (int c = 0; c < CellCount; c++)
{
var cell = new Peak[PeaksPerCell];

for (int i = 0; i < PeaksPerCell; i++)
{
cell[i] = new Peak
{
Mz = 100.0 + rng.NextDouble() * 900.0,
Intensity = rng.NextDouble() * 1_000_000.0
};
}

data[c] = cell;
}

var file = new H5File
{
["peaks"] = new H5Dataset<Peak[][]>([(ulong)CellCount])
};

using (var writer = file.BeginWrite(_filePath))
{
writer.Write((H5Dataset<Peak[][]>)file["peaks"], data);
}

_file = H5File.OpenRead(_filePath);
_dataset = _file.Dataset("peaks");

// warm the global-heap cache
_ = _dataset.Read<Peak[][]>();
}

[GlobalCleanup]
public void GlobalCleanup()
{
_file.Dispose();

if (File.Exists(_filePath))
File.Delete(_filePath);
}

[Benchmark]
public int ReadVariableLengthPeaks()
{
var actual = _dataset.Read<Peak[][]>();

// sanity check
var total = 0;

for (int c = 0; c < actual.Length; c++)
{
total += actual[c]!.Length;
}

if (total != CellCount * PeaksPerCell)
throw new Exception($"Unexpected peak count: {total}");

return total;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ DatatypePropertyDescription[] Properties
private static readonly MethodInfo _methodInfoGetDecodeInfoForUnmanagedMemory = typeof(DatatypeMessage)
.GetMethod(nameof(GetDecodeInfoForUnmanagedMemory), BindingFlags.NonPublic | BindingFlags.Static)!;

private static readonly MethodInfo _methodInfoBuildVariableLengthSequenceUnmanagedDecoder = typeof(DatatypeMessage)
.GetMethod(nameof(BuildVariableLengthSequenceUnmanagedDecoder), BindingFlags.NonPublic | BindingFlags.Static)!;

private byte _version;

private DatatypeMessageClass _class;
Expand Down Expand Up @@ -771,6 +774,23 @@ private ElementDecodeDelegate GetDecodeInfoForOpaqueAsByteArray()
var elementType = memoryType?.GetElementType();
(elementType, var elementDecode) = property.BaseType.GetDecodeInfoForScalar(context, elementType);

// Fast path: blittable element type whose in-memory size matches the on-disk size.
// Eliminates per-element boxing and the staging object[] allocation by casting the
// global-heap object bytes directly into a freshly allocated typed array.
if (!DataUtils.IsReferenceOrContainsReferences(elementType) &&
!property.BaseType.IsReferenceOrContainsReferences() &&
DataUtils.UnmanagedSizeOf(elementType) == (int)property.BaseType.Size)
{
var fastDecode = (ElementDecodeDelegate)_methodInfoBuildVariableLengthSequenceUnmanagedDecoder
.MakeGenericMethod(elementType)
.Invoke(default, [context, (int)property.BaseType.Size])!;

memoryType ??= Type.GetType($"{elementType}[]")
?? throw new Exception($"Unable to find array type for element type {elementType}.");

return (memoryType, fastDecode);
}

object? decode(IH5ReadStream source)
{
// https://github.com/HDFGroup/hdf5/blob/1d90890a7b38834074169ce56720b7ea7f4b01ae/src/H5Tpublic.h#L1621-L1642
Expand Down Expand Up @@ -1010,4 +1030,52 @@ static void decode(IH5ReadStream source, Span<T> target)

return decode;
}

private static ElementDecodeDelegate BuildVariableLengthSequenceUnmanagedDecoder<TElement>(
NativeReadContext context,
int fileTypeSize)
where TElement : unmanaged
{
var lengthSize = sizeof(uint);
var globalHeapIdSize = (int)context.Superblock.OffsetsSize + sizeof(uint);
var headerSize = lengthSize + globalHeapIdSize;

object? decode(IH5ReadStream source)
{
using var memoryOwner = MemoryPool<byte>.Shared.Rent(headerSize);
var headerBuffer = memoryOwner.Memory[..headerSize];

source.ReadDataset(headerBuffer.Span);

var sequenceLength = BinaryPrimitives.ReadUInt32LittleEndian(headerBuffer.Span);
var globalHeapId = ReadingGlobalHeapId.Decode(
context.Superblock,
headerBuffer.Span[lengthSize..]);

if (globalHeapId.Equals(default))
return default;

var globalHeapCollection = NativeCache.GetGlobalHeapObject(
context,
globalHeapId.CollectionAddress,
restoreAddress: true);

if (!globalHeapCollection.GlobalHeapObjects.TryGetValue((int)globalHeapId.ObjectIndex, out var globalHeapObject))
return default;

var count = (int)sequenceLength;
var result = GC.AllocateUninitializedArray<TElement>(count);

if (count == 0)
return result;

var byteCount = count * fileTypeSize;
var source2 = MemoryMarshal.Cast<byte, TElement>(globalHeapObject.ObjectData.AsSpan(0, byteCount));
source2.CopyTo(result);

return result;
}

return decode;
}
}