Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,16 @@ public static ChunkedStoragePropertyDescription4 Decode(NativeReadContext contex

public override ushort GetEncodeSize()
{
var encLen = ComputeEncodedLength(DimensionSizes);

var encodeSize =
sizeof(byte) +
sizeof(byte) +
sizeof(byte) +
sizeof(ulong) * Rank +
sizeof(byte) +
sizeof(byte) + // flags
sizeof(byte) + // dimensionality (rank)
sizeof(byte) + // dimension size encoded length
encLen * Rank + // dimension sizes (variable byte width)
sizeof(byte) + // chunk indexing type
IndexingInformation.GetEncodeSize(Flags) +
sizeof(ulong);
sizeof(ulong); // address

return (ushort)encodeSize;
}
Expand All @@ -218,17 +220,21 @@ public override void Encode(H5DriverBase driver)
// dimensionality
driver.Write(Rank);

// dimension size encoded length
driver.Write((byte)8);

// dimension sizes
for (int i = 0; i < Rank - 1; i++)
// dimension size encoded length: minimum number of bytes needed to encode
// the largest chunk dimension. libhdf5's H5D__chunk_set_sizes() in
// src/H5Dchunk.c strictly enforces (`!=` check) that this value matches its
// own calculation; hardcoding a different value (e.g. 8) produces files h5py /
// HDFView / MATLAB / Imaris reject with "stored chunk dimension encoding
// length does not match value calculated from chunk dimensions".
var encLen = ComputeEncodedLength(DimensionSizes);
driver.Write(encLen);

// dimension sizes (variable byte width per encLen, last entry is element size)
for (int i = 0; i < Rank; i++)
{
driver.Write(DimensionSizes[i]);
WriteUtils.WriteUlongArbitrary(driver, DimensionSizes[i], encLen);
}

driver.Write((ulong)4);

// chunk indexing type
var indexingType = IndexingInformation switch
{
Expand All @@ -250,6 +256,33 @@ public override void Encode(H5DriverBase driver)

IsDirty = false;
}

// Mirrors libhdf5 H5D__chunk_set_sizes() byte-counting logic: counts how many
// 8-bit-shifted iterations bring the largest dimension value to zero. Returns 1
// even when all dims are zero (encoded length must be at least 1 per HDF5 spec).
private static byte ComputeEncodedLength(ulong[] dimensionSizes)
{
var maxValue = 0UL;

for (int i = 0; i < dimensionSizes.Length; i++)
{
if (dimensionSizes[i] > maxValue)
maxValue = dimensionSizes[i];
}

if (maxValue == 0)
return 1;

byte length = 0;

while (maxValue != 0)
{
length++;
maxValue >>= 8;
}

return length;
}
}

internal record class VirtualStoragePropertyDescription(
Expand Down
81 changes: 81 additions & 0 deletions tests/PureHDF.Tests/Writing/DatasetTests@layout_chunked.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Xunit;
using System.Reflection;
using HDF.PInvoke;
using PureHDF.Filters;

namespace PureHDF.Tests.Writing;
Expand Down Expand Up @@ -386,4 +387,84 @@ public void ThrowsForInvalidChunkDimensions()
File.Delete(filePath);
}
}

// Cross-library compatibility test for chunk dimension encoded length.
// Pre-fix: chunked layouts always wrote (byte)8 as the encoded length, which
// libhdf5's H5D__chunk_set_sizes() rejects with
// "stored chunk dimension encoding length does not match value calculated from chunk dimensions"
// because libhdf5 expects the *minimum* number of bytes needed to hold the
// largest chunk dimension. This test writes a chunked file through PureHDF
// and opens it through libhdf5 (via HDF.PInvoke); regression of the
// encoded-length bug surfaces as H5F.open returning a negative handle.
[Theory]
[InlineData(new uint[] { 10U })] // 1D, max 10 → 1 byte
[InlineData(new uint[] { 256U })] // 1D, max 256 → 2 bytes
[InlineData(new uint[] { 65536U })] // 1D, max 65536 → 3 bytes
[InlineData(new uint[] { 4U, 4U, 32U, 32U, 16U, 1U })] // 6D real-world (microscopy)
public void ChunkedFile_IsReadableBy_libhdf5(uint[] chunkDims)
{
// Arrange — build N-D mock data matching the chunk shape (one chunk per dim)
var totalElements = 1;
foreach (var d in chunkDims)
totalElements *= (int)d;
var rawData = new int[totalElements];
for (var i = 0; i < totalElements; i++)
rawData[i] = i;

Array data;
if (chunkDims.Length == 1)
{
data = rawData;
}
else
{
var shape = new int[chunkDims.Length];
for (var i = 0; i < chunkDims.Length; i++)
shape[i] = (int)chunkDims[i];
var nd = Array.CreateInstance(typeof(int), shape);
Buffer.BlockCopy(rawData, 0, nd, 0, rawData.Length * sizeof(int));
data = nd;
}

var file = new H5File
{
["chunked"] = new H5Dataset(data, chunks: chunkDims)
};

var filePath = Path.GetTempFileName();

// Act
file.Write(filePath);

// Assert — libhdf5 must accept the file (negative handle = error)
try
{
var fileId = H5F.open(filePath, H5F.ACC_RDONLY);
try
{
Assert.True(fileId >= 0, $"H5F.open rejected PureHDF chunked file (handle={fileId})");

var datasetId = H5D.open(fileId, "chunked");
try
{
Assert.True(datasetId >= 0, $"H5D.open rejected chunked dataset (handle={datasetId})");
}
finally
{
if (datasetId >= 0)
_ = H5D.close(datasetId);
}
}
finally
{
if (fileId >= 0)
_ = H5F.close(fileId);
}
}
finally
{
if (File.Exists(filePath))
File.Delete(filePath);
}
}
}