diff --git a/src/PureHDF/VOL/Native/Core.Reading/ReadTypes.cs b/src/PureHDF/VOL/Native/Core.Reading/ReadTypes.cs index 37645bca..fb8484c0 100644 --- a/src/PureHDF/VOL/Native/Core.Reading/ReadTypes.cs +++ b/src/PureHDF/VOL/Native/Core.Reading/ReadTypes.cs @@ -3,6 +3,8 @@ namespace PureHDF.VOL.Native; internal delegate void DecodeDelegate(IH5ReadStream source, Span target); internal delegate object? ElementDecodeDelegate(IH5ReadStream source); +internal delegate object? ElementDecodeDelegateBuffered(IH5ReadStream source, Span buffer); + internal readonly record struct DecodeStep( Action? SetValue, ulong CompoundMemberOffset, diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/GlobalHeap/GlobalHeapCollection.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/GlobalHeap/GlobalHeapCollection.cs index 2eb44f34..2fa9c0d2 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/GlobalHeap/GlobalHeapCollection.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/GlobalHeap/GlobalHeapCollection.cs @@ -59,6 +59,21 @@ public static GlobalHeapCollection Decode(NativeReadContext context) // collection size var collectionSize = superblock.ReadLength(driver); + if (collectionSize > int.MaxValue) + { + throw new NotSupportedException("The collection size is too big."); + } + + var buffer = ArrayPool.Shared.Rent((int)collectionSize); + driver.ReadDataset(buffer.AsSpan()[..(int)collectionSize]); + + var memoryStream = new MemoryStream(buffer); + var subDriver = new H5StreamDriver(memoryStream, false); + var subContext = new NativeReadContext(subDriver, superblock) + { + ReadOptions = context.ReadOptions, + File = context.File, + }; // global heap objects var globalHeapObjects = new Dictionary(); @@ -68,20 +83,22 @@ public static GlobalHeapCollection Decode(NativeReadContext context) while (remaining > headerSize) { - var before = driver.Position; - var globalHeapObject = GlobalHeapObject.Decode(context); + var before = subDriver.Position; + var globalHeapObject = GlobalHeapObject.Decode(subContext); // Global Heap Object 0 (free space) can appear at the end of the collection. if (globalHeapObject.ObjectIndex == 0) break; globalHeapObjects[globalHeapObject.ObjectIndex] = globalHeapObject; - var after = driver.Position; + var after = subDriver.Position; var consumed = (ulong)(after - before); remaining -= consumed; } + ArrayPool.Shared.Return(buffer); + return new GlobalHeapCollection( GlobalHeapObjects: globalHeapObjects ) diff --git a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs index 8bb6611f..88ff7e7d 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level2/ObjectHeaderMessages/Datatype/DatatypeMessage.Reading.cs @@ -170,6 +170,20 @@ public DecodeDelegate GetDecodeInfo( }; } + private (Type Type, ElementDecodeDelegateBuffered Decode)? GetDecodeInfoForScalarBuffered(NativeReadContext context, Type? memoryType) + { + return Class switch + { + DatatypeMessageClass.VariableLength when ((VariableLengthBitFieldDescription)BitField).Type == InternalVariableLengthType.String => + memoryType is null || memoryType == typeof(string) + ? (typeof(string), GetDecodeInfoForVariableLengthStringBuffered(context)) + : throw new Exception($"Variable-length string data can only be decoded as string (incompatible type: {memoryType})."), + + /* default */ + _ => null + }; + } + private (Type Type, ElementDecodeDelegate Decode) GetDecodeInfoForScalar( NativeReadContext context, Type? memoryType) @@ -899,6 +913,70 @@ private ElementDecodeDelegate GetDecodeInfoForVariableLengthString( return value; } + else + { + // It would be more correct to just throw an exception + // when the object index is not found in the collection, + // but that would make the following test fail + // - CanRead_Array_nullable_struct. + // + // And it would make the user's life a bit more complicated + // if the library cannot handle missing entries. + return default; + } + } + + return decode; + } + + private ElementDecodeDelegateBuffered GetDecodeInfoForVariableLengthStringBuffered( + NativeReadContext context) + { + object? decode(IH5ReadStream source, Span buffer) + { + /* Padding + * https://support.hdfgroup.org/HDF5/doc/H5.format.html#DatatypeMessage + * Search for "null terminate": null terminate and null padding are essentially + * the same when simply reading them from file. + */ + + /* String is always split after first \0 when writing data to file. + * In other words, padding type only matters when reading data. + */ + + if (BitField is not VariableLengthBitFieldDescription bitField) + throw new Exception("Variable-length bit field description must not be null."); + + // see IV.B. Disk Format: Level 2B - Data Object Data Storage + Func trim = bitField.PaddingType switch + { + PaddingType.NullTerminate => value => value, + PaddingType.NullPad => value => value, + PaddingType.SpacePad => value => value.TrimEnd(' '), + _ => throw new Exception("Unsupported padding type.") + }; + + /* skip the length of the sequence (H5Tvlen.c H5T_vlen_disk_read) */ + buffer = buffer.Slice(sizeof(uint)); + + /* decode global heap IDs and get associated data */ + var globalHeapId = ReadingGlobalHeapId.Decode(context.Superblock, buffer); + + if (globalHeapId.Equals(default)) + return default; + + var globalHeapCollection = NativeCache.GetGlobalHeapObject( + context, + globalHeapId.CollectionAddress, + restoreAddress: true); + + if (globalHeapCollection.GlobalHeapObjects.TryGetValue((int)globalHeapId.ObjectIndex, out var globalHeapObject)) + { + var value = Encoding.UTF8.GetString(globalHeapObject.ObjectData); + value = trim(value); + return value; + } + else { // It would be more correct to just throw an exception @@ -987,6 +1065,26 @@ private DecodeDelegate GetDecodeInfoForReferenceMemory( NativeReadContext context ) { + var elementDecodeBuffered = GetDecodeInfoForScalarBuffered(context, typeof(T))?.Decode; + if (elementDecodeBuffered is not null) + { + void decodeBuffered(IH5ReadStream source, Span target) + { + var totalSize = sizeof(uint) + context.Superblock.OffsetsSize + sizeof(uint); + using var memoryOwner = MemoryPool.Shared.Rent(target.Length * totalSize); + source.ReadDataset(memoryOwner.Memory.Slice(0, target.Length * totalSize).Span); + var targetSpan = target; + + for (int i = 0; i < target.Length; i++) + { + var elementBuffer = memoryOwner.Memory.Slice(i * totalSize, totalSize).Span; + targetSpan[i] = (T)elementDecodeBuffered(source, elementBuffer)!; + } + } + + return decodeBuffered; + } + var elementDecode = GetDecodeInfoForScalar(context, typeof(T)).Decode; void decode(IH5ReadStream source, Span target)