diff --git a/src/PureHDF/VOL/Native/API.Reading/NativeGroup.cs b/src/PureHDF/VOL/Native/API.Reading/NativeGroup.cs index 3f20f959..7946ac35 100644 --- a/src/PureHDF/VOL/Native/API.Reading/NativeGroup.cs +++ b/src/PureHDF/VOL/Native/API.Reading/NativeGroup.cs @@ -131,6 +131,27 @@ public IEnumerable Children(H5LinkAccess linkAccess = default) .Select(reference => reference.Dereference()); } + /// + /// Gets the number of children in this group without dereferencing them. + /// + /// The number of children in this group. + public ulong GetChildCount() + { + var linkInfoMessage = GetLinkInfoMessage(); + + if (!Context.Superblock.IsUndefinedAddress(linkInfoMessage.BTree2NameIndexAddress)) + { + return linkInfoMessage.BTree2NameIndex.RootNodePointer.TotalRecordCount; + } + + if (Context.Superblock.IsUndefinedAddress(linkInfoMessage.FractalHeapAddress)) + { + return (ulong)Header.GetMessages().Count(); + } + + throw new NotSupportedException("The group does not use compact or indexed dense link storage."); + } + private bool InternalLinkExists(string path, H5LinkAccess linkAccess) { if (path == "/") @@ -430,8 +451,16 @@ private IEnumerable EnumerateReferences(H5LinkAccess linkA * A group is storing its links compactly when the fractal heap address * in the Link Info Message is set to the "undefined address" value. */ else + { linkMessages = Header.GetMessages(); + if (lmessage.Flags.HasFlag(CreationOrderFlags.TrackCreationOrder)) + { + linkMessages = linkMessages + .OrderBy(message => message.CreationOrder); + } + } + // build links foreach (var linkMessage in linkMessages) { @@ -451,29 +480,73 @@ private IEnumerable EnumerateReferences(H5LinkAccess linkA #region Link Message private IEnumerable EnumerateLinkMessagesFromLinkInfoMessage(LinkInfoMessage infoMessage) + { + if (infoMessage.Flags.HasFlag(CreationOrderFlags.TrackCreationOrder)) + return EnumerateLinkMessagesByCreationOrder(infoMessage); + + return EnumerateLinkMessagesByName(infoMessage); + } + + private IEnumerable EnumerateLinkMessagesByName(LinkInfoMessage infoMessage) { var fractalHeap = infoMessage.FractalHeap; var btree2NameIndex = infoMessage.BTree2NameIndex; - var records = btree2NameIndex - .EnumerateRecords() - .ToList(); // local cache: indirectly accessed, non-filtered - List? record01Cache = null; + BTree2Header? record01Cache = null; + + foreach (var record in btree2NameIndex.EnumerateRecords()) + { + yield return ReadLinkMessage(fractalHeap, record.HeapId, ref record01Cache); + } + } - foreach (var record in records) + private IEnumerable EnumerateLinkMessagesByCreationOrder(LinkInfoMessage infoMessage) + { + if (Context.Superblock.IsUndefinedAddress(infoMessage.BTree2CreationOrderIndexAddress)) { - using var localDriver = new H5StreamDriver(new MemoryStream(record.HeapId), leaveOpen: false); - var heapId = FractalHeapId.Construct(Context, localDriver, fractalHeap); + return EnumerateLinkMessagesByName(infoMessage) + .OrderBy(message => message.CreationOrder); + } - yield return heapId.Read(driver => - { - var message = LinkMessage.Decode(Context); - return message; - }, ref record01Cache); + return EnumerateLinkMessagesByCreationOrderIndex(infoMessage); + } + + private IEnumerable EnumerateLinkMessagesByCreationOrderIndex(LinkInfoMessage infoMessage) + { + var fractalHeap = infoMessage.FractalHeap; + var btree2CreationOrder = infoMessage.BTree2CreationOrder; + + // local cache: indirectly accessed, non-filtered + BTree2Header? record01Cache = null; + + foreach (var record in btree2CreationOrder.EnumerateRecords()) + { + yield return ReadLinkMessage(fractalHeap, record.HeapId, ref record01Cache); } } + private LinkInfoMessage GetLinkInfoMessage() + { + var linkInfoMessages = Header.GetMessages(); + + if (!linkInfoMessages.Any()) + throw new Exception("No link information found in object header."); + + if (linkInfoMessages.Count() != 1) + throw new Exception("There may be only a single link info message."); + + return linkInfoMessages.First(); + } + + private LinkMessage ReadLinkMessage(FractalHeapHeader fractalHeap, byte[] heapIdBytes, ref BTree2Header? record01Cache) + { + using var localDriver = new H5StreamDriver(new MemoryStream(heapIdBytes), leaveOpen: false); + var heapId = FractalHeapId.Construct(Context, localDriver, fractalHeap); + + return heapId.Read(driver => LinkMessage.Decode(Context), ref record01Cache); + } + private bool TryGetLinkMessageFromLinkInfoMessage(LinkInfoMessage linkInfoMessage, string name, [NotNullWhen(returnValue: true)] out LinkMessage? linkMessage) diff --git a/src/PureHDF/VOL/Native/API.Reading/NativeObject.cs b/src/PureHDF/VOL/Native/API.Reading/NativeObject.cs index 95fa0ef1..c48d895a 100644 --- a/src/PureHDF/VOL/Native/API.Reading/NativeObject.cs +++ b/src/PureHDF/VOL/Native/API.Reading/NativeObject.cs @@ -186,16 +186,13 @@ private IEnumerable EnumerateAttributeMessagesFromAttributeInf AttributeInfoMessage attributeInfoMessage) { var btree2NameIndex = attributeInfoMessage.BTree2NameIndex; - var records = btree2NameIndex - .EnumerateRecords() - .ToList(); var fractalHeap = attributeInfoMessage.FractalHeap; // local cache: indirectly accessed, non-filtered - List? record01Cache = null; + BTree2Header? record01Cache = null; - foreach (var record in records) + foreach (var record in btree2NameIndex.EnumerateRecords()) { // TODO: duplicate1_of_3 using var localDriver = new H5StreamDriver(new MemoryStream(record.HeapId), leaveOpen: false); diff --git a/src/PureHDF/VOL/Native/Core.Reading/NativeCache.cs b/src/PureHDF/VOL/Native/Core.Reading/NativeCache.cs index e3917933..faf154ae 100644 --- a/src/PureHDF/VOL/Native/Core.Reading/NativeCache.cs +++ b/src/PureHDF/VOL/Native/Core.Reading/NativeCache.cs @@ -8,8 +8,8 @@ internal static class NativeCache static NativeCache() { - _globalHeapMap = new ConcurrentDictionary>(); - _fileMap = new ConcurrentDictionary>(); + _globalHeapMap = new ConcurrentDictionary>(); + _fileMap = new ConcurrentDictionary>(); } #endregion @@ -41,7 +41,7 @@ public static void Clear(H5DriverBase driver) #region Global Heap - private static readonly ConcurrentDictionary> _globalHeapMap; + private static readonly ConcurrentDictionary> _globalHeapMap; public static GlobalHeapCollection GetGlobalHeapObject( NativeReadContext context, @@ -50,8 +50,8 @@ public static GlobalHeapCollection GetGlobalHeapObject( { if (!_globalHeapMap.TryGetValue(context.Driver, out var addressToCollectionMap)) { - addressToCollectionMap = new Dictionary(); - _globalHeapMap.AddOrUpdate(context.Driver, addressToCollectionMap, (_, oldAddressToCollectionMap) => addressToCollectionMap); + addressToCollectionMap = new ConcurrentDictionary(); + _globalHeapMap.AddOrUpdate(context.Driver, addressToCollectionMap, (_, oldAddressToCollectionMap) => oldAddressToCollectionMap); } if (!addressToCollectionMap.TryGetValue(address, out var collection)) @@ -74,7 +74,7 @@ public static GlobalHeapCollection GetGlobalHeapObject( #region File Handles - private static readonly ConcurrentDictionary> _fileMap; + private static readonly ConcurrentDictionary> _fileMap; public static NativeFile GetNativeFile(H5DriverBase driver, string absoluteFilePath) { @@ -86,8 +86,8 @@ public static NativeFile GetNativeFile(H5DriverBase driver, string absoluteFileP if (!_fileMap.TryGetValue(driver, out var pathToNativeFileMap)) { - pathToNativeFileMap = new Dictionary(); - _fileMap.AddOrUpdate(driver, pathToNativeFileMap, (_, oldPathToNativeFileMap) => pathToNativeFileMap); + pathToNativeFileMap = new ConcurrentDictionary(); + _fileMap.AddOrUpdate(driver, pathToNativeFileMap, (_, oldPathToNativeFileMap) => oldPathToNativeFileMap); } if (!pathToNativeFileMap.TryGetValue(uri.AbsoluteUri, out var nativeFile)) diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/BTree2/BTree2Header.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/BTree2/BTree2Header.cs index 5f48cfe8..2a64a156 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/BTree2/BTree2Header.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/BTree2/BTree2Header.cs @@ -283,7 +283,7 @@ public IEnumerable EnumerateRecords() return EnumerateRecords(rootNode, Depth); else - return new List(); + return Enumerable.Empty(); } private IEnumerable EnumerateRecords(BTree2Node node, ushort nodeLevel) @@ -296,18 +296,12 @@ private IEnumerable EnumerateRecords(BTree2Node node, ushort nodeLevel) if (internalNode is not null) { - var records = node.Records - .Cast() - .ToList(); + var records = internalNode.Records; var nodePointers = internalNode.NodePointers; for (int i = 0; i < nodePointers.Length; i++) { - // there is one more node pointer than records - if (i < records.Count) - yield return records[i]; - var nodePointer = nodePointers[i]; Context.Driver.SeekRelativeToBaseAddress((long)nodePointer.Address); var childNodeLevel = (ushort)(nodeLevel - 1); @@ -341,6 +335,10 @@ private IEnumerable EnumerateRecords(BTree2Node node, ushort nodeLevel) { yield return record; } + + // there is one more node pointer than records + if (i < records.Length) + yield return records[i]; } } // leaf node diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/FractalHeapId.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/FractalHeapId.cs index 7ad545e8..cac344d9 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/FractalHeapId.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/FractalHeapId.cs @@ -50,11 +50,11 @@ internal static FractalHeapId Construct( public T Read(Func func) { // TODO: Is there a better way? - List? cache = null; + BTree2Header? cache = null; return Read(func, ref cache); } public abstract T Read( Func func, - [AllowNull] ref List record01Cache); + [AllowNull] ref BTree2Header record01Cache); } \ No newline at end of file diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType1.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType1.cs index cb16a654..cd79ef3f 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType1.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType1.cs @@ -23,7 +23,7 @@ internal static HugeObjectsFractalHeapIdSubType1 Decode( public override T Read( Func func, - [AllowNull] ref List record01Cache) + [AllowNull] ref BTree2Header record01Cache) { var driver = Context.Driver; @@ -31,11 +31,14 @@ public override T Read( if (record01Cache is null) { driver.SeekRelativeToBaseAddress((long)HeapHeader.HugeObjectsBTree2Address); - var hugeBtree2 = BTree2Header.Decode(Context, DecodeRecord01); - record01Cache = hugeBtree2.EnumerateRecords().ToList(); + record01Cache = BTree2Header.Decode(Context, DecodeRecord01); } - var hugeRecord = record01Cache.FirstOrDefault(record => record.HugeObjectId == BTree2Key); + var success = record01Cache.TryFindRecord(out var hugeRecord, record => BTree2Key.CompareTo(record.HugeObjectId)); + + if (!success) + throw new Exception("Could not find huge fractal heap object."); + driver.SeekRelativeToBaseAddress((long)hugeRecord.HugeObjectAddress); return func(driver); diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType2.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType2.cs index 58fb2180..49c029f1 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType2.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType2.cs @@ -22,7 +22,7 @@ ulong BTree2Key public override T Read( Func func, - [AllowNull] ref List record01Cache) + [AllowNull] ref BTree2Header record01Cache) { throw new Exception("Filtered data is not yet supported."); } diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType3.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType3.cs index 9876dd24..7df2884e 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType3.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType3.cs @@ -20,7 +20,7 @@ public static HugeObjectsFractalHeapIdSubType3 Decode(NativeReadContext context, } public override T Read( Func func, - [AllowNull] ref List record01Cache) + [AllowNull] ref BTree2Header record01Cache) { Driver.SeekRelativeToBaseAddress((long)Address); return func(Driver); diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType4.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType4.cs index 9683f597..bbd5cb78 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType4.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/HugeObjectsFractalHeapIdSubType4.cs @@ -23,7 +23,7 @@ public static HugeObjectsFractalHeapIdSubType4 Decode( public override T Read( Func func, - [AllowNull] ref List record01Cache) + [AllowNull] ref BTree2Header record01Cache) { throw new Exception("Filtered data is not yet supported."); } diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/ManagedObjectsFractalHeapId.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/ManagedObjectsFractalHeapId.cs index e095f90e..8d5d6e70 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/ManagedObjectsFractalHeapId.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/ManagedObjectsFractalHeapId.cs @@ -26,7 +26,7 @@ public static ManagedObjectsFractalHeapId Decode( public override T Read( Func func, - [AllowNull] ref List record01Cache) + [AllowNull] ref BTree2Header record01Cache) { var address = Header.GetAddress(this); diff --git a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/TinyObjectsFractalHeapIdSubType1.cs b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/TinyObjectsFractalHeapIdSubType1.cs index 8cf3bc9d..179d117b 100644 --- a/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/TinyObjectsFractalHeapIdSubType1.cs +++ b/src/PureHDF/VOL/Native/FileFormat/Level1/FractalHeap/FractalHeapId/TinyObjectsFractalHeapIdSubType1.cs @@ -19,7 +19,7 @@ public static TinyObjectsFractalHeapIdSubType1 Decode( public override T Read( Func func, - [AllowNull] ref List record01Cache) + [AllowNull] ref BTree2Header record01Cache) { using var driver = new H5StreamDriver(new MemoryStream(Data), leaveOpen: false); return func.Invoke(driver); diff --git a/tests/PureHDF.Tests/Reading/LinkTests.cs b/tests/PureHDF.Tests/Reading/LinkTests.cs index f23f30df..2ec579cf 100644 --- a/tests/PureHDF.Tests/Reading/LinkTests.cs +++ b/tests/PureHDF.Tests/Reading/LinkTests.cs @@ -1,5 +1,6 @@ using HDF.PInvoke; using PureHDF.VFD; +using System.Text; using Xunit; namespace PureHDF.Tests.Reading; @@ -100,6 +101,103 @@ public void CanEnumerateLinksMass() }); } + [Fact] + public void CanEnumerateDenseIndexedChildrenInNameIndexOrder() + { + // Arrange + const int childCount = 1000; + var filePath = TestUtils.PrepareTestFile(H5F.libver_t.V110, fileId => TestUtils.AddMassLinks(fileId)); + + // Act + using var root = NativeFile.InternalOpenRead(filePath, deleteOnClose: true); + var group = root.Group("mass_links"); + var actual = group.Children(); + + var expected = Enumerable + .Range(0, childCount) + .Select(index => $"mass_{index:D4}") + .OrderBy(name => ChecksumUtils.JenkinsLookup3(Encoding.UTF8.GetBytes(name))) + .ThenBy(name => name, StringComparer.Ordinal) + .ToArray(); + + // Assert + Assert.Equal(expected, actual.Select(child => child.Name)); + } + + [Fact] + public void CanCountDenseIndexedChildren() + { + // Arrange + const int childCount = 32; + var filePath = TestUtils.PrepareTestFile(H5F.libver_t.V110, fileId => TestUtils.AddCreationOrderIndexedLinks(fileId, childCount)); + + // Act + using var root = NativeFile.InternalOpenRead(filePath, deleteOnClose: true); + var group = (NativeGroup)root.Group("creation_order_links"); + var actual = group.GetChildCount(); + + // Assert + Assert.Equal((ulong)childCount, actual); + } + + [Fact] + public void CanCountCompactChildren() + { + // Arrange + const int childCount = 4; + var filePath = TestUtils.PrepareTestFile(H5F.libver_t.V110, fileId => TestUtils.AddCreationOrderIndexedLinks(fileId, childCount)); + + // Act + using var root = NativeFile.InternalOpenRead(filePath, deleteOnClose: true); + var group = (NativeGroup)root.Group("creation_order_links"); + var actual = group.GetChildCount(); + + // Assert + Assert.Equal((ulong)childCount, actual); + } + + [Fact] + public void CanEnumerateDenseIndexedChildrenInCreationOrder() + { + // Arrange + const int childCount = 128; + var filePath = TestUtils.PrepareTestFile(H5F.libver_t.V110, fileId => TestUtils.AddCreationOrderIndexedLinks(fileId, childCount)); + + // Act + using var root = NativeFile.InternalOpenRead(filePath, deleteOnClose: true); + var group = (NativeGroup)root.Group("creation_order_links"); + var actual = group.Children(); + + var expected = Enumerable + .Range(0, childCount) + .Select(index => $"child_{childCount - index:D4}") + .ToArray(); + + // Assert + Assert.Equal(expected, actual.Select(child => child.Name)); + } + + [Fact] + public void CanEnumerateCompactChildrenInCreationOrder() + { + // Arrange + const int childCount = 4; + var filePath = TestUtils.PrepareTestFile(H5F.libver_t.V110, fileId => TestUtils.AddCreationOrderIndexedLinks(fileId, childCount)); + + // Act + using var root = NativeFile.InternalOpenRead(filePath, deleteOnClose: true); + var group = (NativeGroup)root.Group("creation_order_links"); + var actual = group.Children().Take(2); + + var expected = Enumerable + .Range(0, 2) + .Select(index => $"child_{childCount - index:D4}") + .ToArray(); + + // Assert + Assert.Equal(expected, actual.Select(child => child.Name)); + } + [Fact] public void CanOpenDataset_DataLayoutMessage12() { diff --git a/tests/PureHDF.Tests/TestUtils/TestUtils@links.cs b/tests/PureHDF.Tests/TestUtils/TestUtils@links.cs index 346296c5..7d4e56ac 100644 --- a/tests/PureHDF.Tests/TestUtils/TestUtils@links.cs +++ b/tests/PureHDF.Tests/TestUtils/TestUtils@links.cs @@ -53,6 +53,33 @@ public static unsafe void AddMassLinks(long fileId) _ = H5G.close(groupId); } + public static unsafe void AddCreationOrderIndexedLinks(long fileId, int count = 32) + { + var gcplId = H5P.create(H5P.GROUP_CREATE); + var flags = H5P.CRT_ORDER_TRACKED | H5P.CRT_ORDER_INDEXED; + + if (H5P.set_link_creation_order(gcplId, flags) < 0) + throw new Exception("Could not enable link creation-order tracking and indexing."); + + var groupId = H5G.create(fileId, "creation_order_links", 0, gcplId, 0); + + if (groupId < 0) + throw new Exception("Could not create creation-order indexed group."); + + for (int i = 0; i < count; i++) + { + var linkId = H5G.create(groupId, $"child_{count - i:D4}"); + + if (linkId < 0) + throw new Exception("Could not create child group."); + + _ = H5G.close(linkId); + } + + _ = H5G.close(groupId); + _ = H5P.close(gcplId); + } + public static unsafe void AddLinks(long fileId) { var groupId = H5G.create(fileId, "links");