From 13d49352b5e81da187512cd3fa5b2dd79c7cdd56 Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Thu, 4 Jun 2026 11:14:38 -0700 Subject: [PATCH] Add pack-index object existence checker strategy for prefetch Introduce IObjectExistenceChecker strategy pattern to decouple blob prefetch from libgit2's git_revparse_single, which is extremely slow for missing objects (~2.8ms/op with 14 packs in a large GVFS cache). New PackIndexObjectExistenceChecker reads MIDX and supplemental .idx files directly in managed code via memory-mapped IO (~5us/op), with loose-object File.Exists fallback. Gated on gvfs.prefetch-use-idx git config (default: false). Components: - IObjectExistenceChecker: strategy interface - RevParseObjectExistenceChecker: wraps existing LibGit2Repo.ObjectExists - PackIndexObjectExistenceChecker: MIDX + pack idx + loose fallback - MidxReader: memory-mapped MIDX v1 parser with binary search - PackIndexReader: memory-mapped pack index v2 parser with binary search - FindBlobsStage: accepts optional checker factory (backward compatible) - BlobPrefetcher: reads config, creates appropriate checker factory Searches both LocalObjectsRoot and GitObjectsRoot (shared cache), detects supplemental packs not yet in MIDX via PNAM chunk diffing, and safely falls back to revparse on initialization errors. Unit tests cover: MIDX/idx hit and miss, all 256 fanout buckets, supplemental pack detection, loose objects, empty/missing pack dirs, multiple object roots, corrupt file handling, and deduplication. Assisted-by: Claude Opus 4.6 Signed-off-by: Tyrie Vella --- GVFS/GVFS.Common/GVFSConstants.cs | 3 + .../Git/IObjectExistenceChecker.cs | 14 + .../Git/LibGit2ObjectExistenceChecker.cs | 27 ++ GVFS/GVFS.Common/Git/MidxReader.cs | 283 +++++++++++++++++ .../Git/PackIndexObjectExistenceChecker.cs | 166 ++++++++++ GVFS/GVFS.Common/Git/PackIndexReader.cs | 161 ++++++++++ GVFS/GVFS.Common/Prefetch/BlobPrefetcher.cs | 89 +++++- .../Prefetch/Pipeline/FindBlobsStage.cs | 13 +- .../Prefetch/MidxReaderTests.cs | 299 ++++++++++++++++++ .../PackIndexObjectExistenceCheckerTests.cs | 216 +++++++++++++ .../Prefetch/PackIndexReaderTests.cs | 171 ++++++++++ 11 files changed, 1438 insertions(+), 4 deletions(-) create mode 100644 GVFS/GVFS.Common/Git/IObjectExistenceChecker.cs create mode 100644 GVFS/GVFS.Common/Git/LibGit2ObjectExistenceChecker.cs create mode 100644 GVFS/GVFS.Common/Git/MidxReader.cs create mode 100644 GVFS/GVFS.Common/Git/PackIndexObjectExistenceChecker.cs create mode 100644 GVFS/GVFS.Common/Git/PackIndexReader.cs create mode 100644 GVFS/GVFS.UnitTests/Prefetch/MidxReaderTests.cs create mode 100644 GVFS/GVFS.UnitTests/Prefetch/PackIndexObjectExistenceCheckerTests.cs create mode 100644 GVFS/GVFS.UnitTests/Prefetch/PackIndexReaderTests.cs diff --git a/GVFS/GVFS.Common/GVFSConstants.cs b/GVFS/GVFS.Common/GVFSConstants.cs index e81ecc635..8f135786a 100644 --- a/GVFS/GVFS.Common/GVFSConstants.cs +++ b/GVFS/GVFS.Common/GVFSConstants.cs @@ -48,6 +48,9 @@ public static class GitConfig public const bool ShowHydrationStatusDefault = false; public const string MaxHttpConnectionsConfig = GVFSPrefix + "max-http-connections"; + + public const string PrefetchUseIdx = GVFSPrefix + "prefetch-use-idx"; + public const bool PrefetchUseIdxDefault = false; } public static class LocalGVFSConfig diff --git a/GVFS/GVFS.Common/Git/IObjectExistenceChecker.cs b/GVFS/GVFS.Common/Git/IObjectExistenceChecker.cs new file mode 100644 index 000000000..46da33c84 --- /dev/null +++ b/GVFS/GVFS.Common/Git/IObjectExistenceChecker.cs @@ -0,0 +1,14 @@ +using System; + +namespace GVFS.Common.Git +{ + /// + /// Strategy interface for checking whether git objects exist locally. + /// Implementations must be safe to call from a single worker thread. + /// Thread-safety across multiple workers depends on the implementation. + /// + public interface IObjectExistenceChecker : IDisposable + { + bool ObjectExists(string sha); + } +} diff --git a/GVFS/GVFS.Common/Git/LibGit2ObjectExistenceChecker.cs b/GVFS/GVFS.Common/Git/LibGit2ObjectExistenceChecker.cs new file mode 100644 index 000000000..fe73a91f7 --- /dev/null +++ b/GVFS/GVFS.Common/Git/LibGit2ObjectExistenceChecker.cs @@ -0,0 +1,27 @@ +using GVFS.Common.Tracing; + +namespace GVFS.Common.Git +{ + /// + /// Object existence checker backed by libgit2 — one instance per worker thread. + /// + public class LibGit2ObjectExistenceChecker : IObjectExistenceChecker + { + private readonly LibGit2Repo repo; + + public LibGit2ObjectExistenceChecker(ITracer tracer, string repoPath) + { + this.repo = new LibGit2Repo(tracer, repoPath); + } + + public bool ObjectExists(string sha) + { + return this.repo.ObjectExists(sha); + } + + public void Dispose() + { + this.repo.Dispose(); + } + } +} diff --git a/GVFS/GVFS.Common/Git/MidxReader.cs b/GVFS/GVFS.Common/Git/MidxReader.cs new file mode 100644 index 000000000..05fb3d22d --- /dev/null +++ b/GVFS/GVFS.Common/Git/MidxReader.cs @@ -0,0 +1,283 @@ +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.IO.MemoryMappedFiles; +using System.Runtime.CompilerServices; + +namespace GVFS.Common.Git +{ + /// + /// Reads a git multi-pack-index (MIDX) file and performs binary search + /// lookups against the sorted OID table. Pure managed code, thread-safe. + /// + public sealed class MidxReader : IDisposable + { + private const uint MidxMagic = 0x4D494458; // "MIDX" + private const uint ChunkIdPNAM = 0x504E414D; // Pack Names + private const uint ChunkIdOIDF = 0x4F494446; // OID Fanout + private const uint ChunkIdOIDL = 0x4F49444C; // OID Lookup + + private readonly MemoryMappedFile mmf; + private readonly MemoryMappedViewAccessor accessor; + private int hashLen; + private long fanoutOffset; + private long oidLookupOffset; + private int totalObjects; + private HashSet packStems; + + public int TotalObjects => this.totalObjects; + + public MidxReader(string path) + { + long fileLength = new FileInfo(path).Length; + this.mmf = MemoryMappedFile.CreateFromFile(path, FileMode.Open, null, 0, MemoryMappedFileAccess.Read); + try + { + this.accessor = this.mmf.CreateViewAccessor(0, fileLength, MemoryMappedFileAccess.Read); + try + { + this.InitializeFromAccessor(); + } + catch + { + this.accessor.Dispose(); + throw; + } + } + catch + { + this.mmf.Dispose(); + throw; + } + } + + private void InitializeFromAccessor() + { + // Header: MIDX(4) version(1) oidVersion(1) numChunks(1) reserved(1) numPacks(4) + uint magic = this.ReadUInt32BE(0); + if (magic != MidxMagic) + { + throw new InvalidDataException($"Not a MIDX file (magic=0x{magic:X8})"); + } + + byte version = this.ReadByte(4); + if (version != 1) + { + throw new InvalidDataException($"Unsupported MIDX version {version}"); + } + + byte oidVersion = this.ReadByte(5); + this.hashLen = oidVersion == 2 ? 32 : 20; + int numChunks = this.ReadByte(6); + + // Parse chunk TOC at offset 12 + long tocStart = 12; + long pnamOffset = 0; + long pnamEnd = 0; + this.fanoutOffset = 0; + this.oidLookupOffset = 0; + + // Read all chunk entries + terminator to get chunk boundaries + long[] chunkOffsets = new long[numChunks + 1]; + uint[] chunkIds = new uint[numChunks]; + for (int i = 0; i < numChunks; i++) + { + long entryOff = tocStart + ((long)i * 12); + chunkIds[i] = this.ReadUInt32BE(entryOff); + chunkOffsets[i] = this.ReadInt64BE(entryOff + 4); + } + + // Terminator entry + long terminatorOff = tocStart + ((long)numChunks * 12); + chunkOffsets[numChunks] = this.ReadInt64BE(terminatorOff + 4); + + for (int i = 0; i < numChunks; i++) + { + switch (chunkIds[i]) + { + case ChunkIdPNAM: + pnamOffset = chunkOffsets[i]; + pnamEnd = chunkOffsets[i + 1]; + break; + case ChunkIdOIDF: + this.fanoutOffset = chunkOffsets[i]; + break; + case ChunkIdOIDL: + this.oidLookupOffset = chunkOffsets[i]; + break; + } + } + + if (this.fanoutOffset == 0 || this.oidLookupOffset == 0) + { + throw new InvalidDataException("MIDX missing required OIDF/OIDL chunks"); + } + + // Total objects from fanout[255] + this.totalObjects = (int)this.ReadUInt32BE(this.fanoutOffset + (255 * 4)); + + // Parse pack names from PNAM chunk + this.packStems = new HashSet(StringComparer.OrdinalIgnoreCase); + if (pnamOffset > 0 && pnamEnd > pnamOffset) + { + int pnamLen = (int)(pnamEnd - pnamOffset); + byte[] pnamBuf = new byte[pnamLen]; + this.accessor.ReadArray(pnamOffset, pnamBuf, 0, pnamLen); + string pnamStr = System.Text.Encoding.ASCII.GetString(pnamBuf); + foreach (string name in pnamStr.Split('\0', StringSplitOptions.RemoveEmptyEntries)) + { + // PNAM stores .idx names; strip extension to get stem + string stem = name; + if (stem.EndsWith(".idx", StringComparison.OrdinalIgnoreCase)) + { + stem = stem.Substring(0, stem.Length - 4); + } + + this.packStems.Add(stem); + } + } + } + + /// + /// Returns the set of pack file stems (without extension) covered by this MIDX. + /// + public HashSet GetPackStems() + { + return this.packStems; + } + + /// + /// Check if an object with the given SHA-1 hex string exists in the MIDX. + /// Thread-safe. + /// + public bool Exists(string shaHex) + { + if (shaHex == null || shaHex.Length < this.hashLen * 2) + { + return false; + } + + Span oid = stackalloc byte[this.hashLen]; + HexToBytes(shaHex, oid); + return this.Exists(oid); + } + + /// + /// Check if an object with the given binary OID exists in the MIDX. + /// Thread-safe. + /// + public bool Exists(ReadOnlySpan oid) + { + int firstByte = oid[0]; + + uint lo = firstByte == 0 ? 0 : this.ReadUInt32BE(this.fanoutOffset + ((firstByte - 1) * 4)); + uint hi = this.ReadUInt32BE(this.fanoutOffset + (firstByte * 4)); + + if (lo >= hi) + { + return false; + } + + return this.BinarySearchOid(oid, (int)lo, (int)hi - 1); + } + + private bool BinarySearchOid(ReadOnlySpan target, int lo, int hi) + { + while (lo <= hi) + { + int mid = lo + ((hi - lo) / 2); + long offset = this.oidLookupOffset + ((long)mid * this.hashLen); + + int cmp = this.CompareOidAtOffset(target, offset); + if (cmp == 0) + { + return true; + } + else if (cmp < 0) + { + hi = mid - 1; + } + else + { + lo = mid + 1; + } + } + + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int CompareOidAtOffset(ReadOnlySpan target, long fileOffset) + { + for (int i = 0; i < this.hashLen; i++) + { + int diff = target[i] - this.accessor.ReadByte(fileOffset + i); + if (diff != 0) + { + return diff; + } + } + + return 0; + } + + internal static void HexToBytes(string hex, Span output) + { + for (int i = 0; i < output.Length; i++) + { + output[i] = (byte)((HexVal(hex[i * 2]) << 4) | HexVal(hex[(i * 2) + 1])); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int HexVal(char c) + { + if (c >= 'a') + { + return c - 'a' + 10; + } + + if (c >= 'A') + { + return c - 'A' + 10; + } + + return c - '0'; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private byte ReadByte(long offset) + { + return this.accessor.ReadByte(offset); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private uint ReadUInt32BE(long offset) + { + byte b0 = this.accessor.ReadByte(offset); + byte b1 = this.accessor.ReadByte(offset + 1); + byte b2 = this.accessor.ReadByte(offset + 2); + byte b3 = this.accessor.ReadByte(offset + 3); + return ((uint)b0 << 24) | ((uint)b1 << 16) | ((uint)b2 << 8) | b3; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private long ReadInt64BE(long offset) + { + Span buf = stackalloc byte[8]; + for (int i = 0; i < 8; i++) + { + buf[i] = this.accessor.ReadByte(offset + i); + } + + return BinaryPrimitives.ReadInt64BigEndian(buf); + } + + public void Dispose() + { + this.accessor.Dispose(); + this.mmf.Dispose(); + } + } +} diff --git a/GVFS/GVFS.Common/Git/PackIndexObjectExistenceChecker.cs b/GVFS/GVFS.Common/Git/PackIndexObjectExistenceChecker.cs new file mode 100644 index 000000000..69a63a8d5 --- /dev/null +++ b/GVFS/GVFS.Common/Git/PackIndexObjectExistenceChecker.cs @@ -0,0 +1,166 @@ +using GVFS.Common.Tracing; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace GVFS.Common.Git +{ + /// + /// Object existence checker that reads MIDX and pack .idx files directly + /// in managed code. Falls back to loose-object file existence checks. + /// Thread-safe — all reads are against read-only memory-mapped files. + /// + public class PackIndexObjectExistenceChecker : IObjectExistenceChecker + { + private readonly MidxReader[] midxReaders; + private readonly PackIndexReader[] supplementalPacks; + private readonly string[] objectRoots; + private readonly ITracer tracer; + + /// + /// Creates a checker that scans packs and loose objects under the given object roots. + /// Multiple roots are supported (e.g. LocalObjectsRoot and GitObjectsRoot) and + /// are de-duplicated by normalized path. + /// + public PackIndexObjectExistenceChecker(ITracer tracer, params string[] objectRoots) + { + this.tracer = tracer; + + // De-duplicate roots (LocalObjectsRoot == GitObjectsRoot in non-cache scenarios) + this.objectRoots = objectRoots + .Where(r => !string.IsNullOrEmpty(r)) + .Select(r => Path.GetFullPath(r)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToArray(); + + List midxList = new List(); + List supplementalList = new List(); + + foreach (string root in this.objectRoots) + { + string packDir = Path.Combine(root, "pack"); + if (!Directory.Exists(packDir)) + { + continue; + } + + HashSet midxPackStems = new HashSet(StringComparer.OrdinalIgnoreCase); + string midxPath = Path.Combine(packDir, "multi-pack-index"); + + if (File.Exists(midxPath)) + { + try + { + MidxReader reader = new MidxReader(midxPath); + midxList.Add(reader); + midxPackStems = reader.GetPackStems(); + + tracer.RelatedInfo( + "PackIndexChecker: Loaded MIDX from {0} ({1:N0} objects, {2} packs)", + packDir, + reader.TotalObjects, + midxPackStems.Count); + } + catch (Exception ex) when (ex is InvalidDataException || ex is IOException) + { + tracer.RelatedWarning("PackIndexChecker: Failed to load MIDX at {0}: {1}", midxPath, ex.Message); + } + } + + // Find .idx files not covered by MIDX + try + { + foreach (string idxFile in Directory.GetFiles(packDir, "*.idx")) + { + string stem = Path.GetFileNameWithoutExtension(idxFile); + if (!midxPackStems.Contains(stem)) + { + try + { + PackIndexReader reader = new PackIndexReader(idxFile); + supplementalList.Add(reader); + + tracer.RelatedInfo( + "PackIndexChecker: Loaded supplemental idx {0} ({1:N0} objects)", + Path.GetFileName(idxFile), + reader.TotalObjects); + } + catch (Exception ex) when (ex is InvalidDataException || ex is IOException) + { + tracer.RelatedWarning( + "PackIndexChecker: Failed to load idx {0}: {1}", + idxFile, + ex.Message); + } + } + } + } + catch (DirectoryNotFoundException) + { + // Pack directory disappeared between check and enumeration + } + } + + this.midxReaders = midxList.ToArray(); + this.supplementalPacks = supplementalList.ToArray(); + + tracer.RelatedInfo( + "PackIndexChecker: Initialized with {0} MIDX reader(s), {1} supplemental pack(s), {2} object root(s)", + this.midxReaders.Length, + this.supplementalPacks.Length, + this.objectRoots.Length); + } + + public bool ObjectExists(string sha) + { + // Check MIDX readers first (covers the vast majority of objects) + for (int i = 0; i < this.midxReaders.Length; i++) + { + if (this.midxReaders[i].Exists(sha)) + { + return true; + } + } + + // Check supplemental pack indexes (packs not yet in MIDX) + for (int i = 0; i < this.supplementalPacks.Length; i++) + { + if (this.supplementalPacks[i].Exists(sha)) + { + return true; + } + } + + // Loose object fallback: check objects// file existence + if (sha != null && sha.Length >= GVFSConstants.ShaStringLength) + { + string prefix = sha.Substring(0, 2); + string suffix = sha.Substring(2); + for (int i = 0; i < this.objectRoots.Length; i++) + { + string loosePath = Path.Combine(this.objectRoots[i], prefix, suffix); + if (File.Exists(loosePath)) + { + return true; + } + } + } + + return false; + } + + public void Dispose() + { + foreach (MidxReader reader in this.midxReaders) + { + reader.Dispose(); + } + + foreach (PackIndexReader reader in this.supplementalPacks) + { + reader.Dispose(); + } + } + } +} diff --git a/GVFS/GVFS.Common/Git/PackIndexReader.cs b/GVFS/GVFS.Common/Git/PackIndexReader.cs new file mode 100644 index 000000000..881aa3ac0 --- /dev/null +++ b/GVFS/GVFS.Common/Git/PackIndexReader.cs @@ -0,0 +1,161 @@ +using System; +using System.IO; +using System.IO.MemoryMappedFiles; +using System.Runtime.CompilerServices; + +namespace GVFS.Common.Git +{ + /// + /// Reads a git pack index (.idx) v2 file and performs binary search + /// lookups against the sorted OID table. Pure managed code, thread-safe. + /// + public sealed class PackIndexReader : IDisposable + { + // Pack index v2 magic: 0xff 0x74 0x4f 0x63 + private const uint IdxV2Magic = 0xFF744F63; + private const int FanoutEntries = 256; + private const int FanoutSize = FanoutEntries * 4; + private const int HeaderSize = 8; // magic(4) + version(4) + + private readonly MemoryMappedFile mmf; + private readonly MemoryMappedViewAccessor accessor; + private readonly int totalObjects; + private readonly long fanoutOffset; + private readonly long oidTableOffset; + private readonly int hashLen; + + public int TotalObjects => this.totalObjects; + + public PackIndexReader(string idxPath) + { + long fileLength = new FileInfo(idxPath).Length; + this.mmf = MemoryMappedFile.CreateFromFile(idxPath, FileMode.Open, null, 0, MemoryMappedFileAccess.Read); + try + { + this.accessor = this.mmf.CreateViewAccessor(0, fileLength, MemoryMappedFileAccess.Read); + try + { + uint magic = this.ReadUInt32BE(0); + if (magic != IdxV2Magic) + { + throw new InvalidDataException($"Unsupported pack index format (magic=0x{magic:X8}), expected v2"); + } + + uint version = this.ReadUInt32BE(4); + if (version != 2) + { + throw new InvalidDataException($"Unsupported pack index version {version}"); + } + + this.hashLen = 20; // SHA-1 + this.fanoutOffset = HeaderSize; + this.oidTableOffset = HeaderSize + FanoutSize; + + // Total objects from fanout[255] + this.totalObjects = (int)this.ReadUInt32BE(this.fanoutOffset + (255 * 4)); + } + catch + { + this.accessor.Dispose(); + throw; + } + } + catch + { + this.mmf.Dispose(); + throw; + } + } + + /// + /// Check if an object with the given SHA-1 hex string exists in this pack index. + /// Thread-safe. + /// + public bool Exists(string shaHex) + { + if (shaHex == null || shaHex.Length < this.hashLen * 2) + { + return false; + } + + Span oid = stackalloc byte[this.hashLen]; + MidxReader.HexToBytes(shaHex, oid); + return this.Exists(oid); + } + + /// + /// Check if an object with the given binary OID exists in this pack index. + /// Thread-safe. + /// + public bool Exists(ReadOnlySpan oid) + { + int firstByte = oid[0]; + + uint lo = firstByte == 0 ? 0 : this.ReadUInt32BE(this.fanoutOffset + ((firstByte - 1) * 4)); + uint hi = this.ReadUInt32BE(this.fanoutOffset + (firstByte * 4)); + + if (lo >= hi) + { + return false; + } + + return this.BinarySearchOid(oid, (int)lo, (int)hi - 1); + } + + private bool BinarySearchOid(ReadOnlySpan target, int lo, int hi) + { + while (lo <= hi) + { + int mid = lo + ((hi - lo) / 2); + long offset = this.oidTableOffset + ((long)mid * this.hashLen); + + int cmp = this.CompareOidAtOffset(target, offset); + if (cmp == 0) + { + return true; + } + else if (cmp < 0) + { + hi = mid - 1; + } + else + { + lo = mid + 1; + } + } + + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int CompareOidAtOffset(ReadOnlySpan target, long fileOffset) + { + for (int i = 0; i < this.hashLen; i++) + { + int diff = target[i] - this.accessor.ReadByte(fileOffset + i); + if (diff != 0) + { + return diff; + } + } + + return 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private uint ReadUInt32BE(long offset) + { + byte b0 = this.accessor.ReadByte(offset); + byte b1 = this.accessor.ReadByte(offset + 1); + byte b2 = this.accessor.ReadByte(offset + 2); + byte b3 = this.accessor.ReadByte(offset + 3); + return ((uint)b0 << 24) | ((uint)b1 << 16) | ((uint)b2 << 8) | b3; + } + + public void Dispose() + { + this.accessor.Dispose(); + this.mmf.Dispose(); + } + } +} diff --git a/GVFS/GVFS.Common/Prefetch/BlobPrefetcher.cs b/GVFS/GVFS.Common/Prefetch/BlobPrefetcher.cs index 29bc4cc67..59ea14ec6 100644 --- a/GVFS/GVFS.Common/Prefetch/BlobPrefetcher.cs +++ b/GVFS/GVFS.Common/Prefetch/BlobPrefetcher.cs @@ -293,7 +293,10 @@ public void PrefetchWithStats( // * availableBlobs (out param): Locally available blob ids (shared between `blobFinder`, `downloader`, and `packIndexer`, all add blob ids to the list as they are locally available) // * MissingBlobs (property): Blob ids that are missing and need to be downloaded // * AvailableBlobs (property): Same as availableBlobs - FindBlobsStage blobFinder = new FindBlobsStage(this.SearchThreadCount, diff.RequiredBlobs, availableBlobs, this.Tracer, this.Enlistment); + Func checkerFactory = this.CreateObjectExistenceCheckerFactory(out IDisposable sharedCheckerOwner); + try + { + FindBlobsStage blobFinder = new FindBlobsStage(this.SearchThreadCount, diff.RequiredBlobs, availableBlobs, this.Tracer, this.Enlistment, checkerFactory); // downloader // Inputs: @@ -385,6 +388,90 @@ public void PrefetchWithStats( { this.SavePrefetchArgs(commitToFetch, hydrateFilesAfterDownload); } + } + finally + { + sharedCheckerOwner?.Dispose(); + } + } + + /// + /// Creates a factory for object existence checkers based on git config. + /// When gvfs.prefetch-use-idx is true, returns a factory that shares a single + /// PackIndexObjectExistenceChecker (thread-safe, read-only mmap) across all workers. + /// The shared instance is returned via for + /// the caller to dispose after all workers complete. + /// Otherwise, returns a factory creating per-worker LibGit2ObjectExistenceChecker instances. + /// + private Func CreateObjectExistenceCheckerFactory(out IDisposable sharedCheckerOwner) + { + sharedCheckerOwner = null; + + bool usePackIdx = false; + try + { + GitProcess git = new GitProcess(this.Enlistment); + GitProcess.ConfigResult configResult = git.GetFromLocalConfig(GVFSConstants.GitConfig.PrefetchUseIdx); + if (!configResult.TryParseAsString(out string value, out string _) || + string.IsNullOrEmpty(value) || + !bool.TryParse(value, out usePackIdx)) + { + usePackIdx = GVFSConstants.GitConfig.PrefetchUseIdxDefault; + } + } + catch (Exception ex) + { + this.Tracer.RelatedWarning("Failed to read {0} config: {1}", GVFSConstants.GitConfig.PrefetchUseIdx, ex.Message); + } + + if (usePackIdx) + { + this.Tracer.RelatedInfo("Prefetch: Using pack-index object existence checker"); + try + { + PackIndexObjectExistenceChecker sharedChecker = new PackIndexObjectExistenceChecker( + this.Tracer, + this.Enlistment.LocalObjectsRoot, + this.Enlistment.GitObjectsRoot); + + sharedCheckerOwner = sharedChecker; + return () => new NonDisposingCheckerWrapper(sharedChecker); + } + catch (Exception ex) + { + this.Tracer.RelatedWarning( + "Failed to create pack-index checker, falling back to revparse: {0}", + ex.Message); + } + } + + this.Tracer.RelatedInfo("Prefetch: Using revparse object existence checker"); + return () => new LibGit2ObjectExistenceChecker(this.Tracer, this.Enlistment.WorkingDirectoryBackingRoot); + } + + /// + /// Wrapper that delegates to a shared checker but does not dispose it. + /// Allows shared thread-safe checkers to be used in using-blocks + /// without premature disposal. + /// + private class NonDisposingCheckerWrapper : IObjectExistenceChecker + { + private readonly IObjectExistenceChecker inner; + + public NonDisposingCheckerWrapper(IObjectExistenceChecker inner) + { + this.inner = inner; + } + + public bool ObjectExists(string sha) + { + return this.inner.ObjectExists(sha); + } + + public void Dispose() + { + // No-op: the shared checker is owned by BlobPrefetcher + } } protected bool UpdateRefSpec(ITracer tracer, Enlistment enlistment, string branchOrCommit, GitRefs refs) diff --git a/GVFS/GVFS.Common/Prefetch/Pipeline/FindBlobsStage.cs b/GVFS/GVFS.Common/Prefetch/Pipeline/FindBlobsStage.cs index 95c06b04e..d031e7764 100644 --- a/GVFS/GVFS.Common/Prefetch/Pipeline/FindBlobsStage.cs +++ b/GVFS/GVFS.Common/Prefetch/Pipeline/FindBlobsStage.cs @@ -1,6 +1,7 @@ using GVFS.Common.Git; using GVFS.Common.Prefetch.Git; using GVFS.Common.Tracing; +using System; using System.Collections.Concurrent; using System.Threading; @@ -22,18 +23,22 @@ public class FindBlobsStage : PrefetchPipelineStage private ConcurrentHashSet alreadyFoundBlobIds; + private Func checkerFactory; + public FindBlobsStage( int maxParallel, BlockingCollection requiredBlobs, BlockingCollection availableBlobs, ITracer tracer, - Enlistment enlistment) + Enlistment enlistment, + Func checkerFactory = null) : base(maxParallel) { this.tracer = tracer.StartActivity(AreaPath, EventLevel.Informational, Keywords.Telemetry, metadata: null); this.requiredBlobs = requiredBlobs; this.enlistment = enlistment; this.alreadyFoundBlobIds = new ConcurrentHashSet(); + this.checkerFactory = checkerFactory; this.MissingBlobs = new BlockingCollection(); this.AvailableBlobs = availableBlobs; @@ -55,13 +60,15 @@ public int AvailableBlobCount protected override void DoWork() { string blobId; - using (LibGit2Repo repo = new LibGit2Repo(this.tracer, this.enlistment.WorkingDirectoryBackingRoot)) + using (IObjectExistenceChecker checker = this.checkerFactory != null + ? this.checkerFactory() + : new LibGit2ObjectExistenceChecker(this.tracer, this.enlistment.WorkingDirectoryBackingRoot)) { while (this.requiredBlobs.TryTake(out blobId, Timeout.Infinite)) { if (this.alreadyFoundBlobIds.Add(blobId)) { - if (!repo.ObjectExists(blobId)) + if (!checker.ObjectExists(blobId)) { Interlocked.Increment(ref this.missingBlobCount); this.MissingBlobs.Add(blobId); diff --git a/GVFS/GVFS.UnitTests/Prefetch/MidxReaderTests.cs b/GVFS/GVFS.UnitTests/Prefetch/MidxReaderTests.cs new file mode 100644 index 000000000..d3956c11e --- /dev/null +++ b/GVFS/GVFS.UnitTests/Prefetch/MidxReaderTests.cs @@ -0,0 +1,299 @@ +using GVFS.Common.Git; +using GVFS.Tests.Should; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace GVFS.UnitTests.Prefetch +{ + [TestFixture] + public class MidxReaderTests + { + private string tempDir; + + [SetUp] + public void SetUp() + { + this.tempDir = Path.Combine(Path.GetTempPath(), "MidxReaderTests_" + Guid.NewGuid().ToString("N").Substring(0, 8)); + Directory.CreateDirectory(this.tempDir); + } + + [TearDown] + public void TearDown() + { + if (Directory.Exists(this.tempDir)) + { + Directory.Delete(this.tempDir, recursive: true); + } + } + + [Test] + public void FindsExistingObject() + { + string[] oids = GenerateSortedOids(100); + string midxPath = WriteMidxFile(this.tempDir, oids, new[] { "pack-abc123" }); + + using (MidxReader reader = new MidxReader(midxPath)) + { + reader.TotalObjects.ShouldEqual(100); + reader.Exists(oids[0]).ShouldBeTrue("First OID should exist"); + reader.Exists(oids[50]).ShouldBeTrue("Middle OID should exist"); + reader.Exists(oids[99]).ShouldBeTrue("Last OID should exist"); + } + } + + [Test] + public void ReturnsFalseForMissingObject() + { + string[] oids = GenerateSortedOids(100); + string midxPath = WriteMidxFile(this.tempDir, oids, new[] { "pack-abc123" }); + + using (MidxReader reader = new MidxReader(midxPath)) + { + reader.Exists("0000000000000000000000000000000000000000").ShouldBeFalse(); + reader.Exists("ffffffffffffffffffffffffffffffffffffffff").ShouldBeFalse(); + reader.Exists("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef").ShouldBeFalse(); + } + } + + [Test] + public void ReturnsFalseForNullOrShortSha() + { + string[] oids = GenerateSortedOids(10); + string midxPath = WriteMidxFile(this.tempDir, oids, new[] { "pack-abc123" }); + + using (MidxReader reader = new MidxReader(midxPath)) + { + reader.Exists((string)null).ShouldBeFalse(); + reader.Exists("abc").ShouldBeFalse(); + } + } + + [Test] + public void ParsesPackNames() + { + string[] oids = GenerateSortedOids(10); + string[] packs = new[] { "pack-aaaa", "pack-bbbb", "prefetch-cccc" }; + string midxPath = WriteMidxFile(this.tempDir, oids, packs); + + using (MidxReader reader = new MidxReader(midxPath)) + { + HashSet stems = reader.GetPackStems(); + stems.Count.ShouldEqual(3); + stems.Contains("pack-aaaa").ShouldBeTrue(); + stems.Contains("pack-bbbb").ShouldBeTrue(); + stems.Contains("prefetch-cccc").ShouldBeTrue(); + } + } + + [Test] + public void HandlesEmptyMidx() + { + string midxPath = WriteMidxFile(this.tempDir, Array.Empty(), new[] { "pack-empty" }); + + using (MidxReader reader = new MidxReader(midxPath)) + { + reader.TotalObjects.ShouldEqual(0); + reader.Exists("0000000000000000000000000000000000000000").ShouldBeFalse(); + } + } + + [Test] + public void ThrowsOnInvalidMagic() + { + string path = Path.Combine(this.tempDir, "bad-midx"); + File.WriteAllBytes(path, new byte[] { 0, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0, 1 }); + + Assert.Throws(() => + { + using (MidxReader _ = new MidxReader(path)) { } + }); + } + + [Test] + public void HandlesAllFanoutBuckets() + { + // Create OIDs that span all 256 fanout buckets + List oids = new List(); + for (int i = 0; i < 256; i++) + { + byte[] raw = new byte[20]; + raw[0] = (byte)i; + raw[1] = 0x42; + oids.Add(BitConverter.ToString(raw).Replace("-", "").ToLowerInvariant()); + } + + oids.Sort(StringComparer.Ordinal); + string midxPath = WriteMidxFile(this.tempDir, oids.ToArray(), new[] { "pack-full" }); + + using (MidxReader reader = new MidxReader(midxPath)) + { + reader.TotalObjects.ShouldEqual(256); + foreach (string oid in oids) + { + reader.Exists(oid).ShouldBeTrue($"OID {oid} should exist"); + } + } + } + + /// + /// Writes a synthetic MIDX v1 file. + /// Format: Header(12) + ChunkTOC(numChunks*12 + 12 terminator) + PNAM + OIDF + OIDL + OOFF + /// + internal static string WriteMidxFile(string dir, string[] sortedOidHexes, string[] packNames) + { + int numObjects = sortedOidHexes.Length; + int numPacks = packNames.Length; + + // PNAM chunk: null-terminated .idx filenames concatenated + List pnamBytes = new List(); + foreach (string name in packNames) + { + byte[] nameBytes = System.Text.Encoding.ASCII.GetBytes(name + ".idx\0"); + pnamBytes.AddRange(nameBytes); + } + + // Pad PNAM to 4-byte alignment + while (pnamBytes.Count % 4 != 0) + { + pnamBytes.Add(0); + } + + // OIDF (fanout): 256 * 4 bytes + uint[] fanout = new uint[256]; + foreach (string hex in sortedOidHexes) + { + int firstByte = (HexVal(hex[0]) << 4) | HexVal(hex[1]); + fanout[firstByte]++; + } + + // Make cumulative + for (int i = 1; i < 256; i++) + { + fanout[i] += fanout[i - 1]; + } + + // OIDL: sorted 20-byte OIDs + byte[] oidlBytes = new byte[numObjects * 20]; + for (int i = 0; i < numObjects; i++) + { + byte[] oid = HexToByteArray(sortedOidHexes[i]); + Array.Copy(oid, 0, oidlBytes, i * 20, 20); + } + + // OOFF: dummy 8-byte entries per object (pack-id:4 + offset:4) + byte[] ooffBytes = new byte[numObjects * 8]; + + // Chunk layout: 3 chunks (PNAM, OIDF, OIDL) + OOFF for terminator boundary + int numChunks = 4; // PNAM, OIDF, OIDL, OOFF + int headerSize = 12; + int tocSize = (numChunks * 12) + 12; // +12 for terminator + long dataStart = headerSize + tocSize; + + long pnamOff = dataStart; + long oidfOff = pnamOff + pnamBytes.Count; + long oidlOff = oidfOff + (256 * 4); + long ooffOff = oidlOff + oidlBytes.Length; + long endOff = ooffOff + ooffBytes.Length; + + string path = Path.Combine(dir, "multi-pack-index"); + using (FileStream fs = File.Create(path)) + using (BinaryWriter bw = new BinaryWriter(fs)) + { + // Header + bw.Write(new byte[] { 0x4D, 0x49, 0x44, 0x58 }); // MIDX + bw.Write((byte)1); // version + bw.Write((byte)1); // oid version (SHA-1) + bw.Write((byte)numChunks); + bw.Write((byte)0); // reserved + WriteBE32(bw, (uint)numPacks); + + // Chunk TOC + WriteTocEntry(bw, 0x504E414D, pnamOff); // PNAM + WriteTocEntry(bw, 0x4F494446, oidfOff); // OIDF + WriteTocEntry(bw, 0x4F49444C, oidlOff); // OIDL + WriteTocEntry(bw, 0x4F4F4646, ooffOff); // OOFF + WriteTocEntry(bw, 0x00000000, endOff); // Terminator + + // PNAM + bw.Write(pnamBytes.ToArray()); + + // OIDF (fanout) + for (int i = 0; i < 256; i++) + { + WriteBE32(bw, fanout[i]); + } + + // OIDL + bw.Write(oidlBytes); + + // OOFF + bw.Write(ooffBytes); + } + + return path; + } + + internal static string[] GenerateSortedOids(int count) + { + Random rng = new Random(42); // deterministic + HashSet set = new HashSet(); + while (set.Count < count) + { + byte[] raw = new byte[20]; + rng.NextBytes(raw); + set.Add(BitConverter.ToString(raw).Replace("-", "").ToLowerInvariant()); + } + + string[] result = set.ToArray(); + Array.Sort(result, StringComparer.Ordinal); + return result; + } + + private static void WriteTocEntry(BinaryWriter bw, uint chunkId, long offset) + { + WriteBE32(bw, chunkId); + WriteBE64(bw, offset); + } + + private static void WriteBE32(BinaryWriter bw, uint value) + { + bw.Write((byte)(value >> 24)); + bw.Write((byte)(value >> 16)); + bw.Write((byte)(value >> 8)); + bw.Write((byte)value); + } + + private static void WriteBE64(BinaryWriter bw, long value) + { + bw.Write((byte)(value >> 56)); + bw.Write((byte)(value >> 48)); + bw.Write((byte)(value >> 40)); + bw.Write((byte)(value >> 32)); + bw.Write((byte)(value >> 24)); + bw.Write((byte)(value >> 16)); + bw.Write((byte)(value >> 8)); + bw.Write((byte)value); + } + + private static byte[] HexToByteArray(string hex) + { + byte[] result = new byte[hex.Length / 2]; + for (int i = 0; i < result.Length; i++) + { + result[i] = (byte)((HexVal(hex[i * 2]) << 4) | HexVal(hex[(i * 2) + 1])); + } + + return result; + } + + private static int HexVal(char c) + { + if (c >= 'a') return c - 'a' + 10; + if (c >= 'A') return c - 'A' + 10; + return c - '0'; + } + } +} diff --git a/GVFS/GVFS.UnitTests/Prefetch/PackIndexObjectExistenceCheckerTests.cs b/GVFS/GVFS.UnitTests/Prefetch/PackIndexObjectExistenceCheckerTests.cs new file mode 100644 index 000000000..78fa3a668 --- /dev/null +++ b/GVFS/GVFS.UnitTests/Prefetch/PackIndexObjectExistenceCheckerTests.cs @@ -0,0 +1,216 @@ +using GVFS.Common; +using GVFS.Common.Git; +using GVFS.Tests.Should; +using GVFS.UnitTests.Mock.Common; +using NUnit.Framework; +using System; +using System.IO; +using System.Linq; + +namespace GVFS.UnitTests.Prefetch +{ + [TestFixture] + public class PackIndexObjectExistenceCheckerTests + { + private string tempDir; + private string objectsRoot; + private string packDir; + + [SetUp] + public void SetUp() + { + this.tempDir = Path.Combine(Path.GetTempPath(), "PackIdxCheckerTests_" + Guid.NewGuid().ToString("N").Substring(0, 8)); + this.objectsRoot = Path.Combine(this.tempDir, "objects"); + this.packDir = Path.Combine(this.objectsRoot, "pack"); + Directory.CreateDirectory(this.packDir); + } + + [TearDown] + public void TearDown() + { + if (Directory.Exists(this.tempDir)) + { + Directory.Delete(this.tempDir, recursive: true); + } + } + + [Test] + public void FindsObjectInMidx() + { + string[] oids = MidxReaderTests.GenerateSortedOids(100); + MidxReaderTests.WriteMidxFile(this.packDir, oids, new[] { "pack-abc" }); + + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + this.objectsRoot)) + { + checker.ObjectExists(oids[0]).ShouldBeTrue(); + checker.ObjectExists(oids[50]).ShouldBeTrue(); + checker.ObjectExists(oids[99]).ShouldBeTrue(); + } + } + + [Test] + public void FindsObjectInSupplementalPack() + { + // Create MIDX with one set of OIDs + string[] midxOids = MidxReaderTests.GenerateSortedOids(50); + MidxReaderTests.WriteMidxFile(this.packDir, midxOids, new[] { "pack-inmidx" }); + + // Create a supplemental .idx NOT listed in the MIDX + string[] extraOids = MidxReaderTests.GenerateSortedOids(30); + // Use a different seed to get different OIDs + Random rng = new Random(999); + extraOids = Enumerable.Range(0, 30) + .Select(_ => + { + byte[] raw = new byte[20]; + rng.NextBytes(raw); + return BitConverter.ToString(raw).Replace("-", "").ToLowerInvariant(); + }) + .Distinct() + .OrderBy(x => x, StringComparer.Ordinal) + .ToArray(); + + PackIndexReaderTests.WritePackIndexV2(this.packDir, "pack-supplemental", extraOids); + + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + this.objectsRoot)) + { + // MIDX objects should still be found + checker.ObjectExists(midxOids[0]).ShouldBeTrue("MIDX object should be found"); + + // Supplemental pack objects should be found + checker.ObjectExists(extraOids[0]).ShouldBeTrue("Supplemental pack object should be found"); + checker.ObjectExists(extraOids[extraOids.Length - 1]).ShouldBeTrue("Last supplemental object should be found"); + } + } + + [Test] + public void FindsLooseObject() + { + // No packs at all — just a loose object + string sha = "aabbccddee112233445566778899001122334455"; + string prefix = sha.Substring(0, 2); + string suffix = sha.Substring(2); + string looseDir = Path.Combine(this.objectsRoot, prefix); + Directory.CreateDirectory(looseDir); + File.WriteAllBytes(Path.Combine(looseDir, suffix), new byte[] { 0x78, 0x01 }); // zlib header + + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + this.objectsRoot)) + { + checker.ObjectExists(sha).ShouldBeTrue("Loose object should be found"); + checker.ObjectExists("0000000000000000000000000000000000000000").ShouldBeFalse("Non-existent loose should not be found"); + } + } + + [Test] + public void ReturnsFalseForMissingObject() + { + string[] oids = MidxReaderTests.GenerateSortedOids(50); + MidxReaderTests.WriteMidxFile(this.packDir, oids, new[] { "pack-abc" }); + + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + this.objectsRoot)) + { + checker.ObjectExists("0000000000000000000000000000000000000000").ShouldBeFalse(); + checker.ObjectExists("ffffffffffffffffffffffffffffffffffffffff").ShouldBeFalse(); + } + } + + [Test] + public void HandlesEmptyPackDir() + { + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + this.objectsRoot)) + { + checker.ObjectExists("0000000000000000000000000000000000000000").ShouldBeFalse(); + } + } + + [Test] + public void HandlesMissingPackDir() + { + string noPackRoot = Path.Combine(this.tempDir, "nopack"); + Directory.CreateDirectory(noPackRoot); + // No "pack" subdirectory + + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + noPackRoot)) + { + checker.ObjectExists("0000000000000000000000000000000000000000").ShouldBeFalse(); + } + } + + [Test] + public void DeduplicatesIdenticalRoots() + { + string[] oids = MidxReaderTests.GenerateSortedOids(10); + MidxReaderTests.WriteMidxFile(this.packDir, oids, new[] { "pack-dedup" }); + + // Pass the same root twice (simulates LocalObjectsRoot == GitObjectsRoot) + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + this.objectsRoot, + this.objectsRoot)) + { + checker.ObjectExists(oids[0]).ShouldBeTrue(); + } + } + + [Test] + public void SearchesMultipleRoots() + { + // Root 1 with some objects + string root1 = Path.Combine(this.tempDir, "root1"); + string packDir1 = Path.Combine(root1, "pack"); + Directory.CreateDirectory(packDir1); + string[] oids1 = MidxReaderTests.GenerateSortedOids(20); + MidxReaderTests.WriteMidxFile(packDir1, oids1, new[] { "pack-r1" }); + + // Root 2 with different objects + string root2 = Path.Combine(this.tempDir, "root2"); + string packDir2 = Path.Combine(root2, "pack"); + Directory.CreateDirectory(packDir2); + Random rng = new Random(12345); + string[] oids2 = Enumerable.Range(0, 20) + .Select(_ => + { + byte[] raw = new byte[20]; + rng.NextBytes(raw); + return BitConverter.ToString(raw).Replace("-", "").ToLowerInvariant(); + }) + .Distinct() + .OrderBy(x => x, StringComparer.Ordinal) + .ToArray(); + MidxReaderTests.WriteMidxFile(packDir2, oids2, new[] { "pack-r2" }); + + using (PackIndexObjectExistenceChecker checker = new PackIndexObjectExistenceChecker( + MockTracerProvider.CreateMockTracer(), + root1, + root2)) + { + checker.ObjectExists(oids1[0]).ShouldBeTrue("Root1 object should be found"); + checker.ObjectExists(oids2[0]).ShouldBeTrue("Root2 object should be found"); + checker.ObjectExists("0000000000000000000000000000000000000000").ShouldBeFalse(); + } + } + } + + /// + /// Helper to create mock tracers for tests that need ITracer. + /// + internal static class MockTracerProvider + { + public static MockTracer CreateMockTracer() + { + return new MockTracer(); + } + } +} diff --git a/GVFS/GVFS.UnitTests/Prefetch/PackIndexReaderTests.cs b/GVFS/GVFS.UnitTests/Prefetch/PackIndexReaderTests.cs new file mode 100644 index 000000000..e153d4690 --- /dev/null +++ b/GVFS/GVFS.UnitTests/Prefetch/PackIndexReaderTests.cs @@ -0,0 +1,171 @@ +using GVFS.Common.Git; +using GVFS.Tests.Should; +using NUnit.Framework; +using System; +using System.IO; +using System.Linq; + +namespace GVFS.UnitTests.Prefetch +{ + [TestFixture] + public class PackIndexReaderTests + { + private string tempDir; + + [SetUp] + public void SetUp() + { + this.tempDir = Path.Combine(Path.GetTempPath(), "PackIndexReaderTests_" + Guid.NewGuid().ToString("N").Substring(0, 8)); + Directory.CreateDirectory(this.tempDir); + } + + [TearDown] + public void TearDown() + { + if (Directory.Exists(this.tempDir)) + { + Directory.Delete(this.tempDir, recursive: true); + } + } + + [Test] + public void FindsExistingObject() + { + string[] oids = MidxReaderTests.GenerateSortedOids(50); + string idxPath = WritePackIndexV2(this.tempDir, "pack-test1", oids); + + using (PackIndexReader reader = new PackIndexReader(idxPath)) + { + reader.TotalObjects.ShouldEqual(50); + reader.Exists(oids[0]).ShouldBeTrue(); + reader.Exists(oids[25]).ShouldBeTrue(); + reader.Exists(oids[49]).ShouldBeTrue(); + } + } + + [Test] + public void ReturnsFalseForMissingObject() + { + string[] oids = MidxReaderTests.GenerateSortedOids(50); + string idxPath = WritePackIndexV2(this.tempDir, "pack-test2", oids); + + using (PackIndexReader reader = new PackIndexReader(idxPath)) + { + reader.Exists("0000000000000000000000000000000000000000").ShouldBeFalse(); + reader.Exists("ffffffffffffffffffffffffffffffffffffffff").ShouldBeFalse(); + } + } + + [Test] + public void HandlesSingleObject() + { + string[] oids = MidxReaderTests.GenerateSortedOids(1); + string idxPath = WritePackIndexV2(this.tempDir, "pack-single", oids); + + using (PackIndexReader reader = new PackIndexReader(idxPath)) + { + reader.TotalObjects.ShouldEqual(1); + reader.Exists(oids[0]).ShouldBeTrue(); + reader.Exists("0000000000000000000000000000000000000000").ShouldBeFalse(); + } + } + + [Test] + public void ThrowsOnInvalidMagic() + { + string path = Path.Combine(this.tempDir, "bad.idx"); + File.WriteAllBytes(path, new byte[] { 0, 0, 0, 0, 0, 0, 0, 2 }); + + Assert.Throws(() => + { + using (PackIndexReader _ = new PackIndexReader(path)) { } + }); + } + + /// + /// Writes a synthetic pack index v2 file. + /// Format: Magic(4) + Version(4) + Fanout(256*4) + OIDs(N*20) + CRC32(N*4) + Offsets(N*4) + PackSHA(20) + IdxSHA(20) + /// + internal static string WritePackIndexV2(string dir, string packStem, string[] sortedOidHexes) + { + int numObjects = sortedOidHexes.Length; + + // Fanout + uint[] fanout = new uint[256]; + foreach (string hex in sortedOidHexes) + { + int firstByte = (HexVal(hex[0]) << 4) | HexVal(hex[1]); + fanout[firstByte]++; + } + + for (int i = 1; i < 256; i++) + { + fanout[i] += fanout[i - 1]; + } + + // OID table + byte[] oidBytes = new byte[numObjects * 20]; + for (int i = 0; i < numObjects; i++) + { + byte[] oid = HexToByteArray(sortedOidHexes[i]); + Array.Copy(oid, 0, oidBytes, i * 20, 20); + } + + string path = Path.Combine(dir, packStem + ".idx"); + using (FileStream fs = File.Create(path)) + using (BinaryWriter bw = new BinaryWriter(fs)) + { + // Magic + bw.Write(new byte[] { 0xFF, 0x74, 0x4F, 0x63 }); + // Version + WriteBE32(bw, 2); + + // Fanout + for (int i = 0; i < 256; i++) + { + WriteBE32(bw, fanout[i]); + } + + // OID table + bw.Write(oidBytes); + + // CRC32 table (dummy) + bw.Write(new byte[numObjects * 4]); + + // Offset table (dummy) + bw.Write(new byte[numObjects * 4]); + + // Pack SHA + Idx SHA (dummy) + bw.Write(new byte[40]); + } + + return path; + } + + private static void WriteBE32(BinaryWriter bw, uint value) + { + bw.Write((byte)(value >> 24)); + bw.Write((byte)(value >> 16)); + bw.Write((byte)(value >> 8)); + bw.Write((byte)value); + } + + private static byte[] HexToByteArray(string hex) + { + byte[] result = new byte[hex.Length / 2]; + for (int i = 0; i < result.Length; i++) + { + result[i] = (byte)((HexVal(hex[i * 2]) << 4) | HexVal(hex[(i * 2) + 1])); + } + + return result; + } + + private static int HexVal(char c) + { + if (c >= 'a') return c - 'a' + 10; + if (c >= 'A') return c - 'A' + 10; + return c - '0'; + } + } +}