diff --git a/README.md b/README.md index ed2ba64..3c650c6 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,54 @@ Config file locations: - `skeleton [path]` or `tree [path]` - **(New)** View the structural tree of a project with file headers and symbol definitions directly in your terminal. - `[path]` - Start the MCP server (stdio) for the specified path (defaults to current directory). +### Including paths excluded by the workspace `.gitignore` + +If your workspace `.gitignore` excludes a sub-directory that you still want +indexed (common in monorepos where sub-projects under `repos/`, `packages/`, +or `vendor/` are gitignored at the top level), use `--include` or +`CONTEXTPLUS_EXTRA_ROOTS` to add the paths back. + +**CLI form** (repeatable): + +```bash +bunx contextplus /path/to/workspace \ + --include repos/lacuna \ + --include repos/graphrag-core +``` + +**Environment variable** (fallback when no `--include` flag is set; uses the +system path separator — `:` on Unix, `;` on Windows): + +```bash +CONTEXTPLUS_EXTRA_ROOTS=repos/lacuna:repos/graphrag-core \ + bunx contextplus /path/to/workspace +``` + +**In `.mcp.json`** the env form is usually more ergonomic: + +```json +{ + "mcpServers": { + "contextplus": { + "command": "bunx", + "args": ["contextplus", "/path/to/workspace"], + "env": { + "CONTEXTPLUS_EXTRA_ROOTS": "repos/lacuna:repos/graphrag-core" + } + } + } +} +``` + +Each path listed is walked **independently** of the workspace root, with a +fresh ignore scope. Each path's own `.gitignore` is respected. Paths are +validated at startup; invalid entries (non-existent, not a directory, +outside the workspace) emit a stderr warning and are skipped. + +Nested `.gitignore` files inside the workspace and inside each extra root +are loaded and merged with inherited rules, matching `git` and `ripgrep` +behavior. + ### From Source ```bash diff --git a/src/core/extra-roots.ts b/src/core/extra-roots.ts new file mode 100644 index 0000000..e09b4d7 --- /dev/null +++ b/src/core/extra-roots.ts @@ -0,0 +1,81 @@ +// CLI/env argument parsing for the extraRoots config. +// Pure module — no side effects, safe to import from tests. + +import { realpathSync, statSync } from "fs"; +import { delimiter, isAbsolute, resolve, sep } from "path"; + +export interface ParseExtraRootsInput { + argv: string[]; + env: NodeJS.ProcessEnv | Record; + rootDir: string; +} + +export interface ParseExtraRootsResult { + accepted: string[]; + warnings: string[]; +} + +function extractIncludeFlags(argv: string[]): string[] { + const out: string[] = []; + for (let i = 0; i < argv.length; i++) { + if (argv[i] === "--include" && i + 1 < argv.length) { + out.push(argv[i + 1]); + i++; + } else if (argv[i].startsWith("--include=")) { + out.push(argv[i].slice("--include=".length)); + } + } + return out; +} + +export function parseExtraRoots(input: ParseExtraRootsInput): ParseExtraRootsResult { + const accepted: string[] = []; + const warnings: string[] = []; + const rootAbs = resolve(input.rootDir); + let rootReal = rootAbs; + try { + rootReal = realpathSync(rootAbs); + } catch { + // rootDir doesn't exist; fall through + } + + const fromCli = extractIncludeFlags(input.argv); + const raw = fromCli.length > 0 + ? fromCli + : (input.env.CONTEXTPLUS_EXTRA_ROOTS ?? "") + .split(delimiter) + .filter((s) => s.length > 0); + + for (const entry of raw) { + const abs = isAbsolute(entry) ? entry : resolve(rootAbs, entry); + let real = abs; + try { + real = realpathSync(abs); + } catch { + // doesn't exist - statSync below will catch and warn + } + + if (real === rootReal) { + warnings.push(`contextplus: extraRoot '${entry}' equals the workspace root — skipping`); + continue; + } + if (!real.startsWith(rootReal + sep)) { + warnings.push(`contextplus: extraRoot '${entry}' is outside the workspace root — skipping`); + continue; + } + let stats; + try { + stats = statSync(real); + } catch { + warnings.push(`contextplus: extraRoot '${entry}' does not exist — skipping`); + continue; + } + if (!stats.isDirectory()) { + warnings.push(`contextplus: extraRoot '${entry}' is not a directory — skipping`); + continue; + } + accepted.push(real); + } + + return { accepted, warnings }; +} diff --git a/src/core/walker.ts b/src/core/walker.ts index 8b391d6..d97c6ec 100644 --- a/src/core/walker.ts +++ b/src/core/walker.ts @@ -1,8 +1,8 @@ // Gitignore-aware recursive directory walker with depth control -// Returns filtered file paths respecting project ignore patterns +// Returns filtered file paths respecting project ignore patterns (nested-gitignore-aware) -import { readdir, readFile, stat } from "fs/promises"; -import { join, relative, resolve } from "path"; +import { readdir, readFile, realpath, stat } from "fs/promises"; +import { join, relative, resolve, sep } from "path"; import ignore, { type Ignore } from "ignore"; export interface WalkOptions { @@ -18,6 +18,13 @@ export interface FileEntry { depth: number; } +interface IgnoreScope { + scopeRoot: string; + ig: Ignore; +} + +type IgnoreChain = IgnoreScope[]; + const ALWAYS_IGNORE = new Set([ "node_modules", ".git", @@ -38,20 +45,37 @@ const ALWAYS_IGNORE = new Set([ ".parcel-cache", ]); -async function loadIgnoreRules(rootDir: string): Promise { - const ig = ignore(); +async function loadLocalScope(dir: string): Promise { try { - const content = await readFile(join(rootDir, ".gitignore"), "utf-8"); - ig.add(content); + const content = await readFile(join(dir, ".gitignore"), "utf-8"); + return { scopeRoot: dir, ig: ignore().add(content) }; } catch { + return null; } - return ig; +} + +function isIgnoredInChain(absPath: string, isDir: boolean, chain: IgnoreChain): boolean { + // Walk scopes from outermost to innermost. Each scope's patterns are evaluated + // against paths relative to that scope's directory. Later scopes can re-include + // paths that earlier scopes excluded (gitignore negation crosses scope boundaries). + let state: "ignored" | "included" = "included"; + for (const scope of chain) { + let rel = relative(scope.scopeRoot, absPath).replace(/\\/g, "/"); + if (!rel || rel.startsWith("..")) continue; + // Mark directories with a trailing slash so anchored directory patterns + // like `/build/` match the directory itself (and short-circuit descent). + if (isDir) rel += "/"; + const result = scope.ig.test(rel); + if (result.unignored) state = "included"; + else if (result.ignored) state = "ignored"; + } + return state === "ignored"; } async function walkRecursive( dir: string, rootDir: string, - ig: Ignore, + chain: IgnoreChain, depth: number, maxDepth: number, results: FileEntry[], @@ -64,19 +88,22 @@ async function walkRecursive( const fullPath = join(dir, entry.name); const relPath = relative(rootDir, fullPath).replace(/\\/g, "/"); - if (ig.ignores(relPath)) continue; - const isDir = entry.isDirectory(); + if (isIgnoredInChain(fullPath, isDir, chain)) continue; + results.push({ path: fullPath, relativePath: relPath, isDirectory: isDir, depth }); - if (isDir) await walkRecursive(fullPath, rootDir, ig, depth + 1, maxDepth, results); + if (isDir) { + const localScope = await loadLocalScope(fullPath); + const childChain = localScope ? [...chain, localScope] : chain; + await walkRecursive(fullPath, rootDir, childChain, depth + 1, maxDepth, results); + } } } export async function walkDirectory(options: WalkOptions): Promise { const rootDir = resolve(options.rootDir); const startDir = options.targetPath ? resolve(rootDir, options.targetPath) : rootDir; - const ig = await loadIgnoreRules(rootDir); const results: FileEntry[] = []; try { @@ -85,7 +112,23 @@ export async function walkDirectory(options: WalkOptions): Promise return results; } - await walkRecursive(startDir, rootDir, ig, 0, options.depthLimit ?? 0, results); + // Build the initial chain from rootDir down to startDir so ancestor scopes apply + // at the start of the walk. + const chain: IgnoreChain = []; + const rootScope = await loadLocalScope(rootDir); + if (rootScope) chain.push(rootScope); + + if (startDir !== rootDir) { + const segments = relative(rootDir, startDir).split(/[\\/]/).filter(Boolean); + let cursor = rootDir; + for (const segment of segments) { + cursor = join(cursor, segment); + const scope = await loadLocalScope(cursor); + if (scope) chain.push(scope); + } + } + + await walkRecursive(startDir, rootDir, chain, 0, options.depthLimit ?? 0, results); return results; } @@ -101,3 +144,77 @@ export function groupByDirectory(entries: FileEntry[]): Map } return groups; } + +let GLOBAL_EXTRA_ROOTS: string[] = []; + +export function setExtraRoots(paths: string[]): void { + GLOBAL_EXTRA_ROOTS = [...paths]; +} + +export function getExtraRoots(): string[] { + return [...GLOBAL_EXTRA_ROOTS]; +} + +export interface WalkRootsOptions { + rootDir: string; + extraRoots?: string[]; + depthLimit?: number; + targetPath?: string; +} + +export async function walkRoots(options: WalkRootsOptions): Promise { + const rootDir = resolve(options.rootDir); + let rootReal = rootDir; + try { + rootReal = await realpath(rootDir); + } catch { + // rootDir doesn't exist; fall through with unresolved value + } + + const extraRoots = options.extraRoots ?? GLOBAL_EXTRA_ROOTS; + const seen = new Set(); + const results: FileEntry[] = []; + + const primary = await walkDirectory({ + rootDir, + depthLimit: options.depthLimit, + targetPath: options.targetPath, + }); + for (const entry of primary) { + if (seen.has(entry.path)) continue; + seen.add(entry.path); + results.push(entry); + } + + // targetPath constrains the primary walk only — extraRoots are always walked in full. + for (const extra of extraRoots) { + const extraAbs = resolve(rootDir, extra); + let extraReal = extraAbs; + try { + extraReal = await realpath(extraAbs); + } catch { + // doesn't exist; will fall through to the prefix check on unresolved path + } + if (extraReal !== rootReal && !extraReal.startsWith(rootReal + sep)) { + throw new Error(`walkRoots: extraRoot "${extra}" resolves outside workspace root`); + } + const depthOffset = relative(rootReal, extraReal).split(/[\\/]/).filter(Boolean).length; + const extraEntries = await walkDirectory({ + rootDir: extraReal, + depthLimit: options.depthLimit, + }); + for (const entry of extraEntries) { + if (seen.has(entry.path)) continue; + seen.add(entry.path); + const workspaceRel = relative(rootReal, entry.path).replace(/\\/g, "/"); + results.push({ + path: entry.path, + relativePath: workspaceRel, + isDirectory: entry.isDirectory, + depth: entry.depth + depthOffset, + }); + } + } + + return results; +} diff --git a/src/index.ts b/src/index.ts index 5413421..7dd839a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,7 +10,9 @@ import { dirname, resolve } from "path"; import { fileURLToPath } from "url"; import { z } from "zod"; import { createEmbeddingTrackerController } from "./core/embedding-tracker.js"; +import { parseExtraRoots } from "./core/extra-roots.js"; import { createIdleMonitor, getIdleShutdownMs, getParentPollMs, isBrokenPipeError, runCleanup, startParentMonitor } from "./core/process-lifecycle.js"; +import { setExtraRoots } from "./core/walker.js"; import { getContextTree } from "./tools/context-tree.js"; import { getFileSkeleton } from "./tools/file-skeleton.js"; import { ensureMcpDataDir, cancelAllEmbeddings } from "./core/embeddings.js"; @@ -39,6 +41,16 @@ const passthroughArgs = process.argv.slice(2); const ROOT_DIR = passthroughArgs[0] && !SUB_COMMANDS.includes(passthroughArgs[0]) ? resolve(passthroughArgs[0]) : process.cwd(); + +const extraRootsResult = parseExtraRoots({ + argv: passthroughArgs, + env: process.env, + rootDir: ROOT_DIR, +}); +for (const warning of extraRootsResult.warnings) { + process.stderr.write(`${warning}\n`); +} +setExtraRoots(extraRootsResult.accepted); const INSTRUCTIONS_SOURCE_URL = "https://contextplus.vercel.app/api/instructions"; const INSTRUCTIONS_RESOURCE_URI = "contextplus://instructions"; const PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), ".."); diff --git a/src/tools/blast-radius.ts b/src/tools/blast-radius.ts index 706a787..6abfdc4 100644 --- a/src/tools/blast-radius.ts +++ b/src/tools/blast-radius.ts @@ -1,7 +1,7 @@ // Dependency graph analyzer to trace symbol usage across the codebase // Finds every file and line where a function, class, or variable is referenced -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { isSupportedFile } from "../core/parser.js"; import { readFile } from "fs/promises"; @@ -18,7 +18,7 @@ interface SymbolUsage { } export async function getBlastRadius(options: BlastRadiusOptions): Promise { - const entries = await walkDirectory({ rootDir: options.rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir: options.rootDir, depthLimit: 0 }); const files = entries.filter((e) => !e.isDirectory && isSupportedFile(e.path)); const usages: SymbolUsage[] = []; const symbolPattern = new RegExp(`\\b${escapeRegex(options.symbolName)}\\b`, "g"); diff --git a/src/tools/context-tree.ts b/src/tools/context-tree.ts index 9e41339..a2eb9d9 100644 --- a/src/tools/context-tree.ts +++ b/src/tools/context-tree.ts @@ -1,7 +1,7 @@ // Structural tree generator with file headers, symbols, and depth control // Dynamic token-aware pruning: Level 0 (files only) to Level 2 (deep context) -import { walkDirectory, type FileEntry } from "../core/walker.js"; +import { walkRoots, type FileEntry } from "../core/walker.js"; import { analyzeFile, formatSymbol, isSupportedFile } from "../core/parser.js"; export interface ContextTreeOptions { @@ -105,7 +105,7 @@ function pruneHeaders(node: TreeNode): void { } export async function getContextTree(options: ContextTreeOptions): Promise { - const entries = await walkDirectory({ + const entries = await walkRoots({ rootDir: options.rootDir, targetPath: options.targetPath, depthLimit: options.depthLimit, diff --git a/src/tools/feature-hub.ts b/src/tools/feature-hub.ts index 7ce6ee2..8530d15 100644 --- a/src/tools/feature-hub.ts +++ b/src/tools/feature-hub.ts @@ -5,7 +5,7 @@ import { resolve, extname } from "path"; import { readFile, stat } from "fs/promises"; import { parseHubFile, discoverHubs, findOrphanedFiles, type HubInfo } from "../core/hub.js"; import { getFileSkeleton } from "./file-skeleton.js"; -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; export interface FeatureHubOptions { rootDir: string; @@ -53,7 +53,7 @@ export async function getFeatureHub(options: FeatureHubOptions): Promise } if (showOrphans) { - const entries = await walkDirectory({ rootDir, depthLimit: 10 }); + const entries = await walkRoots({ rootDir, depthLimit: 10 }); const filePaths = entries.filter((e) => !e.isDirectory).map((e) => e.relativePath); const orphans = await findOrphanedFiles(rootDir, filePaths); if (orphans.length === 0) return "No orphaned files. All source files are linked to a hub."; diff --git a/src/tools/semantic-identifiers.ts b/src/tools/semantic-identifiers.ts index c694d59..46b275f 100644 --- a/src/tools/semantic-identifiers.ts +++ b/src/tools/semantic-identifiers.ts @@ -2,7 +2,7 @@ // FEATURE: Symbol intelligence via semantic search over definitions and usages import { readFile } from "fs/promises"; -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { analyzeFile, flattenSymbols, isSupportedFile } from "../core/parser.js"; import { fetchEmbedding, @@ -182,7 +182,7 @@ async function buildIdentifierIndex(rootDir: string): Promise { return cachedIndex; } - const entries = await walkDirectory({ rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir, depthLimit: 0 }); const files = entries.filter((entry) => !entry.isDirectory && isSupportedFile(entry.path)); const docs: IdentifierDoc[] = []; const fileLines = new Map(); diff --git a/src/tools/semantic-navigate.ts b/src/tools/semantic-navigate.ts index b99d74e..866e149 100644 --- a/src/tools/semantic-navigate.ts +++ b/src/tools/semantic-navigate.ts @@ -1,7 +1,7 @@ // Semantic project navigator using spectral clustering and provider-agnostic labeling // Browse codebase by meaning: embeds files, clusters vectors, generates labels -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { analyzeFile, flattenSymbols, isSupportedFile } from "../core/parser.js"; import { fetchEmbedding } from "../core/embeddings.js"; import { readFile } from "fs/promises"; @@ -253,7 +253,7 @@ export async function semanticNavigate(options: SemanticNavigateOptions): Promis const maxClusters = options.maxClusters ?? 20; const maxDepth = options.maxDepth ?? 3; - const entries = await walkDirectory({ rootDir: options.rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir: options.rootDir, depthLimit: 0 }); const fileEntries = entries.filter((e) => !e.isDirectory && isNavigableSourceCandidate(e.path)); if (fileEntries.length === 0) return "No supported source files found in the project."; diff --git a/src/tools/semantic-search.ts b/src/tools/semantic-search.ts index c511e81..8e8fd68 100644 --- a/src/tools/semantic-search.ts +++ b/src/tools/semantic-search.ts @@ -1,7 +1,7 @@ // Ollama-powered semantic search over file headers and symbol names // Uses vector embeddings with cosine similarity for concept matching -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { analyzeFile, flattenSymbols, isSupportedFile } from "../core/parser.js"; import { fetchEmbedding, @@ -136,7 +136,7 @@ async function buildIndex(rootDir: string): Promise { return cachedIndex; } - const entries = await walkDirectory({ rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir, depthLimit: 0 }); const files = entries.filter((e) => !e.isDirectory); const docs: SearchDocument[] = []; diff --git a/test/demo/walker.demo.mjs b/test/demo/walker.demo.mjs index c59b7d2..2c10e25 100644 --- a/test/demo/walker.demo.mjs +++ b/test/demo/walker.demo.mjs @@ -83,3 +83,40 @@ describe("DEMO: groupByDirectory", () => { console.log("--- END ---\n"); }); }); + +describe("DEMO: tessera-style monorepo (issue #38)", () => { + const ROOT = join(process.cwd(), "test", "_demo_tessera"); + + before(async () => { + await rm(ROOT, { recursive: true, force: true }); + await mkdir(join(ROOT, "docs"), { recursive: true }); + await mkdir(join(ROOT, "repos", "lacuna", "src"), { recursive: true }); + await mkdir(join(ROOT, "repos", "graphrag-core", "src"), { recursive: true }); + await mkdir(join(ROOT, "repos", "lacuna", "build"), { recursive: true }); + await writeFile(join(ROOT, ".gitignore"), "repos/\n"); + await writeFile(join(ROOT, "docs", "readme.md"), "docs"); + await writeFile(join(ROOT, "repos", "lacuna", ".gitignore"), "build/\n"); + await writeFile(join(ROOT, "repos", "lacuna", "src", "main.py"), "m"); + await writeFile(join(ROOT, "repos", "lacuna", "build", "junk.py"), "j"); + await writeFile(join(ROOT, "repos", "graphrag-core", "src", "core.py"), "c"); + }); + + after(async () => { + await rm(ROOT, { recursive: true, force: true }); + }); + + it("workspace alone indexes only docs/ (the broken case before this PR)", async () => { + const { walkDirectory } = await import("../../build/core/walker.js"); + const entries = await walkDirectory({ rootDir: ROOT }); + console.log("[demo] without extraRoots: " + entries.map((e) => e.relativePath).sort().join(", ")); + }); + + it("with extraRoots, both sub-repos are indexed and each respects its own .gitignore", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOT, + extraRoots: ["repos/lacuna", "repos/graphrag-core"], + }); + console.log("[demo] with extraRoots: " + entries.map((e) => e.relativePath).sort().join(", ")); + }); +}); diff --git a/test/main/cli-parsing.test.mjs b/test/main/cli-parsing.test.mjs new file mode 100644 index 0000000..b253ba8 --- /dev/null +++ b/test/main/cli-parsing.test.mjs @@ -0,0 +1,127 @@ +import { describe, it, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { writeFile, mkdir, rm } from "fs/promises"; +import { join, delimiter } from "path"; +import { parseExtraRoots } from "../../build/core/extra-roots.js"; + +const FIX = join(process.cwd(), "test", "_cli_fixtures"); + +describe("parseExtraRoots", () => { + before(async () => { + await rm(FIX, { recursive: true, force: true }); + await mkdir(join(FIX, "a"), { recursive: true }); + await mkdir(join(FIX, "b"), { recursive: true }); + await writeFile(join(FIX, "not-a-dir.txt"), ""); + }); + + after(async () => { + await rm(FIX, { recursive: true, force: true }); + }); + + it("parses repeated --include flags", () => { + const result = parseExtraRoots({ + argv: ["--include", "a", "--include", "b"], + env: {}, + rootDir: FIX, + }); + assert.deepEqual(result.accepted.sort(), [join(FIX, "a"), join(FIX, "b")].sort()); + assert.equal(result.warnings.length, 0); + }); + + it("falls back to env var when no --include flag is present", () => { + const result = parseExtraRoots({ + argv: [], + env: { CONTEXTPLUS_EXTRA_ROOTS: ["a", "b"].join(delimiter) }, + rootDir: FIX, + }); + assert.deepEqual(result.accepted.sort(), [join(FIX, "a"), join(FIX, "b")].sort()); + }); + + it("CLI wins entirely when both --include and env are set", () => { + const result = parseExtraRoots({ + argv: ["--include", "a"], + env: { CONTEXTPLUS_EXTRA_ROOTS: "b" }, + rootDir: FIX, + }); + assert.deepEqual(result.accepted, [join(FIX, "a")]); + }); + + it("warns and drops non-existent paths", () => { + const result = parseExtraRoots({ + argv: ["--include", "missing"], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /missing/); + }); + + it("warns and drops files (not directories)", () => { + const result = parseExtraRoots({ + argv: ["--include", "not-a-dir.txt"], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /not a directory/i); + }); + + it("warns and drops paths outside the workspace root", () => { + const result = parseExtraRoots({ + argv: ["--include", "/tmp"], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /outside/i); + }); + + it("rejects the workspace root itself", () => { + const result = parseExtraRoots({ + argv: ["--include", "."], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + }); + + it("skips empty entries in env list", () => { + const result = parseExtraRoots({ + argv: [], + env: { CONTEXTPLUS_EXTRA_ROOTS: `a${delimiter}${delimiter}b` }, + rootDir: FIX, + }); + assert.deepEqual(result.accepted.sort(), [join(FIX, "a"), join(FIX, "b")].sort()); + }); + + it("rejects symlinks that point outside the workspace root", async () => { + const { symlink } = await import("fs/promises"); + const OUTSIDE = join(FIX, "..", "_outside_target"); + await rm(OUTSIDE, { recursive: true, force: true }); + await mkdir(OUTSIDE, { recursive: true }); + try { + await symlink(OUTSIDE, join(FIX, "bad-link")); + } catch { + // If symlink creation fails (e.g., on a filesystem that doesn't support symlinks), + // skip the test gracefully. + return; + } + + const result = parseExtraRoots({ + argv: ["--include", "bad-link"], + env: {}, + rootDir: FIX, + }); + + assert.equal(result.accepted.length, 0, "symlink to outside should be rejected"); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /outside/i); + + await rm(join(FIX, "bad-link")); + await rm(OUTSIDE, { recursive: true, force: true }); + }); +}); diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 7a36126..c0fe314 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -129,6 +129,269 @@ describe("walker", () => { }); }); + describe("nested gitignore", () => { + const NESTED = join(FIXTURE_DIR, "_nested"); + + before(async () => { + await rm(NESTED, { recursive: true, force: true }); + await mkdir(join(NESTED, "child", "cache"), { recursive: true }); + await writeFile(join(NESTED, "child", "cache", "x.txt"), "cached"); + await writeFile(join(NESTED, "child", "keep.txt"), "kept"); + await writeFile(join(NESTED, "child", ".gitignore"), "cache/\n"); + }); + + after(async () => { + await rm(NESTED, { recursive: true, force: true }); + }); + + it("applies a child .gitignore rule inside that child", async () => { + const entries = await walkDirectory({ rootDir: NESTED }); + const paths = entries.map((e) => e.relativePath); + assert.ok(paths.includes("child/keep.txt"), "child/keep.txt should be included"); + assert.ok( + !paths.some((p) => p.includes("cache/x.txt")), + "files under child/cache should be ignored by child's .gitignore", + ); + }); + + it("supports negation in a child .gitignore (re-include)", async () => { + // Parent ignores *.log everywhere; child re-includes important.log. + await writeFile(join(NESTED, ".gitignore"), "*.log\n"); + await writeFile(join(NESTED, "root.log"), "noise"); + await writeFile(join(NESTED, "child", "important.log"), "valuable"); + await writeFile(join(NESTED, "child", "noise.log"), "noise"); + await writeFile(join(NESTED, "child", ".gitignore"), "cache/\n!important.log\n"); + + const entries = await walkDirectory({ rootDir: NESTED }); + const paths = entries.map((e) => e.relativePath); + + assert.ok( + !paths.includes("root.log"), + "root.log should be excluded by parent rule", + ); + assert.ok( + !paths.includes("child/noise.log"), + "child/noise.log should still be excluded (parent rule still applies)", + ); + assert.ok( + paths.includes("child/important.log"), + "child/important.log should be re-included by child's negation", + ); + }); + + it("merges .gitignore across three levels of nesting", async () => { + const DEEP = join(FIXTURE_DIR, "_deep"); + await rm(DEEP, { recursive: true, force: true }); + await mkdir(join(DEEP, "a", "b", "c"), { recursive: true }); + await writeFile(join(DEEP, ".gitignore"), "*.tmp\n"); + await writeFile(join(DEEP, "a", ".gitignore"), "*.bak\n"); + await writeFile(join(DEEP, "a", "b", ".gitignore"), "*.old\n"); + await writeFile(join(DEEP, "a", "b", "c", "keep.txt"), "k"); + await writeFile(join(DEEP, "a", "b", "c", "x.tmp"), "1"); + await writeFile(join(DEEP, "a", "b", "c", "x.bak"), "2"); + await writeFile(join(DEEP, "a", "b", "c", "x.old"), "3"); + + const entries = await walkDirectory({ rootDir: DEEP }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("a/b/c/keep.txt")); + assert.ok(!paths.includes("a/b/c/x.tmp"), "level-0 *.tmp rule must reach level 3"); + assert.ok(!paths.includes("a/b/c/x.bak"), "level-1 *.bak rule must reach level 3"); + assert.ok(!paths.includes("a/b/c/x.old"), "level-2 *.old rule must reach level 3"); + + await rm(DEEP, { recursive: true, force: true }); + }); + + it("respects anchored patterns scoped to a nested .gitignore", async () => { + const ANCHOR = join(FIXTURE_DIR, "_anchor"); + await rm(ANCHOR, { recursive: true, force: true }); + await mkdir(join(ANCHOR, "artifacts"), { recursive: true }); + await mkdir(join(ANCHOR, "child", "artifacts"), { recursive: true }); + // Workspace-level artifacts/ should NOT be excluded — only the child has the rule. + await writeFile(join(ANCHOR, "artifacts", "ws.txt"), "ws"); + await writeFile(join(ANCHOR, "child", "artifacts", "junk.txt"), "junk"); + await writeFile(join(ANCHOR, "child", "keep.txt"), "keep"); + // Anchored pattern in child .gitignore should only affect child/artifacts/. + await writeFile(join(ANCHOR, "child", ".gitignore"), "/artifacts/\n"); + + const entries = await walkDirectory({ rootDir: ANCHOR }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("artifacts/ws.txt"), "workspace-level artifacts/ should NOT be ignored"); + assert.ok(paths.includes("child/keep.txt")); + assert.ok( + !paths.some((p) => p.includes("child/artifacts")), + "anchored /artifacts/ in child/.gitignore should ignore only child/artifacts/", + ); + + await rm(ANCHOR, { recursive: true, force: true }); + }); + }); + + describe("walkRoots", () => { + const ROOTS = join(FIXTURE_DIR, "_roots"); + + before(async () => { + await rm(ROOTS, { recursive: true, force: true }); + await mkdir(join(ROOTS, "docs"), { recursive: true }); + await mkdir(join(ROOTS, "repos", "lacuna", "src"), { recursive: true }); + await mkdir(join(ROOTS, "repos", "other"), { recursive: true }); + await writeFile(join(ROOTS, ".gitignore"), "repos/\n"); + await writeFile(join(ROOTS, "docs", "readme.md"), "d"); + await writeFile(join(ROOTS, "repos", "lacuna", "src", "foo.py"), "f"); + await writeFile(join(ROOTS, "repos", "other", "noise.py"), "n"); + }); + + after(async () => { + await rm(ROOTS, { recursive: true, force: true }); + }); + + it("indexes paths listed in extraRoots even when parent .gitignore excludes them", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("docs/readme.md"), "workspace files should still be indexed"); + assert.ok( + paths.includes("repos/lacuna/src/foo.py"), + "extraRoot file should be indexed", + ); + assert.ok( + !paths.some((p) => p.startsWith("repos/other")), + "repos/other (not in extraRoots) should remain ignored", + ); + }); + + it("rejects extraRoots that resolve outside the workspace root", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + await assert.rejects( + () => walkRoots({ rootDir: ROOTS, extraRoots: ["../../etc"] }), + /resolves outside workspace root/, + ); + await assert.rejects( + () => walkRoots({ rootDir: ROOTS, extraRoots: ["/etc"] }), + /resolves outside workspace root/, + ); + }); + + it("reports workspace-relative depth for extraRoot entries", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const fooEntry = entries.find((e) => e.relativePath === "repos/lacuna/src/foo.py"); + assert.ok(fooEntry, "expected to find foo.py"); + // repos/lacuna/src/foo.py: workspace depth is 3 (under repos/lacuna/src/). + // walkDirectory called with rootDir=/repos/lacuna gives foo.py depth=1 + // (src/ is depth 0 from lacuna, foo.py is depth 1). With offset 2 → 3. + assert.equal(fooEntry.depth, 3, "depth should be workspace-relative, not extraRoot-relative"); + }); + + it("starts with a fresh ignore scope inside each extraRoot", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + await mkdir(join(ROOTS, "repos", "lacuna", "build"), { recursive: true }); + await writeFile(join(ROOTS, "repos", "lacuna", "build", "junk.py"), "j"); + await writeFile(join(ROOTS, "repos", "lacuna", ".gitignore"), "build/\n"); + + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("repos/lacuna/src/foo.py")); + assert.ok( + !paths.some((p) => p.includes("repos/lacuna/build")), + "extraRoot's own .gitignore should apply", + ); + + await rm(join(ROOTS, "repos", "lacuna", "build"), { recursive: true, force: true }); + await rm(join(ROOTS, "repos", "lacuna", ".gitignore")); + }); + + it("deduplicates files reachable from both the workspace and an extraRoot", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const VEND = join(FIXTURE_DIR, "_vend"); + await rm(VEND, { recursive: true, force: true }); + await mkdir(join(VEND, "vendored"), { recursive: true }); + await writeFile(join(VEND, "vendored", "lib.py"), "l"); + + const entries = await walkRoots({ + rootDir: VEND, + extraRoots: ["vendored"], + }); + const matches = entries.filter((e) => e.relativePath === "vendored/lib.py"); + assert.equal(matches.length, 1, "file should be emitted exactly once after dedupe"); + + await rm(VEND, { recursive: true, force: true }); + }); + + it("reports extraRoot file paths relative to the workspace root", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const fooEntry = entries.find((e) => e.path.endsWith("foo.py")); + assert.ok(fooEntry, "expected to find foo.py in results"); + assert.equal( + fooEntry.relativePath, + "repos/lacuna/src/foo.py", + "relativePath should be rooted at the workspace, not the extraRoot", + ); + }); + + it("rejects symlinked extraRoots that point outside the workspace", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const { symlink, mkdir, rm, writeFile } = await import("fs/promises"); + const SYM = join(FIXTURE_DIR, "_sym"); + const EXTERNAL = join(FIXTURE_DIR, "_external"); + await rm(SYM, { recursive: true, force: true }); + await rm(EXTERNAL, { recursive: true, force: true }); + await mkdir(SYM, { recursive: true }); + await mkdir(EXTERNAL, { recursive: true }); + await writeFile(join(EXTERNAL, "secret.txt"), "secret"); + // Place a symlink INSIDE the workspace pointing OUTSIDE. + await symlink(EXTERNAL, join(SYM, "link")); + + await assert.rejects( + () => walkRoots({ rootDir: SYM, extraRoots: ["link"] }), + /resolves outside workspace root/, + ); + + await rm(SYM, { recursive: true, force: true }); + await rm(EXTERNAL, { recursive: true, force: true }); + }); + + it("follows symlinked extraRoots that point inside the workspace", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const { symlink, mkdir, rm, writeFile } = await import("fs/promises"); + const SYM = join(FIXTURE_DIR, "_sym_inside"); + await rm(SYM, { recursive: true, force: true }); + await mkdir(join(SYM, "real"), { recursive: true }); + await writeFile(join(SYM, "real", "ok.txt"), "ok"); + await writeFile(join(SYM, ".gitignore"), "linked/\n"); // exclude symlink-named path from primary walk so the only way to see ok.txt is via the extraRoot + await symlink(join(SYM, "real"), join(SYM, "linked")); + + const entries = await walkRoots({ rootDir: SYM, extraRoots: ["linked"] }); + const paths = entries.map((e) => e.relativePath); + + // The symlink target is INSIDE the workspace, so it should be walked. + // The file may appear under either the canonical path or the symlink path, + // depending on how realpath rewrites it. Accept either. + assert.ok( + paths.some((p) => p.endsWith("ok.txt")), + "ok.txt should be reachable via the symlinked extraRoot", + ); + + await rm(SYM, { recursive: true, force: true }); + }); + }); + after(async () => { await rm(FIXTURE_DIR, { recursive: true, force: true }); });