From c043cfc897030749114adc3b4a7503c08ed0d885 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 12:16:27 +0200 Subject: [PATCH 01/14] feat(walker): load .gitignore per directory and merge with inherited rules Walker previously loaded only the workspace root's .gitignore. With this change, each recursed directory's .gitignore is loaded and merged with the inherited ignore scope, matching the behavior of git and ripgrep. Refs forloopcodes/contextplus#38 --- src/core/walker.ts | 51 ++++++++++++++++++++++++++++++--------- test/main/walker.test.mjs | 26 ++++++++++++++++++++ 2 files changed, 66 insertions(+), 11 deletions(-) diff --git a/src/core/walker.ts b/src/core/walker.ts index 8b391d6..335b228 100644 --- a/src/core/walker.ts +++ b/src/core/walker.ts @@ -1,5 +1,5 @@ // Gitignore-aware recursive directory walker with depth control -// Returns filtered file paths respecting project ignore patterns +// Returns filtered file paths respecting project ignore patterns (nested-gitignore-aware) import { readdir, readFile, stat } from "fs/promises"; import { join, relative, resolve } from "path"; @@ -18,6 +18,11 @@ export interface FileEntry { depth: number; } +interface IgnoreScope { + ig: Ignore; + patterns: string[]; +} + const ALWAYS_IGNORE = new Set([ "node_modules", ".git", @@ -38,20 +43,28 @@ const ALWAYS_IGNORE = new Set([ ".parcel-cache", ]); -async function loadIgnoreRules(rootDir: string): Promise { - const ig = ignore(); +async function readGitignorePatterns(dir: string): Promise { try { - const content = await readFile(join(rootDir, ".gitignore"), "utf-8"); - ig.add(content); + const content = await readFile(join(dir, ".gitignore"), "utf-8"); + return content.split(/\r?\n/).filter((line) => line.trim() && !line.startsWith("#")); } catch { + return []; } - return ig; +} + +async function loadScopeFor(dir: string, parent: IgnoreScope | null): Promise { + const local = await readGitignorePatterns(dir); + if (!parent && local.length === 0) return { ig: ignore(), patterns: [] }; + if (!parent) return { ig: ignore().add(local), patterns: local }; + if (local.length === 0) return parent; + const merged = [...parent.patterns, ...local]; + return { ig: ignore().add(merged), patterns: merged }; } async function walkRecursive( dir: string, rootDir: string, - ig: Ignore, + scope: IgnoreScope, depth: number, maxDepth: number, results: FileEntry[], @@ -64,19 +77,21 @@ async function walkRecursive( const fullPath = join(dir, entry.name); const relPath = relative(rootDir, fullPath).replace(/\\/g, "/"); - if (ig.ignores(relPath)) continue; + if (scope.ig.ignores(relPath)) continue; const isDir = entry.isDirectory(); results.push({ path: fullPath, relativePath: relPath, isDirectory: isDir, depth }); - if (isDir) await walkRecursive(fullPath, rootDir, ig, depth + 1, maxDepth, results); + if (isDir) { + const childScope = await loadScopeFor(fullPath, scope); + await walkRecursive(fullPath, rootDir, childScope, depth + 1, maxDepth, results); + } } } export async function walkDirectory(options: WalkOptions): Promise { const rootDir = resolve(options.rootDir); const startDir = options.targetPath ? resolve(rootDir, options.targetPath) : rootDir; - const ig = await loadIgnoreRules(rootDir); const results: FileEntry[] = []; try { @@ -85,7 +100,21 @@ export async function walkDirectory(options: WalkOptions): Promise return results; } - await walkRecursive(startDir, rootDir, ig, 0, options.depthLimit ?? 0, results); + const rootScope = await loadScopeFor(rootDir, null); + let startScope = rootScope; + if (startDir !== rootDir) { + // Build the scope chain from rootDir down to startDir so inherited rules apply. + const rel = relative(rootDir, startDir).split(/[\\/]/).filter(Boolean); + let cursor = rootDir; + let scope = rootScope; + for (const segment of rel) { + cursor = join(cursor, segment); + scope = await loadScopeFor(cursor, scope); + } + startScope = scope; + } + + await walkRecursive(startDir, rootDir, startScope, 0, options.depthLimit ?? 0, results); return results; } diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 7a36126..481281b 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -129,6 +129,32 @@ describe("walker", () => { }); }); + describe("nested gitignore", () => { + const NESTED = join(FIXTURE_DIR, "_nested"); + + before(async () => { + await rm(NESTED, { recursive: true, force: true }); + await mkdir(join(NESTED, "child", "cache"), { recursive: true }); + await writeFile(join(NESTED, "child", "cache", "x.txt"), "cached"); + await writeFile(join(NESTED, "child", "keep.txt"), "kept"); + await writeFile(join(NESTED, "child", ".gitignore"), "cache/\n"); + }); + + after(async () => { + await rm(NESTED, { recursive: true, force: true }); + }); + + it("applies a child .gitignore rule inside that child", async () => { + const entries = await walkDirectory({ rootDir: NESTED }); + const paths = entries.map((e) => e.relativePath); + assert.ok(paths.includes("child/keep.txt"), "child/keep.txt should be included"); + assert.ok( + !paths.some((p) => p.includes("cache/x.txt")), + "files under child/cache should be ignored by child's .gitignore", + ); + }); + }); + after(async () => { await rm(FIXTURE_DIR, { recursive: true, force: true }); }); From abfe715e7e5c877ffb88d7cf8c9b7275c089bbcb Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 12:30:50 +0200 Subject: [PATCH 02/14] test(walker): cover negation in nested .gitignore --- test/main/walker.test.mjs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 481281b..1961f8e 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -153,6 +153,31 @@ describe("walker", () => { "files under child/cache should be ignored by child's .gitignore", ); }); + + it("supports negation in a child .gitignore (re-include)", async () => { + // Parent ignores *.log everywhere; child re-includes important.log. + await writeFile(join(NESTED, ".gitignore"), "*.log\n"); + await writeFile(join(NESTED, "root.log"), "noise"); + await writeFile(join(NESTED, "child", "important.log"), "valuable"); + await writeFile(join(NESTED, "child", "noise.log"), "noise"); + await writeFile(join(NESTED, "child", ".gitignore"), "cache/\n!important.log\n"); + + const entries = await walkDirectory({ rootDir: NESTED }); + const paths = entries.map((e) => e.relativePath); + + assert.ok( + !paths.includes("root.log"), + "root.log should be excluded by parent rule", + ); + assert.ok( + !paths.includes("child/noise.log"), + "child/noise.log should still be excluded (parent rule still applies)", + ); + assert.ok( + paths.includes("child/important.log"), + "child/important.log should be re-included by child's negation", + ); + }); }); after(async () => { From c54a0ba65554b2d03f84ed4d830565177a9eddab Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 12:31:57 +0200 Subject: [PATCH 03/14] test(walker): cover .gitignore merging across deep nesting --- test/main/walker.test.mjs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 1961f8e..27c08c8 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -178,6 +178,29 @@ describe("walker", () => { "child/important.log should be re-included by child's negation", ); }); + + it("merges .gitignore across three levels of nesting", async () => { + const DEEP = join(FIXTURE_DIR, "_deep"); + await rm(DEEP, { recursive: true, force: true }); + await mkdir(join(DEEP, "a", "b", "c"), { recursive: true }); + await writeFile(join(DEEP, ".gitignore"), "*.tmp\n"); + await writeFile(join(DEEP, "a", ".gitignore"), "*.bak\n"); + await writeFile(join(DEEP, "a", "b", ".gitignore"), "*.old\n"); + await writeFile(join(DEEP, "a", "b", "c", "keep.txt"), "k"); + await writeFile(join(DEEP, "a", "b", "c", "x.tmp"), "1"); + await writeFile(join(DEEP, "a", "b", "c", "x.bak"), "2"); + await writeFile(join(DEEP, "a", "b", "c", "x.old"), "3"); + + const entries = await walkDirectory({ rootDir: DEEP }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("a/b/c/keep.txt")); + assert.ok(!paths.includes("a/b/c/x.tmp"), "level-0 *.tmp rule must reach level 3"); + assert.ok(!paths.includes("a/b/c/x.bak"), "level-1 *.bak rule must reach level 3"); + assert.ok(!paths.includes("a/b/c/x.old"), "level-2 *.old rule must reach level 3"); + + await rm(DEEP, { recursive: true, force: true }); + }); }); after(async () => { From 3ce9dbe59b5c907c9cc66830e02d950d4863a024 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 12:33:37 +0200 Subject: [PATCH 04/14] feat(walker): add walkRoots() with extraRoots support walkRoots walks the workspace root plus a list of extraRoots, each with a fresh ignore scope. Paths are reported relative to the workspace root so downstream tools see a single canonical address per file. Files are deduped by absolute path; the workspace walk wins for collisions. A module-level setExtraRoots() setter lets the server entry-point configure the default list at startup. walkRoots accepts an explicit extraRoots override for tests and callers that want to bypass globals. Refs forloopcodes/contextplus#38 --- src/core/walker.ts | 61 +++++++++++++++++++++++++++++++++++++ test/main/walker.test.mjs | 64 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/src/core/walker.ts b/src/core/walker.ts index 335b228..549c636 100644 --- a/src/core/walker.ts +++ b/src/core/walker.ts @@ -130,3 +130,64 @@ export function groupByDirectory(entries: FileEntry[]): Map } return groups; } + +let GLOBAL_EXTRA_ROOTS: string[] = []; + +export function setExtraRoots(paths: string[]): void { + GLOBAL_EXTRA_ROOTS = [...paths]; +} + +export function getExtraRoots(): string[] { + return [...GLOBAL_EXTRA_ROOTS]; +} + +export interface WalkRootsOptions { + rootDir: string; + extraRoots?: string[]; + depthLimit?: number; + targetPath?: string; +} + +export async function walkRoots(options: WalkRootsOptions): Promise { + const rootDir = resolve(options.rootDir); + const extraRoots = options.extraRoots ?? GLOBAL_EXTRA_ROOTS; + const seen = new Set(); + const results: FileEntry[] = []; + + const primary = await walkDirectory({ + rootDir, + depthLimit: options.depthLimit, + targetPath: options.targetPath, + }); + for (const entry of primary) { + if (seen.has(entry.path)) continue; + seen.add(entry.path); + results.push(entry); + } + + // targetPath constrains the primary walk only — extraRoots are always walked in full. + for (const extra of extraRoots) { + const extraAbs = resolve(rootDir, extra); + if (extraAbs !== rootDir && !extraAbs.startsWith(rootDir + "/")) { + throw new Error(`walkRoots: extraRoot "${extra}" resolves outside workspace root`); + } + const depthOffset = relative(rootDir, extraAbs).split("/").filter(Boolean).length; + const extraEntries = await walkDirectory({ + rootDir: extraAbs, + depthLimit: options.depthLimit, + }); + for (const entry of extraEntries) { + if (seen.has(entry.path)) continue; + seen.add(entry.path); + const workspaceRel = relative(rootDir, entry.path).replace(/\\/g, "/"); + results.push({ + path: entry.path, + relativePath: workspaceRel, + isDirectory: entry.isDirectory, + depth: entry.depth + depthOffset, + }); + } + } + + return results; +} diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 27c08c8..2de2b1e 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -203,6 +203,70 @@ describe("walker", () => { }); }); + describe("walkRoots", () => { + const ROOTS = join(FIXTURE_DIR, "_roots"); + + before(async () => { + await rm(ROOTS, { recursive: true, force: true }); + await mkdir(join(ROOTS, "docs"), { recursive: true }); + await mkdir(join(ROOTS, "repos", "lacuna", "src"), { recursive: true }); + await mkdir(join(ROOTS, "repos", "other"), { recursive: true }); + await writeFile(join(ROOTS, ".gitignore"), "repos/\n"); + await writeFile(join(ROOTS, "docs", "readme.md"), "d"); + await writeFile(join(ROOTS, "repos", "lacuna", "src", "foo.py"), "f"); + await writeFile(join(ROOTS, "repos", "other", "noise.py"), "n"); + }); + + after(async () => { + await rm(ROOTS, { recursive: true, force: true }); + }); + + it("indexes paths listed in extraRoots even when parent .gitignore excludes them", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("docs/readme.md"), "workspace files should still be indexed"); + assert.ok( + paths.includes("repos/lacuna/src/foo.py"), + "extraRoot file should be indexed", + ); + assert.ok( + !paths.some((p) => p.startsWith("repos/other")), + "repos/other (not in extraRoots) should remain ignored", + ); + }); + + it("rejects extraRoots that resolve outside the workspace root", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + await assert.rejects( + () => walkRoots({ rootDir: ROOTS, extraRoots: ["../../etc"] }), + /resolves outside workspace root/, + ); + await assert.rejects( + () => walkRoots({ rootDir: ROOTS, extraRoots: ["/etc"] }), + /resolves outside workspace root/, + ); + }); + + it("reports workspace-relative depth for extraRoot entries", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const fooEntry = entries.find((e) => e.relativePath === "repos/lacuna/src/foo.py"); + assert.ok(fooEntry, "expected to find foo.py"); + // repos/lacuna/src/foo.py: workspace depth is 3 (under repos/lacuna/src/). + // walkDirectory called with rootDir=/repos/lacuna gives foo.py depth=1 + // (src/ is depth 0 from lacuna, foo.py is depth 1). With offset 2 → 3. + assert.equal(fooEntry.depth, 3, "depth should be workspace-relative, not extraRoot-relative"); + }); + }); + after(async () => { await rm(FIXTURE_DIR, { recursive: true, force: true }); }); From 7960a015003b6a12818deb541ca870b03b2bef41 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 12:39:18 +0200 Subject: [PATCH 05/14] test(walker): verify extraRoot walks have a fresh ignore scope --- test/main/walker.test.mjs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 2de2b1e..30b303d 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -265,6 +265,28 @@ describe("walker", () => { // (src/ is depth 0 from lacuna, foo.py is depth 1). With offset 2 → 3. assert.equal(fooEntry.depth, 3, "depth should be workspace-relative, not extraRoot-relative"); }); + + it("starts with a fresh ignore scope inside each extraRoot", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + await mkdir(join(ROOTS, "repos", "lacuna", "build"), { recursive: true }); + await writeFile(join(ROOTS, "repos", "lacuna", "build", "junk.py"), "j"); + await writeFile(join(ROOTS, "repos", "lacuna", ".gitignore"), "build/\n"); + + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("repos/lacuna/src/foo.py")); + assert.ok( + !paths.some((p) => p.includes("repos/lacuna/build")), + "extraRoot's own .gitignore should apply", + ); + + await rm(join(ROOTS, "repos", "lacuna", "build"), { recursive: true, force: true }); + await rm(join(ROOTS, "repos", "lacuna", ".gitignore")); + }); }); after(async () => { From f2da8661a155cb0071d8f4fea9ee49562a4faa14 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 12:39:48 +0200 Subject: [PATCH 06/14] test(walker): dedupe overlap between workspace walk and extraRoots --- test/main/walker.test.mjs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 30b303d..b4e4539 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -287,6 +287,23 @@ describe("walker", () => { await rm(join(ROOTS, "repos", "lacuna", "build"), { recursive: true, force: true }); await rm(join(ROOTS, "repos", "lacuna", ".gitignore")); }); + + it("deduplicates files reachable from both the workspace and an extraRoot", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const VEND = join(FIXTURE_DIR, "_vend"); + await rm(VEND, { recursive: true, force: true }); + await mkdir(join(VEND, "vendored"), { recursive: true }); + await writeFile(join(VEND, "vendored", "lib.py"), "l"); + + const entries = await walkRoots({ + rootDir: VEND, + extraRoots: ["vendored"], + }); + const matches = entries.filter((e) => e.relativePath === "vendored/lib.py"); + assert.equal(matches.length, 1, "file should be emitted exactly once after dedupe"); + + await rm(VEND, { recursive: true, force: true }); + }); }); after(async () => { From 8eda3ffd761d6be4ea7e2047b60a27ebe08178a0 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 12:40:15 +0200 Subject: [PATCH 07/14] test(walker): extraRoot paths are reported relative to workspace root --- test/main/walker.test.mjs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index b4e4539..25504cd 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -304,6 +304,21 @@ describe("walker", () => { await rm(VEND, { recursive: true, force: true }); }); + + it("reports extraRoot file paths relative to the workspace root", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOTS, + extraRoots: ["repos/lacuna"], + }); + const fooEntry = entries.find((e) => e.path.endsWith("foo.py")); + assert.ok(fooEntry, "expected to find foo.py in results"); + assert.equal( + fooEntry.relativePath, + "repos/lacuna/src/foo.py", + "relativePath should be rooted at the workspace, not the extraRoot", + ); + }); }); after(async () => { From 5de7a82c03f28cbe152ae333890345c0390c01a5 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 13:12:01 +0200 Subject: [PATCH 08/14] feat(cli): parseExtraRoots() validates --include flags and env var Pure function that: - Reads repeated --include flags from argv - Falls back to CONTEXTPLUS_EXTRA_ROOTS (path.delimiter-separated) when no --include is present - Resolves each entry relative to the workspace root - Rejects entries that are missing, non-directories, the workspace root itself, or outside the workspace root (returns warnings, drops them) Refs forloopcodes/contextplus#38 --- src/core/extra-roots.ts | 69 +++++++++++++++++++++++ test/main/cli-parsing.test.mjs | 100 +++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 src/core/extra-roots.ts create mode 100644 test/main/cli-parsing.test.mjs diff --git a/src/core/extra-roots.ts b/src/core/extra-roots.ts new file mode 100644 index 0000000..13b7642 --- /dev/null +++ b/src/core/extra-roots.ts @@ -0,0 +1,69 @@ +// CLI/env argument parsing for the extraRoots config. +// Pure module — no side effects, safe to import from tests. + +import { statSync } from "fs"; +import { delimiter, isAbsolute, resolve, sep } from "path"; + +export interface ParseExtraRootsInput { + argv: string[]; + env: NodeJS.ProcessEnv | Record; + rootDir: string; +} + +export interface ParseExtraRootsResult { + accepted: string[]; + warnings: string[]; +} + +function extractIncludeFlags(argv: string[]): string[] { + const out: string[] = []; + for (let i = 0; i < argv.length; i++) { + if (argv[i] === "--include" && i + 1 < argv.length) { + out.push(argv[i + 1]); + i++; + } else if (argv[i].startsWith("--include=")) { + out.push(argv[i].slice("--include=".length)); + } + } + return out; +} + +export function parseExtraRoots(input: ParseExtraRootsInput): ParseExtraRootsResult { + const accepted: string[] = []; + const warnings: string[] = []; + const rootAbs = resolve(input.rootDir); + + const fromCli = extractIncludeFlags(input.argv); + const raw = fromCli.length > 0 + ? fromCli + : (input.env.CONTEXTPLUS_EXTRA_ROOTS ?? "") + .split(delimiter) + .filter((s) => s.length > 0); + + for (const entry of raw) { + const abs = isAbsolute(entry) ? entry : resolve(rootAbs, entry); + + if (abs === rootAbs) { + warnings.push(`contextplus: extraRoot '${entry}' equals the workspace root — skipping`); + continue; + } + if (!abs.startsWith(rootAbs + sep)) { + warnings.push(`contextplus: extraRoot '${entry}' is outside the workspace root — skipping`); + continue; + } + let stats; + try { + stats = statSync(abs); + } catch { + warnings.push(`contextplus: extraRoot '${entry}' does not exist — skipping`); + continue; + } + if (!stats.isDirectory()) { + warnings.push(`contextplus: extraRoot '${entry}' is not a directory — skipping`); + continue; + } + accepted.push(abs); + } + + return { accepted, warnings }; +} diff --git a/test/main/cli-parsing.test.mjs b/test/main/cli-parsing.test.mjs new file mode 100644 index 0000000..832f283 --- /dev/null +++ b/test/main/cli-parsing.test.mjs @@ -0,0 +1,100 @@ +import { describe, it, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { writeFile, mkdir, rm } from "fs/promises"; +import { join, delimiter } from "path"; +import { parseExtraRoots } from "../../build/core/extra-roots.js"; + +const FIX = join(process.cwd(), "test", "_cli_fixtures"); + +describe("parseExtraRoots", () => { + before(async () => { + await rm(FIX, { recursive: true, force: true }); + await mkdir(join(FIX, "a"), { recursive: true }); + await mkdir(join(FIX, "b"), { recursive: true }); + await writeFile(join(FIX, "not-a-dir.txt"), ""); + }); + + after(async () => { + await rm(FIX, { recursive: true, force: true }); + }); + + it("parses repeated --include flags", () => { + const result = parseExtraRoots({ + argv: ["--include", "a", "--include", "b"], + env: {}, + rootDir: FIX, + }); + assert.deepEqual(result.accepted.sort(), [join(FIX, "a"), join(FIX, "b")].sort()); + assert.equal(result.warnings.length, 0); + }); + + it("falls back to env var when no --include flag is present", () => { + const result = parseExtraRoots({ + argv: [], + env: { CONTEXTPLUS_EXTRA_ROOTS: ["a", "b"].join(delimiter) }, + rootDir: FIX, + }); + assert.deepEqual(result.accepted.sort(), [join(FIX, "a"), join(FIX, "b")].sort()); + }); + + it("CLI wins entirely when both --include and env are set", () => { + const result = parseExtraRoots({ + argv: ["--include", "a"], + env: { CONTEXTPLUS_EXTRA_ROOTS: "b" }, + rootDir: FIX, + }); + assert.deepEqual(result.accepted, [join(FIX, "a")]); + }); + + it("warns and drops non-existent paths", () => { + const result = parseExtraRoots({ + argv: ["--include", "missing"], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /missing/); + }); + + it("warns and drops files (not directories)", () => { + const result = parseExtraRoots({ + argv: ["--include", "not-a-dir.txt"], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /not a directory/i); + }); + + it("warns and drops paths outside the workspace root", () => { + const result = parseExtraRoots({ + argv: ["--include", "/tmp"], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /outside/i); + }); + + it("rejects the workspace root itself", () => { + const result = parseExtraRoots({ + argv: ["--include", "."], + env: {}, + rootDir: FIX, + }); + assert.equal(result.accepted.length, 0); + assert.equal(result.warnings.length, 1); + }); + + it("skips empty entries in env list", () => { + const result = parseExtraRoots({ + argv: [], + env: { CONTEXTPLUS_EXTRA_ROOTS: `a${delimiter}${delimiter}b` }, + rootDir: FIX, + }); + assert.deepEqual(result.accepted.sort(), [join(FIX, "a"), join(FIX, "b")].sort()); + }); +}); From 020623fb113ff1b2c191410edafad1aa51a1aa0b Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 13:16:31 +0200 Subject: [PATCH 09/14] feat(cli): wire parseExtraRoots into server startup Reads --include flags and CONTEXTPLUS_EXTRA_ROOTS env at boot, validates each, writes warnings to stderr for invalid entries, and calls setExtraRoots() so the walker uses them as the default for walkRoots(). Refs forloopcodes/contextplus#38 --- src/index.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/index.ts b/src/index.ts index 5413421..7dd839a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,7 +10,9 @@ import { dirname, resolve } from "path"; import { fileURLToPath } from "url"; import { z } from "zod"; import { createEmbeddingTrackerController } from "./core/embedding-tracker.js"; +import { parseExtraRoots } from "./core/extra-roots.js"; import { createIdleMonitor, getIdleShutdownMs, getParentPollMs, isBrokenPipeError, runCleanup, startParentMonitor } from "./core/process-lifecycle.js"; +import { setExtraRoots } from "./core/walker.js"; import { getContextTree } from "./tools/context-tree.js"; import { getFileSkeleton } from "./tools/file-skeleton.js"; import { ensureMcpDataDir, cancelAllEmbeddings } from "./core/embeddings.js"; @@ -39,6 +41,16 @@ const passthroughArgs = process.argv.slice(2); const ROOT_DIR = passthroughArgs[0] && !SUB_COMMANDS.includes(passthroughArgs[0]) ? resolve(passthroughArgs[0]) : process.cwd(); + +const extraRootsResult = parseExtraRoots({ + argv: passthroughArgs, + env: process.env, + rootDir: ROOT_DIR, +}); +for (const warning of extraRootsResult.warnings) { + process.stderr.write(`${warning}\n`); +} +setExtraRoots(extraRootsResult.accepted); const INSTRUCTIONS_SOURCE_URL = "https://contextplus.vercel.app/api/instructions"; const INSTRUCTIONS_RESOURCE_URI = "contextplus://instructions"; const PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), ".."); From 13a2feefb3a3388fc2723948f866c047fde80bc2 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 13:19:10 +0200 Subject: [PATCH 10/14] refactor(tools): migrate walkDirectory call sites to walkRoots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every tool that walks the workspace now goes through walkRoots, which reads the extraRoots list set by the server entry-point. Option shape is unchanged — walkRoots accepts the same {rootDir, depthLimit, targetPath} fields that walkDirectory does. Refs forloopcodes/contextplus#38 --- src/tools/blast-radius.ts | 4 ++-- src/tools/context-tree.ts | 4 ++-- src/tools/feature-hub.ts | 4 ++-- src/tools/semantic-identifiers.ts | 4 ++-- src/tools/semantic-navigate.ts | 4 ++-- src/tools/semantic-search.ts | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/tools/blast-radius.ts b/src/tools/blast-radius.ts index 706a787..6abfdc4 100644 --- a/src/tools/blast-radius.ts +++ b/src/tools/blast-radius.ts @@ -1,7 +1,7 @@ // Dependency graph analyzer to trace symbol usage across the codebase // Finds every file and line where a function, class, or variable is referenced -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { isSupportedFile } from "../core/parser.js"; import { readFile } from "fs/promises"; @@ -18,7 +18,7 @@ interface SymbolUsage { } export async function getBlastRadius(options: BlastRadiusOptions): Promise { - const entries = await walkDirectory({ rootDir: options.rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir: options.rootDir, depthLimit: 0 }); const files = entries.filter((e) => !e.isDirectory && isSupportedFile(e.path)); const usages: SymbolUsage[] = []; const symbolPattern = new RegExp(`\\b${escapeRegex(options.symbolName)}\\b`, "g"); diff --git a/src/tools/context-tree.ts b/src/tools/context-tree.ts index 9e41339..a2eb9d9 100644 --- a/src/tools/context-tree.ts +++ b/src/tools/context-tree.ts @@ -1,7 +1,7 @@ // Structural tree generator with file headers, symbols, and depth control // Dynamic token-aware pruning: Level 0 (files only) to Level 2 (deep context) -import { walkDirectory, type FileEntry } from "../core/walker.js"; +import { walkRoots, type FileEntry } from "../core/walker.js"; import { analyzeFile, formatSymbol, isSupportedFile } from "../core/parser.js"; export interface ContextTreeOptions { @@ -105,7 +105,7 @@ function pruneHeaders(node: TreeNode): void { } export async function getContextTree(options: ContextTreeOptions): Promise { - const entries = await walkDirectory({ + const entries = await walkRoots({ rootDir: options.rootDir, targetPath: options.targetPath, depthLimit: options.depthLimit, diff --git a/src/tools/feature-hub.ts b/src/tools/feature-hub.ts index 7ce6ee2..8530d15 100644 --- a/src/tools/feature-hub.ts +++ b/src/tools/feature-hub.ts @@ -5,7 +5,7 @@ import { resolve, extname } from "path"; import { readFile, stat } from "fs/promises"; import { parseHubFile, discoverHubs, findOrphanedFiles, type HubInfo } from "../core/hub.js"; import { getFileSkeleton } from "./file-skeleton.js"; -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; export interface FeatureHubOptions { rootDir: string; @@ -53,7 +53,7 @@ export async function getFeatureHub(options: FeatureHubOptions): Promise } if (showOrphans) { - const entries = await walkDirectory({ rootDir, depthLimit: 10 }); + const entries = await walkRoots({ rootDir, depthLimit: 10 }); const filePaths = entries.filter((e) => !e.isDirectory).map((e) => e.relativePath); const orphans = await findOrphanedFiles(rootDir, filePaths); if (orphans.length === 0) return "No orphaned files. All source files are linked to a hub."; diff --git a/src/tools/semantic-identifiers.ts b/src/tools/semantic-identifiers.ts index c694d59..46b275f 100644 --- a/src/tools/semantic-identifiers.ts +++ b/src/tools/semantic-identifiers.ts @@ -2,7 +2,7 @@ // FEATURE: Symbol intelligence via semantic search over definitions and usages import { readFile } from "fs/promises"; -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { analyzeFile, flattenSymbols, isSupportedFile } from "../core/parser.js"; import { fetchEmbedding, @@ -182,7 +182,7 @@ async function buildIdentifierIndex(rootDir: string): Promise { return cachedIndex; } - const entries = await walkDirectory({ rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir, depthLimit: 0 }); const files = entries.filter((entry) => !entry.isDirectory && isSupportedFile(entry.path)); const docs: IdentifierDoc[] = []; const fileLines = new Map(); diff --git a/src/tools/semantic-navigate.ts b/src/tools/semantic-navigate.ts index b99d74e..866e149 100644 --- a/src/tools/semantic-navigate.ts +++ b/src/tools/semantic-navigate.ts @@ -1,7 +1,7 @@ // Semantic project navigator using spectral clustering and provider-agnostic labeling // Browse codebase by meaning: embeds files, clusters vectors, generates labels -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { analyzeFile, flattenSymbols, isSupportedFile } from "../core/parser.js"; import { fetchEmbedding } from "../core/embeddings.js"; import { readFile } from "fs/promises"; @@ -253,7 +253,7 @@ export async function semanticNavigate(options: SemanticNavigateOptions): Promis const maxClusters = options.maxClusters ?? 20; const maxDepth = options.maxDepth ?? 3; - const entries = await walkDirectory({ rootDir: options.rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir: options.rootDir, depthLimit: 0 }); const fileEntries = entries.filter((e) => !e.isDirectory && isNavigableSourceCandidate(e.path)); if (fileEntries.length === 0) return "No supported source files found in the project."; diff --git a/src/tools/semantic-search.ts b/src/tools/semantic-search.ts index c511e81..8e8fd68 100644 --- a/src/tools/semantic-search.ts +++ b/src/tools/semantic-search.ts @@ -1,7 +1,7 @@ // Ollama-powered semantic search over file headers and symbol names // Uses vector embeddings with cosine similarity for concept matching -import { walkDirectory } from "../core/walker.js"; +import { walkRoots } from "../core/walker.js"; import { analyzeFile, flattenSymbols, isSupportedFile } from "../core/parser.js"; import { fetchEmbedding, @@ -136,7 +136,7 @@ async function buildIndex(rootDir: string): Promise { return cachedIndex; } - const entries = await walkDirectory({ rootDir, depthLimit: 0 }); + const entries = await walkRoots({ rootDir, depthLimit: 0 }); const files = entries.filter((e) => !e.isDirectory); const docs: SearchDocument[] = []; From 35b3ebec86158339e0ff6e07966d381efe80bb6c Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 13:20:45 +0200 Subject: [PATCH 11/14] test(demo): tessera-style monorepo walkthrough for issue #38 --- test/demo/walker.demo.mjs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/demo/walker.demo.mjs b/test/demo/walker.demo.mjs index c59b7d2..2c10e25 100644 --- a/test/demo/walker.demo.mjs +++ b/test/demo/walker.demo.mjs @@ -83,3 +83,40 @@ describe("DEMO: groupByDirectory", () => { console.log("--- END ---\n"); }); }); + +describe("DEMO: tessera-style monorepo (issue #38)", () => { + const ROOT = join(process.cwd(), "test", "_demo_tessera"); + + before(async () => { + await rm(ROOT, { recursive: true, force: true }); + await mkdir(join(ROOT, "docs"), { recursive: true }); + await mkdir(join(ROOT, "repos", "lacuna", "src"), { recursive: true }); + await mkdir(join(ROOT, "repos", "graphrag-core", "src"), { recursive: true }); + await mkdir(join(ROOT, "repos", "lacuna", "build"), { recursive: true }); + await writeFile(join(ROOT, ".gitignore"), "repos/\n"); + await writeFile(join(ROOT, "docs", "readme.md"), "docs"); + await writeFile(join(ROOT, "repos", "lacuna", ".gitignore"), "build/\n"); + await writeFile(join(ROOT, "repos", "lacuna", "src", "main.py"), "m"); + await writeFile(join(ROOT, "repos", "lacuna", "build", "junk.py"), "j"); + await writeFile(join(ROOT, "repos", "graphrag-core", "src", "core.py"), "c"); + }); + + after(async () => { + await rm(ROOT, { recursive: true, force: true }); + }); + + it("workspace alone indexes only docs/ (the broken case before this PR)", async () => { + const { walkDirectory } = await import("../../build/core/walker.js"); + const entries = await walkDirectory({ rootDir: ROOT }); + console.log("[demo] without extraRoots: " + entries.map((e) => e.relativePath).sort().join(", ")); + }); + + it("with extraRoots, both sub-repos are indexed and each respects its own .gitignore", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const entries = await walkRoots({ + rootDir: ROOT, + extraRoots: ["repos/lacuna", "repos/graphrag-core"], + }); + console.log("[demo] with extraRoots: " + entries.map((e) => e.relativePath).sort().join(", ")); + }); +}); From 0d3249609e89ca5ec12e0e9b4e846c7546dfc65c Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 13:21:11 +0200 Subject: [PATCH 12/14] docs: --include flag and CONTEXTPLUS_EXTRA_ROOTS env var --- README.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/README.md b/README.md index ed2ba64..3c650c6 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,54 @@ Config file locations: - `skeleton [path]` or `tree [path]` - **(New)** View the structural tree of a project with file headers and symbol definitions directly in your terminal. - `[path]` - Start the MCP server (stdio) for the specified path (defaults to current directory). +### Including paths excluded by the workspace `.gitignore` + +If your workspace `.gitignore` excludes a sub-directory that you still want +indexed (common in monorepos where sub-projects under `repos/`, `packages/`, +or `vendor/` are gitignored at the top level), use `--include` or +`CONTEXTPLUS_EXTRA_ROOTS` to add the paths back. + +**CLI form** (repeatable): + +```bash +bunx contextplus /path/to/workspace \ + --include repos/lacuna \ + --include repos/graphrag-core +``` + +**Environment variable** (fallback when no `--include` flag is set; uses the +system path separator — `:` on Unix, `;` on Windows): + +```bash +CONTEXTPLUS_EXTRA_ROOTS=repos/lacuna:repos/graphrag-core \ + bunx contextplus /path/to/workspace +``` + +**In `.mcp.json`** the env form is usually more ergonomic: + +```json +{ + "mcpServers": { + "contextplus": { + "command": "bunx", + "args": ["contextplus", "/path/to/workspace"], + "env": { + "CONTEXTPLUS_EXTRA_ROOTS": "repos/lacuna:repos/graphrag-core" + } + } + } +} +``` + +Each path listed is walked **independently** of the workspace root, with a +fresh ignore scope. Each path's own `.gitignore` is respected. Paths are +validated at startup; invalid entries (non-existent, not a directory, +outside the workspace) emit a stderr warning and are skipped. + +Nested `.gitignore` files inside the workspace and inside each extra root +are loaded and merged with inherited rules, matching `git` and `ripgrep` +behavior. + ### From Source ```bash From 2d43585fc4a04038bbb61bfbf1b723d4127007c2 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 14:05:00 +0200 Subject: [PATCH 13/14] fix(walker): nested .gitignore per-scope evaluation + Windows path separators Per-scope evaluation: nested .gitignore files were previously merged into a single workspace-rooted Ignore instance. Anchored patterns (e.g. `/build/` in child/.gitignore) evaluated against the wrong base and missed their intended targets. Replace the merge with a chain of per-directory Ignore instances; each scope evaluates patterns against paths relative to its own directory. Cross-scope negation is preserved via Ignore.test(): a child scope's unignored result re-includes a path that an ancestor scope excluded. walkRoots' workspace-boundary check now uses path.sep; depth offset counting handles both / and \\ separators so it works on Windows. Addresses capy-ai bot review on PR #39. --- src/core/walker.ts | 70 +++++++++++++++++++++++---------------- test/main/walker.test.mjs | 25 ++++++++++++++ 2 files changed, 67 insertions(+), 28 deletions(-) diff --git a/src/core/walker.ts b/src/core/walker.ts index 549c636..3c444ba 100644 --- a/src/core/walker.ts +++ b/src/core/walker.ts @@ -2,7 +2,7 @@ // Returns filtered file paths respecting project ignore patterns (nested-gitignore-aware) import { readdir, readFile, stat } from "fs/promises"; -import { join, relative, resolve } from "path"; +import { join, relative, resolve, sep } from "path"; import ignore, { type Ignore } from "ignore"; export interface WalkOptions { @@ -19,10 +19,12 @@ export interface FileEntry { } interface IgnoreScope { + scopeRoot: string; ig: Ignore; - patterns: string[]; } +type IgnoreChain = IgnoreScope[]; + const ALWAYS_IGNORE = new Set([ "node_modules", ".git", @@ -43,28 +45,37 @@ const ALWAYS_IGNORE = new Set([ ".parcel-cache", ]); -async function readGitignorePatterns(dir: string): Promise { +async function loadLocalScope(dir: string): Promise { try { const content = await readFile(join(dir, ".gitignore"), "utf-8"); - return content.split(/\r?\n/).filter((line) => line.trim() && !line.startsWith("#")); + return { scopeRoot: dir, ig: ignore().add(content) }; } catch { - return []; + return null; } } -async function loadScopeFor(dir: string, parent: IgnoreScope | null): Promise { - const local = await readGitignorePatterns(dir); - if (!parent && local.length === 0) return { ig: ignore(), patterns: [] }; - if (!parent) return { ig: ignore().add(local), patterns: local }; - if (local.length === 0) return parent; - const merged = [...parent.patterns, ...local]; - return { ig: ignore().add(merged), patterns: merged }; +function isIgnoredInChain(absPath: string, isDir: boolean, chain: IgnoreChain): boolean { + // Walk scopes from outermost to innermost. Each scope's patterns are evaluated + // against paths relative to that scope's directory. Later scopes can re-include + // paths that earlier scopes excluded (gitignore negation crosses scope boundaries). + let state: "ignored" | "included" = "included"; + for (const scope of chain) { + let rel = relative(scope.scopeRoot, absPath).replace(/\\/g, "/"); + if (!rel || rel.startsWith("..")) continue; + // Mark directories with a trailing slash so anchored directory patterns + // like `/build/` match the directory itself (and short-circuit descent). + if (isDir) rel += "/"; + const result = scope.ig.test(rel); + if (result.unignored) state = "included"; + else if (result.ignored) state = "ignored"; + } + return state === "ignored"; } async function walkRecursive( dir: string, rootDir: string, - scope: IgnoreScope, + chain: IgnoreChain, depth: number, maxDepth: number, results: FileEntry[], @@ -77,14 +88,15 @@ async function walkRecursive( const fullPath = join(dir, entry.name); const relPath = relative(rootDir, fullPath).replace(/\\/g, "/"); - if (scope.ig.ignores(relPath)) continue; - const isDir = entry.isDirectory(); + if (isIgnoredInChain(fullPath, isDir, chain)) continue; + results.push({ path: fullPath, relativePath: relPath, isDirectory: isDir, depth }); if (isDir) { - const childScope = await loadScopeFor(fullPath, scope); - await walkRecursive(fullPath, rootDir, childScope, depth + 1, maxDepth, results); + const localScope = await loadLocalScope(fullPath); + const childChain = localScope ? [...chain, localScope] : chain; + await walkRecursive(fullPath, rootDir, childChain, depth + 1, maxDepth, results); } } } @@ -100,21 +112,23 @@ export async function walkDirectory(options: WalkOptions): Promise return results; } - const rootScope = await loadScopeFor(rootDir, null); - let startScope = rootScope; + // Build the initial chain from rootDir down to startDir so ancestor scopes apply + // at the start of the walk. + const chain: IgnoreChain = []; + const rootScope = await loadLocalScope(rootDir); + if (rootScope) chain.push(rootScope); + if (startDir !== rootDir) { - // Build the scope chain from rootDir down to startDir so inherited rules apply. - const rel = relative(rootDir, startDir).split(/[\\/]/).filter(Boolean); + const segments = relative(rootDir, startDir).split(/[\\/]/).filter(Boolean); let cursor = rootDir; - let scope = rootScope; - for (const segment of rel) { + for (const segment of segments) { cursor = join(cursor, segment); - scope = await loadScopeFor(cursor, scope); + const scope = await loadLocalScope(cursor); + if (scope) chain.push(scope); } - startScope = scope; } - await walkRecursive(startDir, rootDir, startScope, 0, options.depthLimit ?? 0, results); + await walkRecursive(startDir, rootDir, chain, 0, options.depthLimit ?? 0, results); return results; } @@ -168,10 +182,10 @@ export async function walkRoots(options: WalkRootsOptions): Promise // targetPath constrains the primary walk only — extraRoots are always walked in full. for (const extra of extraRoots) { const extraAbs = resolve(rootDir, extra); - if (extraAbs !== rootDir && !extraAbs.startsWith(rootDir + "/")) { + if (extraAbs !== rootDir && !extraAbs.startsWith(rootDir + sep)) { throw new Error(`walkRoots: extraRoot "${extra}" resolves outside workspace root`); } - const depthOffset = relative(rootDir, extraAbs).split("/").filter(Boolean).length; + const depthOffset = relative(rootDir, extraAbs).split(/[\\/]/).filter(Boolean).length; const extraEntries = await walkDirectory({ rootDir: extraAbs, depthLimit: options.depthLimit, diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 25504cd..2777140 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -201,6 +201,31 @@ describe("walker", () => { await rm(DEEP, { recursive: true, force: true }); }); + + it("respects anchored patterns scoped to a nested .gitignore", async () => { + const ANCHOR = join(FIXTURE_DIR, "_anchor"); + await rm(ANCHOR, { recursive: true, force: true }); + await mkdir(join(ANCHOR, "artifacts"), { recursive: true }); + await mkdir(join(ANCHOR, "child", "artifacts"), { recursive: true }); + // Workspace-level artifacts/ should NOT be excluded — only the child has the rule. + await writeFile(join(ANCHOR, "artifacts", "ws.txt"), "ws"); + await writeFile(join(ANCHOR, "child", "artifacts", "junk.txt"), "junk"); + await writeFile(join(ANCHOR, "child", "keep.txt"), "keep"); + // Anchored pattern in child .gitignore should only affect child/artifacts/. + await writeFile(join(ANCHOR, "child", ".gitignore"), "/artifacts/\n"); + + const entries = await walkDirectory({ rootDir: ANCHOR }); + const paths = entries.map((e) => e.relativePath); + + assert.ok(paths.includes("artifacts/ws.txt"), "workspace-level artifacts/ should NOT be ignored"); + assert.ok(paths.includes("child/keep.txt")); + assert.ok( + !paths.some((p) => p.includes("child/artifacts")), + "anchored /artifacts/ in child/.gitignore should ignore only child/artifacts/", + ); + + await rm(ANCHOR, { recursive: true, force: true }); + }); }); describe("walkRoots", () => { From 63d567c975bf23857355484a2eb02604c9085b72 Mon Sep 17 00:00:00 2001 From: Dino Celi <126232664+cdel1@users.noreply.github.com> Date: Tue, 12 May 2026 14:14:28 +0200 Subject: [PATCH 14/14] fix(walker): canonicalize extraRoots with realpath to block symlink escapes The previous extraRoot containment check used path-string prefix on the unresolved path. A symlink placed inside the workspace but pointing outside (e.g. workspace/link -> /etc) passed validation, after which readdir followed the link and traversed external files. Both walkRoots() and parseExtraRoots() now realpath the workspace root and each extraRoot before the containment check. Symlinks targeting locations outside the workspace are rejected; symlinks pointing inside the workspace are walked at their canonical target. Adds tests covering both rejection and inside-the-workspace traversal in walker.test.mjs and parseExtraRoots' new symlink rejection case. Addresses capy-ai bot review on PR #39. --- src/core/extra-roots.ts | 22 ++++++++++++---- src/core/walker.ts | 23 +++++++++++++---- test/main/cli-parsing.test.mjs | 27 ++++++++++++++++++++ test/main/walker.test.mjs | 46 ++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 10 deletions(-) diff --git a/src/core/extra-roots.ts b/src/core/extra-roots.ts index 13b7642..e09b4d7 100644 --- a/src/core/extra-roots.ts +++ b/src/core/extra-roots.ts @@ -1,7 +1,7 @@ // CLI/env argument parsing for the extraRoots config. // Pure module — no side effects, safe to import from tests. -import { statSync } from "fs"; +import { realpathSync, statSync } from "fs"; import { delimiter, isAbsolute, resolve, sep } from "path"; export interface ParseExtraRootsInput { @@ -32,6 +32,12 @@ export function parseExtraRoots(input: ParseExtraRootsInput): ParseExtraRootsRes const accepted: string[] = []; const warnings: string[] = []; const rootAbs = resolve(input.rootDir); + let rootReal = rootAbs; + try { + rootReal = realpathSync(rootAbs); + } catch { + // rootDir doesn't exist; fall through + } const fromCli = extractIncludeFlags(input.argv); const raw = fromCli.length > 0 @@ -42,18 +48,24 @@ export function parseExtraRoots(input: ParseExtraRootsInput): ParseExtraRootsRes for (const entry of raw) { const abs = isAbsolute(entry) ? entry : resolve(rootAbs, entry); + let real = abs; + try { + real = realpathSync(abs); + } catch { + // doesn't exist - statSync below will catch and warn + } - if (abs === rootAbs) { + if (real === rootReal) { warnings.push(`contextplus: extraRoot '${entry}' equals the workspace root — skipping`); continue; } - if (!abs.startsWith(rootAbs + sep)) { + if (!real.startsWith(rootReal + sep)) { warnings.push(`contextplus: extraRoot '${entry}' is outside the workspace root — skipping`); continue; } let stats; try { - stats = statSync(abs); + stats = statSync(real); } catch { warnings.push(`contextplus: extraRoot '${entry}' does not exist — skipping`); continue; @@ -62,7 +74,7 @@ export function parseExtraRoots(input: ParseExtraRootsInput): ParseExtraRootsRes warnings.push(`contextplus: extraRoot '${entry}' is not a directory — skipping`); continue; } - accepted.push(abs); + accepted.push(real); } return { accepted, warnings }; diff --git a/src/core/walker.ts b/src/core/walker.ts index 3c444ba..d97c6ec 100644 --- a/src/core/walker.ts +++ b/src/core/walker.ts @@ -1,7 +1,7 @@ // Gitignore-aware recursive directory walker with depth control // Returns filtered file paths respecting project ignore patterns (nested-gitignore-aware) -import { readdir, readFile, stat } from "fs/promises"; +import { readdir, readFile, realpath, stat } from "fs/promises"; import { join, relative, resolve, sep } from "path"; import ignore, { type Ignore } from "ignore"; @@ -164,6 +164,13 @@ export interface WalkRootsOptions { export async function walkRoots(options: WalkRootsOptions): Promise { const rootDir = resolve(options.rootDir); + let rootReal = rootDir; + try { + rootReal = await realpath(rootDir); + } catch { + // rootDir doesn't exist; fall through with unresolved value + } + const extraRoots = options.extraRoots ?? GLOBAL_EXTRA_ROOTS; const seen = new Set(); const results: FileEntry[] = []; @@ -182,18 +189,24 @@ export async function walkRoots(options: WalkRootsOptions): Promise // targetPath constrains the primary walk only — extraRoots are always walked in full. for (const extra of extraRoots) { const extraAbs = resolve(rootDir, extra); - if (extraAbs !== rootDir && !extraAbs.startsWith(rootDir + sep)) { + let extraReal = extraAbs; + try { + extraReal = await realpath(extraAbs); + } catch { + // doesn't exist; will fall through to the prefix check on unresolved path + } + if (extraReal !== rootReal && !extraReal.startsWith(rootReal + sep)) { throw new Error(`walkRoots: extraRoot "${extra}" resolves outside workspace root`); } - const depthOffset = relative(rootDir, extraAbs).split(/[\\/]/).filter(Boolean).length; + const depthOffset = relative(rootReal, extraReal).split(/[\\/]/).filter(Boolean).length; const extraEntries = await walkDirectory({ - rootDir: extraAbs, + rootDir: extraReal, depthLimit: options.depthLimit, }); for (const entry of extraEntries) { if (seen.has(entry.path)) continue; seen.add(entry.path); - const workspaceRel = relative(rootDir, entry.path).replace(/\\/g, "/"); + const workspaceRel = relative(rootReal, entry.path).replace(/\\/g, "/"); results.push({ path: entry.path, relativePath: workspaceRel, diff --git a/test/main/cli-parsing.test.mjs b/test/main/cli-parsing.test.mjs index 832f283..b253ba8 100644 --- a/test/main/cli-parsing.test.mjs +++ b/test/main/cli-parsing.test.mjs @@ -97,4 +97,31 @@ describe("parseExtraRoots", () => { }); assert.deepEqual(result.accepted.sort(), [join(FIX, "a"), join(FIX, "b")].sort()); }); + + it("rejects symlinks that point outside the workspace root", async () => { + const { symlink } = await import("fs/promises"); + const OUTSIDE = join(FIX, "..", "_outside_target"); + await rm(OUTSIDE, { recursive: true, force: true }); + await mkdir(OUTSIDE, { recursive: true }); + try { + await symlink(OUTSIDE, join(FIX, "bad-link")); + } catch { + // If symlink creation fails (e.g., on a filesystem that doesn't support symlinks), + // skip the test gracefully. + return; + } + + const result = parseExtraRoots({ + argv: ["--include", "bad-link"], + env: {}, + rootDir: FIX, + }); + + assert.equal(result.accepted.length, 0, "symlink to outside should be rejected"); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0], /outside/i); + + await rm(join(FIX, "bad-link")); + await rm(OUTSIDE, { recursive: true, force: true }); + }); }); diff --git a/test/main/walker.test.mjs b/test/main/walker.test.mjs index 2777140..c0fe314 100644 --- a/test/main/walker.test.mjs +++ b/test/main/walker.test.mjs @@ -344,6 +344,52 @@ describe("walker", () => { "relativePath should be rooted at the workspace, not the extraRoot", ); }); + + it("rejects symlinked extraRoots that point outside the workspace", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const { symlink, mkdir, rm, writeFile } = await import("fs/promises"); + const SYM = join(FIXTURE_DIR, "_sym"); + const EXTERNAL = join(FIXTURE_DIR, "_external"); + await rm(SYM, { recursive: true, force: true }); + await rm(EXTERNAL, { recursive: true, force: true }); + await mkdir(SYM, { recursive: true }); + await mkdir(EXTERNAL, { recursive: true }); + await writeFile(join(EXTERNAL, "secret.txt"), "secret"); + // Place a symlink INSIDE the workspace pointing OUTSIDE. + await symlink(EXTERNAL, join(SYM, "link")); + + await assert.rejects( + () => walkRoots({ rootDir: SYM, extraRoots: ["link"] }), + /resolves outside workspace root/, + ); + + await rm(SYM, { recursive: true, force: true }); + await rm(EXTERNAL, { recursive: true, force: true }); + }); + + it("follows symlinked extraRoots that point inside the workspace", async () => { + const { walkRoots } = await import("../../build/core/walker.js"); + const { symlink, mkdir, rm, writeFile } = await import("fs/promises"); + const SYM = join(FIXTURE_DIR, "_sym_inside"); + await rm(SYM, { recursive: true, force: true }); + await mkdir(join(SYM, "real"), { recursive: true }); + await writeFile(join(SYM, "real", "ok.txt"), "ok"); + await writeFile(join(SYM, ".gitignore"), "linked/\n"); // exclude symlink-named path from primary walk so the only way to see ok.txt is via the extraRoot + await symlink(join(SYM, "real"), join(SYM, "linked")); + + const entries = await walkRoots({ rootDir: SYM, extraRoots: ["linked"] }); + const paths = entries.map((e) => e.relativePath); + + // The symlink target is INSIDE the workspace, so it should be walked. + // The file may appear under either the canonical path or the symlink path, + // depending on how realpath rewrites it. Accept either. + assert.ok( + paths.some((p) => p.endsWith("ok.txt")), + "ok.txt should be reachable via the symlinked extraRoot", + ); + + await rm(SYM, { recursive: true, force: true }); + }); }); after(async () => {