From 3ed7bf50596a20051ae2e938ef650b9d68a62a49 Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 22:54:14 -0600 Subject: [PATCH 01/12] Add optional from_line to Edge for call-site coordinates --- crates/mycel-core/src/edge.rs | 36 +++++++++++++++++++ crates/mycel-core/tests/serde.rs | 1 + crates/mycel-extract/src/languages/rust.rs | 3 ++ .../mycel-extract/src/languages/typescript.rs | 2 ++ crates/mycel-graph/tests/integration.rs | 2 ++ crates/mycel-lsp/src/multilspy.rs | 1 + 6 files changed, 45 insertions(+) diff --git a/crates/mycel-core/src/edge.rs b/crates/mycel-core/src/edge.rs index c59bdfe..2c4cac5 100644 --- a/crates/mycel-core/src/edge.rs +++ b/crates/mycel-core/src/edge.rs @@ -30,4 +30,40 @@ pub struct Edge { pub to: String, pub kind: EdgeKind, pub source: EdgeSource, + /// 1-indexed line number in the `from` symbol's source file at which the + /// call/use/implements site appears. Populated by tree-sitter extractors + /// for CALLS / USES_TYPE / IMPLEMENTS edges so the LSP refinement layer + /// can hover that line to resolve the `to` endpoint to a qualified name + /// across file boundaries. `None` for derived or pre-resolution edges. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub from_line: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn edge_default_has_no_from_line() { + let e = Edge { + from: "a".into(), + to: "b".into(), + kind: EdgeKind::Calls, + source: EdgeSource::TreeSitter, + from_line: None, + }; + assert!(e.from_line.is_none()); + } + + #[test] + fn edge_with_from_line_carries_coord() { + let e = Edge { + from: "a".into(), + to: "b".into(), + kind: EdgeKind::Calls, + source: EdgeSource::TreeSitter, + from_line: Some(42), + }; + assert_eq!(e.from_line, Some(42)); + } } diff --git a/crates/mycel-core/tests/serde.rs b/crates/mycel-core/tests/serde.rs index 30f8e30..df475e9 100644 --- a/crates/mycel-core/tests/serde.rs +++ b/crates/mycel-core/tests/serde.rs @@ -41,6 +41,7 @@ fn edge_kind_serializes_lowercase() { to: "b".into(), kind: EdgeKind::Calls, source: EdgeSource::Lsp, + from_line: None, }; let json = serde_json::to_string(&e).unwrap(); assert!(json.contains("\"calls\"")); diff --git a/crates/mycel-extract/src/languages/rust.rs b/crates/mycel-extract/src/languages/rust.rs index 4273a41..e54c3fd 100644 --- a/crates/mycel-extract/src/languages/rust.rs +++ b/crates/mycel-extract/src/languages/rust.rs @@ -165,6 +165,7 @@ fn extract_rust( to: trait_name.into(), kind: EdgeKind::Implements, source: EdgeSource::TreeSitter, + from_line: None, }); } } @@ -185,6 +186,7 @@ fn extract_rust( to: path.into(), kind: EdgeKind::Imports, source: EdgeSource::TreeSitter, + from_line: None, }); } } @@ -214,6 +216,7 @@ fn extract_rust( to: callee.into(), kind: EdgeKind::Calls, source: EdgeSource::TreeSitter, + from_line: None, }); } } diff --git a/crates/mycel-extract/src/languages/typescript.rs b/crates/mycel-extract/src/languages/typescript.rs index f9258df..50f3571 100644 --- a/crates/mycel-extract/src/languages/typescript.rs +++ b/crates/mycel-extract/src/languages/typescript.rs @@ -205,6 +205,7 @@ fn extract_symbols_and_edges( to: callee_name.into(), kind: EdgeKind::Calls, source: EdgeSource::TreeSitter, + from_line: None, }); } @@ -229,6 +230,7 @@ fn extract_symbols_and_edges( to: src_text.into(), kind: EdgeKind::Imports, source: EdgeSource::TreeSitter, + from_line: None, }); } } diff --git a/crates/mycel-graph/tests/integration.rs b/crates/mycel-graph/tests/integration.rs index a9341b1..cf024f7 100644 --- a/crates/mycel-graph/tests/integration.rs +++ b/crates/mycel-graph/tests/integration.rs @@ -103,6 +103,7 @@ async fn edge_upsert_callers_query() { to: "crate::bar".into(), kind: EdgeKind::Calls, source: EdgeSource::Lsp, + from_line: None, }]) .await .unwrap(); @@ -154,6 +155,7 @@ async fn imports_uses_implements_queries() { to: "ts::IFoo".into(), kind: EdgeKind::Implements, source: EdgeSource::Lsp, + from_line: None, }]) .await .unwrap(); diff --git a/crates/mycel-lsp/src/multilspy.rs b/crates/mycel-lsp/src/multilspy.rs index 3d84e16..1edbd84 100644 --- a/crates/mycel-lsp/src/multilspy.rs +++ b/crates/mycel-lsp/src/multilspy.rs @@ -187,6 +187,7 @@ impl MultilspyResolver { to: r.to, kind, source: EdgeSource::Lsp, + from_line: None, }) }) .collect()) From 9bbd05989ba15ba058e0f371e57217ee93e79e9f Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 22:56:19 -0600 Subject: [PATCH 02/12] Capture call-site row in tree-sitter CALLS/USES_TYPE/IMPLEMENTS edges --- crates/mycel-extract/src/languages/rust.rs | 18 +++++++--- .../mycel-extract/src/languages/typescript.rs | 4 ++- crates/mycel-extract/tests/rust_fixtures.rs | 36 +++++++++++++++++++ ...rust_fixtures__simple_module_snapshot.snap | 2 +- ...ust_fixtures__trait_and_impl_snapshot.snap | 2 +- ...ixtures__imports_and_exports_snapshot.snap | 2 +- ...pt_fixtures__simple_function_snapshot.snap | 2 +- .../tests/typescript_fixtures.rs | 15 ++++++++ 8 files changed, 72 insertions(+), 9 deletions(-) diff --git a/crates/mycel-extract/src/languages/rust.rs b/crates/mycel-extract/src/languages/rust.rs index e54c3fd..bec722d 100644 --- a/crates/mycel-extract/src/languages/rust.rs +++ b/crates/mycel-extract/src/languages/rust.rs @@ -151,21 +151,29 @@ fn extract_rust( let mut matches = cursor.matches(&q_impl, tree.root_node(), bytes); while let Some(m) = matches.next() { let mut trait_name: Option<&str> = None; + let mut trait_node: Option = None; let mut type_name: Option<&str> = None; for c in m.captures { match q_impl.capture_names()[c.index as usize] { - "trait" => trait_name = node_text(c.node, bytes), + "trait" => { + trait_name = node_text(c.node, bytes); + trait_node = Some(c.node); + } "ty" => type_name = node_text(c.node, bytes), _ => {} } } - if let (Some(trait_name), Some(type_name)) = (trait_name, type_name) { + if let (Some(trait_name), Some(trait_node), Some(type_name)) = + (trait_name, trait_node, type_name) + { edges.push(Edge { from: format!("{}::{}", file.as_str(), type_name), to: trait_name.into(), kind: EdgeKind::Implements, source: EdgeSource::TreeSitter, - from_line: None, + // 1-indexed row of the trait reference in the `impl Trait for Ty` + // header so LSP can resolve `trait_name` to its definition file. + from_line: Some(trait_node.start_position().row as u32 + 1), }); } } @@ -216,7 +224,9 @@ fn extract_rust( to: callee.into(), kind: EdgeKind::Calls, source: EdgeSource::TreeSitter, - from_line: None, + // 1-indexed call-site row so LSP refinement can hover the + // line to resolve `callee` across files. + from_line: Some(c.node.start_position().row as u32 + 1), }); } } diff --git a/crates/mycel-extract/src/languages/typescript.rs b/crates/mycel-extract/src/languages/typescript.rs index 50f3571..e005858 100644 --- a/crates/mycel-extract/src/languages/typescript.rs +++ b/crates/mycel-extract/src/languages/typescript.rs @@ -205,7 +205,9 @@ fn extract_symbols_and_edges( to: callee_name.into(), kind: EdgeKind::Calls, source: EdgeSource::TreeSitter, - from_line: None, + // 1-indexed call-site row so LSP refinement can hover the line + // to resolve `callee_name` across files. + from_line: Some(site.start_position().row as u32 + 1), }); } diff --git a/crates/mycel-extract/tests/rust_fixtures.rs b/crates/mycel-extract/tests/rust_fixtures.rs index 662ee16..55435b1 100644 --- a/crates/mycel-extract/tests/rust_fixtures.rs +++ b/crates/mycel-extract/tests/rust_fixtures.rs @@ -25,3 +25,39 @@ fn simple_module_snapshot() { fn trait_and_impl_snapshot() { insta::assert_yaml_snapshot!(extract_fixture("trait_and_impl.rs")); } + +#[test] +fn rust_call_edge_carries_from_line() { + let out = extract_fixture("simple_module.rs"); + let call_edges: Vec<_> = out + .edges + .iter() + .filter(|e| matches!(e.kind, mycel_core::EdgeKind::Calls)) + .collect(); + assert!(!call_edges.is_empty(), "fixture should produce >=1 CALL edge"); + for e in &call_edges { + assert!(e.from_line.is_some(), "CALL edge {e:?} missing from_line"); + assert!(e.from_line.unwrap() > 0, "from_line should be 1-indexed"); + } +} + +#[test] +fn rust_implements_edge_carries_from_line() { + let out = extract_fixture("trait_and_impl.rs"); + let impl_edges: Vec<_> = out + .edges + .iter() + .filter(|e| matches!(e.kind, mycel_core::EdgeKind::Implements)) + .collect(); + assert!( + !impl_edges.is_empty(), + "fixture should produce >=1 IMPLEMENTS edge" + ); + for e in &impl_edges { + assert!( + e.from_line.is_some(), + "IMPLEMENTS edge {e:?} missing from_line" + ); + assert!(e.from_line.unwrap() > 0, "from_line should be 1-indexed"); + } +} diff --git a/crates/mycel-extract/tests/snapshots/rust_fixtures__simple_module_snapshot.snap b/crates/mycel-extract/tests/snapshots/rust_fixtures__simple_module_snapshot.snap index 6b218ac..1d75d56 100644 --- a/crates/mycel-extract/tests/snapshots/rust_fixtures__simple_module_snapshot.snap +++ b/crates/mycel-extract/tests/snapshots/rust_fixtures__simple_module_snapshot.snap @@ -1,6 +1,5 @@ --- source: crates/mycel-extract/tests/rust_fixtures.rs -assertion_line: 21 expression: "extract_fixture(\"simple_module.rs\")" --- symbols: @@ -30,4 +29,5 @@ edges: to: add kind: calls source: tree_sitter + from_line: 6 language: rust diff --git a/crates/mycel-extract/tests/snapshots/rust_fixtures__trait_and_impl_snapshot.snap b/crates/mycel-extract/tests/snapshots/rust_fixtures__trait_and_impl_snapshot.snap index f7fccdb..b18e77c 100644 --- a/crates/mycel-extract/tests/snapshots/rust_fixtures__trait_and_impl_snapshot.snap +++ b/crates/mycel-extract/tests/snapshots/rust_fixtures__trait_and_impl_snapshot.snap @@ -1,6 +1,5 @@ --- source: crates/mycel-extract/tests/rust_fixtures.rs -assertion_line: 26 expression: "extract_fixture(\"trait_and_impl.rs\")" --- symbols: @@ -37,4 +36,5 @@ edges: to: Greeter kind: implements source: tree_sitter + from_line: 9 language: rust diff --git a/crates/mycel-extract/tests/snapshots/typescript_fixtures__imports_and_exports_snapshot.snap b/crates/mycel-extract/tests/snapshots/typescript_fixtures__imports_and_exports_snapshot.snap index 2a5bb41..55e5074 100644 --- a/crates/mycel-extract/tests/snapshots/typescript_fixtures__imports_and_exports_snapshot.snap +++ b/crates/mycel-extract/tests/snapshots/typescript_fixtures__imports_and_exports_snapshot.snap @@ -1,6 +1,5 @@ --- source: crates/mycel-extract/tests/typescript_fixtures.rs -assertion_line: 31 expression: "extract_fixture(\"imports_and_exports.ts\")" --- symbols: @@ -16,6 +15,7 @@ edges: to: add kind: calls source: tree_sitter + from_line: 6 - from: tests/fixtures/typescript/imports_and_exports.ts to: "./simple_function" kind: imports diff --git a/crates/mycel-extract/tests/snapshots/typescript_fixtures__simple_function_snapshot.snap b/crates/mycel-extract/tests/snapshots/typescript_fixtures__simple_function_snapshot.snap index 44df172..9905228 100644 --- a/crates/mycel-extract/tests/snapshots/typescript_fixtures__simple_function_snapshot.snap +++ b/crates/mycel-extract/tests/snapshots/typescript_fixtures__simple_function_snapshot.snap @@ -1,6 +1,5 @@ --- source: crates/mycel-extract/tests/typescript_fixtures.rs -assertion_line: 21 expression: "extract_fixture(\"simple_function.ts\")" --- symbols: @@ -23,4 +22,5 @@ edges: to: add kind: calls source: tree_sitter + from_line: 6 language: typescript diff --git a/crates/mycel-extract/tests/typescript_fixtures.rs b/crates/mycel-extract/tests/typescript_fixtures.rs index ea58a0c..92e958a 100644 --- a/crates/mycel-extract/tests/typescript_fixtures.rs +++ b/crates/mycel-extract/tests/typescript_fixtures.rs @@ -30,3 +30,18 @@ fn class_with_methods_snapshot() { fn imports_and_exports_snapshot() { insta::assert_yaml_snapshot!(extract_fixture("imports_and_exports.ts")); } + +#[test] +fn typescript_call_edge_carries_from_line() { + let out = extract_fixture("imports_and_exports.ts"); + let call_edges: Vec<_> = out + .edges + .iter() + .filter(|e| matches!(e.kind, mycel_core::EdgeKind::Calls)) + .collect(); + assert!(!call_edges.is_empty(), "fixture should produce >=1 CALL edge"); + for e in &call_edges { + assert!(e.from_line.is_some(), "CALL edge {e:?} missing from_line"); + assert!(e.from_line.unwrap() > 0, "from_line should be 1-indexed"); + } +} From c63f358b255a36868652715609b777993167f2dd Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:02:23 -0600 Subject: [PATCH 03/12] Pin specific line numbers in from_line contract tests --- crates/mycel-extract/tests/rust_fixtures.rs | 15 ++++++++------- crates/mycel-extract/tests/typescript_fixtures.rs | 6 ++++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/crates/mycel-extract/tests/rust_fixtures.rs b/crates/mycel-extract/tests/rust_fixtures.rs index 55435b1..fddd8dc 100644 --- a/crates/mycel-extract/tests/rust_fixtures.rs +++ b/crates/mycel-extract/tests/rust_fixtures.rs @@ -35,9 +35,11 @@ fn rust_call_edge_carries_from_line() { .filter(|e| matches!(e.kind, mycel_core::EdgeKind::Calls)) .collect(); assert!(!call_edges.is_empty(), "fixture should produce >=1 CALL edge"); + // Pinning the exact line locks the `start_position().row + 1` conversion. + // A `+ 0` regression would be caught here, where `> 0` alone would not. + // The only call site in the fixture is `add(x, x)` inside `double` on line 6. for e in &call_edges { - assert!(e.from_line.is_some(), "CALL edge {e:?} missing from_line"); - assert!(e.from_line.unwrap() > 0, "from_line should be 1-indexed"); + assert_eq!(e.from_line, Some(6), "edge {e:?}"); } } @@ -53,11 +55,10 @@ fn rust_implements_edge_carries_from_line() { !impl_edges.is_empty(), "fixture should produce >=1 IMPLEMENTS edge" ); + // Pinning the exact line locks the `start_position().row + 1` conversion. + // A `+ 0` regression would be caught here, where `> 0` alone would not. + // The only impl in the fixture is `impl Greeter for FormalGreeter` on line 9. for e in &impl_edges { - assert!( - e.from_line.is_some(), - "IMPLEMENTS edge {e:?} missing from_line" - ); - assert!(e.from_line.unwrap() > 0, "from_line should be 1-indexed"); + assert_eq!(e.from_line, Some(9), "edge {e:?}"); } } diff --git a/crates/mycel-extract/tests/typescript_fixtures.rs b/crates/mycel-extract/tests/typescript_fixtures.rs index 92e958a..0f3aaac 100644 --- a/crates/mycel-extract/tests/typescript_fixtures.rs +++ b/crates/mycel-extract/tests/typescript_fixtures.rs @@ -40,8 +40,10 @@ fn typescript_call_edge_carries_from_line() { .filter(|e| matches!(e.kind, mycel_core::EdgeKind::Calls)) .collect(); assert!(!call_edges.is_empty(), "fixture should produce >=1 CALL edge"); + // Pinning the exact line locks the `start_position().row + 1` conversion. + // A `+ 0` regression would be caught here, where `> 0` alone would not. + // The only call site in the fixture is `add(1, 2)` on line 6. for e in &call_edges { - assert!(e.from_line.is_some(), "CALL edge {e:?} missing from_line"); - assert!(e.from_line.unwrap() > 0, "from_line should be 1-indexed"); + assert_eq!(e.from_line, Some(6), "edge {e:?}"); } } From 0c7eaad57cba29313482faf8033553ad7fc6e5c3 Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:05:06 -0600 Subject: [PATCH 04/12] Add GraphClient::symbol_containing for file+line lookups --- crates/mycel-graph/src/queries.rs | 34 +++++++++++++++++++++ crates/mycel-graph/tests/integration.rs | 39 +++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/crates/mycel-graph/src/queries.rs b/crates/mycel-graph/src/queries.rs index e5fd019..390ea9e 100644 --- a/crates/mycel-graph/src/queries.rs +++ b/crates/mycel-graph/src/queries.rs @@ -1,6 +1,7 @@ use crate::GraphClient; use crate::cypher::escape; use crate::symbol::parse_symbol_row; +use falkordb::FalkorValue; use mycel_core::*; impl GraphClient { @@ -56,4 +57,37 @@ impl GraphClient { .filter_map(parse_symbol_row) .collect()) } + + /// Returns the qname of the Symbol whose `start_line..=end_line` range + /// contains `line` in `file_path`. Used by Workstream A's pipeline to map + /// LSP-returned definition locations back to Symbol qnames. + /// + /// Returns `Ok(None)` when no Symbol spans that location — common case + /// for definitions outside the indexed surface (stdlib, node_modules) or + /// for references into module-decl symbols whose range we don't model. + /// For nested symbols (e.g., a method inside a class — both could match + /// line N), `LIMIT 1` arbitrarily picks one; a future iteration may + /// prefer the smallest enclosing range. + pub async fn symbol_containing( + &self, + file_path: &str, + line: u32, + ) -> Result> { + let cypher = format!( + "MATCH (s:Symbol) WHERE s.file_path = '{p}' \ + AND s.start_line <= {l} AND s.end_line >= {l} \ + RETURN s.qualified_name LIMIT 1", + p = escape(file_path), + l = line, + ); + let rows = self.query(&cypher).await?; + Ok(rows + .into_iter() + .next() + .and_then(|row| row.into_iter().next()) + .and_then(|v| match v { + FalkorValue::String(s) => Some(s), + _ => None, + })) + } } diff --git a/crates/mycel-graph/tests/integration.rs b/crates/mycel-graph/tests/integration.rs index cf024f7..c0fdfab 100644 --- a/crates/mycel-graph/tests/integration.rs +++ b/crates/mycel-graph/tests/integration.rs @@ -789,6 +789,45 @@ async fn set_description_round_trips_newlines_and_quotes() { assert_eq!(info.description.as_deref(), Some(payload)); } +#[tokio::test] +async fn symbol_containing_finds_enclosing_symbol() { + let client = fresh_client("mycel:test:symbol_containing").await; + // Upsert a Symbol that spans lines 10..=20. + let sym = Symbol { + qualified_name: QualifiedName::new("src/foo.rs::bar"), + kind: SymbolKind::Function, + file_path: "src/foo.rs".into(), + start_line: 10, + end_line: 20, + signature: Signature::new("fn bar()"), + jsdoc: None, + synthesized_description: None, + exported: true, + embedding: None, + body_hash: None, + description_source_hash: None, + }; + client.upsert_symbol(&sym).await.unwrap(); + + // A line inside the span -> Some(qname) + let hit = client.symbol_containing("src/foo.rs", 15).await.unwrap(); + assert_eq!(hit.as_deref(), Some("src/foo.rs::bar")); + + // Boundary lines (inclusive both ends) + let lo = client.symbol_containing("src/foo.rs", 10).await.unwrap(); + let hi = client.symbol_containing("src/foo.rs", 20).await.unwrap(); + assert_eq!(lo.as_deref(), Some("src/foo.rs::bar")); + assert_eq!(hi.as_deref(), Some("src/foo.rs::bar")); + + // A line outside the span -> None + let miss = client.symbol_containing("src/foo.rs", 5).await.unwrap(); + assert!(miss.is_none(), "line 5 is outside 10..=20"); + + // A line in a different file -> None + let other = client.symbol_containing("src/other.rs", 15).await.unwrap(); + assert!(other.is_none()); +} + #[tokio::test] async fn upsert_symbol_preserves_description_source_hash_on_none() { // Sibling invariant to upsert_symbol_writes_body_hash_when_set: an From c787e3717fdd767a10c5fc14a0ad54e72762f0c7 Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:09:36 -0600 Subject: [PATCH 05/12] Add resolve_refs_for_file op to multilspy bridge + Rust wire types --- crates/mycel-lsp/src/protocol.rs | 37 +++++++++++++++++++++++ scripts/multilspy_bridge.py | 52 ++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/crates/mycel-lsp/src/protocol.rs b/crates/mycel-lsp/src/protocol.rs index 114a281..a41a76e 100644 --- a/crates/mycel-lsp/src/protocol.rs +++ b/crates/mycel-lsp/src/protocol.rs @@ -28,3 +28,40 @@ pub struct RawEdge { #[serde(default)] pub source: Option, } + +#[derive(Serialize)] +pub struct ResolveRefsReq<'a> { + pub id: u64, + pub op: &'static str, // always "resolve_refs_for_file" + pub repo_root: &'a str, + pub language: &'a str, + pub path: &'a str, + pub sites: Vec, +} + +#[derive(Serialize, Clone, Debug)] +pub struct RefSite { + pub line: u32, // 1-indexed (matches Edge::from_line) + pub col: u32, // 0-indexed (LSP convention) + pub kind: String, // "calls" | "uses_type" | "implements" +} + +#[derive(Deserialize, Debug)] +pub struct ResolveRefsResp { + pub id: u64, + #[serde(default)] + pub refs: Vec, + #[serde(default)] + pub partial: bool, + #[serde(default)] + pub error: Option, +} + +#[derive(Deserialize, Debug)] +pub struct RawResolvedRef { + pub from_path: String, + pub from_line: u32, + pub to_path: String, // file path, repo-relative (bridge does URI conversion) + pub to_line: u32, // 1-indexed + pub kind: String, +} diff --git a/scripts/multilspy_bridge.py b/scripts/multilspy_bridge.py index 9b7a795..9e8faf7 100644 --- a/scripts/multilspy_bridge.py +++ b/scripts/multilspy_bridge.py @@ -51,6 +51,18 @@ def _selection_start(sym): rng = loc.get("range") or {} return rng.get("start") +def _uri_to_repo_path(uri: str, repo_root: str): + """Convert a file:// URI to a repo-relative path, or None if outside the repo.""" + from urllib.parse import urlparse, unquote + p = urlparse(uri) + if p.scheme != "file": + return None + abs_path = unquote(p.path) + root = repo_root.rstrip("/") + if not abs_path.startswith(root + "/"): + return None + return abs_path[len(root) + 1:] + class BridgeState: def __init__(self): self.servers = {} # (repo_root, language) -> (server, cm) @@ -112,6 +124,41 @@ def edges_for_file(self, repo_root: str, language: str, path: str): partial = True return {"edges": edges, "partial": partial} + def resolve_refs_for_file(self, repo_root: str, language: str, path: str, sites): + """Per-site request_definition; map each result back to (path, line).""" + server = self.get_server(repo_root, language) + refs = [] + partial = False + for site in (sites or []): + try: + # tree-sitter sites are 1-indexed; LSP wants 0-indexed lines. + defs = server.request_definition(path, site["line"] - 1, site["col"]) + for d in (defs or []): + target_uri = d.get("uri") or d.get("targetUri") + # `range` is plain Location; `targetSelectionRange` / `targetRange` + # is the LocationLink form. Prefer the name range when available. + target_range = ( + d.get("range") + or d.get("targetSelectionRange") + or d.get("targetRange") + ) + if not target_uri or not target_range: + continue + to_path = _uri_to_repo_path(target_uri, repo_root) + if to_path is None: + continue # definition lives outside the repo (stdlib, node_modules) + to_line = target_range["start"]["line"] + 1 # back to 1-indexed + refs.append({ + "from_path": path, + "from_line": site["line"], + "to_path": to_path, + "to_line": to_line, + "kind": site["kind"], + }) + except Exception: + partial = True + return {"refs": refs, "partial": partial} + def main(): state = BridgeState() for line in sys.stdin: @@ -131,6 +178,11 @@ def main(): req["repo_root"], req["language"], req["path"] ) emit({"id": rid, **result}) + elif op == "resolve_refs_for_file": + result = state.resolve_refs_for_file( + req["repo_root"], req["language"], req["path"], req.get("sites", []), + ) + emit({"id": rid, **result}) elif op == "ping": emit({"id": rid, "pong": True}) elif op == "shutdown": From be2bbaa54517b6ae75e6d4c17929a4d0146a78f0 Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:11:30 -0600 Subject: [PATCH 06/12] Add MultilspyResolver::resolve_refs + parallel response demux --- crates/mycel-lsp/src/multilspy.rs | 114 ++++++++++++++++++++++++++---- crates/mycel-lsp/tests/smoke.rs | 45 ++++++++++++ 2 files changed, 147 insertions(+), 12 deletions(-) diff --git a/crates/mycel-lsp/src/multilspy.rs b/crates/mycel-lsp/src/multilspy.rs index 1edbd84..a91687e 100644 --- a/crates/mycel-lsp/src/multilspy.rs +++ b/crates/mycel-lsp/src/multilspy.rs @@ -25,6 +25,7 @@ use tracing::{debug, error, warn}; pub struct MultilspyResolver { next_id: AtomicU64, pending: Arc>>>, + pending_resolve: Arc>>>, stdin: Arc>, repo_root: Utf8PathBuf, dead: Arc, @@ -59,15 +60,23 @@ impl MultilspyResolver { let pending: Arc>>> = Default::default(); + let pending_resolve: Arc>>> = + Default::default(); let dead = Arc::new(AtomicBool::new(false)); - tokio::spawn(reader_loop(stdout, pending.clone())); + tokio::spawn(reader_loop(stdout, pending.clone(), pending_resolve.clone())); tokio::spawn(stderr_loop(stderr)); - tokio::spawn(wait_loop(child, pending.clone(), dead.clone())); + tokio::spawn(wait_loop( + child, + pending.clone(), + pending_resolve.clone(), + dead.clone(), + )); Ok(Self { next_id: AtomicU64::new(1), pending, + pending_resolve, stdin: Arc::new(Mutex::new(stdin)), repo_root, dead, @@ -78,14 +87,28 @@ impl MultilspyResolver { async fn reader_loop( stdout: ChildStdout, pending: Arc>>>, + pending_resolve: Arc>>>, ) { let mut reader = BufReader::new(stdout).lines(); while let Ok(Some(line)) = reader.next_line().await { debug!(line = %line, "bridge response"); - match serde_json::from_str::(&line) { + // Demux: the bridge has two response shapes and neither carries an `op` + // discriminator. Route by id ownership — try edges first, then resolve. + // A response routes to whichever pending map currently holds its id. + // We parse twice in the worst case; JSON parse failures here are cheap. + if let Ok(resp) = serde_json::from_str::(&line) { + if let Some(tx) = pending.lock().await.remove(&resp.id) { + let _ = tx.send(resp); + continue; + } + // Fall through: id wasn't pending here; try the resolve map. + } + match serde_json::from_str::(&line) { Ok(resp) => { - if let Some(tx) = pending.lock().await.remove(&resp.id) { + if let Some(tx) = pending_resolve.lock().await.remove(&resp.id) { let _ = tx.send(resp); + } else { + warn!(line = %line, "bridge response id not pending in either map"); } } Err(e) => warn!(error = %e, line = %line, "could not parse bridge response"), @@ -108,6 +131,7 @@ async fn stderr_loop(stderr: ChildStderr) { async fn wait_loop( mut child: tokio::process::Child, pending: Arc>>>, + pending_resolve: Arc>>>, dead: Arc, ) { let status = child.wait().await; @@ -117,14 +141,27 @@ async fn wait_loop( Err(e) => format!("bridge wait failed: {e}"), }; error!("{msg}"); - let mut pending = pending.lock().await; - for (_, tx) in pending.drain() { - let _ = tx.send(EdgesForFileResp { - id: 0, - edges: Vec::new(), - partial: false, - error: Some(msg.clone()), - }); + { + let mut pending = pending.lock().await; + for (_, tx) in pending.drain() { + let _ = tx.send(EdgesForFileResp { + id: 0, + edges: Vec::new(), + partial: false, + error: Some(msg.clone()), + }); + } + } + { + let mut pending = pending_resolve.lock().await; + for (_, tx) in pending.drain() { + let _ = tx.send(ResolveRefsResp { + id: 0, + refs: Vec::new(), + partial: false, + error: Some(msg.clone()), + }); + } } } @@ -192,4 +229,57 @@ impl MultilspyResolver { }) .collect()) } + + /// For each (line, col, kind) site, calls multilspy's `request_definition` + /// via the bridge and returns the resolved (from_path, from_line, to_path, + /// to_line, kind) tuples. `path` is repo-relative. + pub async fn resolve_refs( + &self, + path: &camino::Utf8Path, + language: &str, + sites: Vec, + ) -> Result> { + if self.dead.load(Ordering::Acquire) { + return Err(MycelError::Lsp( + "bridge died earlier; refusing further requests".into(), + )); + } + if sites.is_empty() { + return Ok(Vec::new()); + } + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + let (tx, rx) = oneshot::channel(); + self.pending_resolve.lock().await.insert(id, tx); + let req = crate::protocol::ResolveRefsReq { + id, + op: "resolve_refs_for_file", + repo_root: self.repo_root.as_str(), + language, + path: path.as_str(), + sites, + }; + let line = serde_json::to_string(&req)? + "\n"; + { + let mut stdin = self.stdin.lock().await; + stdin + .write_all(line.as_bytes()) + .await + .map_err(|e| MycelError::Lsp(format!("write: {e}")))?; + stdin + .flush() + .await + .map_err(|e| MycelError::Lsp(format!("flush: {e}")))?; + } + let resp = rx + .await + .map_err(|_| MycelError::Lsp("bridge closed".into()))?; + if let Some(err) = resp.error { + warn!(language, %path, "resolve_refs error: {err}"); + return Ok(Vec::new()); + } + if resp.partial { + warn!(language, %path, "resolve_refs returned partial results"); + } + Ok(resp.refs) + } } diff --git a/crates/mycel-lsp/tests/smoke.rs b/crates/mycel-lsp/tests/smoke.rs index 678ae75..783e612 100644 --- a/crates/mycel-lsp/tests/smoke.rs +++ b/crates/mycel-lsp/tests/smoke.rs @@ -24,3 +24,48 @@ async fn lsp_smoke_typescript() { // or multilspy is broken and the test should fail loudly. assert!(!edges.is_empty(), "expected at least one LSP edge from a TS fixture with cross-file references"); } + +#[tokio::test] +async fn lsp_resolve_refs_typescript() { + if std::env::var("MYCEL_TEST_LSP").ok().as_deref() != Some("1") { + eprintln!("skipping (set MYCEL_TEST_LSP=1; requires multilspy + tsserver)"); + return; + } + use mycel_lsp::protocol::RefSite; + + // Use the same fixture+repo-root pattern as `lsp_smoke_typescript`. The + // call to `add(...)` lives in `tests/fixtures/typescript/imports_and_exports.ts` + // at line 6, column 33 (0-indexed) — the `a` of `add(1, 2)` inside the + // template literal: + // ` return \`Hi ${name}, sum is ${add(1, 2)}\`;` + // ^ col 33 + // It resolves to `tests/fixtures/typescript/simple_function.ts` line 1. + let repo: Utf8PathBuf = std::env::current_dir().unwrap().try_into().unwrap(); + let resolver = MultilspyResolver::spawn( + "python3 scripts/multilspy_bridge.py", + repo.clone(), + ) + .await + .expect("bridge should spawn"); + + let sites = vec![RefSite { + line: 6, + col: 33, + kind: "calls".into(), + }]; + let refs = resolver + .resolve_refs( + camino::Utf8Path::new("tests/fixtures/typescript/imports_and_exports.ts"), + "typescript", + sites, + ) + .await + .expect("resolve_refs should succeed"); + assert!(!refs.is_empty(), "expected at least one resolved ref; got {refs:?}"); + let first = &refs[0]; + assert!( + first.to_path.contains("simple_function"), + "expected cross-file resolution to simple_function.ts; got {first:?}", + ); + assert!(first.to_line > 0); +} From bcabafd2ba4d459198b72a104b98ecc2c44db94a Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:16:55 -0600 Subject: [PATCH 07/12] Pipeline: resolve cross-file edges via multilspy + graph location lookup --- crates/mycel-index/src/pipeline.rs | 100 +++++++++++++++++++++++++++-- crates/mycel-lsp/src/multilspy.rs | 6 ++ crates/mycel-lsp/tests/smoke.rs | 4 ++ 3 files changed, 103 insertions(+), 7 deletions(-) diff --git a/crates/mycel-index/src/pipeline.rs b/crates/mycel-index/src/pipeline.rs index c34067c..f3c60b4 100644 --- a/crates/mycel-index/src/pipeline.rs +++ b/crates/mycel-index/src/pipeline.rs @@ -50,16 +50,102 @@ impl Indexer { "extracted" ); - // 3. LSP refinement (if configured) + // 3. Cross-file resolution via LSP (if configured). + // + // Tree-sitter emits Calls / UsesType / Implements edges with bare + // callee names and a `from_line` coord. `resolve_same_file_edges` + // above handles the same-file case; what remains is cross-file — + // LSP territory. We: + // a) Collect a RefSite per qualifying tree-sitter edge. + // b) Build a `from_line -> from_qname` map (the LSP response + // carries only file/line coords, not qnames). + // c) Call `resolve_refs`. On error, log WARN and continue with + // tree-sitter edges only. + // d) For each resolved ref, look up the to-side qname via the + // graph (`symbol_containing`). If the definition is outside + // the indexed surface, drop the edge. let mut all_edges = extraction.edges.clone(); if let Some(lsp) = &self.lsp { - match lsp.refine(path, extractor.language_name(), &extraction).await { - Ok(mut lsp_edges) => { - mycel_extract::resolve_same_file_edges(path, &mut lsp_edges, &extraction.symbols); - debug!(file=%path, n_lsp_edges = lsp_edges.len(), "lsp refined"); - all_edges.extend(lsp_edges); + use mycel_lsp::protocol::RefSite; + use std::collections::HashMap; + + let mut from_line_to_qname: HashMap = HashMap::new(); + let mut sites: Vec = Vec::new(); + for e in &all_edges { + let kind_str = match e.kind { + EdgeKind::Calls => "calls", + EdgeKind::UsesType => "uses_type", + EdgeKind::Implements => "implements", + _ => continue, + }; + let Some(line) = e.from_line else { continue }; + from_line_to_qname + .entry(line) + .or_insert_with(|| e.from.clone()); + sites.push(RefSite { + line, + // Line-hover suffices for v1; column accuracy is a follow-up + // refinement once the bridge supports per-site columns from + // the tree-sitter capture. + col: 0, + kind: kind_str.into(), + }); + } + + if !sites.is_empty() { + match lsp + .resolve_refs(path, extractor.language_name(), sites) + .await + { + Ok(refs) => { + let mut resolved = 0usize; + for r in refs { + let Some(from_qname) = from_line_to_qname.get(&r.from_line) else { + // LSP returned a site we didn't seed — shouldn't + // happen, but skip rather than fabricate a from. + continue; + }; + let to_qname = match self + .graph + .symbol_containing(&r.to_path, r.to_line) + .await + { + Ok(Some(q)) => q, + Ok(None) => continue, // definition outside indexed surface + Err(e) => { + warn!( + file=%path, + to_path = %r.to_path, + to_line = r.to_line, + error=%e, + "symbol_containing failed; dropping resolved ref", + ); + continue; + } + }; + let kind = match r.kind.as_str() { + "calls" => EdgeKind::Calls, + "uses_type" => EdgeKind::UsesType, + "implements" => EdgeKind::Implements, + _ => continue, + }; + all_edges.push(Edge { + from: from_qname.clone(), + to: to_qname, + kind, + source: EdgeSource::Lsp, + from_line: Some(r.from_line), + }); + resolved += 1; + } + debug!(file=%path, n_resolved = resolved, "lsp resolve_refs"); + } + Err(e) => warn!( + file=%path, + error=%e, + "lsp resolve_refs failed; proceeding with tree-sitter edges only", + ), } - Err(e) => warn!(file=%path, error=%e, "lsp refine failed; proceeding with tree-sitter only"), } } diff --git a/crates/mycel-lsp/src/multilspy.rs b/crates/mycel-lsp/src/multilspy.rs index a91687e..a214cd9 100644 --- a/crates/mycel-lsp/src/multilspy.rs +++ b/crates/mycel-lsp/src/multilspy.rs @@ -166,6 +166,12 @@ async fn wait_loop( } impl MultilspyResolver { + #[deprecated( + note = "Phase 3 Workstream A: use `resolve_refs` instead. \ + `refine` emits degenerate REFERENCES edges keyed on file URIs, \ + which never resolve to Symbol qnames at upsert time and so \ + never land in the graph." + )] pub async fn refine( &self, path: &camino::Utf8Path, diff --git a/crates/mycel-lsp/tests/smoke.rs b/crates/mycel-lsp/tests/smoke.rs index 783e612..1aaef8d 100644 --- a/crates/mycel-lsp/tests/smoke.rs +++ b/crates/mycel-lsp/tests/smoke.rs @@ -16,6 +16,10 @@ async fn lsp_smoke_typescript() { ).await.expect("spawn multilspy"); let path: Utf8PathBuf = "tests/fixtures/typescript/imports_and_exports.ts".into(); let extraction = mycel_extract::ExtractionOutput::default(); + // `refine` is deprecated in favor of `resolve_refs` (Phase 3 Workstream A) + // but this smoke test still exercises it directly to keep coverage on the + // legacy bridge op while it remains in the codebase. + #[allow(deprecated)] let edges = resolver.refine(&path, "typescript", &extraction).await .expect("refine returns Ok even when partial"); eprintln!("got {} edges", edges.len()); From 10358688de53c6e83d940bb3d6f389210b709a85 Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:20:31 -0600 Subject: [PATCH 08/12] Pipeline: document one-from-per-line assumption + defensive from_path check --- crates/mycel-index/src/pipeline.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/crates/mycel-index/src/pipeline.rs b/crates/mycel-index/src/pipeline.rs index f3c60b4..66012d1 100644 --- a/crates/mycel-index/src/pipeline.rs +++ b/crates/mycel-index/src/pipeline.rs @@ -79,6 +79,12 @@ impl Indexer { _ => continue, }; let Some(line) = e.from_line else { continue }; + // Assumes one enclosing-symbol per from_line: a single source line in a single + // file is owned by one tree-sitter parent (e.g., `foo().bar().baz()` on one + // line all share an enclosing fn). If a future extractor emits multiple + // distinct `from` qnames for the same line — possible with closures or + // inline lambdas — promote this to HashMap> and disambiguate + // at apply time. For today's TS+Rust extractors this is safe. from_line_to_qname .entry(line) .or_insert_with(|| e.from.clone()); @@ -100,6 +106,14 @@ impl Indexer { Ok(refs) => { let mut resolved = 0usize; for r in refs { + if r.from_path != path.as_str() { + tracing::warn!( + file = %path, + bridge_from_path = %r.from_path, + "resolve_refs response carries a from_path that doesn't match the request; skipping", + ); + continue; + } let Some(from_qname) = from_line_to_qname.get(&r.from_line) else { // LSP returned a site we didn't seed — shouldn't // happen, but skip rather than fabricate a from. From 5d51842d9a1f9d3a29ce3475da77450196a458bd Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:33:23 -0600 Subject: [PATCH 09/12] Add end-to-end test: cross-file CALLS edge lands in the graph Locks the post-Workstream-A behavior: copies the TS fixtures into a tempdir, runs the indexer pipeline (extract -> resolve_refs -> graph upsert) per file with deferred edge batching, then asserts `query_callers("simple_function.ts::add")` returns the cross-file caller from `imports_and_exports.ts`. Gated on MYCEL_TEST_LSP=1 to keep multilspy / tsserver / FalkorDB out of the default `cargo test --workspace` path. Re-uses the existing `imports_and_exports.ts` fixture (already imports `add` from `./simple_function` and calls it) rather than adding a redundant `cross_file_caller.ts`. Adds async-trait + tempfile as dev-dependencies on mycel-index for the StubEmbedder and tempdir setup. --- Cargo.lock | 2 + crates/mycel-index/Cargo.toml | 5 + crates/mycel-index/tests/cross_file_calls.rs | 164 +++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 crates/mycel-index/tests/cross_file_calls.rs diff --git a/Cargo.lock b/Cargo.lock index cf9f78c..b4566e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1088,6 +1088,7 @@ dependencies = [ name = "mycel-index" version = "0.0.1" dependencies = [ + "async-trait", "blake3", "camino", "futures", @@ -1096,6 +1097,7 @@ dependencies = [ "mycel-graph", "mycel-lsp", "mycel-models", + "tempfile", "time", "tokio", "tracing", diff --git a/crates/mycel-index/Cargo.toml b/crates/mycel-index/Cargo.toml index d3e8f2f..c502dcb 100644 --- a/crates/mycel-index/Cargo.toml +++ b/crates/mycel-index/Cargo.toml @@ -20,3 +20,8 @@ camino = { workspace = true } walkdir = { workspace = true } time = { workspace = true } futures = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } +async-trait = { workspace = true } +tempfile = "3" diff --git a/crates/mycel-index/tests/cross_file_calls.rs b/crates/mycel-index/tests/cross_file_calls.rs new file mode 100644 index 0000000..486e578 --- /dev/null +++ b/crates/mycel-index/tests/cross_file_calls.rs @@ -0,0 +1,164 @@ +//! End-to-end: index a tiny TS project with a cross-file call and assert +//! the CALLS edge lands in the graph with both endpoints resolved to Symbol +//! qnames. +//! +//! This locks the post-Workstream-A behavior: tree-sitter emits a tentative +//! `::greet -> add` edge (bare `add` callee), the LSP bridge resolves +//! the call site to `simple_function.ts:1`, and `symbol_containing` rewrites +//! the edge target to the full qname `simple_function.ts::add`. Without +//! Workstream A wiring this test fails because the dangling `add` callee +//! never matches a Symbol at upsert time. +//! +//! Requires: +//! - MYCEL_TEST_LSP=1 (gates the network-y multilspy spawn) +//! - FalkorDB on redis://127.0.0.1:16379 +//! - python3 with `multilspy` and a tsserver discoverable on PATH + +use camino::{Utf8Path, Utf8PathBuf}; +use mycel_core::{EdgeKind, Result}; +use mycel_graph::GraphClient; +use mycel_lsp::MultilspyResolver; +use mycel_models::Embedder; +use std::sync::Arc; + +const FIXTURE_SIMPLE: &str = "simple_function.ts"; +const FIXTURE_CALLER: &str = "imports_and_exports.ts"; +// imports_and_exports.ts also imports from this file; copy it too so tsserver +// can resolve every import in the project root and not bail out on missing +// modules. +const FIXTURE_TYPES: &str = "class_with_methods.ts"; + +/// Stub embedder so the test doesn't depend on a running Ollama. Mirrors the +/// pattern used in `crates/mycel-graph/tests/integration.rs`. +struct StubEmbedder; + +#[async_trait::async_trait] +impl Embedder for StubEmbedder { + fn identity(&self) -> &str { + "stub/cross-file-test" + } + fn dimension(&self) -> u32 { + 768 + } + async fn embed(&self, texts: &[&str]) -> Result>> { + Ok(texts.iter().map(|_| vec![0.0_f32; 768]).collect()) + } +} + +#[tokio::test] +async fn cross_file_calls_land_in_graph() { + if std::env::var("MYCEL_TEST_LSP").ok().as_deref() != Some("1") { + eprintln!( + "skipping cross_file_calls_land_in_graph \ + (set MYCEL_TEST_LSP=1; requires multilspy + tsserver + FalkorDB)" + ); + return; + } + + // Source fixtures live at /tests/fixtures/typescript/. + // CARGO_MANIFEST_DIR is crates/mycel-index; ../.. lands at the workspace root. + let workspace_root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .map(|p| p.to_path_buf()) + .expect("workspace root resolvable from CARGO_MANIFEST_DIR"); + let src_dir = workspace_root.join("tests/fixtures/typescript"); + let bridge_script = workspace_root.join("scripts/multilspy_bridge.py"); + assert!( + bridge_script.exists(), + "multilspy bridge missing at {bridge_script}" + ); + + // Copy fixtures into a tempdir so the indexer's "repo" is hermetic — no + // unrelated files for walkdir to crawl into and no risk of cross-test + // contamination. + let tmp = tempfile::tempdir().expect("tempdir"); + let repo_root = Utf8PathBuf::from_path_buf(tmp.path().to_path_buf()) + .expect("tempdir is utf8"); + for name in [FIXTURE_SIMPLE, FIXTURE_CALLER, FIXTURE_TYPES] { + let src = src_dir.join(name); + let dst = repo_root.join(name); + std::fs::copy(&src, &dst).unwrap_or_else(|e| panic!("copy {src} -> {dst}: {e}")); + } + + // Per-test graph, wiped before use so a prior run can't poison assertions. + let graph_url = std::env::var("MYCEL_TEST_FALKORDB_URL") + .unwrap_or_else(|_| "redis://127.0.0.1:16379".into()); + let client = GraphClient::connect(&graph_url, "mycel:test:cross_file_calls") + .await + .expect("connect FalkorDB"); + client + .query("MATCH (n) DETACH DELETE n") + .await + .expect("wipe graph"); + + let lsp = MultilspyResolver::spawn( + &format!("python3 {bridge_script}"), + repo_root.clone(), + ) + .await + .expect("multilspy bridge spawn"); + + let indexer = mycel_index::Indexer { + graph: client.clone(), + lsp: Some(Arc::new(lsp)), + embedder: Arc::new(StubEmbedder), + }; + + // Index files with repo-relative paths so symbol qnames stay short + // (e.g. `simple_function.ts::add`) and `to_path` from the LSP bridge + // (also repo-relative) lines up with stored `file_path`s — required for + // `symbol_containing` to find the callee at edge-resolution time. + // + // We collect edges from both files and upsert them in a single batch at + // the end, matching `index_repo`'s deferred-edge ordering — without this, + // the caller file's CALLS edge would be written before `add` exists as a + // Symbol and would silently drop. + let mut all_edges = Vec::new(); + for name in [FIXTURE_SIMPLE, FIXTURE_CALLER, FIXTURE_TYPES] { + let rel: Utf8PathBuf = name.into(); + let abs = repo_root.join(name); + let content = std::fs::read_to_string(&abs).expect("read fixture"); + let edges = indexer + .index_file_collect_edges(Utf8Path::new(&rel), &content) + .await + .expect("index_file_collect_edges") + .expect("file was indexed (not dedup-skipped)"); + all_edges.extend(edges); + } + client + .upsert_edge_batch(&all_edges) + .await + .expect("upsert deferred edges"); + + // Sanity: at least one of the edges we just upserted is the cross-file + // CALLS we care about — caller in imports_and_exports.ts, callee == add + // qname in simple_function.ts. + let cross_file_call_present = all_edges.iter().any(|e| { + matches!(e.kind, EdgeKind::Calls) + && e.to == "simple_function.ts::add" + && e.from.starts_with("imports_and_exports.ts::") + }); + assert!( + cross_file_call_present, + "expected a Calls edge from imports_and_exports.ts::* -> simple_function.ts::add; \ + all_edges = {all_edges:?}" + ); + + // The real Tier-1 contract: `query_callers` over the callee returns the + // cross-file caller. + let callers = client + .query_callers("simple_function.ts::add") + .await + .expect("query_callers"); + let names: Vec<&str> = callers + .iter() + .map(|s| s.qualified_name.as_str()) + .collect(); + assert!( + names + .iter() + .any(|n| n.starts_with("imports_and_exports.ts::")), + "expected a caller in imports_and_exports.ts for simple_function.ts::add; got {names:?}" + ); +} From 606c647fcaef418f1203bb3dec256240c9bf7be1 Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Mon, 11 May 2026 23:41:45 -0600 Subject: [PATCH 10/12] Bridge: open_file before request_definition in resolve_refs_for_file (and edges_for_file) --- scripts/multilspy_bridge.py | 120 ++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 53 deletions(-) diff --git a/scripts/multilspy_bridge.py b/scripts/multilspy_bridge.py index 9e8faf7..435c8e9 100644 --- a/scripts/multilspy_bridge.py +++ b/scripts/multilspy_bridge.py @@ -95,31 +95,35 @@ def edges_for_file(self, repo_root: str, language: str, path: str): edges = [] partial = False try: - doc_symbols = server.request_document_symbols(path) - # Some multilspy versions return a tuple, others a list; normalize. - if isinstance(doc_symbols, tuple): - doc_symbols = doc_symbols[0] - for sym in (doc_symbols or []): - name = sym.get("name") if isinstance(sym, dict) else None - if not name: - continue - start = _selection_start(sym) - if not start: - continue - try: - defs = server.request_definition(path, start["line"], start["character"]) - for d in (defs or []): - target_uri = d.get("uri") or d.get("targetUri") - if not target_uri: - continue - edges.append({ - "from": f"{path}::{name}", - "to": target_uri, - "kind": "references", - "source": "lsp", - }) - except Exception: - partial = True + # multilspy requires open_file before any request targeting that + # file's symbols — without it, request_definition returns only + # same-file hits. + with server.open_file(path): + doc_symbols = server.request_document_symbols(path) + # Some multilspy versions return a tuple, others a list; normalize. + if isinstance(doc_symbols, tuple): + doc_symbols = doc_symbols[0] + for sym in (doc_symbols or []): + name = sym.get("name") if isinstance(sym, dict) else None + if not name: + continue + start = _selection_start(sym) + if not start: + continue + try: + defs = server.request_definition(path, start["line"], start["character"]) + for d in (defs or []): + target_uri = d.get("uri") or d.get("targetUri") + if not target_uri: + continue + edges.append({ + "from": f"{path}::{name}", + "to": target_uri, + "kind": "references", + "source": "lsp", + }) + except Exception: + partial = True except Exception: partial = True return {"edges": edges, "partial": partial} @@ -129,34 +133,44 @@ def resolve_refs_for_file(self, repo_root: str, language: str, path: str, sites) server = self.get_server(repo_root, language) refs = [] partial = False - for site in (sites or []): - try: - # tree-sitter sites are 1-indexed; LSP wants 0-indexed lines. - defs = server.request_definition(path, site["line"] - 1, site["col"]) - for d in (defs or []): - target_uri = d.get("uri") or d.get("targetUri") - # `range` is plain Location; `targetSelectionRange` / `targetRange` - # is the LocationLink form. Prefer the name range when available. - target_range = ( - d.get("range") - or d.get("targetSelectionRange") - or d.get("targetRange") - ) - if not target_uri or not target_range: - continue - to_path = _uri_to_repo_path(target_uri, repo_root) - if to_path is None: - continue # definition lives outside the repo (stdlib, node_modules) - to_line = target_range["start"]["line"] + 1 # back to 1-indexed - refs.append({ - "from_path": path, - "from_line": site["line"], - "to_path": to_path, - "to_line": to_line, - "kind": site["kind"], - }) - except Exception: - partial = True + if not sites: + return {"refs": refs, "partial": partial} + # multilspy requires open_file before any request targeting that file's + # symbols — without it, request_definition returns only same-file hits. + try: + with server.open_file(path): + for site in sites: + try: + # tree-sitter sites are 1-indexed; LSP wants 0-indexed lines. + defs = server.request_definition(path, site["line"] - 1, site["col"]) + for d in (defs or []): + target_uri = d.get("uri") or d.get("targetUri") + # `range` is plain Location; `targetSelectionRange` / `targetRange` + # is the LocationLink form. Prefer the name range when available. + target_range = ( + d.get("range") + or d.get("targetSelectionRange") + or d.get("targetRange") + ) + if not target_uri or not target_range: + continue + to_path = _uri_to_repo_path(target_uri, repo_root) + if to_path is None: + continue # definition lives outside the repo (stdlib, node_modules) + to_line = target_range["start"]["line"] + 1 # back to 1-indexed + refs.append({ + "from_path": path, + "from_line": site["line"], + "to_path": to_path, + "to_line": to_line, + "kind": site["kind"], + }) + except Exception: + partial = True + except Exception: + # If open_file itself fails (e.g., file not found from the LS's POV), + # mark partial and return what we have (empty). + partial = True return {"refs": refs, "partial": partial} def main(): From e39790678010d716eb2a728df486b7e6bcfc770d Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Sun, 17 May 2026 02:03:33 -0600 Subject: [PATCH 11/12] Make cross-file LSP resolution actually land edges Two bugs were preventing Workstream A's cross-file CALLS / USES_TYPE / IMPLEMENTS edges from reaching the graph: 1. Bridge closed files between requests. tsserver only resolves through imports when the target file is open simultaneously with the source. The `with server.open_file(path):` form closed each file before the next request, so request_definition stopped at the import binding in the requesting file. Fix: maintain an ExitStack of open files per (repo, language), and prewarm with every same-language source file in the repo when the LS first spins up. 2. Pipeline always sent col=0 to LSP. tsserver's request_definition returns nothing when the cursor lands on whitespace, so col=0 for indented call sites resolved to empty. Fix: locate the bare callee token on the source line and seed its 0-indexed column. Also skip edges whose target already contains `::` (same-file resolutions produced earlier in the pipeline). The smoke tests under crates/mycel-lsp were anchored on `current_dir()` but `cargo test` runs from the package manifest dir, so `scripts/multilspy_bridge.py` and the TS fixtures were never findable. Fix: anchor on `CARGO_MANIFEST_DIR` and walk up to the workspace root, matching the pattern already used in cross_file_calls.rs. After these fixes the cross-file integration test passes against a live multilspy + tsserver, and all 85 workspace tests pass with MYCEL_TEST_LSP=1. --- .gitignore | 1 + Cargo.lock | 1 + crates/mycel-index/Cargo.toml | 1 + crates/mycel-index/src/pipeline.rs | 80 +++++++- crates/mycel-index/tests/cross_file_calls.rs | 1 + crates/mycel-lsp/tests/smoke.rs | 22 ++- scripts/multilspy_bridge.py | 195 +++++++++++++------ 7 files changed, 231 insertions(+), 70 deletions(-) diff --git a/.gitignore b/.gitignore index 4a73673..e1fd2ad 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ *.log .worktrees/ .claude/ +__pycache__/ diff --git a/Cargo.lock b/Cargo.lock index b4566e8..dce4ae6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1101,6 +1101,7 @@ dependencies = [ "time", "tokio", "tracing", + "tracing-subscriber", "walkdir", ] diff --git a/crates/mycel-index/Cargo.toml b/crates/mycel-index/Cargo.toml index c502dcb..cab10e2 100644 --- a/crates/mycel-index/Cargo.toml +++ b/crates/mycel-index/Cargo.toml @@ -25,3 +25,4 @@ futures = { workspace = true } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } async-trait = { workspace = true } tempfile = "3" +tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } diff --git a/crates/mycel-index/src/pipeline.rs b/crates/mycel-index/src/pipeline.rs index 66012d1..9966ae1 100644 --- a/crates/mycel-index/src/pipeline.rs +++ b/crates/mycel-index/src/pipeline.rs @@ -79,6 +79,13 @@ impl Indexer { _ => continue, }; let Some(line) = e.from_line else { continue }; + // Skip edges whose target already resolved to a qname + // (`::`) — these are same-file resolutions + // produced by `resolve_same_file_edges`. Sending them to LSP + // wastes a round-trip and column 0 returns nothing anyway. + if e.to.contains("::") { + continue; + } // Assumes one enclosing-symbol per from_line: a single source line in a single // file is owned by one tree-sitter parent (e.g., `foo().bar().baz()` on one // line all share an enclosing fn). If a future extractor emits multiple @@ -88,12 +95,15 @@ impl Indexer { from_line_to_qname .entry(line) .or_insert_with(|| e.from.clone()); + // Column accuracy matters: tsserver's request_definition returns + // empty when the cursor lands on whitespace. We locate the bare + // callee name in the source line to seed an identifier-bearing + // column. Falls back to 0 if not found (same as the no-column + // baseline; the LSP call will likely return empty). + let col = locate_token_col(content, line, &e.to); sites.push(RefSite { line, - // Line-hover suffices for v1; column accuracy is a follow-up - // refinement once the bridge supports per-site columns from - // the tree-sitter capture. - col: 0, + col, kind: kind_str.into(), }); } @@ -365,3 +375,65 @@ impl Indexer { Ok(count) } } + +/// Return the 0-indexed column of the first word-boundary occurrence of +/// `token` on `line` (1-indexed) in `content`. Returns 0 if not found — +/// caller-side LSP will return empty for that site, which is the same +/// behavior as the previous "always col 0" baseline. +/// +/// Word-boundary: the char before must not be alphanumeric/underscore and +/// the char after must not be alphanumeric/underscore. This avoids matching +/// `add` inside `padded`. +fn locate_token_col(content: &str, line: u32, token: &str) -> u32 { + if token.is_empty() { + return 0; + } + let line_idx = line.saturating_sub(1) as usize; + let Some(line_str) = content.lines().nth(line_idx) else { + return 0; + }; + let bytes = line_str.as_bytes(); + let tok = token.as_bytes(); + let mut i = 0usize; + while i + tok.len() <= bytes.len() { + if bytes[i..i + tok.len()] == *tok { + let before_ok = i == 0 || !is_ident_byte(bytes[i - 1]); + let after_ok = i + tok.len() == bytes.len() || !is_ident_byte(bytes[i + tok.len()]); + if before_ok && after_ok { + return i as u32; + } + } + i += 1; + } + 0 +} + +fn is_ident_byte(b: u8) -> bool { + b.is_ascii_alphanumeric() || b == b'_' +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn locate_token_col_finds_word_boundary() { + let content = " return `Hi ${name}, sum is ${add(1, 2)}`;\n"; + let col = locate_token_col(content, 1, "add"); + assert_eq!(col, 33, "expected col 33 for `add` in template literal"); + } + + #[test] + fn locate_token_col_skips_substring_match() { + // `add` inside `padded` must not match. + let content = "let padded = 1;\n"; + let col = locate_token_col(content, 1, "add"); + assert_eq!(col, 0, "should not match `add` inside `padded`"); + } + + #[test] + fn locate_token_col_returns_zero_when_line_missing() { + let content = "only one line\n"; + assert_eq!(locate_token_col(content, 99, "missing"), 0); + } +} diff --git a/crates/mycel-index/tests/cross_file_calls.rs b/crates/mycel-index/tests/cross_file_calls.rs index 486e578..8b9cbd6 100644 --- a/crates/mycel-index/tests/cross_file_calls.rs +++ b/crates/mycel-index/tests/cross_file_calls.rs @@ -54,6 +54,7 @@ async fn cross_file_calls_land_in_graph() { ); return; } + let _ = tracing_subscriber::fmt::try_init(); // Source fixtures live at /tests/fixtures/typescript/. // CARGO_MANIFEST_DIR is crates/mycel-index; ../.. lands at the workspace root. diff --git a/crates/mycel-lsp/tests/smoke.rs b/crates/mycel-lsp/tests/smoke.rs index 1aaef8d..29f276a 100644 --- a/crates/mycel-lsp/tests/smoke.rs +++ b/crates/mycel-lsp/tests/smoke.rs @@ -3,15 +3,28 @@ use camino::Utf8PathBuf; use mycel_lsp::*; +/// Workspace root resolved from `CARGO_MANIFEST_DIR`. `cargo test` sets the +/// test binary's CWD to the package manifest directory, but the bridge script +/// and TS fixtures live at the workspace root — so we anchor on the manifest +/// dir and walk up two levels (`crates/mycel-lsp` → workspace root). +fn workspace_root() -> Utf8PathBuf { + Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .map(|p| p.to_path_buf()) + .expect("workspace root resolvable from CARGO_MANIFEST_DIR") +} + #[tokio::test] async fn lsp_smoke_typescript() { if std::env::var("MYCEL_TEST_LSP").ok().as_deref() != Some("1") { eprintln!("skipping (set MYCEL_TEST_LSP=1 to run)"); return; } - let repo: Utf8PathBuf = std::env::current_dir().unwrap().try_into().unwrap(); + let repo = workspace_root(); + let bridge = repo.join("scripts/multilspy_bridge.py"); let resolver = MultilspyResolver::spawn( - "python3 scripts/multilspy_bridge.py", + &format!("python3 {bridge}"), repo.clone(), ).await.expect("spawn multilspy"); let path: Utf8PathBuf = "tests/fixtures/typescript/imports_and_exports.ts".into(); @@ -44,9 +57,10 @@ async fn lsp_resolve_refs_typescript() { // ` return \`Hi ${name}, sum is ${add(1, 2)}\`;` // ^ col 33 // It resolves to `tests/fixtures/typescript/simple_function.ts` line 1. - let repo: Utf8PathBuf = std::env::current_dir().unwrap().try_into().unwrap(); + let repo = workspace_root(); + let bridge = repo.join("scripts/multilspy_bridge.py"); let resolver = MultilspyResolver::spawn( - "python3 scripts/multilspy_bridge.py", + &format!("python3 {bridge}"), repo.clone(), ) .await diff --git a/scripts/multilspy_bridge.py b/scripts/multilspy_bridge.py index 435c8e9..7a23612 100644 --- a/scripts/multilspy_bridge.py +++ b/scripts/multilspy_bridge.py @@ -20,7 +20,9 @@ If/when call-hierarchy lands in multilspy, extend this bridge to upgrade CALLS edges from `tree-sitter` to `lsp` source. """ +import contextlib import json +import os import sys import traceback @@ -37,6 +39,22 @@ "rust": "rust", } +# File extensions that participate in a given language's LSP project. Used by +# `prewarm` to seed tsserver/rust-analyzer with project-wide knowledge before +# the first request_definition call, which is otherwise stuck at the import +# binding for cross-file references. +LANG_TO_EXTENSIONS = { + "typescript": {".ts", ".tsx", ".mts", ".cts"}, + "rust": {".rs"}, +} + +# Directory names that are never part of the source tree. Skipped during +# prewarm to avoid pulling in vendored/build artifacts. +PREWARM_SKIP_DIRS = { + "node_modules", "target", ".git", "dist", "build", ".next", + ".worktrees", "vendor", ".venv", "venv", +} + def emit(payload): sys.stdout.write(json.dumps(payload) + "\n") sys.stdout.flush() @@ -65,7 +83,13 @@ def _uri_to_repo_path(uri: str, repo_root: str): class BridgeState: def __init__(self): - self.servers = {} # (repo_root, language) -> (server, cm) + # (repo_root, language) -> (server, server_cm, files_stack, open_paths_set) + # files_stack keeps each file's open_file context manager alive so + # tsserver retains project-wide knowledge across requests. Without + # this, request_definition on an imported symbol stops at the import + # binding because the LS has never seen the target file. See the + # cross_file_calls integration test for the contract. + self.servers = {} def get_server(self, repo_root: str, language: str): key = (repo_root, language) @@ -76,9 +100,49 @@ def get_server(self, repo_root: str, language: str): server = SyncLanguageServer.create(cfg, logger, repo_root) cm = server.start_server() cm.__enter__() - self.servers[key] = (server, cm) + self.servers[key] = (server, cm, contextlib.ExitStack(), set()) + # Prewarm the LS with every same-language source file in the repo. + # tsserver in particular won't resolve cross-file imports unless + # the target file is already open — and the indexer can't pre-open + # transitively because it doesn't know imports until after parse. + # The cost is bounded: one open_file per source file, paid once + # per (repo, language) for the bridge process lifetime. + self._prewarm(repo_root, language) return self.servers[key][0] + def _prewarm(self, repo_root: str, language: str): + exts = LANG_TO_EXTENSIONS.get(language, set()) + if not exts: + return + for dirpath, dirnames, filenames in os.walk(repo_root): + dirnames[:] = [d for d in dirnames if d not in PREWARM_SKIP_DIRS] + for name in filenames: + if os.path.splitext(name)[1] not in exts: + continue + abs_path = os.path.join(dirpath, name) + rel = os.path.relpath(abs_path, repo_root) + try: + self.ensure_open(repo_root, language, rel) + except Exception: + # Best-effort: a single bad file shouldn't stop the whole + # prewarm. The next request against that file will surface + # the error. + pass + + def ensure_open(self, repo_root: str, language: str, path: str): + """Open `path` in the LS and keep it open for the bridge's lifetime. + + Subsequent requests against any other file can now resolve cross-file + references into this file. Idempotent — a second call for the same + (repo_root, language, path) is a no-op. + """ + key = (repo_root, language) + server, _cm, stack, open_paths = self.servers[key] + if path in open_paths: + return + stack.enter_context(server.open_file(path)) + open_paths.add(path) + def edges_for_file(self, repo_root: str, language: str, path: str): """Refine extractor edges using LSP definition lookups. @@ -95,35 +159,36 @@ def edges_for_file(self, repo_root: str, language: str, path: str): edges = [] partial = False try: - # multilspy requires open_file before any request targeting that - # file's symbols — without it, request_definition returns only - # same-file hits. - with server.open_file(path): - doc_symbols = server.request_document_symbols(path) - # Some multilspy versions return a tuple, others a list; normalize. - if isinstance(doc_symbols, tuple): - doc_symbols = doc_symbols[0] - for sym in (doc_symbols or []): - name = sym.get("name") if isinstance(sym, dict) else None - if not name: - continue - start = _selection_start(sym) - if not start: - continue - try: - defs = server.request_definition(path, start["line"], start["character"]) - for d in (defs or []): - target_uri = d.get("uri") or d.get("targetUri") - if not target_uri: - continue - edges.append({ - "from": f"{path}::{name}", - "to": target_uri, - "kind": "references", - "source": "lsp", - }) - except Exception: - partial = True + # Persistently open: keeps tsserver project context warm so + # request_definition can resolve through imports added by later + # files. The legacy `with server.open_file(path)` form closed the + # file before cross-file calls could see the target. + self.ensure_open(repo_root, language, path) + doc_symbols = server.request_document_symbols(path) + # Some multilspy versions return a tuple, others a list; normalize. + if isinstance(doc_symbols, tuple): + doc_symbols = doc_symbols[0] + for sym in (doc_symbols or []): + name = sym.get("name") if isinstance(sym, dict) else None + if not name: + continue + start = _selection_start(sym) + if not start: + continue + try: + defs = server.request_definition(path, start["line"], start["character"]) + for d in (defs or []): + target_uri = d.get("uri") or d.get("targetUri") + if not target_uri: + continue + edges.append({ + "from": f"{path}::{name}", + "to": target_uri, + "kind": "references", + "source": "lsp", + }) + except Exception: + partial = True except Exception: partial = True return {"edges": edges, "partial": partial} @@ -135,38 +200,44 @@ def resolve_refs_for_file(self, repo_root: str, language: str, path: str, sites) partial = False if not sites: return {"refs": refs, "partial": partial} - # multilspy requires open_file before any request targeting that file's - # symbols — without it, request_definition returns only same-file hits. + # Persistently open: tsserver needs every file that participates in a + # cross-file resolution to be open simultaneously. Without this, + # request_definition stops at the import binding in the requesting + # file. See `ensure_open` and the cross_file_calls integration test. try: - with server.open_file(path): - for site in sites: - try: - # tree-sitter sites are 1-indexed; LSP wants 0-indexed lines. - defs = server.request_definition(path, site["line"] - 1, site["col"]) - for d in (defs or []): - target_uri = d.get("uri") or d.get("targetUri") - # `range` is plain Location; `targetSelectionRange` / `targetRange` - # is the LocationLink form. Prefer the name range when available. - target_range = ( - d.get("range") - or d.get("targetSelectionRange") - or d.get("targetRange") - ) - if not target_uri or not target_range: - continue - to_path = _uri_to_repo_path(target_uri, repo_root) - if to_path is None: - continue # definition lives outside the repo (stdlib, node_modules) - to_line = target_range["start"]["line"] + 1 # back to 1-indexed - refs.append({ - "from_path": path, - "from_line": site["line"], - "to_path": to_path, - "to_line": to_line, - "kind": site["kind"], - }) - except Exception: - partial = True + self.ensure_open(repo_root, language, path) + for site in sites: + try: + # tree-sitter sites are 1-indexed; LSP wants 0-indexed lines. + defs = server.request_definition(path, site["line"] - 1, site["col"]) + for d in (defs or []): + target_uri = d.get("uri") or d.get("targetUri") + # `range` is plain Location; `targetSelectionRange` / `targetRange` + # is the LocationLink form. Prefer the name range when available. + target_range = ( + d.get("range") + or d.get("targetSelectionRange") + or d.get("targetRange") + ) + if not target_uri or not target_range: + continue + to_path = _uri_to_repo_path(target_uri, repo_root) + if to_path is None: + continue # definition lives outside the repo (stdlib, node_modules) + to_line = target_range["start"]["line"] + 1 # back to 1-indexed + # If the resolved definition lives in a file we haven't + # opened yet, open it now so subsequent cross-file + # queries against it (or *from* it) can resolve too. + self.ensure_open(repo_root, language, to_path) + refs.append({ + "from_path": path, + "from_line": site["line"], + "to_path": to_path, + "to_line": to_line, + "kind": site["kind"], + }) + except Exception: + partial = True except Exception: # If open_file itself fails (e.g., file not found from the LS's POV), # mark partial and return what we have (empty). From d6cf60542a0b80c011ad8eaedb46acd590384e97 Mon Sep 17 00:00:00 2001 From: Gavyn Caldwell Date: Sun, 17 May 2026 02:03:36 -0600 Subject: [PATCH 12/12] Mark callers/uses/implements as working post-Workstream-A --- CLAUDE.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 18c911e..dc217da 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ Mycelium is a local-first, graph-aware code intelligence layer for AI agents. We The daemon is registered as a systemd-user service (`mycel.service`) and watches this tree with a 2s debounce. Edits you make are reflected in the graph within seconds. -**As of 2026-05-04 benchmarking: only `definers` is reliable.** Use it for "where is X defined" — it returns a line range and signature in one shot, saving the subsequent `Read`. Fall back to grep for everything else. +**As of 2026-05-17 (Phase 3 Workstream A):** `definers`, `callers`, `uses`, and `implements` are all reliable for TypeScript and Rust within the indexed surface. `find` is reliable with description coverage. `IMPORTS` queries still need `grep` (file→file edge granularity). Run from the repo root (`target/release/mycel`; not on PATH): @@ -20,9 +20,9 @@ Run from the repo root (`target/release/mycel`; not on PATH): | Install the graph-care skill into Claude Code | `target/release/mycel --repo . skill install` | **Works** | | Backfill legacy description hashes | `target/release/mycel --repo . synthesize --refresh-hashes-only` | **Works** — one-shot for graphs predating 2026-05-05 | | Re-index from scratch (drop legacy descriptions) | `target/release/mycel --repo . index . --force-cold-rebuild` | **Works** | -| Callers/callees | `target/release/mycel --repo . callers ` | **Broken** — returns empty; use grep | -| Type usage | `target/release/mycel --repo . uses ` | **Broken** — returns empty; use grep | -| Interface implementations | `target/release/mycel --repo . implements ` | **Broken** — IMPLEMENTS edges not landing; use grep | +| Callers/callees | `target/release/mycel --repo . callers ` | **Works** — cross-file CALLS via LSP definition resolution (Phase 3 Workstream A) | +| Type usage | `target/release/mycel --repo . uses ` | **Works** — cross-file USES_TYPE via LSP definition resolution (Phase 3 Workstream A) | +| Interface implementations | `target/release/mycel --repo . implements ` | **Works** — cross-file IMPLEMENTS via LSP definition resolution (Phase 3 Workstream A) | Add `--json` for structured output. If you've rebuilt the daemon: `target/release/mycel daemon stop && target/release/mycel daemon start`. @@ -35,9 +35,8 @@ These are tracked and being worked on; flag them when they bite, don't try to fi - **`find` quality depends on description coverage.** Phase 2 ships description synthesis — a freshly indexed graph has signature+body-slice-embedded symbols until Claude (via the `mycel-graph-care` skill) writes behavioral descriptions for the symbols you actually work with — coverage grows with use, not with a one-shot bulk command. Symbols with descriptions cluster by behavior; signature-embedded symbols cluster by name shape. Mixed states (partial coverage) give mixed results. - **Module-declaration hallucinations.** (Applies to the bulk `mycel synthesize` path; the workload-driven skill instructs Claude to skip module decls.) Single-line `mod foo;` symbols get rich behavioral descriptions hallucinated from the module's name only (e.g., `mod launchd;` → "core logic for managing background services… launching system daemons"), which cluster against unrelated queries. Known follow-up: skip `SymbolKind::Module` in `list_symbols_for_synthesis`. Until then, filter top results by `kind` if a module decl is dragging your search off course. - **HNSW low-k flakiness.** FalkorDB's HNSW vector index walks adaptively; at `--limit < 10` it sometimes returns zero rows on valid queries that have answers at `--limit 20`. Default is `20` post-Phase 2; raise it further if you suspect a result is being clipped. -- **`callers`, `uses`, `implements` all return empty.** Benchmarked: `callers content_hash` → empty (grep found 3 callers). `uses Symbol` → empty (grep found 67). `implements Embedder` → empty (OllamaEmbedder clearly implements it). CALLS, TYPED_BY, and IMPLEMENTS edges are not landing. Phase 2/3 work. -- **Cross-file CALLS edges drop silently.** Tree-sitter only resolves same-file callees; the multilspy bridge currently emits degenerate `REFERENCES` so cross-file CALLS don't land. Phase 2/3 territory. - **`IMPORTS` queries return empty** — tree-sitter emits import edges as file→file, not Symbol→Symbol. Use `grep -rn 'use '` directly. +- **LSP-resolved edges only land within the indexed surface.** Cross-file CALLS / USES_TYPE / IMPLEMENTS that resolve into stdlib, node_modules, or any path outside the repo are dropped (the bridge returns the file URI, `symbol_containing` returns None, the edge is skipped). Same-language source files inside the repo work. Phase 3 Workstream A. If a `mycel` command returns empty when you expect results, **first verify the daemon is healthy** before assuming the query is wrong: