Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions libs/openant-core/parsers/python/parse_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from utilities.file_io import read_json, write_json, open_utf8


def generate_analyzer_output(extractor_result: dict) -> dict:
def generate_analyzer_output(extractor_result: dict, call_graph_result: dict | None = None) -> dict:
"""
Generate analyzer_output.json format for Stage 2 verification.

Expand All @@ -65,9 +65,13 @@ def generate_analyzer_output(extractor_result: dict) -> dict:

Args:
extractor_result: Output from FunctionExtractor
call_graph_result: Optional CallGraphBuilder.export() result. When provided, the
top-level callGraph / reverseCallGraph are surfaced (matching the analyzer_output
schema consumed by dependency resolvers); omitted when None (back-compat).

Returns:
Dict in analyzer_output.json format:
Dict in analyzer_output.json format (callGraph/reverseCallGraph included when
call_graph_result is provided):
{
"functions": {
"file.py:func_name": {
Expand Down Expand Up @@ -99,7 +103,16 @@ def generate_analyzer_output(extractor_result: dict) -> dict:
if class_name:
functions[func_id]["className"] = class_name

return {"functions": functions}
output = {"functions": functions}

# Surface the call graph top-level: the analyzer_output schema carries callGraph /
# reverseCallGraph (func_id -> [func_ids]), already computed in call_graph_result.
# (filePath is derived from the func_id key by consumers, so it is not a per-function field.)
if call_graph_result is not None:
output["callGraph"] = call_graph_result.get("call_graph", {})
output["reverseCallGraph"] = call_graph_result.get("reverse_call_graph", {})

return output


def parse_repository(repo_path: str, options: dict = None) -> tuple:
Expand Down Expand Up @@ -191,8 +204,9 @@ def parse_repository(repo_path: str, options: dict = None) -> tuple:
for unit_type, count in sorted(stats['by_type'].items()):
print(f" {unit_type}: {count}", file=sys.stderr)

# Generate analyzer output for Stage 2 verification
analyzer_output = generate_analyzer_output(extractor_result)
# Generate analyzer output for Stage 2 verification (pass the call graph so the output
# surfaces top-level callGraph / reverseCallGraph, matching the analyzer_output schema).
analyzer_output = generate_analyzer_output(extractor_result, call_graph_result)
print(f"\n[Stage 2 Support] Generated analyzer output: {len(analyzer_output['functions'])} functions", file=sys.stderr)

if output_dir:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Regression test — python parse_repository analyzer_output omits the call graph.

generate_analyzer_output() emitted only {"functions": {...}}, dropping the top-level callGraph /
reverseCallGraph that the analyzer_output schema carries (per the JS reference dependency_resolver +
PARSER_UPGRADE_PLAN) and that are already computed in call_graph_result. (The indirect_calls part of the
original filing is phantom and is excluded; filePath is derived from the func_id key by consumers
(RepositoryIndex / dependency_resolver do funcId.split(':')[0]), so it is not a per-function field.)
Fix: pass call_graph_result in and emit top-level callGraph / reverseCallGraph.
"""
import sys
from pathlib import Path

CORE = Path(__file__).resolve().parents[1] # libs/openant-core
sys.path.insert(0, str(CORE))
sys.path.insert(0, str(CORE / "parsers" / "python")) # parse_repository's bare imports

import parse_repository # noqa: E402

EXTRACTOR = {"functions": {
"f.py:foo": {"name": "foo", "code": "def foo():\n bar()", "unit_type": "function", "start_line": 1, "end_line": 2},
"f.py:bar": {"name": "bar", "code": "def bar():\n pass", "unit_type": "function", "start_line": 4, "end_line": 5},
}}
CALL_GRAPH = {
"repository": "/x", "functions": EXTRACTOR["functions"], "classes": {}, "imports": {},
"call_graph": {"f.py:foo": ["f.py:bar"]},
"reverse_call_graph": {"f.py:bar": ["f.py:foo"]},
"statistics": {},
}


def test_analyzer_output_includes_call_graph():
"""Post-fix: the analyzer output surfaces the top-level callGraph / reverseCallGraph from
call_graph_result. Pre-fix generate_analyzer_output took only the extractor result (no call graph)."""
out = parse_repository.generate_analyzer_output(EXTRACTOR, CALL_GRAPH)
assert out["functions"]["f.py:foo"]["name"] == "foo" # functions unchanged
assert out.get("callGraph") == {"f.py:foo": ["f.py:bar"]}, f"callGraph missing/empty: {out.get('callGraph')!r}"
assert out.get("reverseCallGraph") == {"f.py:bar": ["f.py:foo"]}, f"reverseCallGraph missing: {out.get('reverseCallGraph')!r}"


def test_analyzer_output_backward_compatible_without_call_graph():
"""Guard: with only the extractor result (no call graph), it still returns functions and simply omits
the call-graph keys (back-compat for any 1-arg caller)."""
out = parse_repository.generate_analyzer_output(EXTRACTOR)
assert "functions" in out and out["functions"]["f.py:bar"]["name"] == "bar"
Loading