diff --git a/libs/openant-core/parsers/python/parse_repository.py b/libs/openant-core/parsers/python/parse_repository.py index 18a61b7..6f33ec0 100644 --- a/libs/openant-core/parsers/python/parse_repository.py +++ b/libs/openant-core/parsers/python/parse_repository.py @@ -55,7 +55,7 @@ from utilities.file_io import read_json, write_json, open_utf8 -def generate_analyzer_output(extractor_result: dict) -> dict: +def generate_analyzer_output(extractor_result: dict, call_graph_result: dict | None = None) -> dict: """ Generate analyzer_output.json format for Stage 2 verification. @@ -65,9 +65,13 @@ def generate_analyzer_output(extractor_result: dict) -> dict: Args: extractor_result: Output from FunctionExtractor + call_graph_result: Optional CallGraphBuilder.export() result. When provided, the + top-level callGraph / reverseCallGraph are surfaced (matching the analyzer_output + schema consumed by dependency resolvers); omitted when None (back-compat). Returns: - Dict in analyzer_output.json format: + Dict in analyzer_output.json format (callGraph/reverseCallGraph included when + call_graph_result is provided): { "functions": { "file.py:func_name": { @@ -99,7 +103,16 @@ def generate_analyzer_output(extractor_result: dict) -> dict: if class_name: functions[func_id]["className"] = class_name - return {"functions": functions} + output = {"functions": functions} + + # Surface the call graph top-level: the analyzer_output schema carries callGraph / + # reverseCallGraph (func_id -> [func_ids]), already computed in call_graph_result. + # (filePath is derived from the func_id key by consumers, so it is not a per-function field.) + if call_graph_result is not None: + output["callGraph"] = call_graph_result.get("call_graph", {}) + output["reverseCallGraph"] = call_graph_result.get("reverse_call_graph", {}) + + return output def parse_repository(repo_path: str, options: dict = None) -> tuple: @@ -191,8 +204,9 @@ def parse_repository(repo_path: str, options: dict = None) -> tuple: for unit_type, count in sorted(stats['by_type'].items()): print(f" {unit_type}: {count}", file=sys.stderr) - # Generate analyzer output for Stage 2 verification - analyzer_output = generate_analyzer_output(extractor_result) + # Generate analyzer output for Stage 2 verification (pass the call graph so the output + # surfaces top-level callGraph / reverseCallGraph, matching the analyzer_output schema). + analyzer_output = generate_analyzer_output(extractor_result, call_graph_result) print(f"\n[Stage 2 Support] Generated analyzer output: {len(analyzer_output['functions'])} functions", file=sys.stderr) if output_dir: diff --git a/libs/openant-core/tests/test_parse_repository_analyzer_schema.py b/libs/openant-core/tests/test_parse_repository_analyzer_schema.py new file mode 100644 index 0000000..25d4bb0 --- /dev/null +++ b/libs/openant-core/tests/test_parse_repository_analyzer_schema.py @@ -0,0 +1,44 @@ +"""Regression test — python parse_repository analyzer_output omits the call graph. + +generate_analyzer_output() emitted only {"functions": {...}}, dropping the top-level callGraph / +reverseCallGraph that the analyzer_output schema carries (per the JS reference dependency_resolver + +PARSER_UPGRADE_PLAN) and that are already computed in call_graph_result. (The indirect_calls part of the +original filing is phantom and is excluded; filePath is derived from the func_id key by consumers +(RepositoryIndex / dependency_resolver do funcId.split(':')[0]), so it is not a per-function field.) +Fix: pass call_graph_result in and emit top-level callGraph / reverseCallGraph. +""" +import sys +from pathlib import Path + +CORE = Path(__file__).resolve().parents[1] # libs/openant-core +sys.path.insert(0, str(CORE)) +sys.path.insert(0, str(CORE / "parsers" / "python")) # parse_repository's bare imports + +import parse_repository # noqa: E402 + +EXTRACTOR = {"functions": { + "f.py:foo": {"name": "foo", "code": "def foo():\n bar()", "unit_type": "function", "start_line": 1, "end_line": 2}, + "f.py:bar": {"name": "bar", "code": "def bar():\n pass", "unit_type": "function", "start_line": 4, "end_line": 5}, +}} +CALL_GRAPH = { + "repository": "/x", "functions": EXTRACTOR["functions"], "classes": {}, "imports": {}, + "call_graph": {"f.py:foo": ["f.py:bar"]}, + "reverse_call_graph": {"f.py:bar": ["f.py:foo"]}, + "statistics": {}, +} + + +def test_analyzer_output_includes_call_graph(): + """Post-fix: the analyzer output surfaces the top-level callGraph / reverseCallGraph from + call_graph_result. Pre-fix generate_analyzer_output took only the extractor result (no call graph).""" + out = parse_repository.generate_analyzer_output(EXTRACTOR, CALL_GRAPH) + assert out["functions"]["f.py:foo"]["name"] == "foo" # functions unchanged + assert out.get("callGraph") == {"f.py:foo": ["f.py:bar"]}, f"callGraph missing/empty: {out.get('callGraph')!r}" + assert out.get("reverseCallGraph") == {"f.py:bar": ["f.py:foo"]}, f"reverseCallGraph missing: {out.get('reverseCallGraph')!r}" + + +def test_analyzer_output_backward_compatible_without_call_graph(): + """Guard: with only the extractor result (no call graph), it still returns functions and simply omits + the call-graph keys (back-compat for any 1-arg caller).""" + out = parse_repository.generate_analyzer_output(EXTRACTOR) + assert "functions" in out and out["functions"]["f.py:bar"]["name"] == "bar"