Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/sgraph/analyzers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Analyzers for modeling various sources into SGraph structures.

This module provides tools for analyzing source code, databases and other
structures into hierarchic graph models.

Usage examples:

# Simple Python analysis
>>> from sgraph.analyzers import analyze_python
>>> result = analyze_python("./src")
>>> result.graph.to_xml("model.xml")

# Finer control
>>> from sgraph.analyzers import AnalyzerConfig, AnalysisLevel
>>> from sgraph.analyzers.code.python import analyze_python_project
>>> config = AnalyzerConfig(
... root_path="./src",
... level=AnalysisLevel.FULL,
... exclude_patterns=("**/test/**",),
... )
>>> result = analyze_python_project(config)
"""
from sgraph.analyzers.base import (
AnalyzerConfig,
AnalysisResult,
AnalysisError,
AnalysisLevel,
DependencyKind,
SourceLocation,
)


# Lazy import to avoid circular dependencies during package init
def analyze_python(
path: str,
level: "AnalysisLevel" = AnalysisLevel.FUNCTIONS,
**kwargs,
) -> "AnalysisResult":
"""
Analyze a Python project and produce an SGraph model.

Args:
path: Root directory of the project
level: Analysis detail level
**kwargs: Other AnalyzerConfig parameters

Returns:
AnalysisResult containing the graph, errors and statistics

Example:
>>> result = analyze_python("./src/sgraph")
>>> print(result.graph.rootNode.getNodeCount())
"""
from sgraph.analyzers.code.python.python_analyzer import analyze_python as _analyze
return _analyze(path, level, **kwargs)


__all__ = [
# Main functions
"analyze_python",
# Configuration
"AnalyzerConfig",
"AnalysisLevel",
# Results
"AnalysisResult",
"AnalysisError",
# Types
"DependencyKind",
"SourceLocation",
]
134 changes: 134 additions & 0 deletions src/sgraph/analyzers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Shared types and helper functions for the analyzer architecture."""
from __future__ import annotations

from dataclasses import dataclass, field
from enum import Enum, auto
from pathlib import Path
from typing import TYPE_CHECKING
from collections.abc import Sequence

if TYPE_CHECKING:
from sgraph import SGraph


class AnalysisLevel(Enum):
"""Analysis detail level."""
PACKAGES_ONLY = auto() # Packages/directories only
FILES = auto() # + files
CLASSES = auto() # + classes
FUNCTIONS = auto() # + functions/methods
FULL = auto() # + attributes, parameters, decorators


class DependencyKind(Enum):
"""Dependency types."""
IMPORT = "import"
FROM_IMPORT = "from_import"
INHERITS = "inherits"
IMPLEMENTS = "implements"
CALLS = "calls"
TYPE_REF = "type_ref"


@dataclass(frozen=True, slots=True)
class SourceLocation:
"""Source code reference."""
file: Path
line: int
column: int = 0
end_line: int | None = None
end_column: int | None = None


@dataclass
class AnalyzerConfig:
"""
Analyzer configuration.

Attributes:
root_path: Root directory of the project to analyze
level: Analysis detail level
include_patterns: Glob patterns for files to include
exclude_patterns: Glob patterns for files/directories to skip
follow_external_imports: Whether to follow external dependencies
include_stdlib: Whether to include standard-library modules
"""
root_path: Path
level: AnalysisLevel = AnalysisLevel.FUNCTIONS
include_patterns: Sequence[str] = ("**/*.py",)
exclude_patterns: Sequence[str] = (
"**/__pycache__/**",
"**/.*",
"**/venv/**",
"**/.venv/**",
"**/env/**",
"**/node_modules/**",
"**/*.egg-info/**",
"**/build/**",
"**/dist/**",
)
follow_external_imports: bool = False
include_stdlib: bool = False

def __post_init__(self):
# Convert string to Path
if isinstance(self.root_path, str):
object.__setattr__(self, 'root_path', Path(self.root_path))


@dataclass
class AnalysisError:
"""A single error during analysis."""
file: Path
message: str
line: int | None = None
exception: Exception | None = None

def __str__(self) -> str:
loc = f":{self.line}" if self.line else ""
return f"{self.file}{loc}: {self.message}"


@dataclass
class AnalysisResult:
"""
Analysis result.

Attributes:
graph: The produced SGraph model
config: The configuration that was used
errors: List of errors encountered during analysis
stats: Statistics (files analyzed, elements, etc.)
"""
graph: "SGraph"
config: AnalyzerConfig
errors: list[AnalysisError] = field(default_factory=list)
stats: dict[str, int] = field(default_factory=dict)

@property
def success(self) -> bool:
"""Whether the analysis succeeded (at least one element)."""
return self.graph.rootNode.getNodeCount() > 0

@property
def file_count(self) -> int:
"""Number of files analyzed."""
return self.stats.get("files_analyzed", 0)

@property
def error_count(self) -> int:
"""Number of errors."""
return len(self.errors)

def summary(self) -> str:
"""Return a summary of the analysis."""
lines = [
f"Files analyzed: {self.file_count}",
f"Packages: {self.stats.get('packages', 0)}",
f"Modules: {self.stats.get('modules', 0)}",
f"Classes: {self.stats.get('classes', 0)}",
f"Functions: {self.stats.get('functions', 0)}",
f"Dependencies: {self.stats.get('dependencies', 0)}",
f"Errors: {self.error_count}",
]
return "\n".join(lines)
6 changes: 6 additions & 0 deletions src/sgraph/analyzers/code/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Code analyzers for various programming languages."""
from sgraph.analyzers.code.python import analyze_python_project

__all__ = [
"analyze_python_project",
]
113 changes: 113 additions & 0 deletions src/sgraph/analyzers/code/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Shared structures for code analysis."""
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Iterator
import fnmatch


@dataclass(frozen=True, slots=True)
class SourceFile:
"""
Metadata for a source file.

Attributes:
path: Absolute path to the file
relative_path: Path relative to the root directory
content: File contents (loaded separately)
"""
path: Path
relative_path: Path
content: str | None = None

@property
def module_path(self) -> str:
"""
Convert a file path into a Python module path.

E.g. src/sgraph/analyzers/__init__.py -> src.sgraph.analyzers
"""
parts = list(self.relative_path.with_suffix('').parts)
if parts and parts[-1] == '__init__':
parts = parts[:-1]
return '.'.join(parts)

@property
def is_package_init(self) -> bool:
"""Whether the file is a package __init__.py."""
return self.relative_path.name == '__init__.py'


def discover_source_files(
root: Path,
include_patterns: tuple[str, ...],
exclude_patterns: tuple[str, ...],
) -> Iterator[SourceFile]:
"""
Find source files in a directory.

Args:
root: Root directory
include_patterns: Glob patterns for files to include
exclude_patterns: Glob patterns for files to skip

Yields:
SourceFile objects for the files found
"""
root = root.resolve()

def is_excluded(path: Path) -> bool:
rel_path = path.relative_to(root)
rel_str = str(rel_path)
rel_parts = rel_path.parts

for pat in exclude_patterns:
# Check whether the pattern is a simple directory name (e.g. "__pycache__")
# or of the form **/name/** or **/name/*
clean_pat = pat.strip("*").strip("/")
if not clean_pat:
continue

# If the pattern is "**/__pycache__/**", check whether "__pycache__" is in the path
if pat.startswith("**/") and (pat.endswith("/**") or pat.endswith("/*")):
dir_name = clean_pat.rstrip("/*")
if dir_name in rel_parts:
return True

# Simple fnmatch without ** support
if fnmatch.fnmatch(rel_str, pat):
return True

return False

for pattern in include_patterns:
for file_path in root.glob(pattern):
if file_path.is_file() and not is_excluded(file_path):
yield SourceFile(
path=file_path,
relative_path=file_path.relative_to(root),
)


def read_source_file(source: SourceFile, encoding: str = 'utf-8') -> SourceFile:
"""
Read a file's contents into a SourceFile object.

Args:
source: SourceFile that is missing its content
encoding: Character encoding (default: utf-8)

Returns:
A new SourceFile with content filled in
"""
try:
content = source.path.read_text(encoding=encoding)
except UnicodeDecodeError:
# Fall back to latin-1
content = source.path.read_text(encoding='latin-1')
return SourceFile(
path=source.path,
relative_path=source.relative_path,
content=content
)
10 changes: 10 additions & 0 deletions src/sgraph/analyzers/code/python/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Python code analyzer."""
from sgraph.analyzers.code.python.python_analyzer import (
analyze_python_project,
analyze_python,
)

__all__ = [
"analyze_python_project",
"analyze_python",
]
Loading
Loading