softagram · villelaitila · Jun 1, 2026
diff --git a/src/sgraph/analyzers/__init__.py b/src/sgraph/analyzers/__init__.py
@@ -0,0 +1,71 @@
+"""
+Analyzers for modeling various sources into SGraph structures.
+
+This module provides tools for analyzing source code, databases and other
+structures into hierarchic graph models.
+
+Usage examples:
+
+    # Simple Python analysis
+    >>> from sgraph.analyzers import analyze_python
+    >>> result = analyze_python("./src")
+    >>> result.graph.to_xml("model.xml")
+
+    # Finer control
+    >>> from sgraph.analyzers import AnalyzerConfig, AnalysisLevel
+    >>> from sgraph.analyzers.code.python import analyze_python_project
+    >>> config = AnalyzerConfig(
+    ...     root_path="./src",
+    ...     level=AnalysisLevel.FULL,
+    ...     exclude_patterns=("**/test/**",),
+    ... )
+    >>> result = analyze_python_project(config)
+"""
+from sgraph.analyzers.base import (
+    AnalyzerConfig,
+    AnalysisResult,
+    AnalysisError,
+    AnalysisLevel,
+    DependencyKind,
+    SourceLocation,
+)
+
+
+# Lazy import to avoid circular dependencies during package init
+def analyze_python(
+    path: str,
+    level: "AnalysisLevel" = AnalysisLevel.FUNCTIONS,
+    **kwargs,
+) -> "AnalysisResult":
+    """
+    Analyze a Python project and produce an SGraph model.
+
+    Args:
+        path: Root directory of the project
+        level: Analysis detail level
+        **kwargs: Other AnalyzerConfig parameters
+
+    Returns:
+        AnalysisResult containing the graph, errors and statistics
+
+    Example:
+        >>> result = analyze_python("./src/sgraph")
+        >>> print(result.graph.rootNode.getNodeCount())
+    """
+    from sgraph.analyzers.code.python.python_analyzer import analyze_python as _analyze
+    return _analyze(path, level, **kwargs)
+
+
+__all__ = [
+    # Main functions
+    "analyze_python",
+    # Configuration
+    "AnalyzerConfig",
+    "AnalysisLevel",
+    # Results
+    "AnalysisResult",
+    "AnalysisError",
+    # Types
+    "DependencyKind",
+    "SourceLocation",
+]
diff --git a/src/sgraph/analyzers/base.py b/src/sgraph/analyzers/base.py
@@ -0,0 +1,134 @@
+"""Shared types and helper functions for the analyzer architecture."""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum, auto
+from pathlib import Path
+from typing import TYPE_CHECKING
+from collections.abc import Sequence
+
+if TYPE_CHECKING:
+    from sgraph import SGraph
+
+
+class AnalysisLevel(Enum):
+    """Analysis detail level."""
+    PACKAGES_ONLY = auto()  # Packages/directories only
+    FILES = auto()          # + files
+    CLASSES = auto()        # + classes
+    FUNCTIONS = auto()      # + functions/methods
+    FULL = auto()           # + attributes, parameters, decorators
+
+
+class DependencyKind(Enum):
+    """Dependency types."""
+    IMPORT = "import"
+    FROM_IMPORT = "from_import"
+    INHERITS = "inherits"
+    IMPLEMENTS = "implements"
+    CALLS = "calls"
+    TYPE_REF = "type_ref"
+
+
+@dataclass(frozen=True, slots=True)
+class SourceLocation:
+    """Source code reference."""
+    file: Path
+    line: int
+    column: int = 0
+    end_line: int | None = None
+    end_column: int | None = None
+
+
+@dataclass
+class AnalyzerConfig:
+    """
+    Analyzer configuration.
+
+    Attributes:
+        root_path: Root directory of the project to analyze
+        level: Analysis detail level
+        include_patterns: Glob patterns for files to include
+        exclude_patterns: Glob patterns for files/directories to skip
+        follow_external_imports: Whether to follow external dependencies
+        include_stdlib: Whether to include standard-library modules
+    """
+    root_path: Path
+    level: AnalysisLevel = AnalysisLevel.FUNCTIONS
+    include_patterns: Sequence[str] = ("**/*.py",)
+    exclude_patterns: Sequence[str] = (
+        "**/__pycache__/**",
+        "**/.*",
+        "**/venv/**",
+        "**/.venv/**",
+        "**/env/**",
+        "**/node_modules/**",
+        "**/*.egg-info/**",
+        "**/build/**",
+        "**/dist/**",
+    )
+    follow_external_imports: bool = False
+    include_stdlib: bool = False
+
+    def __post_init__(self):
+        # Convert string to Path
+        if isinstance(self.root_path, str):
+            object.__setattr__(self, 'root_path', Path(self.root_path))
+
+
+@dataclass
+class AnalysisError:
+    """A single error during analysis."""
+    file: Path
+    message: str
+    line: int | None = None
+    exception: Exception | None = None
+
+    def __str__(self) -> str:
+        loc = f":{self.line}" if self.line else ""
+        return f"{self.file}{loc}: {self.message}"
+
+
+@dataclass
+class AnalysisResult:
+    """
+    Analysis result.
+
+    Attributes:
+        graph: The produced SGraph model
+        config: The configuration that was used
+        errors: List of errors encountered during analysis
+        stats: Statistics (files analyzed, elements, etc.)
+    """
+    graph: "SGraph"
+    config: AnalyzerConfig
+    errors: list[AnalysisError] = field(default_factory=list)
+    stats: dict[str, int] = field(default_factory=dict)
+
+    @property
+    def success(self) -> bool:
+        """Whether the analysis succeeded (at least one element)."""
+        return self.graph.rootNode.getNodeCount() > 0
+
+    @property
+    def file_count(self) -> int:
+        """Number of files analyzed."""
+        return self.stats.get("files_analyzed", 0)
+
+    @property
+    def error_count(self) -> int:
+        """Number of errors."""
+        return len(self.errors)
+
+    def summary(self) -> str:
+        """Return a summary of the analysis."""
+        lines = [
+            f"Files analyzed: {self.file_count}",
+            f"Packages: {self.stats.get('packages', 0)}",
+            f"Modules: {self.stats.get('modules', 0)}",
+            f"Classes: {self.stats.get('classes', 0)}",
+            f"Functions: {self.stats.get('functions', 0)}",
+            f"Dependencies: {self.stats.get('dependencies', 0)}",
+            f"Errors: {self.error_count}",
+        ]
+        return "\n".join(lines)
diff --git a/src/sgraph/analyzers/code/__init__.py b/src/sgraph/analyzers/code/__init__.py
@@ -0,0 +1,6 @@
+"""Code analyzers for various programming languages."""
+from sgraph.analyzers.code.python import analyze_python_project
+
+__all__ = [
+    "analyze_python_project",
+]
diff --git a/src/sgraph/analyzers/code/base.py b/src/sgraph/analyzers/code/base.py
@@ -0,0 +1,113 @@
+"""Shared structures for code analysis."""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterator
+import fnmatch
+
+
+@dataclass(frozen=True, slots=True)
+class SourceFile:
+    """
+    Metadata for a source file.
+
+    Attributes:
+        path: Absolute path to the file
+        relative_path: Path relative to the root directory
+        content: File contents (loaded separately)
+    """
+    path: Path
+    relative_path: Path
+    content: str | None = None
+
+    @property
+    def module_path(self) -> str:
+        """
+        Convert a file path into a Python module path.
+
+        E.g. src/sgraph/analyzers/__init__.py -> src.sgraph.analyzers
+        """
+        parts = list(self.relative_path.with_suffix('').parts)
+        if parts and parts[-1] == '__init__':
+            parts = parts[:-1]
+        return '.'.join(parts)
+
+    @property
+    def is_package_init(self) -> bool:
+        """Whether the file is a package __init__.py."""
+        return self.relative_path.name == '__init__.py'
+
+
+def discover_source_files(
+    root: Path,
+    include_patterns: tuple[str, ...],
+    exclude_patterns: tuple[str, ...],
+) -> Iterator[SourceFile]:
+    """
+    Find source files in a directory.
+
+    Args:
+        root: Root directory
+        include_patterns: Glob patterns for files to include
+        exclude_patterns: Glob patterns for files to skip
+
+    Yields:
+        SourceFile objects for the files found
+    """
+    root = root.resolve()
+
+    def is_excluded(path: Path) -> bool:
+        rel_path = path.relative_to(root)
+        rel_str = str(rel_path)
+        rel_parts = rel_path.parts
+
+        for pat in exclude_patterns:
+            # Check whether the pattern is a simple directory name (e.g. "__pycache__")
+            # or of the form **/name/** or **/name/*
+            clean_pat = pat.strip("*").strip("/")
+            if not clean_pat:
+                continue
+
+            # If the pattern is "**/__pycache__/**", check whether "__pycache__" is in the path
+            if pat.startswith("**/") and (pat.endswith("/**") or pat.endswith("/*")):
+                dir_name = clean_pat.rstrip("/*")
+                if dir_name in rel_parts:
+                    return True
+
+            # Simple fnmatch without ** support
+            if fnmatch.fnmatch(rel_str, pat):
+                return True
+
+        return False
+
+    for pattern in include_patterns:
+        for file_path in root.glob(pattern):
+            if file_path.is_file() and not is_excluded(file_path):
+                yield SourceFile(
+                    path=file_path,
+                    relative_path=file_path.relative_to(root),
+                )
+
+
+def read_source_file(source: SourceFile, encoding: str = 'utf-8') -> SourceFile:
+    """
+    Read a file's contents into a SourceFile object.
+
+    Args:
+        source: SourceFile that is missing its content
+        encoding: Character encoding (default: utf-8)
+
+    Returns:
+        A new SourceFile with content filled in
+    """
+    try:
+        content = source.path.read_text(encoding=encoding)
+    except UnicodeDecodeError:
+        # Fall back to latin-1
+        content = source.path.read_text(encoding='latin-1')
+    return SourceFile(
+        path=source.path,
+        relative_path=source.relative_path,
+        content=content
+    )
diff --git a/src/sgraph/analyzers/code/python/__init__.py b/src/sgraph/analyzers/code/python/__init__.py
@@ -0,0 +1,10 @@
+"""Python code analyzer."""
+from sgraph.analyzers.code.python.python_analyzer import (
+    analyze_python_project,
+    analyze_python,
+)
+
+__all__ = [
+    "analyze_python_project",
+    "analyze_python",
+]