From 11e8c5b6608a3c76d37b807ffd1ecd353ab61386 Mon Sep 17 00:00:00 2001 From: Caleb Evans Date: Thu, 28 May 2026 10:35:06 -0600 Subject: [PATCH 1/3] fix: resolve GPU embedding performance bottleneck in TransformerEmbedder - Pass device directly to SentenceTransformer constructor instead of using .to(), which left _target_device out of sync and caused the model to be moved back to CPU before each forward pass on CUDA - Replace manual per-batch encode loop with a single model.encode() call to eliminate repeated DataLoader/tokenization overhead and enable length-based sorting - Measured ~15% embedding throughput improvement on CPU; GPU improvement to be measured soon --- src/cordon/embedding/transformer.py | 47 ++++++++++++++--------------- tests/test_transformer.py | 9 ++++-- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/src/cordon/embedding/transformer.py b/src/cordon/embedding/transformer.py index a6e865e..28e3b00 100644 --- a/src/cordon/embedding/transformer.py +++ b/src/cordon/embedding/transformer.py @@ -7,7 +7,6 @@ import numpy.typing as npt import torch from sentence_transformers import SentenceTransformer -from tqdm import tqdm from cordon.core.config import AnalysisConfig from cordon.core.device import detect_device @@ -25,6 +24,11 @@ class TransformerEmbedder: def __init__(self, config: AnalysisConfig) -> None: """Initialize the embedder with a sentence-transformer model. + The model is initialized directly on the target device via the + ``device`` constructor parameter. This ensures the internal + ``_target_device`` attribute is set correctly so that ``encode()`` + places input tensors on the same device as the model parameters. + Args: config: Analysis configuration specifying model and device. @@ -35,7 +39,7 @@ def __init__(self, config: AnalysisConfig) -> None: self.device = detect_device(self.config.device) try: - self.model = SentenceTransformer(config.model_name) + self.model = SentenceTransformer(config.model_name, device=str(self.device)) except Exception as error: raise RuntimeError( f"Failed to load sentence-transformer model '{config.model_name}'. " @@ -43,7 +47,6 @@ def __init__(self, config: AnalysisConfig) -> None: f"for first-time downloads. Error: {error}" ) from error - self.model.to(self.device) self._truncation_warned = False def embed_windows( @@ -51,6 +54,11 @@ def embed_windows( ) -> Iterator[tuple[TextWindow, npt.NDArray[np.floating[Any]]]]: """Embed text windows into vector representations. + Encodes all windows in a single ``model.encode()`` call, delegating + batching, length-based sorting, and padding to sentence-transformers. + This avoids per-batch overhead from repeated DataLoader creation and + tokenization and allows optimal GPU utilization. + Args: windows: Iterable of text windows to embed. @@ -69,28 +77,17 @@ def embed_windows( if torch.cuda.is_available(): torch.cuda.empty_cache() - batch_size = self.config.batch_size - total_batches = (len(window_list) + batch_size - 1) // batch_size - - for batch_start_idx in tqdm( - range(0, len(window_list), batch_size), - desc="Generating embeddings", - total=total_batches, - unit="batch", - disable=not self.config.show_progress, - ): - batch = window_list[batch_start_idx : batch_start_idx + batch_size] - texts = [window.content for window in batch] - - embeddings = self.model.encode( - texts, - batch_size=len(batch), - show_progress_bar=False, - convert_to_numpy=True, - normalize_embeddings=True, - ) - - yield from zip(batch, embeddings, strict=False) + texts = [window.content for window in window_list] + + all_embeddings: npt.NDArray[np.floating[Any]] = self.model.encode( + texts, + batch_size=self.config.batch_size, + show_progress_bar=self.config.show_progress, + convert_to_numpy=True, + normalize_embeddings=True, + ) + + yield from zip(window_list, all_embeddings, strict=False) def _check_truncation_warning(self, windows: list[TextWindow]) -> None: """Check if windows are likely to be truncated and warn user. diff --git a/tests/test_transformer.py b/tests/test_transformer.py index 3c02a06..1970aa3 100644 --- a/tests/test_transformer.py +++ b/tests/test_transformer.py @@ -215,8 +215,13 @@ def embedder(self, mock_st: MagicMock) -> TransformerEmbedder: mock_model = MagicMock() rng = np.random.default_rng(0) - raw = rng.standard_normal((1, 384)).astype(np.float32) - mock_model.encode.return_value = raw / np.linalg.norm(raw, axis=1, keepdims=True) + + def _fake_encode(texts: list[str], **kwargs: object) -> np.ndarray: # type: ignore[type-arg] + n = len(texts) if isinstance(texts, list) else 1 + raw = rng.standard_normal((n, 384)).astype(np.float32) + return raw / np.linalg.norm(raw, axis=1, keepdims=True) + + mock_model.encode.side_effect = _fake_encode mock_st.return_value = mock_model config = AnalysisConfig(device="cpu", batch_size=2) From d26dcf582a7e9c68025ce9fb9c36b0bf974570fe Mon Sep 17 00:00:00 2001 From: Caleb Evans Date: Thu, 28 May 2026 11:00:04 -0600 Subject: [PATCH 2/3] release: v1.1.1 --- pyproject.toml | 2 +- src/cordon/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c5ba78b..85a5342 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cordon" -version = "1.1.0" +version = "1.1.1" description = "Semantic anomaly detection for system log files" readme = "README.md" requires-python = ">=3.10,<3.15" diff --git a/src/cordon/__init__.py b/src/cordon/__init__.py index d767de2..b79981b 100644 --- a/src/cordon/__init__.py +++ b/src/cordon/__init__.py @@ -2,7 +2,7 @@ from cordon.core.types import AnalysisResult, MergedBlock, ScoredWindow, TextWindow from cordon.pipeline import SemanticLogAnalyzer -__version__ = "1.1.0" +__version__ = "1.1.1" __all__ = [ "SemanticLogAnalyzer", From 8c6b44c09d40a4a3b86813bc19ca489d40612019 Mon Sep 17 00:00:00 2001 From: Caleb Evans Date: Thu, 28 May 2026 11:04:13 -0600 Subject: [PATCH 3/3] fix: add strict validation for embedding output length mismatch - Add explicit length check between window_list and encode() output before yielding results, raising ValueError with a descriptive message if they differ - Switch zip from strict=False to strict=True as a secondary safeguard --- src/cordon/embedding/transformer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cordon/embedding/transformer.py b/src/cordon/embedding/transformer.py index 28e3b00..2b6aec4 100644 --- a/src/cordon/embedding/transformer.py +++ b/src/cordon/embedding/transformer.py @@ -87,7 +87,14 @@ def embed_windows( normalize_embeddings=True, ) - yield from zip(window_list, all_embeddings, strict=False) + if len(all_embeddings) != len(window_list): + raise ValueError( + f"model.encode() returned {len(all_embeddings)} embeddings " + f"for {len(window_list)} input windows. This indicates a " + f"sentence-transformers internal error." + ) + + yield from zip(window_list, all_embeddings, strict=True) def _check_truncation_warning(self, windows: list[TextWindow]) -> None: """Check if windows are likely to be truncated and warn user.