From 11e8c5b6608a3c76d37b807ffd1ecd353ab61386 Mon Sep 17 00:00:00 2001
From: Caleb Evans <caevans@redhat.com>
Date: Thu, 28 May 2026 10:35:06 -0600
Subject: [PATCH 1/3] fix: resolve GPU embedding performance bottleneck in
 TransformerEmbedder

- Pass device directly to SentenceTransformer constructor instead of
  using .to(), which left _target_device out of sync and caused the
  model to be moved back to CPU before each forward pass on CUDA
- Replace manual per-batch encode loop with a single model.encode() call
  to eliminate repeated DataLoader/tokenization overhead and enable
  length-based sorting
- Measured ~15% embedding throughput improvement on CPU; GPU improvement
 to be measured soon
---
 src/cordon/embedding/transformer.py | 47 ++++++++++++++---------------
 tests/test_transformer.py           |  9 ++++--
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/src/cordon/embedding/transformer.py b/src/cordon/embedding/transformer.py
index a6e865e..28e3b00 100644
--- a/src/cordon/embedding/transformer.py
+++ b/src/cordon/embedding/transformer.py
@@ -7,7 +7,6 @@
 import numpy.typing as npt
 import torch
 from sentence_transformers import SentenceTransformer
-from tqdm import tqdm
 
 from cordon.core.config import AnalysisConfig
 from cordon.core.device import detect_device
@@ -25,6 +24,11 @@ class TransformerEmbedder:
     def __init__(self, config: AnalysisConfig) -> None:
         """Initialize the embedder with a sentence-transformer model.
 
+        The model is initialized directly on the target device via the
+        ``device`` constructor parameter. This ensures the internal
+        ``_target_device`` attribute is set correctly so that ``encode()``
+        places input tensors on the same device as the model parameters.
+
         Args:
             config: Analysis configuration specifying model and device.
 
@@ -35,7 +39,7 @@ def __init__(self, config: AnalysisConfig) -> None:
         self.device = detect_device(self.config.device)
 
         try:
-            self.model = SentenceTransformer(config.model_name)
+            self.model = SentenceTransformer(config.model_name, device=str(self.device))
         except Exception as error:
             raise RuntimeError(
                 f"Failed to load sentence-transformer model '{config.model_name}'. "
@@ -43,7 +47,6 @@ def __init__(self, config: AnalysisConfig) -> None:
                 f"for first-time downloads. Error: {error}"
             ) from error
 
-        self.model.to(self.device)
         self._truncation_warned = False
 
     def embed_windows(
@@ -51,6 +54,11 @@ def embed_windows(
     ) -> Iterator[tuple[TextWindow, npt.NDArray[np.floating[Any]]]]:
         """Embed text windows into vector representations.
 
+        Encodes all windows in a single ``model.encode()`` call, delegating
+        batching, length-based sorting, and padding to sentence-transformers.
+        This avoids per-batch overhead from repeated DataLoader creation and
+        tokenization and allows optimal GPU utilization.
+
         Args:
             windows: Iterable of text windows to embed.
 
@@ -69,28 +77,17 @@ def embed_windows(
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 
-        batch_size = self.config.batch_size
-        total_batches = (len(window_list) + batch_size - 1) // batch_size
-
-        for batch_start_idx in tqdm(
-            range(0, len(window_list), batch_size),
-            desc="Generating embeddings",
-            total=total_batches,
-            unit="batch",
-            disable=not self.config.show_progress,
-        ):
-            batch = window_list[batch_start_idx : batch_start_idx + batch_size]
-            texts = [window.content for window in batch]
-
-            embeddings = self.model.encode(
-                texts,
-                batch_size=len(batch),
-                show_progress_bar=False,
-                convert_to_numpy=True,
-                normalize_embeddings=True,
-            )
-
-            yield from zip(batch, embeddings, strict=False)
+        texts = [window.content for window in window_list]
+
+        all_embeddings: npt.NDArray[np.floating[Any]] = self.model.encode(
+            texts,
+            batch_size=self.config.batch_size,
+            show_progress_bar=self.config.show_progress,
+            convert_to_numpy=True,
+            normalize_embeddings=True,
+        )
+
+        yield from zip(window_list, all_embeddings, strict=False)
 
     def _check_truncation_warning(self, windows: list[TextWindow]) -> None:
         """Check if windows are likely to be truncated and warn user.
diff --git a/tests/test_transformer.py b/tests/test_transformer.py
index 3c02a06..1970aa3 100644
--- a/tests/test_transformer.py
+++ b/tests/test_transformer.py
@@ -215,8 +215,13 @@ def embedder(self, mock_st: MagicMock) -> TransformerEmbedder:
 
         mock_model = MagicMock()
         rng = np.random.default_rng(0)
-        raw = rng.standard_normal((1, 384)).astype(np.float32)
-        mock_model.encode.return_value = raw / np.linalg.norm(raw, axis=1, keepdims=True)
+
+        def _fake_encode(texts: list[str], **kwargs: object) -> np.ndarray:  # type: ignore[type-arg]
+            n = len(texts) if isinstance(texts, list) else 1
+            raw = rng.standard_normal((n, 384)).astype(np.float32)
+            return raw / np.linalg.norm(raw, axis=1, keepdims=True)
+
+        mock_model.encode.side_effect = _fake_encode
         mock_st.return_value = mock_model
 
         config = AnalysisConfig(device="cpu", batch_size=2)

From d26dcf582a7e9c68025ce9fb9c36b0bf974570fe Mon Sep 17 00:00:00 2001
From: Caleb Evans <caevans@redhat.com>
Date: Thu, 28 May 2026 11:00:04 -0600
Subject: [PATCH 2/3] release: v1.1.1

---
 pyproject.toml         | 2 +-
 src/cordon/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c5ba78b..85a5342 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "cordon"
-version = "1.1.0"
+version = "1.1.1"
 description = "Semantic anomaly detection for system log files"
 readme = "README.md"
 requires-python = ">=3.10,<3.15"
diff --git a/src/cordon/__init__.py b/src/cordon/__init__.py
index d767de2..b79981b 100644
--- a/src/cordon/__init__.py
+++ b/src/cordon/__init__.py
@@ -2,7 +2,7 @@
 from cordon.core.types import AnalysisResult, MergedBlock, ScoredWindow, TextWindow
 from cordon.pipeline import SemanticLogAnalyzer
 
-__version__ = "1.1.0"
+__version__ = "1.1.1"
 
 __all__ = [
     "SemanticLogAnalyzer",

From 8c6b44c09d40a4a3b86813bc19ca489d40612019 Mon Sep 17 00:00:00 2001
From: Caleb Evans <caevans@redhat.com>
Date: Thu, 28 May 2026 11:04:13 -0600
Subject: [PATCH 3/3] fix: add strict validation for embedding output length
 mismatch

- Add explicit length check between window_list and encode() output
  before yielding results, raising ValueError with a descriptive message
  if they differ
- Switch zip from strict=False to strict=True as a secondary safeguard
---
 src/cordon/embedding/transformer.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/cordon/embedding/transformer.py b/src/cordon/embedding/transformer.py
index 28e3b00..2b6aec4 100644
--- a/src/cordon/embedding/transformer.py
+++ b/src/cordon/embedding/transformer.py
@@ -87,7 +87,14 @@ def embed_windows(
             normalize_embeddings=True,
         )
 
-        yield from zip(window_list, all_embeddings, strict=False)
+        if len(all_embeddings) != len(window_list):
+            raise ValueError(
+                f"model.encode() returned {len(all_embeddings)} embeddings "
+                f"for {len(window_list)} input windows. This indicates a "
+                f"sentence-transformers internal error."
+            )
+
+        yield from zip(window_list, all_embeddings, strict=True)
 
     def _check_truncation_warning(self, windows: list[TextWindow]) -> None:
         """Check if windows are likely to be truncated and warn user.