diff --git a/pyproject.toml b/pyproject.toml
index 60f1d39..884554f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,11 +51,14 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-embeddings = ["sqlite-vec>=0.1.6"]
+embeddings = ["sqlite-vec>=0.1.6", "sentence-transformers>=3.0"]
+# sqlite-vec is a tiny wheel; including it in dev lets CI exercise the
+# vector-store tests without pulling in torch via sentence-transformers.
 dev = [
     "pytest>=8.0",
     "pytest-asyncio>=0.23",
     "ruff>=0.5",
+    "sqlite-vec>=0.1.6",
 ]
 # Heavyweight, optional. Required only by `benchmarks/faithfulness/` which
 # downloads the HaluBench subsample and self-hosts HHEM-2.1-Open for the
diff --git a/src/orc/cli_commands/ingest.py b/src/orc/cli_commands/ingest.py
index 063f6a4..d43eeca 100644
--- a/src/orc/cli_commands/ingest.py
+++ b/src/orc/cli_commands/ingest.py
@@ -35,6 +35,8 @@ def ingest_command(source: str, workspace: str | None, no_recursive: bool) -> No
     console.print(
         f"[green]Ingested[/green] {len(ids)} evidence item(s) into [bold]{ws.name}[/bold]"
     )
+    if ws.has_embeddings:
+        console.print(f"  embeddings: {ws.embedding_model}")
     for eid in ids[:10]:
         console.print(f"  [dim]{eid}[/dim]")
     if len(ids) > 10:
diff --git a/src/orc/cli_commands/search.py b/src/orc/cli_commands/search.py
index c85363f..e680487 100644
--- a/src/orc/cli_commands/search.py
+++ b/src/orc/cli_commands/search.py
@@ -50,7 +50,7 @@ def search_command(query: str, workspace: str | None, k: int, as_json: bool) ->
         console.print("[yellow]No chunks matched[/yellow]")
         return
 
-    table = Table(title=f"BM25 results for '{query}'")
+    table = Table(title=f"Retrieval results for '{query}'")
     table.add_column("Rank", justify="right")
     table.add_column("Score", justify="right")
     table.add_column("Title")
diff --git a/src/orc/cli_commands/workspace.py b/src/orc/cli_commands/workspace.py
index 4ea5798..9be64c9 100644
--- a/src/orc/cli_commands/workspace.py
+++ b/src/orc/cli_commands/workspace.py
@@ -1,16 +1,33 @@
-"""`orc workspace ...` commands."""
+"""`orc workspace ...` commands.
+
+The embedding model is pinned in the workspace row — there is deliberately no
+env var to override it at retrieval time, because the column is the
+replay-pinned truth for which model embedded the corpus.
+"""
 
 from __future__ import annotations
 
 import click
 from rich.console import Console
+from rich.markup import escape
 from rich.table import Table
 
-from orc.errors import WorkspaceExistsError
+from orc.errors import EmbeddingsUnavailableError, WorkspaceExistsError, WorkspaceNotFoundError
+from orc.paths import workspace_db_path
+from orc.retrieval.embedder import DEFAULT_EMBEDDING_MODEL, embedder_available, get_embedder
 from orc.storage import workspace as ws_module
+from orc.storage.db import open_connection, transaction
+from orc.storage.embeddings_store import (
+    backfill_embeddings,
+    ensure_chunk_vec,
+    load_vec_extension,
+    vec_extension_available,
+)
 
 console = Console()
 
+_INSTALL_HINT = 'pip install "orc-ai[embeddings]"'
+
 
 @click.group("workspace")
 def workspace() -> None:
@@ -19,10 +36,34 @@ def workspace() -> None:
 
 @workspace.command("create")
 @click.argument("name")
-def create_command(name: str) -> None:
+@click.option(
+    "--embeddings",
+    "embeddings",
+    is_flag=True,
+    help="Enable hybrid (BM25 + vector) retrieval for this workspace.",
+)
+@click.option(
+    "--embedding-model",
+    "embedding_model",
+    default=None,
+    help=f"Embedding model id (default: {DEFAULT_EMBEDDING_MODEL}). Requires --embeddings.",
+)
+def create_command(name: str, embeddings: bool, embedding_model: str | None) -> None:
     """Create a new workspace."""
+    if embedding_model is not None and not embeddings:
+        raise click.UsageError("--embedding-model requires --embeddings")
+    model = (embedding_model or DEFAULT_EMBEDDING_MODEL) if embeddings else None
+
+    # Warn-but-create: the flag records intent in the workspace row; the user
+    # can install the extra and run `orc workspace embed` later.
+    if model is not None and not (embedder_available() and vec_extension_available()):
+        console.print(
+            "[yellow]Warning:[/yellow] embedding dependencies are not installed; "
+            f"ingest will fail until you run: {escape(_INSTALL_HINT)}"
+        )
+
     try:
-        ws = ws_module.create(name)
+        ws = ws_module.create(name, embedding_model=model)
     except WorkspaceExistsError as exc:
         raise click.ClickException(str(exc)) from exc
     except ValueError as exc:
@@ -30,6 +71,58 @@ def create_command(name: str) -> None:
     console.print(f"[green]Created workspace[/green] [bold]{ws.name}[/bold]")
     console.print(f"  schema_version = {ws.schema_version}")
     console.print(f"  created_at     = {ws.created_at}")
+    if ws.has_embeddings:
+        console.print(f"  embeddings     = {ws.embedding_model}")
+
+
+@workspace.command("embed")
+@click.argument("name")
+@click.option(
+    "--model",
+    default=None,
+    help="Embedding model id. Only needed when the workspace has none set yet.",
+)
+def embed_command(name: str, model: str | None) -> None:
+    """Backfill vector embeddings for all unembedded chunks in a workspace."""
+    try:
+        ws = ws_module.resolve(name)
+    except WorkspaceNotFoundError as exc:
+        raise click.ClickException(str(exc)) from exc
+
+    if ws.embedding_model is None:
+        effective_model = model or DEFAULT_EMBEDDING_MODEL
+    elif model is not None and model != ws.embedding_model:
+        raise click.ClickException(
+            f"Workspace {ws.name!r} is pinned to embedding model "
+            f"{ws.embedding_model!r}; refusing to embed with {model!r}. "
+            "Vectors from different models cannot be mixed."
+        )
+    else:
+        effective_model = ws.embedding_model
+
+    if not vec_extension_available():
+        raise click.ClickException(
+            f"The sqlite-vec extension is unavailable; run: {_INSTALL_HINT}"
+        )
+    try:
+        embedder = get_embedder(effective_model)
+    except EmbeddingsUnavailableError as exc:
+        raise click.ClickException(str(exc)) from exc
+
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        load_vec_extension(conn)
+        try:
+            ensure_chunk_vec(conn, embedder.dim)
+        except ValueError as exc:
+            raise click.ClickException(str(exc)) from exc
+        if ws.embedding_model is None:
+            with transaction(conn):
+                conn.execute(
+                    "UPDATE workspace SET embedding_model = ? WHERE name = ?",
+                    (effective_model, ws.name),
+                )
+        count = backfill_embeddings(conn, embedder)
+    console.print(f"[green]Embedded[/green] {count} chunk(s) with [bold]{effective_model}[/bold]")
 
 
 @workspace.command("list")
diff --git a/src/orc/directives/research/skills/research_topic.py b/src/orc/directives/research/skills/research_topic.py
index 85eb91d..be7f5d9 100644
--- a/src/orc/directives/research/skills/research_topic.py
+++ b/src/orc/directives/research/skills/research_topic.py
@@ -10,7 +10,7 @@
 from orc.llm.cache import build_verify_messages, format_corpus
 from orc.llm.client import get_client, messages_create, resolve_model_for_provider
 from orc.llm.models import resolve_research_model
-from orc.retrieval import bm25_search
+from orc.retrieval import retrieve
 from orc.runs.runner import Run
 from orc.storage.workspace import Workspace
 
@@ -73,11 +73,13 @@ def run(
             raise ValueError("topic must be a non-empty string")
 
         resolved_model = resolve_research_model(model)
-        pool = bm25_search(
-            run.conn, topic, limit=retrieval_pool, corpus_version=corpus_version
+        res = retrieve(
+            run.conn, topic, workspace=workspace, limit=retrieval_pool, corpus_version=corpus_version
+        )
+        candidates = res.chunks[:k]
+        run.record_retrieval(
+            candidates, method=res.method, candidates_considered=res.candidates_considered
         )
-        candidates = pool[:k]
-        run.record_retrieval(candidates, method="bm25", candidates_considered=len(pool))
 
         if not candidates:
             return {
diff --git a/src/orc/directives/research/skills/search_evidence.py b/src/orc/directives/research/skills/search_evidence.py
index f013274..ebf02e0 100644
--- a/src/orc/directives/research/skills/search_evidence.py
+++ b/src/orc/directives/research/skills/search_evidence.py
@@ -4,7 +4,7 @@
 
 from typing import Any
 
-from orc.retrieval import bm25_search
+from orc.retrieval import retrieve
 from orc.runs.runner import Run
 from orc.storage.workspace import Workspace
 
@@ -22,8 +22,11 @@ def run(
         corpus_version: int | None = None,
         **_unused: Any,
     ) -> dict[str, Any]:
-        chunks = bm25_search(run.conn, query, limit=k, corpus_version=corpus_version)
-        run.record_retrieval(chunks, method="bm25", candidates_considered=len(chunks))
+        res = retrieve(run.conn, query, workspace=workspace, limit=k, corpus_version=corpus_version)
+        chunks = res.chunks
+        run.record_retrieval(
+            chunks, method=res.method, candidates_considered=res.candidates_considered
+        )
         return {
             "query": query,
             "k": k,
diff --git a/src/orc/directives/research/skills/verify_claim.py b/src/orc/directives/research/skills/verify_claim.py
index 985f566..8345e3f 100644
--- a/src/orc/directives/research/skills/verify_claim.py
+++ b/src/orc/directives/research/skills/verify_claim.py
@@ -19,7 +19,7 @@
 from orc.llm.cache import build_verify_messages, format_corpus
 from orc.llm.client import get_client, messages_create, resolve_model_for_provider
 from orc.llm.models import resolve_verify_model
-from orc.retrieval import bm25_search
+from orc.retrieval import retrieve
 from orc.runs.runner import Run
 from orc.storage.workspace import Workspace
 
@@ -372,11 +372,17 @@ def run(
                 candidates, method=f"{mode}_all", candidates_considered=len(candidates)
             )
         else:
-            pool = bm25_search(
-                run.conn, claim, limit=retrieval_pool, corpus_version=corpus_version
+            res = retrieve(
+                run.conn,
+                claim,
+                workspace=workspace,
+                limit=retrieval_pool,
+                corpus_version=corpus_version,
+            )
+            candidates = res.chunks[:k]
+            run.record_retrieval(
+                candidates, method=res.method, candidates_considered=res.candidates_considered
             )
-            candidates = pool[:k]
-            run.record_retrieval(candidates, method="bm25", candidates_considered=len(pool))
 
         if not candidates:
             return _make_not_found(claim=claim, model=resolved_model, run=run)
diff --git a/src/orc/errors.py b/src/orc/errors.py
index a9b8a7f..7cd9fba 100644
--- a/src/orc/errors.py
+++ b/src/orc/errors.py
@@ -28,3 +28,7 @@ class TraceNotFoundError(OrcError):
 
 class IngestError(OrcError):
     pass
+
+
+class EmbeddingsUnavailableError(OrcError):
+    """Embeddings were requested but the optional dependencies are missing."""
diff --git a/src/orc/ingest/pipeline.py b/src/orc/ingest/pipeline.py
index de827ff..b9647a4 100644
--- a/src/orc/ingest/pipeline.py
+++ b/src/orc/ingest/pipeline.py
@@ -80,6 +80,11 @@ def _ingest_one(workspace: Workspace, doc: LoadedDoc) -> list[str]:
         # Chunk before any disk write so a chunker failure leaves nothing behind.
         chunks = chunk_text(doc.text)
 
+        # Embed BEFORE the write transaction: model inference can be slow and
+        # must not hold the BEGIN IMMEDIATE write lock. The vectors are then
+        # inserted in the same transaction as the chunk rows (atomic).
+        embeddings = _embed_chunks_for_ingest(conn, workspace=workspace, chunks=chunks)
+
         # Stage the evidence bytes to a temp file and only promote it into place
         # once the DB transaction commits. A failure anywhere leaves neither an
         # orphaned file nor a dangling row — the corpus stays consistent.
@@ -95,6 +100,7 @@ def _ingest_one(workspace: Workspace, doc: LoadedDoc) -> list[str]:
                 sha=sha,
                 doc=doc,
                 chunks=chunks,
+                embeddings=embeddings,
             )
         except BaseException:
             tmp_path.unlink(missing_ok=True)
@@ -103,6 +109,49 @@ def _ingest_one(workspace: Workspace, doc: LoadedDoc) -> list[str]:
         return [evidence_id]
 
 
+def _embed_chunks_for_ingest(
+    conn: Any,
+    *,
+    workspace: Workspace,
+    chunks: list,
+) -> list[list[float]] | None:
+    """Embed chunk texts when the workspace opts into embeddings.
+
+    Fail-loud by design: a workspace with embedding_model set has promised
+    hybrid retrieval, so silently ingesting unembedded chunks would corrupt
+    that promise. Missing deps surface as IngestError with an install hint.
+    Also prepares chunk_vec (extension + table) before the write transaction.
+    """
+    if workspace.embedding_model is None or not chunks:
+        return None
+
+    from orc.errors import EmbeddingsUnavailableError
+    from orc.retrieval.embedder import get_embedder
+    from orc.storage.embeddings_store import (
+        ensure_chunk_vec,
+        load_vec_extension,
+        vec_extension_available,
+    )
+
+    try:
+        if not vec_extension_available():
+            raise EmbeddingsUnavailableError(
+                "the sqlite-vec extension is unavailable; "
+                'run: pip install "orc-ai[embeddings]"'
+            )
+        embedder = get_embedder(workspace.embedding_model)
+    except EmbeddingsUnavailableError as exc:
+        raise IngestError(
+            f"Workspace {workspace.name!r} requires embeddings "
+            f"(embedding_model={workspace.embedding_model!r}) but they are "
+            f"unavailable: {exc}"
+        ) from exc
+
+    load_vec_extension(conn)
+    ensure_chunk_vec(conn, embedder.dim)
+    return embedder.embed_texts([c.text for c in chunks])
+
+
 def _commit_evidence(
     conn: Any,
     *,
@@ -112,6 +161,7 @@ def _commit_evidence(
     sha: str,
     doc: LoadedDoc,
     chunks: list,
+    embeddings: list[list[float]] | None = None,
 ) -> None:
     with transaction(conn):
         conn.execute(
@@ -136,12 +186,13 @@ def _commit_evidence(
                 new_corpus_version,
             ),
         )
-        for c in chunks:
+        chunk_ids = [new_chunk_id() for _ in chunks]
+        for chunk_id, c in zip(chunk_ids, chunks, strict=True):
             conn.execute(
                 "INSERT INTO chunk(chunk_id, evidence_id, seq, text, token_count, "
                 "headings_path, start_offset, end_offset) VALUES (?,?,?,?,?,?,?,?)",
                 (
-                    new_chunk_id(),
+                    chunk_id,
                     evidence_id,
                     c.seq,
                     c.text,
@@ -151,6 +202,16 @@ def _commit_evidence(
                     c.end_offset,
                 ),
             )
+        if embeddings is not None:
+            from orc.storage.embeddings_store import store_chunk_embeddings
+
+            store_chunk_embeddings(
+                conn,
+                [
+                    (chunk_id, new_corpus_version, vector)
+                    for chunk_id, vector in zip(chunk_ids, embeddings, strict=True)
+                ],
+            )
 
 
 def _iter_files(root: Path, *, recursive: bool) -> Iterator[Path]:
diff --git a/src/orc/retrieval/__init__.py b/src/orc/retrieval/__init__.py
index 13db9f2..946c484 100644
--- a/src/orc/retrieval/__init__.py
+++ b/src/orc/retrieval/__init__.py
@@ -1,5 +1,13 @@
 """Retrieval primitives. Pure functions over a sqlite connection."""
 
 from orc.retrieval.bm25 import RetrievedChunk, bm25_search
+from orc.retrieval.hybrid import RetrievalResult, retrieve, rrf_fuse, vector_search
 
-__all__ = ["RetrievedChunk", "bm25_search"]
+__all__ = [
+    "RetrievalResult",
+    "RetrievedChunk",
+    "bm25_search",
+    "retrieve",
+    "rrf_fuse",
+    "vector_search",
+]
diff --git a/src/orc/retrieval/embedder.py b/src/orc/retrieval/embedder.py
new file mode 100644
index 0000000..5940b45
--- /dev/null
+++ b/src/orc/retrieval/embedder.py
@@ -0,0 +1,103 @@
+"""Embedding model access for hybrid retrieval.
+
+The model is pinned per workspace (workspace.embedding_model column) — there is
+deliberately NO env var override at retrieval time, because the workspace column
+is the replay-pinned truth: a frozen replay must embed with the same model the
+original run used.
+
+sentence-transformers (and its torch dependency) is heavyweight, so the import
+is lazy and everything that only needs the dimension consults the module-level
+registry instead of loading the model.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from importlib.util import find_spec
+from typing import Protocol
+
+from orc.errors import EmbeddingsUnavailableError
+
+DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+
+# Known model dims, so callers can size chunk_vec without loading torch.
+_MODEL_DIMS: dict[str, int] = {
+    "sentence-transformers/all-MiniLM-L6-v2": 384,
+}
+
+_INSTALL_HINT = 'pip install "orc-ai[embeddings]"'
+
+
+class Embedder(Protocol):
+    model_id: str
+    dim: int
+
+    def embed_texts(self, texts: list[str]) -> list[list[float]]: ...
+
+
+class SentenceTransformerEmbedder:
+    """Real embedder. Lazy-imports sentence_transformers so `orc` stays light
+    for users who never opt into embeddings."""
+
+    def __init__(self, model_id: str = DEFAULT_EMBEDDING_MODEL) -> None:
+        try:
+            import sentence_transformers
+        except ImportError as exc:
+            raise EmbeddingsUnavailableError(
+                f"sentence-transformers is not installed; run: {_INSTALL_HINT}"
+            ) from exc
+        self.model_id = model_id
+        self._model = sentence_transformers.SentenceTransformer(model_id)
+        # Renamed in sentence-transformers 5.x; support both spellings.
+        get_dim = getattr(
+            self._model,
+            "get_embedding_dimension",
+            self._model.get_sentence_embedding_dimension,
+        )
+        self.dim = int(get_dim())
+
+    def embed_texts(self, texts: list[str]) -> list[list[float]]:
+        # Normalized embeddings make L2 distance rank-equivalent to cosine.
+        return self._model.encode(texts, normalize_embeddings=True).tolist()
+
+
+_factory: Callable[[str], Embedder] | None = None
+_cache: dict[str, Embedder] = {}
+
+
+def model_dim(model_id: str) -> int | None:
+    """Dimension for a known model id, without loading the model."""
+    return _MODEL_DIMS.get(model_id)
+
+
+def embedder_available() -> bool:
+    return find_spec("sentence_transformers") is not None
+
+
+def get_embedder(model_id: str) -> Embedder:
+    """Return a (cached) embedder for the model id.
+
+    Raises EmbeddingsUnavailableError with an install hint when the optional
+    dependency is missing, so callers can decide between fail-loud (ingest)
+    and warn-and-fall-back (retrieval).
+    """
+    if model_id in _cache:
+        return _cache[model_id]
+    if _factory is not None:
+        embedder = _factory(model_id)
+    elif not embedder_available():
+        raise EmbeddingsUnavailableError(
+            f"Embedding model {model_id!r} requested but sentence-transformers "
+            f"is not installed; run: {_INSTALL_HINT}"
+        )
+    else:
+        embedder = SentenceTransformerEmbedder(model_id)
+    _cache[model_id] = embedder
+    return embedder
+
+
+def set_embedder_factory(factory: Callable[[str], Embedder] | None) -> None:
+    """Test hook. Pass None to clear. Clears the cache either way."""
+    global _factory
+    _factory = factory
+    _cache.clear()
diff --git a/src/orc/retrieval/hybrid.py b/src/orc/retrieval/hybrid.py
new file mode 100644
index 0000000..a45404c
--- /dev/null
+++ b/src/orc/retrieval/hybrid.py
@@ -0,0 +1,182 @@
+"""Hybrid retrieval: BM25 + dense vectors fused with Reciprocal Rank Fusion.
+
+Opt-in per workspace via the embedding_model column — workspaces without it
+take the plain BM25 path and produce byte-identical results to before.
+
+Residual replay nondeterminism (documented, accepted):
+- The QUERY embedding is recomputed at replay time. chunk_vec rows are pinned
+  by corpus_version, but torch/SIMD/BLAS variance across machines or library
+  versions can perturb the query vector in the last few ulps, which can flip
+  near-tie KNN orderings. Frozen replay is therefore best-effort for the
+  vector leg; the trace records the method actually used.
+- If embedding deps are absent at replay time, retrieve() falls back to BM25
+  and records method="bm25" honestly rather than failing the replay. The
+  replay engine warns when the method differs from the original trace.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import sqlite3
+import warnings
+from dataclasses import dataclass
+
+from orc.errors import EmbeddingsUnavailableError
+from orc.retrieval.bm25 import RetrievedChunk, bm25_search
+from orc.retrieval.embedder import get_embedder
+from orc.storage.embeddings_store import (
+    knn_chunk_ids,
+    load_vec_extension,
+    vec_extension_available,
+)
+from orc.storage.workspace import Workspace
+
+
+@dataclass(frozen=True)
+class RetrievalResult:
+    chunks: list[RetrievedChunk]
+    method: str
+    candidates_considered: int
+
+
+# Same column set as bm25._SELECT minus the FTS score: vector hits hydrate into
+# the same RetrievedChunk shape so downstream consumers can't tell legs apart.
+_HYDRATE_SELECT = """
+SELECT
+    chunk.chunk_id      AS chunk_id,
+    chunk.evidence_id   AS evidence_id,
+    chunk.seq           AS seq,
+    chunk.text          AS text,
+    chunk.headings_path AS headings_path,
+    chunk.token_count   AS token_count,
+    evidence.title      AS evidence_title,
+    evidence.source_path AS evidence_source_path
+FROM chunk
+JOIN evidence ON evidence.evidence_id = chunk.evidence_id
+WHERE chunk.chunk_id IN ({placeholders})
+"""
+
+
+def vector_search(
+    conn: sqlite3.Connection,
+    query_embedding: list[float],
+    *,
+    limit: int,
+    corpus_version: int | None,
+) -> list[RetrievedChunk]:
+    """KNN over chunk_vec, hydrated to RetrievedChunk in KNN (nearest-first) order.
+
+    bm25_score is 0.0 for vector hits: the field carries the FTS score and a
+    vector distance is not comparable, so we keep the sentinel explicit.
+    """
+    hits = knn_chunk_ids(conn, query_embedding, limit=limit, corpus_version=corpus_version)
+    if not hits:
+        return []
+    ids = [chunk_id for chunk_id, _ in hits]
+    placeholders = ", ".join("?" for _ in ids)
+    rows = conn.execute(_HYDRATE_SELECT.format(placeholders=placeholders), ids).fetchall()
+    by_id = {row["chunk_id"]: row for row in rows}
+    out: list[RetrievedChunk] = []
+    for i, chunk_id in enumerate(cid for cid in ids if cid in by_id):
+        row = by_id[chunk_id]
+        out.append(
+            RetrievedChunk(
+                chunk_id=row["chunk_id"],
+                evidence_id=row["evidence_id"],
+                seq=row["seq"],
+                text=row["text"],
+                headings_path=row["headings_path"],
+                token_count=row["token_count"],
+                rank=i,
+                bm25_score=0.0,
+                evidence_title=row["evidence_title"],
+                evidence_source_path=row["evidence_source_path"],
+            )
+        )
+    return out
+
+
+def rrf_fuse(
+    bm25_results: list[RetrievedChunk],
+    vector_results: list[RetrievedChunk],
+    *,
+    k: int = 60,
+    limit: int,
+) -> list[RetrievedChunk]:
+    """Reciprocal Rank Fusion over the two legs, rank-only.
+
+    score(chunk) = sum over lists containing it of 1 / (k + rank + 1), with
+    0-based ranks. Rank-only fusion sidesteps the incomparability of BM25
+    scores and vector distances. For overlapping chunk_ids the BM25 instance
+    is kept so the real bm25_score survives into the trace. Ties sort by
+    chunk_id for deterministic, replayable output.
+    """
+    scores: dict[str, float] = {}
+    instances: dict[str, RetrievedChunk] = {}
+    for rank, chunk in enumerate(vector_results):
+        scores[chunk.chunk_id] = scores.get(chunk.chunk_id, 0.0) + 1.0 / (k + rank + 1)
+        instances[chunk.chunk_id] = chunk
+    for rank, chunk in enumerate(bm25_results):
+        scores[chunk.chunk_id] = scores.get(chunk.chunk_id, 0.0) + 1.0 / (k + rank + 1)
+        instances[chunk.chunk_id] = chunk  # BM25 instance wins on overlap
+    ordered = sorted(scores, key=lambda cid: (-scores[cid], cid))[:limit]
+    return [dataclasses.replace(instances[cid], rank=i) for i, cid in enumerate(ordered)]
+
+
+def retrieve(
+    conn: sqlite3.Connection,
+    query: str,
+    *,
+    workspace: Workspace,
+    limit: int = 50,
+    corpus_version: int | None = None,
+) -> RetrievalResult:
+    """Retrieve chunks for a query, hybrid when the workspace opts in.
+
+    The embedding model comes ONLY from workspace.embedding_model — no env var
+    override — because that column is the replay-pinned truth. When the model
+    is set but the vector leg can't run (deps or chunk_vec missing), retrieval
+    degrades to BM25 with a warning instead of failing: a read path must not
+    hard-fail on an optional acceleration.
+    """
+    model = workspace.embedding_model
+    if model is None:
+        chunks = bm25_search(conn, query, limit=limit, corpus_version=corpus_version)
+        return RetrievalResult(chunks=chunks, method="bm25", candidates_considered=len(chunks))
+
+    reason = _vector_leg_unavailable_reason(conn, model)
+    if reason is not None:
+        warnings.warn(
+            f"Workspace {workspace.name!r} has embedding_model={model!r} but {reason}; "
+            "falling back to BM25. Run `orc workspace embed` after installing "
+            'the embeddings extra (pip install "orc-ai[embeddings]").',
+            RuntimeWarning,
+            stacklevel=2,
+        )
+        chunks = bm25_search(conn, query, limit=limit, corpus_version=corpus_version)
+        return RetrievalResult(chunks=chunks, method="bm25", candidates_considered=len(chunks))
+
+    embedder = get_embedder(model)
+    [query_embedding] = embedder.embed_texts([query])
+    bm25_leg = bm25_search(conn, query, limit=limit, corpus_version=corpus_version)
+    vector_leg = vector_search(conn, query_embedding, limit=limit, corpus_version=corpus_version)
+    fused = rrf_fuse(bm25_leg, vector_leg, limit=limit)
+    union = {c.chunk_id for c in bm25_leg} | {c.chunk_id for c in vector_leg}
+    return RetrievalResult(chunks=fused, method="hybrid_rrf", candidates_considered=len(union))
+
+
+def _vector_leg_unavailable_reason(conn: sqlite3.Connection, model: str) -> str | None:
+    """None when the vector leg can run; otherwise a short human-readable reason."""
+    if not vec_extension_available():
+        return "the sqlite-vec extension is unavailable"
+    try:
+        get_embedder(model)
+    except EmbeddingsUnavailableError:
+        return "the embedding model dependencies are not installed"
+    load_vec_extension(conn)
+    row = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'chunk_vec'"
+    ).fetchone()
+    if row is None:
+        return "the chunk_vec table does not exist yet"
+    return None
diff --git a/src/orc/runs/replay.py b/src/orc/runs/replay.py
index 40e6154..cbf017f 100644
--- a/src/orc/runs/replay.py
+++ b/src/orc/runs/replay.py
@@ -19,6 +19,7 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import Any
 
 from orc import directives
@@ -70,6 +71,9 @@ def replay(run_id: str, *, live: bool = False) -> dict[str, Any]:
         result = skill.run(workspace=ws, run=run, **skill_kwargs)
         run.close(output=result)
 
+    if not live:
+        _warn_on_retrieval_method_drift(original_trace=trace, new_retrieval=run.retrieval)
+
     return {
         "original_run_id": run_id,
         "new_run_id": run.run_id,
@@ -82,6 +86,27 @@ def replay(run_id: str, *, live: bool = False) -> dict[str, Any]:
     }
 
 
+def _warn_on_retrieval_method_drift(
+    *,
+    original_trace: dict[str, Any],
+    new_retrieval: dict[str, Any] | None,
+) -> None:
+    """Frozen replay promises reproduction; a retrieval method change (e.g.
+    hybrid_rrf -> bm25 because embedding deps are absent at replay time) means
+    the chunk pool may differ even with corpus_version pinned. Surface it
+    rather than letting the drift pass silently."""
+    original_method = (original_trace.get("retrieval") or {}).get("method")
+    new_method = (new_retrieval or {}).get("method")
+    if original_method and new_method and original_method != new_method:
+        warnings.warn(
+            f"Frozen replay used a different retrieval method than the original "
+            f"run: {original_method!r} -> {new_method!r}. Retrieved chunks may "
+            "differ; check embedding dependencies and chunk_vec state.",
+            RuntimeWarning,
+            stacklevel=3,
+        )
+
+
 def _resolve_replay_kwargs(
     *,
     spec: Any,
diff --git a/src/orc/storage/embeddings_store.py b/src/orc/storage/embeddings_store.py
new file mode 100644
index 0000000..6a3e9fe
--- /dev/null
+++ b/src/orc/storage/embeddings_store.py
@@ -0,0 +1,179 @@
+"""chunk_vec vector store backed by the sqlite-vec extension.
+
+The table is created lazily (only workspaces that opt into embeddings pay for
+it), and its dimension is stamped into schema_meta so a later open with a
+different embedding model fails loudly instead of silently mixing vector
+spaces.
+"""
+
+from __future__ import annotations
+
+import sqlite3
+from importlib.util import find_spec
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from orc.retrieval.embedder import Embedder
+
+_DIM_META_KEY = "chunk_vec_dim"
+
+
+def vec_extension_available() -> bool:
+    """True when sqlite-vec can actually be loaded into this interpreter.
+
+    Both halves matter: the wheel must be installed AND the sqlite3 build must
+    support runtime extension loading (some distro builds compile it out).
+    """
+    return find_spec("sqlite_vec") is not None and hasattr(
+        sqlite3.Connection, "enable_load_extension"
+    )
+
+
+def load_vec_extension(conn: sqlite3.Connection) -> None:
+    """Load sqlite-vec into the connection. Idempotent per connection."""
+    try:
+        conn.execute("SELECT vec_version()")
+        return
+    except sqlite3.OperationalError:
+        pass
+    import sqlite_vec
+
+    conn.enable_load_extension(True)
+    try:
+        sqlite_vec.load(conn)
+    finally:
+        # Re-disable immediately: nothing else should load extensions through
+        # a connection that also executes retrieval queries over user input.
+        conn.enable_load_extension(False)
+
+
+def ensure_chunk_vec(conn: sqlite3.Connection, dim: int) -> None:
+    """Create chunk_vec for `dim`-dimensional vectors, or verify the stamp.
+
+    A dim mismatch means the workspace's embedding model changed under us —
+    distances across models are meaningless, so we refuse rather than guess.
+    """
+    stamped = _stamped_dim(conn)
+    if stamped is not None and stamped != dim:
+        raise ValueError(
+            f"chunk_vec dim mismatch: table was created with dim={stamped}, "
+            f"requested dim={dim}. Re-embed the workspace with one model."
+        )
+    conn.execute(
+        "CREATE VIRTUAL TABLE IF NOT EXISTS chunk_vec USING vec0("
+        f"chunk_id TEXT PRIMARY KEY, embedding FLOAT[{dim}], corpus_version INTEGER)"
+    )
+    if stamped is None:
+        conn.execute(
+            "INSERT OR REPLACE INTO schema_meta(key, value) VALUES (?, ?)",
+            (_DIM_META_KEY, str(dim)),
+        )
+
+
+def store_chunk_embeddings(
+    conn: sqlite3.Connection,
+    items: list[tuple[str, int, list[float]]],
+) -> None:
+    """Insert (chunk_id, corpus_version, vector) rows into chunk_vec.
+
+    No transaction here: the caller owns it, so ingest can commit chunk rows
+    and their vectors atomically. Vector lengths are validated up front so a
+    bad batch fails before any row is written.
+    """
+    import sqlite_vec
+
+    dim = _stamped_dim(conn)
+    for chunk_id, _, vector in items:
+        if dim is not None and len(vector) != dim:
+            raise ValueError(
+                f"embedding for chunk {chunk_id!r} has dim {len(vector)}, expected {dim}"
+            )
+    conn.executemany(
+        "INSERT INTO chunk_vec(chunk_id, embedding, corpus_version) VALUES (?, ?, ?)",
+        [
+            (chunk_id, sqlite_vec.serialize_float32(vector), corpus_version)
+            for chunk_id, corpus_version, vector in items
+        ],
+    )
+
+
+def knn_chunk_ids(
+    conn: sqlite3.Connection,
+    query_vec: list[float],
+    *,
+    limit: int,
+    corpus_version: int | None = None,
+) -> list[tuple[str, float]]:
+    """K-nearest chunk_ids for a query vector, nearest first.
+
+    The outer ORDER BY adds chunk_id as a tie-break: sqlite-vec guarantees
+    distance order but not tie order, and replayable retrieval needs full
+    determinism.
+    """
+    import sqlite_vec
+
+    inner = "SELECT chunk_id, distance FROM chunk_vec WHERE embedding MATCH ? AND k = ?"
+    params: tuple = (sqlite_vec.serialize_float32(query_vec), limit)
+    if corpus_version is not None:
+        inner += " AND corpus_version <= ?"
+        params = (*params, corpus_version)
+    # MATERIALIZED stops SQLite from flattening the subquery: vec0 KNN scans
+    # only accept a bare ORDER BY distance, so the tie-break must apply outside.
+    rows = conn.execute(
+        f"WITH knn AS MATERIALIZED ({inner}) "
+        "SELECT chunk_id, distance FROM knn ORDER BY distance, chunk_id",
+        params,
+    ).fetchall()
+    return [(row["chunk_id"], float(row["distance"])) for row in rows]
+
+
+def chunks_missing_embeddings(conn: sqlite3.Connection) -> list[sqlite3.Row]:
+    """Chunks with no chunk_vec row, with the corpus_version of their evidence.
+
+    Backfill must stamp each vector with the chunk's ORIGINAL corpus_version
+    (not the current one) so frozen replay filtering stays truthful.
+    """
+    return conn.execute(
+        "SELECT chunk.chunk_id AS chunk_id, chunk.text AS text, "
+        "evidence.corpus_version AS corpus_version "
+        "FROM chunk JOIN evidence ON evidence.evidence_id = chunk.evidence_id "
+        "WHERE chunk.chunk_id NOT IN (SELECT chunk_id FROM chunk_vec) "
+        "ORDER BY chunk.chunk_id"
+    ).fetchall()
+
+
+def backfill_embeddings(
+    conn: sqlite3.Connection, embedder: Embedder, batch_size: int = 64
+) -> int:
+    """Embed every chunk that has no chunk_vec row yet. Returns rows written.
+
+    Idempotent: only missing chunks are touched, so re-running after a crash
+    (or on an already-complete corpus) is safe. Each vector is stamped with
+    the chunk's ORIGINAL evidence corpus_version so frozen replay filters
+    stay truthful. Batches commit independently — a failure mid-backfill
+    keeps completed batches, and the next run picks up the remainder.
+    """
+    from orc.storage.db import transaction
+
+    missing = chunks_missing_embeddings(conn)
+    written = 0
+    for start in range(0, len(missing), batch_size):
+        batch = missing[start : start + batch_size]
+        vectors = embedder.embed_texts([row["text"] for row in batch])
+        with transaction(conn):
+            store_chunk_embeddings(
+                conn,
+                [
+                    (row["chunk_id"], row["corpus_version"], vector)
+                    for row, vector in zip(batch, vectors, strict=True)
+                ],
+            )
+        written += len(batch)
+    return written
+
+
+def _stamped_dim(conn: sqlite3.Connection) -> int | None:
+    row = conn.execute(
+        "SELECT value FROM schema_meta WHERE key = ?", (_DIM_META_KEY,)
+    ).fetchone()
+    return int(row["value"]) if row is not None else None
diff --git a/src/orc/storage/schema.sql b/src/orc/storage/schema.sql
index 9f8b879..e58d626 100644
--- a/src/orc/storage/schema.sql
+++ b/src/orc/storage/schema.sql
@@ -55,9 +55,11 @@ CREATE TRIGGER IF NOT EXISTS chunk_au AFTER UPDATE ON chunk BEGIN
     INSERT INTO chunk_fts(rowid, text) VALUES (new.rowid, new.text);
 END;
 
--- chunk_vec is created lazily by storage/embeddings_store.py when embeddings are
--- enabled for a workspace. Schema:
---   CREATE VIRTUAL TABLE chunk_vec USING vec0(chunk_id TEXT PRIMARY KEY, embedding FLOAT[N]);
+-- chunk_vec is created lazily by storage/embeddings_store.py (ensure_chunk_vec)
+-- when embeddings are enabled for a workspace. Requires the sqlite-vec extension.
+-- The vector dimension N is stamped in schema_meta under 'chunk_vec_dim'. Schema:
+--   CREATE VIRTUAL TABLE chunk_vec USING vec0(
+--       chunk_id TEXT PRIMARY KEY, embedding FLOAT[N], corpus_version INTEGER);
 
 CREATE TABLE IF NOT EXISTS run (
     run_id               TEXT PRIMARY KEY,
diff --git a/tests/_fake_embedder.py b/tests/_fake_embedder.py
new file mode 100644
index 0000000..258019c
--- /dev/null
+++ b/tests/_fake_embedder.py
@@ -0,0 +1,44 @@
+"""Fake embedder for tests. Deterministic keyword -> one-hot mapping.
+
+Scripting semantic hits: pass a vocabulary mapping keyword -> dimension index.
+Any text containing a vocabulary keyword embeds with 1.0 at that index, so a
+query sharing the keyword lands at L2 distance 0 from the chunk. Texts with no
+vocabulary hit fall back to 0.5 at a CRC32 bucket (stable across processes,
+unlike Python's randomized str hash); the 0.5 magnitude guarantees a fallback
+vector never equals a scripted one-hot even when the buckets collide.
+"""
+
+from __future__ import annotations
+
+import re
+import zlib
+
+
+class FakeEmbedder:
+    def __init__(
+        self,
+        dim: int = 8,
+        *,
+        model_id: str = "fake-embedder",
+        vocabulary: dict[str, int] | None = None,
+    ) -> None:
+        self.model_id = model_id
+        self.dim = dim
+        self.vocabulary = dict(vocabulary or {})
+        self.calls: list[list[str]] = []
+
+    def embed_texts(self, texts: list[str]) -> list[list[float]]:
+        self.calls.append(list(texts))
+        return [self._embed(t) for t in texts]
+
+    def _embed(self, text: str) -> list[float]:
+        vec = [0.0] * self.dim
+        words = set(re.findall(r"\w+", text.lower()))
+        hit = False
+        for keyword, index in sorted(self.vocabulary.items()):
+            if keyword in words:
+                vec[index % self.dim] = 1.0
+                hit = True
+        if not hit:
+            vec[zlib.crc32(text.encode("utf-8")) % self.dim] = 0.5
+        return vec
diff --git a/tests/conftest.py b/tests/conftest.py
index 722bef4..9d6af61 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,9 +8,13 @@
 import os
 from collections.abc import Iterator
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 import pytest
 
+if TYPE_CHECKING:
+    from tests._fake_embedder import FakeEmbedder
+
 # Every env var that lets orc.llm.client.get_client() construct a live provider.
 # get_client() PREFERS OPENROUTER_API_KEY over ANTHROPIC_API_KEY, and ORC_PROVIDER
 # can force either path, so stripping only the Anthropic key is not enough.
@@ -31,3 +35,19 @@ def _no_live_llm_env(monkeypatch: pytest.MonkeyPatch) -> None:
     if not os.environ.get("ORC_TEST_ALLOW_LIVE_LLM"):
         for var in _LIVE_LLM_ENV_VARS:
             monkeypatch.delenv(var, raising=False)
+
+
+@pytest.fixture
+def fake_embedder() -> Iterator[FakeEmbedder]:
+    """Install a deterministic FakeEmbedder via the embedder factory hook.
+
+    Tests script semantic hits through fake.vocabulary (keyword -> dimension).
+    The factory is reset afterwards so the cache never leaks across tests.
+    """
+    from orc.retrieval.embedder import set_embedder_factory
+    from tests._fake_embedder import FakeEmbedder
+
+    fake = FakeEmbedder(dim=8)
+    set_embedder_factory(lambda model_id: fake)
+    yield fake
+    set_embedder_factory(None)
diff --git a/tests/unit/test_embedder.py b/tests/unit/test_embedder.py
new file mode 100644
index 0000000..3bd608d
--- /dev/null
+++ b/tests/unit/test_embedder.py
@@ -0,0 +1,66 @@
+"""Embedder protocol tests: registry, factory hook, availability, errors."""
+
+from __future__ import annotations
+
+import pytest
+
+from orc.errors import EmbeddingsUnavailableError, OrcError
+from orc.retrieval import embedder as embedder_module
+from orc.retrieval.embedder import (
+    DEFAULT_EMBEDDING_MODEL,
+    embedder_available,
+    get_embedder,
+    model_dim,
+    set_embedder_factory,
+)
+from tests._fake_embedder import FakeEmbedder
+
+
+def test_registry_knows_default_model_dim_without_loading() -> None:
+    assert model_dim(DEFAULT_EMBEDDING_MODEL) == 384
+
+
+def test_registry_returns_none_for_unknown_model() -> None:
+    assert model_dim("not/a-model") is None
+
+
+def test_get_embedder_uses_factory_and_caches() -> None:
+    fake = FakeEmbedder(dim=8)
+    set_embedder_factory(lambda model_id: fake)
+    try:
+        first = get_embedder("any-model")
+        second = get_embedder("any-model")
+        assert first is fake
+        assert second is fake
+    finally:
+        set_embedder_factory(None)
+
+
+def test_get_embedder_raises_with_install_hint_when_deps_missing(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    set_embedder_factory(None)
+    monkeypatch.setattr(embedder_module, "find_spec", lambda name: None)
+    with pytest.raises(EmbeddingsUnavailableError, match=r'pip install "orc-ai\[embeddings\]"'):
+        get_embedder(DEFAULT_EMBEDDING_MODEL)
+
+
+def test_embeddings_unavailable_error_is_orc_error() -> None:
+    assert issubclass(EmbeddingsUnavailableError, OrcError)
+
+
+def test_embedder_available_false_when_find_spec_fails(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(embedder_module, "find_spec", lambda name: None)
+    assert embedder_available() is False
+
+
+def test_fake_embedder_is_deterministic_and_scriptable() -> None:
+    fake = FakeEmbedder(dim=8, vocabulary={"skills": 2})
+    [a] = fake.embed_texts(["the skills api"])
+    [b] = fake.embed_texts(["SKILLS everywhere"])
+    assert a == b
+    assert a[2] == 1.0
+    [unrelated] = fake.embed_texts(["kubernetes pods"])
+    assert unrelated != a
diff --git a/tests/unit/test_embeddings_store.py b/tests/unit/test_embeddings_store.py
new file mode 100644
index 0000000..f1b4245
--- /dev/null
+++ b/tests/unit/test_embeddings_store.py
@@ -0,0 +1,107 @@
+"""Embeddings store tests: chunk_vec lifecycle, KNN, and availability probing."""
+
+from __future__ import annotations
+
+import sqlite3
+
+import pytest
+
+from orc.storage import embeddings_store
+from orc.storage.db import bootstrap_schema
+
+
+def _connect() -> sqlite3.Connection:
+    conn = sqlite3.connect(":memory:", isolation_level=None)
+    conn.row_factory = sqlite3.Row
+    bootstrap_schema(conn)
+    return conn
+
+
+def _vec_conn(dim: int) -> sqlite3.Connection:
+    pytest.importorskip("sqlite_vec")
+    conn = _connect()
+    embeddings_store.load_vec_extension(conn)
+    embeddings_store.ensure_chunk_vec(conn, dim)
+    return conn
+
+
+def test_store_and_knn_roundtrip() -> None:
+    conn = _vec_conn(4)
+    embeddings_store.store_chunk_embeddings(
+        conn,
+        [
+            ("c1", 1, [1.0, 0.0, 0.0, 0.0]),
+            ("c2", 1, [0.0, 1.0, 0.0, 0.0]),
+        ],
+    )
+    hits = embeddings_store.knn_chunk_ids(conn, [1.0, 0.0, 0.0, 0.0], limit=2)
+    assert [cid for cid, _ in hits] == ["c1", "c2"]
+    assert hits[0][1] == pytest.approx(0.0)
+    assert hits[0][1] < hits[1][1]
+
+
+def test_knn_corpus_version_filter() -> None:
+    conn = _vec_conn(4)
+    embeddings_store.store_chunk_embeddings(
+        conn,
+        [
+            ("c1", 1, [1.0, 0.0, 0.0, 0.0]),
+            ("c2", 2, [1.0, 0.0, 0.0, 0.0]),
+        ],
+    )
+    hits = embeddings_store.knn_chunk_ids(conn, [1.0, 0.0, 0.0, 0.0], limit=5, corpus_version=1)
+    assert [cid for cid, _ in hits] == ["c1"]
+
+
+def test_knn_equal_distances_tie_break_on_chunk_id() -> None:
+    conn = _vec_conn(4)
+    # Insert in reverse-lexicographic order to prove ordering is not insertion order.
+    embeddings_store.store_chunk_embeddings(
+        conn,
+        [
+            ("c2", 1, [0.0, 1.0, 0.0, 0.0]),
+            ("c1", 1, [0.0, 1.0, 0.0, 0.0]),
+        ],
+    )
+    hits = embeddings_store.knn_chunk_ids(conn, [0.0, 1.0, 0.0, 0.0], limit=2)
+    assert [cid for cid, _ in hits] == ["c1", "c2"]
+
+
+def test_ensure_chunk_vec_dim_mismatch_raises() -> None:
+    conn = _vec_conn(4)
+    with pytest.raises(ValueError, match="dim"):
+        embeddings_store.ensure_chunk_vec(conn, 8)
+
+
+def test_store_rejects_wrong_length_vector() -> None:
+    conn = _vec_conn(4)
+    with pytest.raises(ValueError, match="dim"):
+        embeddings_store.store_chunk_embeddings(conn, [("c1", 1, [1.0, 0.0])])
+
+
+def test_vec_extension_available_false_when_find_spec_fails(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(embeddings_store, "find_spec", lambda name: None)
+    assert embeddings_store.vec_extension_available() is False
+
+
+def test_chunks_missing_embeddings_lists_unembedded_chunks() -> None:
+    conn = _vec_conn(4)
+    conn.execute(
+        "INSERT INTO evidence(evidence_id, source_path, stored_path, sha256, mime_type, "
+        "ingested_at, corpus_version) VALUES (?,?,?,?,?,?,?)",
+        ("ev1", "/x", "/y", "deadbeef", "text/plain", "2026-06-12T00:00:00Z", 3),
+    )
+    conn.execute(
+        "INSERT INTO chunk(chunk_id, evidence_id, seq, text, token_count, headings_path, "
+        "start_offset, end_offset) VALUES (?,?,?,?,?,?,?,?)",
+        ("c1", "ev1", 0, "hello world", 2, None, 0, 11),
+    )
+    missing = embeddings_store.chunks_missing_embeddings(conn)
+    assert [(m["chunk_id"], m["text"], m["corpus_version"]) for m in missing] == [
+        ("c1", "hello world", 3)
+    ]
+
+    embeddings_store.store_chunk_embeddings(conn, [("c1", 3, [1.0, 0.0, 0.0, 0.0])])
+    assert embeddings_store.chunks_missing_embeddings(conn) == []
diff --git a/tests/unit/test_hybrid.py b/tests/unit/test_hybrid.py
new file mode 100644
index 0000000..d0ed4fb
--- /dev/null
+++ b/tests/unit/test_hybrid.py
@@ -0,0 +1,238 @@
+"""Hybrid retrieval tests: RRF fusion math, vector hydration, fallbacks."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from orc.retrieval import retrieve, rrf_fuse, vector_search
+from orc.retrieval.bm25 import RetrievedChunk, bm25_search
+from orc.retrieval.embedder import set_embedder_factory
+from orc.storage import embeddings_store
+from orc.storage import workspace as ws_module
+from orc.storage.db import open_connection
+from tests._fake_embedder import FakeEmbedder
+
+
+def _chunk(chunk_id: str, *, rank: int, bm25_score: float = 0.0) -> RetrievedChunk:
+    return RetrievedChunk(
+        chunk_id=chunk_id,
+        evidence_id=f"ev-{chunk_id}",
+        seq=0,
+        text=f"text {chunk_id}",
+        headings_path=None,
+        token_count=3,
+        rank=rank,
+        bm25_score=bm25_score,
+        evidence_title=None,
+        evidence_source_path="/x",
+    )
+
+
+def test_rrf_fuse_hand_computed_scores() -> None:
+    # k=60, 0-based ranks. Scores:
+    #   A: 1/61               (bm25 rank 0)
+    #   B: 1/62 + 1/61        (bm25 rank 1, vector rank 0)
+    #   C: 1/62               (vector rank 1)
+    # B > A > C.
+    bm25 = [_chunk("A", rank=0, bm25_score=-5.0), _chunk("B", rank=1, bm25_score=-4.0)]
+    vector = [_chunk("B", rank=0), _chunk("C", rank=1)]
+    fused = rrf_fuse(bm25, vector, k=60, limit=10)
+    assert [c.chunk_id for c in fused] == ["B", "A", "C"]
+    assert [c.rank for c in fused] == [0, 1, 2]
+
+
+def test_rrf_fuse_overlap_keeps_real_bm25_score() -> None:
+    bm25 = [_chunk("A", rank=0, bm25_score=-7.5)]
+    vector = [_chunk("A", rank=0)]
+    [fused] = rrf_fuse(bm25, vector, k=60, limit=10)
+    assert fused.bm25_score == -7.5
+
+
+def test_rrf_fuse_vector_only_chunk_has_zero_bm25_score() -> None:
+    fused = rrf_fuse([], [_chunk("V", rank=0)], k=60, limit=10)
+    assert [c.chunk_id for c in fused] == ["V"]
+    assert fused[0].bm25_score == 0.0
+
+
+def test_rrf_fuse_ties_order_by_chunk_id() -> None:
+    # A appears only in bm25 at rank 0, B only in vector at rank 0: equal RRF
+    # scores. Determinism demands the tie-break be chunk_id, not list order.
+    fused = rrf_fuse([_chunk("B", rank=0)], [_chunk("A", rank=0)], k=60, limit=10)
+    assert [c.chunk_id for c in fused] == ["A", "B"]
+
+
+def test_rrf_fuse_respects_limit() -> None:
+    bm25 = [_chunk("A", rank=0), _chunk("B", rank=1), _chunk("C", rank=2)]
+    fused = rrf_fuse(bm25, [], k=60, limit=2)
+    assert len(fused) == 2
+
+
+def _setup_embedded_corpus(tmp_path: Path, fake: FakeEmbedder) -> ws_module.Workspace:
+    """Workspace with two docs, chunk_vec populated via the fake embedder."""
+    from orc.ingest.pipeline import ingest as do_ingest
+    from orc.paths import workspace_db_path
+
+    fake.vocabulary.update({"caching": 0, "skills": 1})
+    ws = ws_module.create("demo", embedding_model=fake.model_id)
+    corpus = tmp_path / "corpus"
+    corpus.mkdir()
+    (corpus / "caching.md").write_text(
+        "# Prompt caching\n\nPrompt caching has a 5-minute ephemeral TTL by default.\n"
+    )
+    (corpus / "skills.md").write_text(
+        "# Skills API\n\nThe Skills API ships versioned auditable capabilities.\n"
+    )
+    do_ingest(ws, str(corpus))
+
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        embeddings_store.load_vec_extension(conn)
+        embeddings_store.ensure_chunk_vec(conn, fake.dim)
+        missing = embeddings_store.chunks_missing_embeddings(conn)
+        if missing:
+            vectors = fake.embed_texts([m["text"] for m in missing])
+            embeddings_store.store_chunk_embeddings(
+                conn,
+                [
+                    (m["chunk_id"], m["corpus_version"], v)
+                    for m, v in zip(missing, vectors, strict=True)
+                ],
+            )
+    return ws_module.resolve(ws.name)
+
+
+def test_vector_search_hydrates_in_knn_order(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.paths import workspace_db_path
+
+    ws = _setup_embedded_corpus(tmp_path, fake_embedder)
+    # Query the LATER-ingested doc so KNN order differs from insertion order.
+    [query_vec] = fake_embedder.embed_texts(["skills"])
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        embeddings_store.load_vec_extension(conn)
+        chunks = vector_search(conn, query_vec, limit=5, corpus_version=None)
+    assert chunks[0].evidence_title == "Skills API"
+    assert chunks[0].text.startswith("# Skills API")
+    assert [c.rank for c in chunks] == list(range(len(chunks)))
+    assert all(c.bm25_score == 0.0 for c in chunks)
+
+
+def test_retrieve_uses_bm25_when_no_embedding_model(
+    orc_home: Path, tmp_path: Path
+) -> None:
+    from orc.ingest.pipeline import ingest as do_ingest
+    from orc.paths import workspace_db_path
+
+    ws = ws_module.create("plain")
+    corpus = tmp_path / "corpus"
+    corpus.mkdir()
+    (corpus / "a.md").write_text("# Doc\n\nPrompt caching has a 5-minute TTL.\n")
+    do_ingest(ws, str(corpus))
+
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        res = retrieve(conn, "prompt caching", workspace=ws, limit=5)
+        expected = bm25_search(conn, "prompt caching", limit=5)
+    assert res.method == "bm25"
+    assert [c.chunk_id for c in res.chunks] == [c.chunk_id for c in expected]
+    assert res.candidates_considered == len(expected)
+
+
+def test_retrieve_falls_back_when_vec_extension_missing(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.paths import workspace_db_path
+    from orc.retrieval import hybrid as hybrid_module
+
+    ws = _setup_embedded_corpus(tmp_path, fake_embedder)
+    monkeypatch.setattr(hybrid_module, "vec_extension_available", lambda: False)
+    with open_connection(workspace_db_path(ws.name)) as conn:  # noqa: SIM117
+        with pytest.warns(RuntimeWarning, match="orc workspace embed"):
+            res = retrieve(conn, "skills", workspace=ws, limit=5)
+    assert res.method == "bm25"
+
+
+def test_retrieve_falls_back_when_embedder_missing(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.paths import workspace_db_path
+    from orc.retrieval import embedder as embedder_module
+
+    ws = _setup_embedded_corpus(tmp_path, fake_embedder)
+    # Drop the factory and make sentence-transformers look uninstalled.
+    set_embedder_factory(None)
+    monkeypatch.setattr(embedder_module, "find_spec", lambda name: None)
+    try:
+        with open_connection(workspace_db_path(ws.name)) as conn:  # noqa: SIM117
+            with pytest.warns(RuntimeWarning, match="orc workspace embed"):
+                res = retrieve(conn, "skills", workspace=ws, limit=5)
+    finally:
+        set_embedder_factory(lambda model_id: fake_embedder)
+    assert res.method == "bm25"
+
+
+def test_retrieve_falls_back_when_chunk_vec_absent(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.ingest.pipeline import ingest as do_ingest
+    from orc.paths import workspace_db_path
+
+    # Corpus ingested BEFORE embeddings were enabled: chunk_vec never created.
+    # Flipping the model flag afterwards must not break retrieval before
+    # `orc workspace embed` has been run.
+    ws = ws_module.create("novec")
+    corpus = tmp_path / "corpus"
+    corpus.mkdir()
+    (corpus / "a.md").write_text("# Doc\n\nSkills are versioned capabilities.\n")
+    do_ingest(ws, str(corpus))
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        conn.execute(
+            "UPDATE workspace SET embedding_model = ? WHERE name = ?",
+            (fake_embedder.model_id, ws.name),
+        )
+        with pytest.warns(RuntimeWarning, match="orc workspace embed"):
+            res = retrieve(conn, "skills", workspace=ws_module.resolve(ws.name), limit=5)
+    assert res.method == "bm25"
+
+
+def test_search_evidence_skill_records_hybrid_method(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.directives.research.skills.search_evidence import search_evidence
+    from orc.runs import open_run
+    from orc.storage.trace_store import load_trace
+
+    ws = _setup_embedded_corpus(tmp_path, fake_embedder)
+    with open_run(ws, directive="research", skill="search_evidence", inputs={}) as run:
+        result = search_evidence.run(workspace=ws, run=run, query="skills", k=5)
+        run.close(output=result)
+
+    trace = load_trace(run.run_id)
+    assert trace["retrieval"]["method"] == "hybrid_rrf"
+    assert trace["retrieval"]["candidates_considered"] >= 1
+    assert result["chunks"], "expected fused hits"
+
+
+def test_retrieve_hybrid_fuses_and_reports_union(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.paths import workspace_db_path
+
+    ws = _setup_embedded_corpus(tmp_path, fake_embedder)
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        res = retrieve(conn, "skills", workspace=ws, limit=5)
+        bm25_ids = {c.chunk_id for c in bm25_search(conn, "skills", limit=5)}
+        [query_vec] = fake_embedder.embed_texts(["skills"])
+        vec_ids = {c.chunk_id for c in vector_search(conn, query_vec, limit=5, corpus_version=None)}
+    assert res.method == "hybrid_rrf"
+    assert res.candidates_considered == len(bm25_ids | vec_ids)
+    # The semantically scripted doc must be in the fused result.
+    assert any(c.evidence_title == "Skills API" for c in res.chunks)
+    assert [c.rank for c in res.chunks] == list(range(len(res.chunks)))
diff --git a/tests/unit/test_ingest_embeddings.py b/tests/unit/test_ingest_embeddings.py
new file mode 100644
index 0000000..4f9865d
--- /dev/null
+++ b/tests/unit/test_ingest_embeddings.py
@@ -0,0 +1,154 @@
+"""Embed-at-ingest and backfill tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from orc.errors import IngestError
+from orc.ingest.pipeline import ingest as do_ingest
+from orc.paths import workspace_db_path
+from orc.retrieval.embedder import set_embedder_factory
+from orc.storage import embeddings_store
+from orc.storage import workspace as ws_module
+from orc.storage.db import open_connection
+from tests._fake_embedder import FakeEmbedder
+
+
+def _write_doc(tmp_path: Path, name: str, text: str) -> Path:
+    doc = tmp_path / name
+    doc.write_text(text)
+    return doc
+
+
+def test_ingest_embeds_chunks_atomically(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    ws = ws_module.create("demo", embedding_model=fake_embedder.model_id)
+    doc = _write_doc(tmp_path, "a.md", "# Doc A\n\nThe Skills API ships in October 2025.\n")
+    do_ingest(ws, str(doc))
+
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        embeddings_store.load_vec_extension(conn)
+        chunk_count = conn.execute("SELECT COUNT(*) AS n FROM chunk").fetchone()["n"]
+        vec_rows = conn.execute(
+            "SELECT chunk_id, corpus_version FROM chunk_vec ORDER BY chunk_id"
+        ).fetchall()
+        evidence_cv = conn.execute("SELECT corpus_version FROM evidence").fetchone()[
+            "corpus_version"
+        ]
+    assert chunk_count >= 1
+    assert len(vec_rows) == chunk_count
+    assert all(row["corpus_version"] == evidence_cv for row in vec_rows)
+
+
+def test_ingest_rolls_back_when_embedding_fails(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+
+    class _BoomError(RuntimeError):
+        pass
+
+    def _explode(texts: list[str]) -> list[list[float]]:
+        raise _BoomError("embedding backend down")
+
+    fake_embedder.embed_texts = _explode  # type: ignore[method-assign]
+    ws = ws_module.create("demo", embedding_model=fake_embedder.model_id)
+    doc = _write_doc(tmp_path, "a.md", "# Doc A\n\nSome content.\n")
+    with pytest.raises(_BoomError):
+        do_ingest(ws, str(doc))
+
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        assert conn.execute("SELECT COUNT(*) AS n FROM evidence").fetchone()["n"] == 0
+        assert conn.execute("SELECT COUNT(*) AS n FROM chunk").fetchone()["n"] == 0
+
+
+def test_ingest_fails_loud_when_model_set_but_embedder_missing(
+    orc_home: Path, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    from orc.retrieval import embedder as embedder_module
+
+    set_embedder_factory(None)
+    monkeypatch.setattr(embedder_module, "find_spec", lambda name: None)
+    ws = ws_module.create("demo", embedding_model="some-model")
+    doc = _write_doc(tmp_path, "a.md", "# Doc A\n\nSome content.\n")
+    with pytest.raises(IngestError, match=r'pip install "orc-ai\[embeddings\]"'):
+        do_ingest(ws, str(doc))
+
+
+def _seed_two_versions_unembedded(tmp_path: Path) -> ws_module.Workspace:
+    """Two ingests (corpus_version 1 and 2) into a workspace WITHOUT embeddings."""
+    ws = ws_module.create("demo")
+    do_ingest(ws, str(_write_doc(tmp_path, "a.md", "# Doc A\n\nFirst document body.\n")))
+    do_ingest(ws, str(_write_doc(tmp_path, "b.md", "# Doc B\n\nSecond document body.\n")))
+    return ws_module.resolve(ws.name)
+
+
+def test_backfill_preserves_original_corpus_versions(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    ws = _seed_two_versions_unembedded(tmp_path)
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        embeddings_store.load_vec_extension(conn)
+        embeddings_store.ensure_chunk_vec(conn, fake_embedder.dim)
+        count = embeddings_store.backfill_embeddings(conn, fake_embedder)
+        rows = conn.execute(
+            "SELECT chunk_vec.corpus_version AS vec_cv, evidence.corpus_version AS ev_cv "
+            "FROM chunk_vec "
+            "JOIN chunk ON chunk.chunk_id = chunk_vec.chunk_id "
+            "JOIN evidence ON evidence.evidence_id = chunk.evidence_id"
+        ).fetchall()
+    assert count == len(rows) >= 2
+    assert all(row["vec_cv"] == row["ev_cv"] for row in rows)
+    assert {row["ev_cv"] for row in rows} == {1, 2}
+
+
+def test_backfill_is_idempotent(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    ws = _seed_two_versions_unembedded(tmp_path)
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        embeddings_store.load_vec_extension(conn)
+        embeddings_store.ensure_chunk_vec(conn, fake_embedder.dim)
+        first = embeddings_store.backfill_embeddings(conn, fake_embedder)
+        second = embeddings_store.backfill_embeddings(conn, fake_embedder)
+        total = conn.execute("SELECT COUNT(*) AS n FROM chunk_vec").fetchone()["n"]
+    assert first >= 2
+    assert second == 0
+    assert total == first
+
+
+def test_cli_ingest_prints_embeddings_line_when_active(
+    orc_home: Path, tmp_path: Path, fake_embedder: FakeEmbedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from click.testing import CliRunner
+
+    from orc.cli import main
+
+    ws_module.create("demo", embedding_model=fake_embedder.model_id)
+    doc = _write_doc(tmp_path, "a.md", "# Doc A\n\nSome content.\n")
+    runner = CliRunner()
+    result = runner.invoke(main, ["ingest", str(doc), "--workspace", "demo"])
+    assert result.exit_code == 0, result.output
+    assert f"embeddings: {fake_embedder.model_id}" in result.output
+
+
+def test_cli_ingest_no_embeddings_line_for_plain_workspace(
+    orc_home: Path, tmp_path: Path
+) -> None:
+    from click.testing import CliRunner
+
+    from orc.cli import main
+
+    ws_module.create("demo")
+    doc = _write_doc(tmp_path, "a.md", "# Doc A\n\nSome content.\n")
+    runner = CliRunner()
+    result = runner.invoke(main, ["ingest", str(doc), "--workspace", "demo"])
+    assert result.exit_code == 0, result.output
+    assert "embeddings:" not in result.output
diff --git a/tests/unit/test_replay.py b/tests/unit/test_replay.py
index 8d7fa37..2d8b376 100644
--- a/tests/unit/test_replay.py
+++ b/tests/unit/test_replay.py
@@ -153,6 +153,69 @@ def test_replay_works_for_extract_claims_runs(
     assert new_trace["output"]["claims"], "extract_claims should have produced claims on replay"
 
 
+def _seed_embedded_corpus(tmp_path: Path, fake) -> str:
+    """Workspace with embeddings enabled; ingest hook embeds via the fake."""
+    fake.vocabulary.update({"skills": 1})
+    ws = ws_module.create("demo", embedding_model=fake.model_id)
+    corpus = tmp_path / "v1"
+    corpus.mkdir()
+    (corpus / "a.md").write_text("# Doc A\n\nThe Skills API ships in October 2025.\n")
+    do_ingest(ws, str(corpus))
+    return ws.name
+
+
+def test_replay_frozen_hybrid_pins_corpus_version(
+    orc_home: Path, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, fake_embedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    name = _seed_embedded_corpus(tmp_path, fake_embedder)
+    original = _verify_once(name, "skills api", monkeypatch)
+    original_trace = load_trace(original)
+    assert original_trace["retrieval"]["method"] == "hybrid_rrf"
+
+    # Grow the corpus (also auto-embedded), bumping corpus_version.
+    extra = tmp_path / "v2"
+    extra.mkdir()
+    (extra / "b.md").write_text("# Doc B\n\nMore skills content arriving later.\n")
+    do_ingest(ws_module.resolve(name), str(extra))
+
+    fake = FakeAnthropic(responses=[make_verdict_response(label="not_found", confidence=0.5)])
+    monkeypatch.setattr(client_module, "_client", fake)
+    out = replay(original)
+    assert out["mode"] == "frozen"
+
+    new_trace = load_trace(out["new_run_id"])
+    assert new_trace["retrieval"]["method"] == "hybrid_rrf"
+    new_chunk_ids = {c["chunk_id"] for c in new_trace["retrieval"]["returned"]}
+    original_chunk_ids = {c["chunk_id"] for c in original_trace["retrieval"]["returned"]}
+    assert new_chunk_ids == original_chunk_ids
+
+
+def test_replay_frozen_warns_on_retrieval_method_mismatch(
+    orc_home: Path, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, fake_embedder
+) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.retrieval import embedder as embedder_module
+    from orc.retrieval.embedder import set_embedder_factory
+
+    name = _seed_embedded_corpus(tmp_path, fake_embedder)
+    original = _verify_once(name, "skills api", monkeypatch)
+    assert load_trace(original)["retrieval"]["method"] == "hybrid_rrf"
+
+    # Embedding deps vanish before the replay: retrieval falls back to BM25,
+    # and the replay engine must surface the method drift.
+    set_embedder_factory(None)
+    monkeypatch.setattr(embedder_module, "find_spec", lambda name: None)
+    fake = FakeAnthropic(responses=[make_verdict_response(label="not_found", confidence=0.5)])
+    monkeypatch.setattr(client_module, "_client", fake)
+    try:
+        with pytest.warns(RuntimeWarning, match="retrieval method"):
+            out = replay(original)
+    finally:
+        set_embedder_factory(lambda model_id: fake_embedder)
+    assert load_trace(out["new_run_id"])["retrieval"]["method"] == "bm25"
+
+
 def test_replay_records_lineage_in_inputs(
     orc_home: Path, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 ) -> None:
diff --git a/tests/unit/test_workspace.py b/tests/unit/test_workspace.py
index 69dbd76..2d98f9f 100644
--- a/tests/unit/test_workspace.py
+++ b/tests/unit/test_workspace.py
@@ -145,3 +145,96 @@ def test_cli_workspace_list_after_create(orc_home: Path) -> None:
     result = runner.invoke(main, ["workspace", "list"])
     assert result.exit_code == 0
     assert "demo" in result.output
+
+
+def test_cli_workspace_create_embeddings_sets_default_model(orc_home: Path) -> None:
+    from orc.retrieval.embedder import DEFAULT_EMBEDDING_MODEL
+
+    runner = CliRunner()
+    result = runner.invoke(main, ["workspace", "create", "demo", "--embeddings"])
+    assert result.exit_code == 0, result.output
+    assert ws_module.resolve("demo").embedding_model == DEFAULT_EMBEDDING_MODEL
+
+
+def test_cli_workspace_create_embeddings_custom_model(orc_home: Path) -> None:
+    runner = CliRunner()
+    result = runner.invoke(
+        main,
+        ["workspace", "create", "demo", "--embeddings", "--embedding-model", "my/model"],
+    )
+    assert result.exit_code == 0, result.output
+    assert ws_module.resolve("demo").embedding_model == "my/model"
+
+
+def test_cli_workspace_create_embeddings_warns_but_creates_when_deps_missing(
+    orc_home: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    from orc.cli_commands import workspace as workspace_cli
+
+    monkeypatch.setattr(workspace_cli, "embedder_available", lambda: False)
+    runner = CliRunner()
+    result = runner.invoke(main, ["workspace", "create", "demo", "--embeddings"])
+    assert result.exit_code == 0, result.output
+    assert "orc-ai[embeddings]" in result.output
+    assert ws_module.resolve("demo").embedding_model is not None
+
+
+def test_cli_workspace_create_embedding_model_requires_embeddings_flag(
+    orc_home: Path,
+) -> None:
+    runner = CliRunner()
+    result = runner.invoke(
+        main, ["workspace", "create", "demo", "--embedding-model", "my/model"]
+    )
+    assert result.exit_code != 0
+    assert "--embeddings" in result.output
+
+
+def test_cli_workspace_embed_backfills_and_sets_model(orc_home: Path, tmp_path: Path) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.ingest.pipeline import ingest as do_ingest
+    from orc.retrieval.embedder import set_embedder_factory
+    from tests._fake_embedder import FakeEmbedder
+
+    fake = FakeEmbedder(dim=8)
+    set_embedder_factory(lambda model_id: fake)
+    try:
+        ws = ws_module.create("demo")
+        doc = tmp_path / "a.md"
+        doc.write_text("# Doc A\n\nSome content to embed.\n")
+        do_ingest(ws, str(doc))
+
+        runner = CliRunner()
+        result = runner.invoke(
+            main, ["workspace", "embed", "demo", "--model", fake.model_id]
+        )
+        assert result.exit_code == 0, result.output
+        assert f"chunk(s) with {fake.model_id}" in result.output
+        assert "Embedded" in result.output
+        assert ws_module.resolve("demo").embedding_model == fake.model_id
+
+        with open_connection(workspace_db_path("demo")) as conn:
+            from orc.storage.embeddings_store import load_vec_extension
+
+            load_vec_extension(conn)
+            n = conn.execute("SELECT COUNT(*) AS n FROM chunk_vec").fetchone()["n"]
+        assert n >= 1
+    finally:
+        set_embedder_factory(None)
+
+
+def test_cli_workspace_embed_conflicting_model_errors(orc_home: Path) -> None:
+    pytest.importorskip("sqlite_vec")
+    from orc.retrieval.embedder import set_embedder_factory
+    from tests._fake_embedder import FakeEmbedder
+
+    fake = FakeEmbedder(dim=8)
+    set_embedder_factory(lambda model_id: fake)
+    try:
+        ws_module.create("demo", embedding_model="model-a")
+        runner = CliRunner()
+        result = runner.invoke(main, ["workspace", "embed", "demo", "--model", "model-b"])
+        assert result.exit_code != 0
+        assert "model-a" in result.output
+    finally:
+        set_embedder_factory(None)
diff --git a/uv.lock b/uv.lock
index 9b48075..2634679 100644
--- a/uv.lock
+++ b/uv.lock
@@ -908,6 +908,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/e9/1f9ada30cef7b05e74bb06f52127e7a724976c225f46adb65c37b1dadfb6/jiter-0.14.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67f00d94b281174144d6532a04b66a12cb866cbdc47c3af3bfe2973677f9861a", size = 349613 },
 ]
 
+[[package]]
+name = "joblib"
+version = "1.5.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071 },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -1201,6 +1210,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477 },
 ]
 
+[[package]]
+name = "narwhals"
+version = "2.22.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/62/3c/c4ef2164a71c1a63d7f1ae411c4082c5fa872405106db60a4b7114989ad7/narwhals-2.22.1.tar.gz", hash = "sha256:d62920805a0a43b7ff8b54b0c0d3142d796f8a9301836ada37e573d6a33cbcd9", size = 647493 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/ca/36339329c4604adbcc99c899b7eb1ce1a555c499b6a6860757dc9bfed36d/narwhals-2.22.1-py3-none-any.whl", hash = "sha256:60567d774edf77db53906f89d9fbd164e66e56d66d388e1e6990f17ac33cfb53", size = 454815 },
+]
+
 [[package]]
 name = "networkx"
 version = "3.6.1"
@@ -1470,8 +1488,10 @@ dev = [
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "ruff" },
+    { name = "sqlite-vec" },
 ]
 embeddings = [
+    { name = "sentence-transformers" },
     { name = "sqlite-vec" },
 ]
 
@@ -1492,6 +1512,8 @@ requires-dist = [
     { name = "pyyaml", specifier = ">=6.0" },
     { name = "rich", specifier = ">=13.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.5" },
+    { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=3.0" },
+    { name = "sqlite-vec", marker = "extra == 'dev'", specifier = ">=0.1.6" },
     { name = "sqlite-vec", marker = "extra == 'embeddings'", specifier = ">=0.1.6" },
     { name = "tiktoken", specifier = ">=0.7" },
     { name = "torch", marker = "extra == 'benchmarks'", specifier = ">=2.2" },
@@ -2352,6 +2374,141 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380 },
 ]
 
+[[package]]
+name = "scikit-learn"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "joblib" },
+    { name = "narwhals" },
+    { name = "numpy" },
+    { name = "scipy" },
+    { name = "threadpoolctl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fa/6f/37092bdb25f712817231799fc5674d8e704066a8a70c1d2d40517e18b4ab/scikit_learn-1.9.0.tar.gz", hash = "sha256:8833266989d3a5110178a9fae30783675460724d0e1efb13b14901d2c660c557", size = 7750767 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f5/be/e844fd9586e66540a15b71924d17a6cbc1bb749e81ddd0a796bcdba4c055/scikit_learn-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9db6f4d34e68c8899e4cab27fdf8eafe6ed21f2ba52ceb25ea250cd237f8e47b", size = 8789686 },
+    { url = "https://files.pythonhosted.org/packages/42/e2/ff880f62677a17d035817d543cb0fc8727d01eccbee81c5f7fc733a9d856/scikit_learn-1.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f401448645a3e7bc115aa3c094097865155b34bff1cba8101857d9104e99074c", size = 8256782 },
+    { url = "https://files.pythonhosted.org/packages/25/64/eb40435e1a508ab1b4e284ce43ae80f6a162e5be5e38ed5a6fab467a9ea4/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd3a8ef0c758555a3b23c03adaa858af32f7736785ded50ad5991f59c4ed03fa", size = 8992419 },
+    { url = "https://files.pythonhosted.org/packages/8d/da/4810a28e473185429e45a57eebcc91fc991b33d889cc0676063e671db03d/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7e254636164090da847715a27f8e5478feb98c40a9e0ee90cbd277de9e5ceb8", size = 9281411 },
+    { url = "https://files.pythonhosted.org/packages/3b/67/be3d369f40d8178ba3bd86635d132e08cb5329b023e4669d9426d84bc007/scikit_learn-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:5dc1818c77575d149e25fce9ef82dd7b7263ae372f03494158668ad632a69759", size = 8272736 },
+    { url = "https://files.pythonhosted.org/packages/37/79/a733f02dc2118da7e77a134b34f39f40201a353311b011d20859d2db3556/scikit_learn-1.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:366652351f092b219c248f1e72821e841960a63d8f358f1dcfd54dc1cbdbbc28", size = 7919564 },
+    { url = "https://files.pythonhosted.org/packages/ac/20/75f915ff375d6249e6550ac740fdbbd66159a068fd3af1400ff62036b07a/scikit_learn-1.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2bd41b0d201bc81575531b96b713d3eb5e5f50fb0b82101ff0f92294fdc236ac", size = 8741122 },
+    { url = "https://files.pythonhosted.org/packages/cc/d5/2b5148f2279196775e1db2aeb85d14b70ac80e7e32b3b28e7ebeafb0901d/scikit_learn-1.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5be45aa4a42a68a533913a6ed736cf309de2226411c79ef8d609a5456f1939b1", size = 8261512 },
+    { url = "https://files.pythonhosted.org/packages/a0/ee/5adbc77656b71f9456a2f5a7a9fdb4bcf9207a6b962889f1c2f9323afa4e/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e50ed4da51974e86e940690e9a3d82e729b62b5a49f7c9bac534d515d39d86f", size = 8837603 },
+    { url = "https://files.pythonhosted.org/packages/6c/c2/63fdda36c56437eeb44aaf9493c8bcd62ce230ab1598924fc626ffbfa943/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:056c92bb67ad4c28463c2f2653d9701449201e7e7a9e94e321be0f71c4fef2b8", size = 9132097 },
+    { url = "https://files.pythonhosted.org/packages/83/a4/c8e67227c680e2259c8864ae72ff48b06e16a6f51253a22167aa02a8aa4e/scikit_learn-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4306775fad04cc4b472a1b15af1ae9cede1540fbfcc17fbce3767cd8dc7ae283", size = 8211173 },
+    { url = "https://files.pythonhosted.org/packages/cf/fd/3c0863792e98e67e9184aa4029288a175935eb65443afcd30d4f143450cf/scikit_learn-1.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:26e22435f63bcdcf396b574273f29f13dd531f5ea035801f5be10ba1540a4e60", size = 7867451 },
+    { url = "https://files.pythonhosted.org/packages/3c/01/cf3310626b6d48d3e9be69a1223f9180360b5e6edb045f50fade723ce494/scikit_learn-1.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:80746d63bd4b6eaca54d36fe5feaf4d28bb38dc6f9470f81c7cad7c40155f119", size = 8705188 },
+    { url = "https://files.pythonhosted.org/packages/3e/04/5acd7ae280c5f93b6ac5ef6cdec14eef4c8d1cd91d85b3292989c94d96b1/scikit_learn-1.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5b934c45c252844a91d69fda3a34cff5e7307e1db10d77cb10a3980312c74713", size = 8228299 },
+    { url = "https://files.pythonhosted.org/packages/0c/39/ffe829a5b8ecb40a518724a997794657fdc354ada5e8fe8e64d998c0bac9/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38c3dcb9a1ffb85505ec53d54c7b4aea0cff70050425a7760c2af661ac85df05", size = 8789690 },
+    { url = "https://files.pythonhosted.org/packages/1f/88/8dab5de10c638c083772a6be83a3d8106ced492f74a928c8693638e5bb50/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da76d09304a4706db7cc1e3ebaa3b6b98a67365cc11d2996c4f1e58ba47df714", size = 9087723 },
+    { url = "https://files.pythonhosted.org/packages/20/3f/7917ca72464038f6240ec70c29f94862d08a34a74291ae4d4ec5eb8186a0/scikit_learn-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5808d98f15c6bf6d9d96d2348c1997392a5888ce7097e664105f930c4bca1277", size = 8184330 },
+    { url = "https://files.pythonhosted.org/packages/78/c7/15739eb2f61fda3c54639e9942414e5a19ad8a8d1f5a3266afad7cb7df80/scikit_learn-1.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:d77f54c017633791bc0225a43e2f8d03745fdcfe4880268fcc4df15f505dec2e", size = 7840653 },
+    { url = "https://files.pythonhosted.org/packages/f4/7d/c9a35cf59b20a86fec24d306f1547b78dec194b08d367ce2a3e4854169d9/scikit_learn-1.9.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9656acd4e93f74e0b66c8a36c88830a99252dfa900044d36bc2212ae89a47162", size = 8713289 },
+    { url = "https://files.pythonhosted.org/packages/3c/a7/552a7821597c632b907f7bfe8f36f9f572777af8ef8a48353041cf8e091a/scikit_learn-1.9.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:24360002ae845e7866522b0a5bbf690802e7bc388cac8663502e78aa98598aa2", size = 8245141 },
+    { url = "https://files.pythonhosted.org/packages/7d/79/f4a0c4fe9711154cddabf913471153af79056382ddc612cfe5ee0ff4b72e/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5162ad10a418c8a282dde04c9aa06965de3e9a65f33c1440c0ae69bb1a09d913", size = 8847671 },
+    { url = "https://files.pythonhosted.org/packages/f0/af/4d72d9e475ac83719160c662619e4bf7b95c19507cd582e7d0167a3c3dae/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fea2cc5677ab49d6f5bade978c866da44957b712d92e9635e8b4f723013c3cb", size = 9118104 },
+    { url = "https://files.pythonhosted.org/packages/a2/d5/6a58eea2cb9abbb9b3f2bb8b2cfb3243d1152d69f442d256c7af71304769/scikit_learn-1.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:64fa347efc1c839c487433e40c5144d38c336e8a2b59c81aa8660373945c2673", size = 8290674 },
+    { url = "https://files.pythonhosted.org/packages/65/5b/d4c879cf358f1187141cf90ced473f087183489090244f50c124a2ee478b/scikit_learn-1.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:1b944b6db288f6b926e3650026ddafb988929de95d11fc2cc5fa117773c9ba42", size = 7978807 },
+    { url = "https://files.pythonhosted.org/packages/8a/43/bfae3121ec67ae09150d453c442c7c1cc166e9aefe056e6ab3b7728a5cfc/scikit_learn-1.9.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4ccacf04ca5f4b492158a5f28afe0ace43f81b2571e4b9a66d34848b46128949", size = 9031941 },
+    { url = "https://files.pythonhosted.org/packages/75/b0/20a4546eb17f3b25d3c66df15810411c14ed5065bcfab50b53c96fb627b2/scikit_learn-1.9.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ee1a8db2c18c08e34c7412d4b10be1cac214cd4ea7dc9715a6a327eb49a37c96", size = 8613528 },
+    { url = "https://files.pythonhosted.org/packages/18/3c/e440e039bb82cd19004edaaad00acbde0fb9b461083c3ecf37941c557312/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:147e9329ef0e39f75d4cffa02b2aa48d827832684926cd5210d9a2cb5c57246b", size = 8855050 },
+    { url = "https://files.pythonhosted.org/packages/43/26/b341b8dab5998da6270a3a42c2152c578501354d36f944b5856757035ef8/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bad8f8b9950321b54c965fdcbac6c6c55e79e16646b49977bcf3668d3870a1a", size = 9097190 },
+    { url = "https://files.pythonhosted.org/packages/fb/de/b650b4d69b84468cfa2e28a3ff7b8103743029e6446ce1a97fe060ef688c/scikit_learn-1.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:78fc56eafd4edb9575d2d8950d1dd152061abb573341a1cb7e099fc40f6c6666", size = 8963204 },
+    { url = "https://files.pythonhosted.org/packages/ee/f3/ff83d76d7418112e5a61326443cdda87be3545dd8d6599c95b2481a4419e/scikit_learn-1.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:051075bda8b7aab87b1906ab3d4740a1e1224a19d7b3781a576736edc94e76aa", size = 8222661 },
+]
+
+[[package]]
+name = "scipy"
+version = "1.17.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675 },
+    { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057 },
+    { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032 },
+    { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533 },
+    { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057 },
+    { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300 },
+    { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333 },
+    { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314 },
+    { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512 },
+    { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248 },
+    { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954 },
+    { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662 },
+    { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366 },
+    { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017 },
+    { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842 },
+    { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890 },
+    { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557 },
+    { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856 },
+    { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682 },
+    { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340 },
+    { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199 },
+    { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001 },
+    { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719 },
+    { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595 },
+    { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429 },
+    { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952 },
+    { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063 },
+    { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449 },
+    { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943 },
+    { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621 },
+    { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708 },
+    { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135 },
+    { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977 },
+    { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601 },
+    { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667 },
+    { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159 },
+    { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771 },
+    { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910 },
+    { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980 },
+    { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543 },
+    { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510 },
+    { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131 },
+    { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032 },
+    { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766 },
+    { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007 },
+    { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333 },
+    { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066 },
+    { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763 },
+    { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984 },
+    { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877 },
+    { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750 },
+    { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858 },
+    { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723 },
+    { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098 },
+    { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397 },
+    { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163 },
+    { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291 },
+    { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317 },
+    { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327 },
+    { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165 },
+]
+
+[[package]]
+name = "sentence-transformers"
+version = "5.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub" },
+    { name = "numpy" },
+    { name = "scikit-learn" },
+    { name = "scipy" },
+    { name = "torch" },
+    { name = "tqdm" },
+    { name = "transformers" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cf/d4/7ef93157485e978c016f49da05363c1e4e7237beb5343b64b5631101f0f1/sentence_transformers-5.5.1.tar.gz", hash = "sha256:02b7740dfc60bdbbcb6061625f5d97a5c1a4e2d3baac5f9391b912bb5eae2290", size = 445161 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/03/ee99a6b030e7a2e056547729f8a4709dd93e13d9c6f07590f74c395c4017/sentence_transformers-5.5.1-py3-none-any.whl", hash = "sha256:4fe11d433badc5282d32f7fc08bc714216b7a5aca426f9df77a45a554756deb7", size = 588887 },
+]
+
 [[package]]
 name = "setuptools"
 version = "81.0.0"
@@ -2438,6 +2595,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353 },
 ]
 
+[[package]]
+name = "threadpoolctl"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638 },
+]
+
 [[package]]
 name = "tiktoken"
 version = "0.12.0"