Thormatt · Thormatt · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,11 +51,14 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-embeddings = ["sqlite-vec>=0.1.6"]
+embeddings = ["sqlite-vec>=0.1.6", "sentence-transformers>=3.0"]
+# sqlite-vec is a tiny wheel; including it in dev lets CI exercise the
+# vector-store tests without pulling in torch via sentence-transformers.
 dev = [
     "pytest>=8.0",
     "pytest-asyncio>=0.23",
     "ruff>=0.5",
+    "sqlite-vec>=0.1.6",
 ]
 # Heavyweight, optional. Required only by `benchmarks/faithfulness/` which
 # downloads the HaluBench subsample and self-hosts HHEM-2.1-Open for the

diff --git a/src/orc/cli_commands/ingest.py b/src/orc/cli_commands/ingest.py
@@ -35,6 +35,8 @@ def ingest_command(source: str, workspace: str | None, no_recursive: bool) -> No
     console.print(
         f"[green]Ingested[/green] {len(ids)} evidence item(s) into [bold]{ws.name}[/bold]"
     )
+    if ws.has_embeddings:
+        console.print(f"  embeddings: {ws.embedding_model}")
     for eid in ids[:10]:
         console.print(f"  [dim]{eid}[/dim]")
     if len(ids) > 10:

diff --git a/src/orc/cli_commands/search.py b/src/orc/cli_commands/search.py
@@ -50,7 +50,7 @@ def search_command(query: str, workspace: str | None, k: int, as_json: bool) ->
         console.print("[yellow]No chunks matched[/yellow]")
         return
 
-    table = Table(title=f"BM25 results for '{query}'")
+    table = Table(title=f"Retrieval results for '{query}'")
     table.add_column("Rank", justify="right")
     table.add_column("Score", justify="right")
     table.add_column("Title")

diff --git a/src/orc/cli_commands/workspace.py b/src/orc/cli_commands/workspace.py
@@ -1,16 +1,33 @@
-"""`orc workspace ...` commands."""
+"""`orc workspace ...` commands.
+
+The embedding model is pinned in the workspace row — there is deliberately no
+env var to override it at retrieval time, because the column is the
+replay-pinned truth for which model embedded the corpus.
+"""
 
 from __future__ import annotations
 
 import click
 from rich.console import Console
+from rich.markup import escape
 from rich.table import Table
 
-from orc.errors import WorkspaceExistsError
+from orc.errors import EmbeddingsUnavailableError, WorkspaceExistsError, WorkspaceNotFoundError
+from orc.paths import workspace_db_path
+from orc.retrieval.embedder import DEFAULT_EMBEDDING_MODEL, embedder_available, get_embedder
 from orc.storage import workspace as ws_module
+from orc.storage.db import open_connection, transaction
+from orc.storage.embeddings_store import (
+    backfill_embeddings,
+    ensure_chunk_vec,
+    load_vec_extension,
+    vec_extension_available,
+)
 
 console = Console()
 
+_INSTALL_HINT = 'pip install "orc-ai[embeddings]"'
+
 
 @click.group("workspace")
 def workspace() -> None:
@@ -19,17 +36,93 @@ def workspace() -> None:
 
 @workspace.command("create")
 @click.argument("name")
-def create_command(name: str) -> None:
+@click.option(
+    "--embeddings",
+    "embeddings",
+    is_flag=True,
+    help="Enable hybrid (BM25 + vector) retrieval for this workspace.",
+)
+@click.option(
+    "--embedding-model",
+    "embedding_model",
+    default=None,
+    help=f"Embedding model id (default: {DEFAULT_EMBEDDING_MODEL}). Requires --embeddings.",
+)
+def create_command(name: str, embeddings: bool, embedding_model: str | None) -> None:
     """Create a new workspace."""
+    if embedding_model is not None and not embeddings:
+        raise click.UsageError("--embedding-model requires --embeddings")
+    model = (embedding_model or DEFAULT_EMBEDDING_MODEL) if embeddings else None
+
+    # Warn-but-create: the flag records intent in the workspace row; the user
+    # can install the extra and run `orc workspace embed` later.
+    if model is not None and not (embedder_available() and vec_extension_available()):
+        console.print(
+            "[yellow]Warning:[/yellow] embedding dependencies are not installed; "
+            f"ingest will fail until you run: {escape(_INSTALL_HINT)}"
+        )
+
     try:
-        ws = ws_module.create(name)
+        ws = ws_module.create(name, embedding_model=model)
     except WorkspaceExistsError as exc:
         raise click.ClickException(str(exc)) from exc
     except ValueError as exc:
         raise click.ClickException(str(exc)) from exc
     console.print(f"[green]Created workspace[/green] [bold]{ws.name}[/bold]")
     console.print(f"  schema_version = {ws.schema_version}")
     console.print(f"  created_at     = {ws.created_at}")
+    if ws.has_embeddings:
+        console.print(f"  embeddings     = {ws.embedding_model}")
+
+
+@workspace.command("embed")
+@click.argument("name")
+@click.option(
+    "--model",
+    default=None,
+    help="Embedding model id. Only needed when the workspace has none set yet.",
+)
+def embed_command(name: str, model: str | None) -> None:
+    """Backfill vector embeddings for all unembedded chunks in a workspace."""
+    try:
+        ws = ws_module.resolve(name)
+    except WorkspaceNotFoundError as exc:
+        raise click.ClickException(str(exc)) from exc
+
+    if ws.embedding_model is None:
+        effective_model = model or DEFAULT_EMBEDDING_MODEL
+    elif model is not None and model != ws.embedding_model:
+        raise click.ClickException(
+            f"Workspace {ws.name!r} is pinned to embedding model "
+            f"{ws.embedding_model!r}; refusing to embed with {model!r}. "
+            "Vectors from different models cannot be mixed."
+        )
+    else:
+        effective_model = ws.embedding_model
+
+    if not vec_extension_available():
+        raise click.ClickException(
+            f"The sqlite-vec extension is unavailable; run: {_INSTALL_HINT}"
+        )
+    try:
+        embedder = get_embedder(effective_model)
+    except EmbeddingsUnavailableError as exc:
+        raise click.ClickException(str(exc)) from exc
+
+    with open_connection(workspace_db_path(ws.name)) as conn:
+        load_vec_extension(conn)
+        try:
+            ensure_chunk_vec(conn, embedder.dim)
+        except ValueError as exc:
+            raise click.ClickException(str(exc)) from exc
+        if ws.embedding_model is None:
+            with transaction(conn):
+                conn.execute(
+                    "UPDATE workspace SET embedding_model = ? WHERE name = ?",
+                    (effective_model, ws.name),
+                )
+        count = backfill_embeddings(conn, embedder)
+    console.print(f"[green]Embedded[/green] {count} chunk(s) with [bold]{effective_model}[/bold]")
 
 
 @workspace.command("list")

diff --git a/src/orc/directives/research/skills/research_topic.py b/src/orc/directives/research/skills/research_topic.py
@@ -10,7 +10,7 @@
 from orc.llm.cache import build_verify_messages, format_corpus
 from orc.llm.client import get_client, messages_create, resolve_model_for_provider
 from orc.llm.models import resolve_research_model
-from orc.retrieval import bm25_search
+from orc.retrieval import retrieve
 from orc.runs.runner import Run
 from orc.storage.workspace import Workspace
 
@@ -73,11 +73,13 @@ def run(
             raise ValueError("topic must be a non-empty string")
 
         resolved_model = resolve_research_model(model)
-        pool = bm25_search(
-            run.conn, topic, limit=retrieval_pool, corpus_version=corpus_version
+        res = retrieve(
+            run.conn, topic, workspace=workspace, limit=retrieval_pool, corpus_version=corpus_version
+        )
+        candidates = res.chunks[:k]
+        run.record_retrieval(
+            candidates, method=res.method, candidates_considered=res.candidates_considered
         )
-        candidates = pool[:k]
-        run.record_retrieval(candidates, method="bm25", candidates_considered=len(pool))
 
         if not candidates:
             return {

diff --git a/src/orc/directives/research/skills/search_evidence.py b/src/orc/directives/research/skills/search_evidence.py
@@ -4,7 +4,7 @@
 
 from typing import Any
 
-from orc.retrieval import bm25_search
+from orc.retrieval import retrieve
 from orc.runs.runner import Run
 from orc.storage.workspace import Workspace
 
@@ -22,8 +22,11 @@ def run(
         corpus_version: int | None = None,
         **_unused: Any,
     ) -> dict[str, Any]:
-        chunks = bm25_search(run.conn, query, limit=k, corpus_version=corpus_version)
-        run.record_retrieval(chunks, method="bm25", candidates_considered=len(chunks))
+        res = retrieve(run.conn, query, workspace=workspace, limit=k, corpus_version=corpus_version)
+        chunks = res.chunks
+        run.record_retrieval(
+            chunks, method=res.method, candidates_considered=res.candidates_considered
+        )
         return {
             "query": query,
             "k": k,

diff --git a/src/orc/directives/research/skills/verify_claim.py b/src/orc/directives/research/skills/verify_claim.py
@@ -19,7 +19,7 @@
 from orc.llm.cache import build_verify_messages, format_corpus
 from orc.llm.client import get_client, messages_create, resolve_model_for_provider
 from orc.llm.models import resolve_verify_model
-from orc.retrieval import bm25_search
+from orc.retrieval import retrieve
 from orc.runs.runner import Run
 from orc.storage.workspace import Workspace
 
@@ -372,11 +372,17 @@ def run(
                 candidates, method=f"{mode}_all", candidates_considered=len(candidates)
             )
         else:
-            pool = bm25_search(
-                run.conn, claim, limit=retrieval_pool, corpus_version=corpus_version
+            res = retrieve(
+                run.conn,
+                claim,
+                workspace=workspace,
+                limit=retrieval_pool,
+                corpus_version=corpus_version,
+            )
+            candidates = res.chunks[:k]
+            run.record_retrieval(
+                candidates, method=res.method, candidates_considered=res.candidates_considered
             )
-            candidates = pool[:k]
-            run.record_retrieval(candidates, method="bm25", candidates_considered=len(pool))
 
         if not candidates:
             return _make_not_found(claim=claim, model=resolved_model, run=run)

diff --git a/src/orc/errors.py b/src/orc/errors.py
@@ -28,3 +28,7 @@ class TraceNotFoundError(OrcError):
 
 class IngestError(OrcError):
     pass
+
+
+class EmbeddingsUnavailableError(OrcError):
+    """Embeddings were requested but the optional dependencies are missing."""
diff --git a/src/orc/ingest/pipeline.py b/src/orc/ingest/pipeline.py
@@ -80,6 +80,11 @@ def _ingest_one(workspace: Workspace, doc: LoadedDoc) -> list[str]:
         # Chunk before any disk write so a chunker failure leaves nothing behind.
         chunks = chunk_text(doc.text)
 
+        # Embed BEFORE the write transaction: model inference can be slow and
+        # must not hold the BEGIN IMMEDIATE write lock. The vectors are then
+        # inserted in the same transaction as the chunk rows (atomic).
+        embeddings = _embed_chunks_for_ingest(conn, workspace=workspace, chunks=chunks)
+
         # Stage the evidence bytes to a temp file and only promote it into place
         # once the DB transaction commits. A failure anywhere leaves neither an
         # orphaned file nor a dangling row — the corpus stays consistent.
@@ -95,6 +100,7 @@ def _ingest_one(workspace: Workspace, doc: LoadedDoc) -> list[str]:
                 sha=sha,
                 doc=doc,
                 chunks=chunks,
+                embeddings=embeddings,
             )
         except BaseException:
             tmp_path.unlink(missing_ok=True)
@@ -103,6 +109,49 @@ def _ingest_one(workspace: Workspace, doc: LoadedDoc) -> list[str]:
         return [evidence_id]
 
 
+def _embed_chunks_for_ingest(
+    conn: Any,
+    *,
+    workspace: Workspace,
+    chunks: list,
+) -> list[list[float]] | None:
+    """Embed chunk texts when the workspace opts into embeddings.
+
+    Fail-loud by design: a workspace with embedding_model set has promised
+    hybrid retrieval, so silently ingesting unembedded chunks would corrupt
+    that promise. Missing deps surface as IngestError with an install hint.
+    Also prepares chunk_vec (extension + table) before the write transaction.
+    """
+    if workspace.embedding_model is None or not chunks:
+        return None
+
+    from orc.errors import EmbeddingsUnavailableError
+    from orc.retrieval.embedder import get_embedder
+    from orc.storage.embeddings_store import (
+        ensure_chunk_vec,
+        load_vec_extension,
+        vec_extension_available,
+    )
+
+    try:
+        if not vec_extension_available():
+            raise EmbeddingsUnavailableError(
+                "the sqlite-vec extension is unavailable; "
+                'run: pip install "orc-ai[embeddings]"'
+            )
+        embedder = get_embedder(workspace.embedding_model)
+    except EmbeddingsUnavailableError as exc:
+        raise IngestError(
+            f"Workspace {workspace.name!r} requires embeddings "
+            f"(embedding_model={workspace.embedding_model!r}) but they are "
+            f"unavailable: {exc}"
+        ) from exc
+
+    load_vec_extension(conn)
+    ensure_chunk_vec(conn, embedder.dim)
+    return embedder.embed_texts([c.text for c in chunks])
+
+
 def _commit_evidence(
     conn: Any,
     *,
@@ -112,6 +161,7 @@ def _commit_evidence(
     sha: str,
     doc: LoadedDoc,
     chunks: list,
+    embeddings: list[list[float]] | None = None,
 ) -> None:
     with transaction(conn):
         conn.execute(
@@ -136,12 +186,13 @@ def _commit_evidence(
                 new_corpus_version,
             ),
         )
-        for c in chunks:
+        chunk_ids = [new_chunk_id() for _ in chunks]
+        for chunk_id, c in zip(chunk_ids, chunks, strict=True):
             conn.execute(
                 "INSERT INTO chunk(chunk_id, evidence_id, seq, text, token_count, "
                 "headings_path, start_offset, end_offset) VALUES (?,?,?,?,?,?,?,?)",
                 (
-                    new_chunk_id(),
+                    chunk_id,
                     evidence_id,
                     c.seq,
                     c.text,
@@ -151,6 +202,16 @@ def _commit_evidence(
                     c.end_offset,
                 ),
             )
+        if embeddings is not None:
+            from orc.storage.embeddings_store import store_chunk_embeddings
+
+            store_chunk_embeddings(
+                conn,
+                [
+                    (chunk_id, new_corpus_version, vector)
+                    for chunk_id, vector in zip(chunk_ids, embeddings, strict=True)
+                ],
+            )
 
 
 def _iter_files(root: Path, *, recursive: bool) -> Iterator[Path]:

diff --git a/src/orc/retrieval/__init__.py b/src/orc/retrieval/__init__.py
@@ -1,5 +1,13 @@
 """Retrieval primitives. Pure functions over a sqlite connection."""
 
 from orc.retrieval.bm25 import RetrievedChunk, bm25_search
+from orc.retrieval.hybrid import RetrievalResult, retrieve, rrf_fuse, vector_search
 
-__all__ = ["RetrievedChunk", "bm25_search"]
+__all__ = [
+    "RetrievalResult",
+    "RetrievedChunk",
+    "bm25_search",
+    "retrieve",
+    "rrf_fuse",
+    "vector_search",
+]