From 8f8da4e1fc1eca2f9a0e1c966ba6642de4fd8ee0 Mon Sep 17 00:00:00 2001
From: Elie Gambache <elyahou.hadass@gmail.com>
Date: Thu, 25 Jun 2026 20:43:45 +0300
Subject: [PATCH] feat(search): dense semantic search with v5 embedding model

- Add SeforimEmbedder (ONNX v5 int8) + HebrewV5Normalizer (final-letter folding)
- HybridSearchEngine (BM25 + dense, RRF) + VectorSearcher over a fused Lucene index
- BuildVectorIndex / fused KnnFloatVectorField indexing in the generator
- Bundle + fetch the v5 model (PackageArtifacts, DownloadEmbedModel -> v5-int8)
- CI: free disk space on the runner before the build
---
 generator/packaging/build.gradle.kts          |  27 ++-
 .../packaging/DownloadEmbedModel.kt           |  84 ++++++++
 .../packaging/PackageArtifacts.kt             |  25 ++-
 generator/searchindex/build.gradle.kts        |   7 +-
 .../searchindex/BuildLuceneIndex.kt           |  37 +++-
 .../lucene/LuceneTextIndexWriter.kt           |  12 +-
 gradle/libs.versions.toml                     |   4 +
 search/build.gradle.kts                       |   5 +
 .../search/HebrewV5Normalizer.kt              |  27 +++
 .../search/HybridSearchEngine.kt              | 187 ++++++++++++++++++
 .../seforimlibrary/search/SeforimEmbedder.kt  | 146 ++++++++++++++
 .../seforimlibrary/search/VectorSearcher.kt   |  59 ++++++
 .../search/SeforimEmbedderTest.kt             |  39 ++++
 13 files changed, 653 insertions(+), 6 deletions(-)
 create mode 100644 generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/DownloadEmbedModel.kt
 create mode 100644 search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HebrewV5Normalizer.kt
 create mode 100644 search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HybridSearchEngine.kt
 create mode 100644 search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedder.kt
 create mode 100644 search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/VectorSearcher.kt
 create mode 100644 search/src/jvmTest/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedderTest.kt
diff --git a/generator/packaging/build.gradle.kts b/generator/packaging/build.gradle.kts
index 698c0fe8..e32d2481 100644
--- a/generator/packaging/build.gradle.kts
+++ b/generator/packaging/build.gradle.kts
@@ -80,12 +80,37 @@ tasks.register<JavaExec>("downloadLexicalDb") {
     jvmArgs = listOf("-Xmx512m")
 }
 
+// Download the dense embedding model (int8 ONNX + tokenizer) from the private
+// SeforimEmbedding v4-int8 release next to seforim.db, so it gets bundled.
+// Needs GITHUB_TOKEN/GH_TOKEN; fails soft (bundle without model) if unavailable.
+// Usage:
+//   GITHUB_TOKEN=… ./gradlew :packaging:downloadEmbedModel
+tasks.register<JavaExec>("downloadEmbedModel") {
+    group = "application"
+    description = "Download int8 embedding model + tokenizer from the private v4-int8 release next to seforim.db."
+
+    dependsOn("jvmJar")
+    mainClass.set("io.github.kdroidfilter.seforimlibrary.packaging.DownloadEmbedModelKt")
+    classpath = files(tasks.named("jvmJar")) + configurations.getByName("jvmRuntimeClasspath")
+
+    if (project.hasProperty("seforimDb")) {
+        systemProperty("seforimDb", project.property("seforimDb") as String)
+    } else if (System.getenv("SEFORIM_DB") != null) {
+        systemProperty("seforimDb", System.getenv("SEFORIM_DB"))
+    } else {
+        val defaultDbPath = rootProject.layout.buildDirectory.file("seforim.db").get().asFile.absolutePath
+        systemProperty("seforimDb", defaultDbPath)
+    }
+
+    jvmArgs = listOf("-Xmx256m")
+}
+
 // Package DB + Lucene indexes into single tar.zst and split
 tasks.register<JavaExec>("packageArtifacts") {
     group = "application"
     description = "Create seforim_bundle.tar.zst (DB + indexes + release info) with zstd and split into ~1.9GiB parts."
 
-    dependsOn("jvmJar", "writeReleaseInfo", "downloadLexicalDb")
+    dependsOn("jvmJar", "writeReleaseInfo", "downloadLexicalDb", "downloadEmbedModel")
     mainClass.set("io.github.kdroidfilter.seforimlibrary.packaging.PackageArtifactsKt")
     classpath = files(tasks.named("jvmJar")) + configurations.getByName("jvmRuntimeClasspath")
 
diff --git a/generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/DownloadEmbedModel.kt b/generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/DownloadEmbedModel.kt
new file mode 100644
index 00000000..008a2d5b
--- /dev/null
+++ b/generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/DownloadEmbedModel.kt
@@ -0,0 +1,84 @@
+package io.github.kdroidfilter.seforimlibrary.packaging
+
+import co.touchlab.kermit.Logger
+import co.touchlab.kermit.Severity
+import io.github.kdroidfilter.seforimlibrary.common.OptimizedHttpClient
+import java.nio.file.Files
+import java.nio.file.Path
+import java.nio.file.Paths
+
+private const val RELEASE_API =
+    "https://api.github.com/repos/kdroidFilter/SeforimEmbedding/releases/tags/v5-int8"
+private const val USER_AGENT = "SeforimLibrary-DownloadEmbedModel/1.0"
+
+// Runtime dense-search artifacts pulled from the private SeforimEmbedding release.
+private val ASSETS = listOf("seforim-embed-v5-int8.onnx", "tokenizer.json")
+
+/**
+ * Download the int8 embedding model + tokenizer from the private `v5-int8` release
+ * and place them next to `seforim.db` so [PackageArtifacts] bundles them.
+ *
+ * Requires a token with read access to the private repo via `GITHUB_TOKEN` / `GH_TOKEN`
+ * (consumed by [OptimizedHttpClient]). On any failure (no token, network, missing
+ * asset) it logs a warning and exits 0 so packaging proceeds WITHOUT the model
+ * (the app then degrades to lexical-only search).
+ *
+ * Usage:
+ *   ./gradlew :packaging:downloadEmbedModel
+ *   ./gradlew :packaging:downloadEmbedModel -PseforimDb=/path/to/seforim.db
+ */
+fun main(args: Array<String>) {
+    Logger.setMinSeverity(Severity.Info)
+    val logger = Logger.withTag("DownloadEmbedModel")
+
+    val dbPath = resolveDbPath(args)
+
+    val present = ASSETS.all { name ->
+        val p = dbPath.resolveSibling(name)
+        Files.exists(p) && Files.isRegularFile(p) && Files.size(p) > 0
+    }
+    if (present) {
+        logger.i { "Embedding model already present next to ${dbPath.fileName}; skipping download" }
+        return
+    }
+
+    runCatching {
+        val json = OptimizedHttpClient.fetchJson(RELEASE_API, USER_AGENT, logger)
+        for (name in ASSETS) {
+            val out = dbPath.resolveSibling(name)
+            if (Files.exists(out) && Files.size(out) > 0) {
+                logger.i { "Using existing $name" }
+                continue
+            }
+            val url = assetApiUrl(json, name)
+                ?: throw IllegalStateException("Asset '$name' not found in v4-int8 release")
+            Files.createDirectories(out.parent)
+            val tmp = out.resolveSibling("${out.fileName}.part")
+            Files.deleteIfExists(tmp)
+            // Asset API url + Accept: octet-stream + token -> works for private repos.
+            OptimizedHttpClient.downloadFile(url, tmp, USER_AGENT, logger, "Downloading $name")
+            Files.deleteIfExists(out)
+            Files.move(tmp, out)
+            logger.i { "Downloaded $name -> ${out.toAbsolutePath()}" }
+        }
+    }.onFailure {
+        logger.w(it) { "Could not download the embedding model; bundle will omit it (dense search disabled)" }
+    }
+}
+
+private fun resolveDbPath(args: Array<String>): Path {
+    val dbPathStr = args.getOrNull(0)
+        ?: System.getProperty("seforimDb")
+        ?: System.getenv("SEFORIM_DB")
+        ?: Paths.get("build", "seforim.db").toString()
+    return Paths.get(dbPathStr)
+}
+
+/** Extract the GitHub *asset API* url for a given asset name from the release JSON. */
+private fun assetApiUrl(json: String, name: String): String? {
+    val re = Regex(
+        "\\{\"url\":\"(https://api\\.github\\.com/[^\"]+?/assets/\\d+)\"[^{}]*?\"name\":\"" +
+            Regex.escape(name) + "\"",
+    )
+    return re.find(json)?.groupValues?.get(1)
+}
diff --git a/generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/PackageArtifacts.kt b/generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/PackageArtifacts.kt
index cb51569d..c553fe25 100644
--- a/generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/PackageArtifacts.kt
+++ b/generator/packaging/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/packaging/PackageArtifacts.kt
@@ -65,13 +65,18 @@ fun main(args: Array<String>) {
 
     // Resolve precomputed catalog next to the DB
     val catalogPath: Path = dbPath.resolveSibling("catalog.pb")
-    
+
     // Resolve release info file next to the DB
     val releaseInfoPath: Path = dbPath.resolveSibling("release_info.txt")
 
     // Resolve lexical DB next to the DB
     val lexicalDbPath: Path = dbPath.resolveSibling("lexical.db")
 
+    // Resolve the dense embedding model (int8 ONNX) + tokenizer next to the DB.
+    // Bundled so the app gets dense search out of the box; absent -> lexical only.
+    val embedModelPath: Path = dbPath.resolveSibling("seforim-embed-v5-int8.onnx")
+    val embedTokenizerPath: Path = dbPath.resolveSibling("tokenizer.json")
+
     if (!textIndexDir.toFile().isDirectory) {
         logger.w { "Lucene text index directory missing: $textIndexDir (will skip)" }
     }
@@ -126,6 +131,8 @@ fun main(args: Array<String>) {
             " - Catalog: $catalogPath\n" +
             " - Release info: $releaseInfoPath\n" +
             " - Lexical DB: $lexicalDbPath\n" +
+            " - Embed model: $embedModelPath\n" +
+            " - Embed tokenizer: $embedTokenizerPath\n" +
             " - Text index: $textIndexDir\n" +
             " - Lookup index: $lookupIndexDir\n" +
             " -> Bundle .tar.zst: $bundleOutputPath\n" +
@@ -171,6 +178,20 @@ fun main(args: Array<String>) {
                             logger.w { "Lexical DB missing: $lexicalDbPath (skipped)" }
                         }
 
+                        // Add the dense embedding model + tokenizer if available
+                        if (embedModelPath.exists()) {
+                            addFileToTar(tar, embedModelPath, embedModelPath.fileName.toString(), logger)
+                            logger.i { "Added embedding model to bundle" }
+                        } else {
+                            logger.w { "Embedding model missing: $embedModelPath (skipped, dense search disabled)" }
+                        }
+                        if (embedTokenizerPath.exists()) {
+                            addFileToTar(tar, embedTokenizerPath, embedTokenizerPath.fileName.toString(), logger)
+                            logger.i { "Added embedding tokenizer to bundle" }
+                        } else {
+                            logger.w { "Embedding tokenizer missing: $embedTokenizerPath (skipped)" }
+                        }
+
                         // Add the precomputed catalog if available
                         if (haveCatalog) {
                             addFileToTar(tar, catalogPath, catalogPath.fileName.toString(), logger)
@@ -178,7 +199,7 @@ fun main(args: Array<String>) {
                         } else {
                             logger.w { "Precomputed catalog missing: $catalogPath (skipped)" }
                         }
-                        
+
                         // Add the release info file if available
                         if (haveReleaseInfo) {
                             addFileToTar(tar, releaseInfoPath, releaseInfoPath.fileName.toString(), logger)
diff --git a/generator/searchindex/build.gradle.kts b/generator/searchindex/build.gradle.kts
index d723f4e4..f5284199 100644
--- a/generator/searchindex/build.gradle.kts
+++ b/generator/searchindex/build.gradle.kts
@@ -64,6 +64,12 @@ tasks.register<JavaExec>("buildLuceneIndexDefault") {
         systemProperty("inMemoryDb", "true")
     }
 
+    // Optional: -PvectorsBin=/path (dir with ids.i64+vecs.f32+meta.txt) → SINGLE
+    // fused index (text + dense KnnFloatVectorField per line).
+    (project.findProperty("vectorsBin") as String?)?.let { systemProperty("vectorsBin", it) }
+    // Optional: -PindexThreads=N to cap concurrent indexing threads (lower = less RAM).
+    (project.findProperty("indexThreads") as String?)?.let { systemProperty("indexThreads", it) }
+
     jvmArgs = listOf(
         "-Xmx$generatorHeap",
         "-XX:+UseG1GC",
@@ -72,4 +78,3 @@ tasks.register<JavaExec>("buildLuceneIndexDefault") {
         "--add-modules=jdk.incubator.vector"
     )
 }
-
diff --git a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt
index 2b96ca91..bf926120 100644
--- a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt
+++ b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt
@@ -22,7 +22,10 @@ import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper
 import org.apache.lucene.analysis.ngram.NGramTokenFilter
 import org.jsoup.Jsoup
 import org.jsoup.safety.Safelist
+import java.io.DataInputStream
 import java.io.File
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
 import java.nio.file.Files
 import java.nio.file.Path
 import java.nio.file.Paths
@@ -58,6 +61,12 @@ fun main() = runBlocking {
     runCatching { Files.createDirectories(indexDir) }
     runCatching { Files.createDirectories(lookupDir) }
 
+    // Optional dense embeddings -> SINGLE fused index. -DvectorsBin points to a dir
+    // with ids.i64 + vecs.f32 + meta.txt (produced by embed_corpus_bin.py against this
+    // same DB). Each line then also gets a KnnFloatVectorField in the text index.
+    val vectorProvider: ((Long) -> FloatArray?)? =
+        System.getProperty("vectorsBin")?.let { loadVectorProvider(Paths.get(it), logger) }
+
     // Open repository (prefer in-memory for faster reads)
     val useMemoryDb = (System.getProperty("inMemoryDb") ?: "true") != "false"
     // Use a shared in-memory DB so multiple connections can read concurrently when multithreading
@@ -115,7 +124,7 @@ fun main() = runBlocking {
         )
     )
 
-    LuceneTextIndexWriter(indexDir, analyzer = analyzer).use { writer ->
+    LuceneTextIndexWriter(indexDir, analyzer = analyzer, vectorProvider = vectorProvider).use { writer ->
         LuceneLookupIndexWriter(lookupDir, analyzer = analyzer).use { lookup ->
             val books = repo.getAllBooks()
             val indexThreads = (System.getProperty("indexThreads") ?: Runtime.getRuntime().availableProcessors().toString()).toInt().coerceAtLeast(1)
@@ -300,3 +309,29 @@ private fun sanitizeAcronymTerm(raw: String): String {
     if (raw.isEmpty()) return ""
     return normalizePostHtmlForIndex(raw)
 }
+
+/**
+ * Loads dense embeddings produced by embed_corpus_bin.py (ids.i64 + vecs.f32 + meta.txt,
+ * little-endian) into RAM and returns a thread-safe lineId -> vector lookup, used by
+ * LuceneTextIndexWriter to attach a KnnFloatVectorField per line (single fused index).
+ */
+private fun loadVectorProvider(dir: Path, logger: co.touchlab.kermit.Logger): ((Long) -> FloatArray?) {
+    val meta = File(dir.toFile(), "meta.txt").readText().trim().split(" ")
+    val n = meta[0].toInt()
+    val dim = meta[1].toInt()
+    logger.i { "Loading $n dense vectors (dim $dim) from $dir for the fused index" }
+    val idsBuf = ByteBuffer.wrap(File(dir.toFile(), "ids.i64").readBytes()).order(ByteOrder.LITTLE_ENDIAN)
+    val rowOf = HashMap<Long, Int>(n * 2)
+    for (i in 0 until n) rowOf[idsBuf.long] = i
+    val vecs = FloatArray(n * dim)
+    DataInputStream(File(dir.toFile(), "vecs.f32").inputStream().buffered(1 shl 20)).use { din ->
+        val rec = ByteArray(dim * 4)
+        val bb = ByteBuffer.wrap(rec).order(ByteOrder.LITTLE_ENDIAN)
+        for (i in 0 until n) {
+            din.readFully(rec); bb.rewind()
+            for (j in 0 until dim) vecs[i * dim + j] = bb.float
+        }
+    }
+    logger.i { "Dense vectors loaded (${(n.toLong() * dim * 4L) / 1_000_000} MB in RAM)" }
+    return { lineId -> rowOf[lineId]?.let { row -> vecs.copyOfRange(row * dim, row * dim + dim) } }
+}
diff --git a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt
index 0b841112..00b2c63c 100644
--- a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt
+++ b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt
@@ -6,6 +6,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer
 import org.apache.lucene.document.Document
 import org.apache.lucene.document.Field
 import org.apache.lucene.document.IntPoint
+import org.apache.lucene.document.KnnFloatVectorField
 import org.apache.lucene.document.StoredField
 import org.apache.lucene.document.StringField
 import org.apache.lucene.document.TextField
@@ -24,12 +25,16 @@ class LuceneTextIndexWriter(
     indexDir: Path,
     analyzer: Analyzer = StandardAnalyzer(),
     private val indexHebrewField: Boolean = false,
-    private val indexPrimaryText: Boolean = true
+    private val indexPrimaryText: Boolean = true,
+    // Optional dense embeddings: if provided, each line doc also gets a
+    // KnnFloatVectorField("vec", COSINE) -> SINGLE index holding text + vectors.
+    private val vectorProvider: ((Long) -> FloatArray?)? = null,
 ) : TextIndexWriter {
     companion object Fields {
         const val FIELD_TYPE = "type"
         const val TYPE_LINE = "line"
         const val TYPE_BOOK_TITLE = "book_title"
+        const val FIELD_VEC = "vec"
 
         const val FIELD_BOOK_ID = "book_id"
         const val FIELD_CATEGORY_ID = "category_id"
@@ -106,6 +111,11 @@ class LuceneTextIndexWriter(
             // Index 4-gram tokens for substring search (per-field analyzer applies NGram filter)
             add(TextField(FIELD_TEXT_NG4, normalizedText, Field.Store.NO))
             // rawPlainText is no longer stored - snippet source is fetched from DB at query time
+
+            // Dense embedding (single fused index): attach the line's vector if available.
+            vectorProvider?.invoke(lineId)?.let { vec ->
+                add(KnnFloatVectorField(FIELD_VEC, vec, org.apache.lucene.index.VectorSimilarityFunction.COSINE))
+            }
         }
         writer.addDocument(doc)
     }
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index dc3ebdfc..65f06001 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -5,6 +5,8 @@ kotlin = "2.3.20"
 agp = "8.12.3"
 jvmToolchain = "25"
 lucene = "10.4.0"
+onnxruntime = "1.20.0"
+djlHuggingface = "0.30.0"
 maven-publish = "0.36.0"
 kotlinx-coroutines = "1.10.2"
 kotlinx-serialization = "1.10.0"
@@ -34,6 +36,8 @@ lucene-highlighter = { module = "org.apache.lucene:lucene-highlighter", version.
 lucene-queryparser = { module = "org.apache.lucene:lucene-queryparser", version.ref = "lucene" }
 lucene-analysis-common = { module = "org.apache.lucene:lucene-analysis-common", version.ref = "lucene" }
 lucene-core = { module = "org.apache.lucene:lucene-core", version.ref = "lucene" }
+onnxruntime = { module = "com.microsoft.onnxruntime:onnxruntime", version.ref = "onnxruntime" }
+djl-huggingface-tokenizers = { module = "ai.djl.huggingface:tokenizers", version.ref = "djlHuggingface" }
 sqlDelight-driver-sqlite = { module = "app.cash.sqldelight:sqlite-driver", version.ref = "sqlDelight" }
 sqlDelight-driver-android = { module = "app.cash.sqldelight:android-driver", version.ref = "sqlDelight" }
 sqlDelight-driver-native = { module = "app.cash.sqldelight:native-driver", version.ref = "sqlDelight" }
diff --git a/search/build.gradle.kts b/search/build.gradle.kts
index 52c283fb..4a2e4111 100644
--- a/search/build.gradle.kts
+++ b/search/build.gradle.kts
@@ -20,6 +20,11 @@ kotlin {
             implementation(libs.sqlDelight.driver.sqlite)
             implementation(libs.kermit)
             implementation(libs.jsoup)
+            // Dense semantic search: ONNX Runtime (query embedding) + HuggingFace tokenizer.
+            // Stock Maven `onnxruntime` is CPU-only on desktop JVM (no DirectML/CoreML/XNNPACK
+            // native — those need a custom build; CUDA needs the separate onnxruntime_gpu).
+            implementation(libs.onnxruntime)
+            implementation(libs.djl.huggingface.tokenizers)
         }
 
         jvmTest.dependencies {
diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HebrewV5Normalizer.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HebrewV5Normalizer.kt
new file mode 100644
index 00000000..304d725b
--- /dev/null
+++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HebrewV5Normalizer.kt
@@ -0,0 +1,27 @@
+package io.github.kdroidfilter.seforimlibrary.search
+
+import org.jsoup.parser.Parser
+
+/**
+ * Exact JVM port of `normalize_he_v5.py`: strip HTML/nikud/teamim and non-Hebrew
+ * characters, then fold final (sofit) letters (ך→כ ם→מ ן→נ ף→פ ץ→צ). It MUST be
+ * applied to any text before embedding it with a v5 model (both indexed lines and
+ * query), so vectors stay comparable. Pair it with a v5 model + a v5-built index.
+ */
+object HebrewV5Normalizer {
+    private val TAG = Regex("<[^>]+>")
+    private val MARKS = Regex("[֑-ׇ]")
+    private val DROP = Regex("[^א-ת0-9\\s.,:;!?()\\[\\]\"'\\-/׳״]")
+    private val WS = Regex("\\s+")
+    // Final (sofit) -> base letter folding.
+    private val FINALS = mapOf('ך' to 'כ', 'ם' to 'מ', 'ן' to 'נ', 'ף' to 'פ', 'ץ' to 'צ')
+
+    fun clean(text: String): String {
+        var s = TAG.replace(text, " ")
+        s = Parser.unescapeEntities(s, false)
+        s = MARKS.replace(s, "")
+        s = DROP.replace(s, " ")
+        s = WS.replace(s, " ").trim()
+        return buildString(s.length) { for (c in s) append(FINALS[c] ?: c) }
+    }
+}
diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HybridSearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HybridSearchEngine.kt
new file mode 100644
index 00000000..59deb5cf
--- /dev/null
+++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/HybridSearchEngine.kt
@@ -0,0 +1,187 @@
+package io.github.kdroidfilter.seforimlibrary.search
+
+import co.touchlab.kermit.Logger
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.sync.Mutex
+import kotlinx.coroutines.sync.withLock
+import kotlinx.coroutines.withContext
+import java.nio.file.Files
+import java.nio.file.Path
+
+/**
+ * Hybrid search = lexical (BM25 + MagicDictionary, [LuceneSearchEngine]) fused with
+ * dense semantic search (v4 embedding + [VectorSearcher]) via Reciprocal Rank Fusion.
+ *
+ *   RRF(doc) = Σ 1/(K + rank_in_list)
+ *
+ * Implements [SearchEngine] so the app uses it transparently: [openSession] runs the
+ * fused search (the session pages over the in-memory fused list); all other methods
+ * (facets, snippets, title prefix) delegate to the lexical engine.
+ *
+ * Falls back to pure lexical when: the model/vector index is absent, OR the filters
+ * are not supported by the dense index (categoryFilter / lineIds). Book / base-book
+ * filters ARE supported by both paths.
+ *
+ * Dense-only hits (lines the lexical path missed) are turned into full [LineHit]s via
+ * [resolveLine], supplied by the caller (fetches title + text + snippet by line id).
+ */
+class HybridSearchEngine(
+    private val lexical: LuceneSearchEngine,
+    private val modelDir: Path?,
+    private val indexDir: Path,
+    private val rrfK: Int = 60,
+    private val candidates: Int = 150,
+    private val resolveLine: suspend (lineId: Long, bookId: Long, query: String) -> LineHit?,
+) : SearchEngine {
+
+    private val logger = Logger.withTag("HybridSearch")
+
+    // The embedder (heavy OrtSession) + vector searcher are loaded LAZILY on the first
+    // dense search, off the main thread (see [ensureDense]) — so the first query just
+    // shows the normal search spinner instead of freezing the UI while the model loads.
+    @Volatile private var embedder: SeforimEmbedder? = null
+    @Volatile private var vectorSearcher: VectorSearcher? = null
+    @Volatile private var denseTried = false
+    private val denseMutex = Mutex()
+
+    // Cheap, no-load check: are the model + vector index even present? Decides whether to
+    // take the dense path (then load lazily); the actual OrtSession is built in fuse().
+    private val denseConfigured: Boolean =
+        indexDir != null && Files.isDirectory(indexDir) && SeforimEmbedder.isAvailable(modelDir)
+
+    val denseEnabled: Boolean get() = embedder != null && vectorSearcher != null
+
+    /** Load the embedder + vector searcher once, on a background thread. Idempotent. */
+    private suspend fun ensureDense() {
+        if (denseTried) return
+        denseMutex.withLock {
+            if (denseTried) return
+            withContext(Dispatchers.IO) {
+                val emb = SeforimEmbedder.tryLoad(modelDir)
+                val vs = if (emb != null && indexDir != null && Files.isDirectory(indexDir)) {
+                    runCatching { VectorSearcher(indexDir) }.getOrNull()
+                } else null
+                if (vs != null) {
+                    embedder = emb; vectorSearcher = vs
+                    logger.i { "dense ready (vector index $indexDir)" }
+                } else {
+                    runCatching { emb?.close() }
+                    logger.i { "dense unavailable -> lexical only" }
+                }
+            }
+            denseTried = true
+        }
+    }
+
+    override fun openSession(
+        query: String,
+        near: Int,
+        bookFilter: Long?,
+        categoryFilter: Long?,
+        bookIds: Collection<Long>?,
+        lineIds: Collection<Long>?,
+        baseBookOnly: Boolean,
+    ): SearchSession? {
+        if (query.isBlank()) return null
+        // Dense index supports book / base-book filters only. For category/line filters
+        // (or when dense isn't configured), use pure lexical to stay correct. The model
+        // itself is loaded lazily in fuse() — denseConfigured is a cheap no-load check.
+        val denseOk = denseConfigured && categoryFilter == null && lineIds == null
+        if (!denseOk) {
+            return lexical.openSession(query, near, bookFilter, categoryFilter, bookIds, lineIds, baseBookOnly)
+        }
+        val effBookIds = bookIds ?: bookFilter?.let { listOf(it) }
+        return HybridSession(query, near, effBookIds, baseBookOnly)
+    }
+
+    override fun searchBooksByTitlePrefix(query: String, limit: Int): List<Long> =
+        lexical.searchBooksByTitlePrefix(query, limit)
+
+    override fun buildSnippet(rawText: String, query: String, near: Int): String =
+        lexical.buildSnippet(rawText, query, near)
+
+    override fun buildHighlightTerms(query: String): List<String> =
+        lexical.buildHighlightTerms(query)
+
+    override fun computeFacets(
+        query: String, near: Int, bookFilter: Long?, categoryFilter: Long?,
+        bookIds: Collection<Long>?, lineIds: Collection<Long>?, baseBookOnly: Boolean,
+    ): SearchFacets? = lexical.computeFacets(query, near, bookFilter, categoryFilter, bookIds, lineIds, baseBookOnly)
+
+    override fun close() {
+        runCatching { vectorSearcher?.close() }
+        runCatching { embedder?.close() }
+        runCatching { lexical.close() }
+    }
+
+    /** Lexical page + dense KNN, fused by RRF, resolved to full LineHits. */
+    private suspend fun fuse(query: String, near: Int, bookIds: Collection<Long>?, baseOnly: Boolean): List<LineHit> {
+        ensureDense()   // first call loads the model off-main (covered by the search spinner)
+        val lexHits = lexical.openSession(query, near = near, bookIds = bookIds, baseBookOnly = baseOnly)
+            ?.use { it.nextPage(candidates)?.hits ?: emptyList() } ?: emptyList()
+
+        // Dense failed to load -> lexical-only result (still correct, just no semantic recall).
+        val emb = embedder
+        val vs = vectorSearcher
+        if (emb == null || vs == null) return lexHits
+
+        val denseHits = withContext(Dispatchers.Default) {
+            val qVec = emb.embed(query)
+            vs.search(qVec, candidates, baseOnly, bookIds)
+        }
+
+        val rrf = HashMap<Long, Double>()
+        val bookOf = HashMap<Long, Long>()
+        lexHits.forEachIndexed { rank, h ->
+            rrf.merge(h.lineId, 1.0 / (rrfK + rank + 1), Double::plus); bookOf[h.lineId] = h.bookId
+        }
+        denseHits.forEachIndexed { rank, h ->
+            rrf.merge(h.lineId, 1.0 / (rrfK + rank + 1), Double::plus); bookOf.putIfAbsent(h.lineId, h.bookId)
+        }
+        val lexById = lexHits.associateBy { it.lineId }
+        val ordered = rrf.entries.sortedByDescending { it.value }
+        val out = ArrayList<LineHit>(ordered.size)
+        for ((lineId, score) in ordered) {
+            val hit = lexById[lineId] ?: resolveLine(lineId, bookOf[lineId] ?: -1L, query)
+            if (hit != null) out += hit.copy(score = score.toFloat())
+        }
+        return out
+    }
+
+    private inner class HybridSession(
+        private val query: String,
+        private val near: Int,
+        private val bookIds: Collection<Long>?,
+        private val baseOnly: Boolean,
+    ) : SearchSession {
+        private var fused: List<LineHit>? = null
+        private var offset = 0
+
+        override suspend fun nextPage(limit: Int): SearchPage? {
+            val all = fused ?: fuse(query, near, bookIds, baseOnly).also { fused = it }
+            if (offset >= all.size) return null
+            val end = minOf(offset + limit, all.size)
+            val slice = all.subList(offset, end)
+            offset = end
+            return SearchPage(hits = slice, totalHits = all.size.toLong(), isLastPage = offset >= all.size)
+        }
+
+        override fun close() {}
+    }
+
+    companion object {
+        private val logger = Logger.withTag("HybridSearch")
+
+        fun create(
+            lexical: LuceneSearchEngine,
+            indexDir: Path,
+            modelDir: Path? = null,
+            resolveLine: suspend (Long, Long, String) -> LineHit?,
+        ): HybridSearchEngine {
+            // ONE index: dense vectors live in the SAME Lucene index as the text
+            // (seforim.db.lucene) — no separate vector index. The embedder + searcher
+            // load lazily on the first dense search (no UI freeze).
+            return HybridSearchEngine(lexical, modelDir, indexDir, resolveLine = resolveLine)
+        }
+    }
+}
diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedder.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedder.kt
new file mode 100644
index 00000000..a72738ad
--- /dev/null
+++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedder.kt
@@ -0,0 +1,146 @@
+package io.github.kdroidfilter.seforimlibrary.search
+
+import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer
+import ai.onnxruntime.OnnxTensor
+import ai.onnxruntime.OrtEnvironment
+import ai.onnxruntime.OrtSession
+import co.touchlab.kermit.Logger
+import java.io.Closeable
+import java.nio.file.Files
+import java.nio.file.Path
+
+/**
+ * Produces L2-normalized 384-d sentence embeddings for Hebrew/Aramaic text on the JVM,
+ * using the v5 model trained in the SeforimEmbedding project (ONNX export).
+ *
+ * The ONNX graph bakes in pooling + projection + L2 normalization, so [embed] returns
+ * a vector ready for a Lucene `KnnFloatVectorField` (cosine). Query text is normalized
+ * with [HebrewV5Normalizer] to match the training distribution.
+ *
+ * Model artifacts (from the SeforimEmbedding release):
+ *  - `seforim-embed-v5-int8.onnx`  (the model)
+ *  - `tokenizer.json`              (the matching tokenizer)
+ * Place both in a directory and point to it via `-DseforimEmbedModelDir=…`,
+ * the `SEFORIM_EMBED_MODEL` env var, or one of the default candidate locations.
+ * If not found, [tryLoad] returns null and dense search is simply disabled.
+ */
+class SeforimEmbedder private constructor(
+    onnxModel: Path,
+    tokenizerJson: Path,
+    private val maxLen: Int = 128,
+) : Closeable {
+
+    val dim: Int = 384
+
+    private val tokenizer: HuggingFaceTokenizer = HuggingFaceTokenizer.newInstance(tokenizerJson)
+    private val env: OrtEnvironment = OrtEnvironment.getEnvironment()
+    private val session: OrtSession = openSession(env, onnxModel)
+
+    // The stock Maven `onnxruntime` artifact is CPU-only on desktop JVM. Hardware EPs only
+    // work if a capable native is bundled: CUDA (swap to the onnxruntime_gpu artifact +
+    // system CUDA/cuDNN, NVIDIA), or DirectML/CoreML (custom ORT build). They're opt-in via
+    // -DseforimEmbedGpu=true / SEFORIM_EMBED_GPU=1; otherwise an optimized CPU session
+    // (full graph opt + all cores). int8 is already CPU-friendly.
+    private fun openSession(env: OrtEnvironment, model: Path): OrtSession {
+        val wantGpu = System.getProperty("seforimEmbedGpu")?.toBoolean() == true ||
+            System.getenv("SEFORIM_EMBED_GPU")?.toBoolean() == true
+        if (wantGpu) {
+            val eps = listOf<Pair<String, (OrtSession.SessionOptions) -> Unit>>(
+                "CUDA" to { it.addCUDA(0) },
+                "DirectML" to { it.addDirectML(0) },
+                "CoreML" to { it.addCoreML() },
+            )
+            for ((name, add) in eps) {
+                runCatching {
+                    val o = OrtSession.SessionOptions().apply {
+                        setOptimizationLevel(OrtSession.SessionOptions.OptLevel.ALL_OPT); add(this)
+                    }
+                    return env.createSession(model.toString(), o).also { logger.i { "ONNX EP: $name (GPU)" } }
+                }
+            }
+            logger.i { "No GPU EP available in this build; using CPU" }
+        }
+        val o = OrtSession.SessionOptions().apply {
+            setOptimizationLevel(OrtSession.SessionOptions.OptLevel.ALL_OPT)
+            runCatching { setIntraOpNumThreads(Runtime.getRuntime().availableProcessors()) }
+        }
+        return env.createSession(model.toString(), o)
+    }
+
+    // v5 models were trained on final-folded text; queries must be normalized
+    // the same way so query and indexed vectors stay comparable.
+    private val normalize: (String) -> String = HebrewV5Normalizer::clean
+
+    /** Embed a single text (normalized like the corpus) into a normalized float[dim]. */
+    fun embed(text: String): FloatArray {
+        val enc = tokenizer.encode(normalize(text))
+        var ids = enc.ids
+        var mask = enc.attentionMask
+        if (ids.size > maxLen) {
+            ids = ids.copyOf(maxLen)
+            mask = mask.copyOf(maxLen)
+        }
+        OnnxTensor.createTensor(env, arrayOf(ids)).use { idsT ->
+            OnnxTensor.createTensor(env, arrayOf(mask)).use { maskT ->
+                session.run(mapOf("input_ids" to idsT, "attention_mask" to maskT)).use { res ->
+                    @Suppress("UNCHECKED_CAST")
+                    return (res[0].value as Array<FloatArray>)[0]
+                }
+            }
+        }
+    }
+
+    override fun close() {
+        session.close()
+        env.close()
+        tokenizer.close()
+    }
+
+    companion object {
+        private val logger = Logger.withTag("SeforimEmbedder")
+
+        /**
+         * Locate the model and load an embedder, or return null if unavailable
+         * (dense search then degrades gracefully to lexical-only).
+         */
+        private fun candidateDirs(explicitDir: Path?): List<Path> = listOfNotNull(
+            explicitDir,
+            System.getProperty("seforimEmbedModelDir")?.let { Path.of(it) },
+            System.getenv("SEFORIM_EMBED_MODEL")?.let { Path.of(it) },
+            Path.of(System.getProperty("user.home"), "IdeaProjects/SeforimEmbedding/artifacts"),
+        ).distinct()
+
+        /** Cheap presence check (model + tokenizer files) WITHOUT creating the heavy
+         *  OrtSession — lets callers decide to take the dense path then load lazily. */
+        fun isAvailable(explicitDir: Path? = null): Boolean =
+            candidateDirs(explicitDir).any { findOnnx(it) != null && findTokenizer(it) != null }
+
+        fun tryLoad(explicitDir: Path? = null): SeforimEmbedder? {
+            val dirs = candidateDirs(explicitDir)
+            for (dir in dirs) {
+                val onnx = findOnnx(dir) ?: continue
+                val tok = findTokenizer(dir) ?: continue
+                return runCatching {
+                    logger.i { "Loading dense embedder: onnx=$onnx tokenizer=$tok" }
+                    SeforimEmbedder(onnx, tok)
+                }.onFailure { logger.w(it) { "Failed to load embedder from $dir" } }.getOrNull()
+            }
+            logger.i { "No embedding model found; dense search disabled. Checked: $dirs" }
+            return null
+        }
+
+        // Prefer the newest model and the int8-quantized variant (4x smaller, ~3x
+        // faster CPU embedding) when present. v5 models fold final letters; the
+        // matching normalization is selected automatically from the filename.
+        private fun findOnnx(dir: Path): Path? = listOf(
+            "seforim-embed-v5-int8.onnx", "seforim-embed-v5.onnx",
+            "seforim-embed-v4-int8.onnx", "model.onnx", "seforim-embed-v4.onnx",
+        ).map { dir.resolve(it) }.firstOrNull { Files.isRegularFile(it) }
+
+        private fun findTokenizer(dir: Path): Path? = listOf(
+            dir.resolve("tokenizer.json"),
+            dir.resolve("tokenizer_v4/tokenizer.json"),
+            dir.resolve("model_v4_phase2a/tokenizer.json"),
+        ).firstOrNull { Files.isRegularFile(it) }
+    }
+}
diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/VectorSearcher.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/VectorSearcher.kt
new file mode 100644
index 00000000..3868c8c0
--- /dev/null
+++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/VectorSearcher.kt
@@ -0,0 +1,59 @@
+package io.github.kdroidfilter.seforimlibrary.search
+
+import org.apache.lucene.document.IntPoint
+import org.apache.lucene.index.DirectoryReader
+import org.apache.lucene.search.BooleanClause
+import org.apache.lucene.search.BooleanQuery
+import org.apache.lucene.search.IndexSearcher
+import org.apache.lucene.search.KnnFloatVectorQuery
+import org.apache.lucene.search.Query
+import org.apache.lucene.store.FSDirectory
+import java.io.Closeable
+import java.nio.file.Path
+
+/** A dense (semantic) hit from the fused Lucene index. */
+data class DenseHit(val lineId: Long, val bookId: Long, val score: Float)
+
+/**
+ * Runs a filtered KNN query over the SINGLE fused Lucene index (text fields +
+ * dense `KnnFloatVectorField` per line, built by the generator's text index
+ * writer). Filters (base-books / specific books) are applied as a pre-filter so
+ * the KNN only considers eligible documents.
+ *
+ * Returns line ids that are joined back to the DB by the caller.
+ */
+class VectorSearcher(indexDir: Path) : Closeable {
+    private val dir = FSDirectory.open(indexDir)
+
+    private fun filterQuery(baseBookOnly: Boolean, bookIds: Collection<Long>?): Query? {
+        val b = BooleanQuery.Builder()
+        var any = false
+        if (baseBookOnly) {
+            // Field name matches the fused text index (LuceneTextIndexWriter.FIELD_IS_BASE_BOOK).
+            b.add(IntPoint.newExactQuery("is_base_book", 1), BooleanClause.Occur.FILTER); any = true
+        }
+        if (!bookIds.isNullOrEmpty()) {
+            b.add(IntPoint.newSetQuery("book_id", *bookIds.map { it.toInt() }.toIntArray()), BooleanClause.Occur.FILTER); any = true
+        }
+        return if (any) b.build() else null
+    }
+
+    fun search(query: FloatArray, k: Int, baseBookOnly: Boolean = false, bookIds: Collection<Long>? = null): List<DenseHit> {
+        DirectoryReader.open(dir).use { reader ->
+            val searcher = IndexSearcher(reader)
+            val knn = KnnFloatVectorQuery("vec", query, k, filterQuery(baseBookOnly, bookIds))
+            val top = searcher.search(knn, k)
+            val stored = searcher.storedFields()
+            return top.scoreDocs.map { sd ->
+                val d = stored.document(sd.doc)
+                DenseHit(
+                    lineId = d.getField("line_id").numericValue().toLong(),
+                    bookId = d.getField("book_id").numericValue().toLong(),
+                    score = sd.score,
+                )
+            }
+        }
+    }
+
+    override fun close() = dir.close()
+}
diff --git a/search/src/jvmTest/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedderTest.kt b/search/src/jvmTest/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedderTest.kt
new file mode 100644
index 00000000..5aa05a60
--- /dev/null
+++ b/search/src/jvmTest/kotlin/io/github/kdroidfilter/seforimlibrary/search/SeforimEmbedderTest.kt
@@ -0,0 +1,39 @@
+package io.github.kdroidfilter.seforimlibrary.search
+
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertTrue
+
+class SeforimEmbedderTest {
+
+    private fun cos(a: FloatArray, b: FloatArray): Float {
+        var s = 0f
+        for (i in a.indices) s += a[i] * b[i]
+        return s
+    }
+
+    @Test
+    fun embedderLoadsAndProducesUsableVectors() {
+        val embedder = SeforimEmbedder.tryLoad()
+        if (embedder == null) {
+            println("[skip] no embedding model found (set -DseforimEmbedModelDir) — dense search disabled")
+            return
+        }
+        embedder.use { e ->
+            val q = e.embed("מה מברכים על אוכל")
+            assertEquals(384, q.size, "embedding dim")
+
+            // deterministic + normalized: same text twice -> cosine ~1
+            val q2 = e.embed("מה מברכים על אוכל")
+            assertTrue(cos(q, q2) > 0.999f, "self-cosine should be ~1.0")
+
+            // sanity: a topically related text should be closer than an unrelated one
+            val related = e.embed("ברכת הנהנין על פירות וירקות")
+            val unrelated = e.embed("הלכות טומאה וטהרה של כלים")
+            val cr = cos(q, related)
+            val cu = cos(q, unrelated)
+            println("[embedder] cos(related)=$cr  cos(unrelated)=$cu")
+            assertTrue(cr > cu, "related ($cr) should be closer than unrelated ($cu)")
+        }
+    }
+}