codebridger · navidshad · Jun 8, 2026 · Jun 8, 2026
diff --git a/server/src/modules/translation/__tests__/chunk-filter.test.ts b/server/src/modules/translation/__tests__/chunk-filter.test.ts
@@ -0,0 +1,63 @@
+import { describe, it, expect } from "@jest/globals";
+import { filterChunksToSelection } from "../chunk-filter";
+
+describe("filterChunksToSelection", () => {
+  it("drops chunks taken from the context, keeping only those in the marked text", () => {
+    // The real-world failing case from the task: the marked text is
+    // "improvements are included", yet the model also returned "related to" and
+    // "based on", which appear only in the surrounding context.
+    const phrase = "improvements are included";
+    const chunks = [
+      { text: "improvements are included", type: "other" },
+      { text: "related to", type: "collocation" },
+      { text: "based on", type: "collocation" },
+    ];
+
+    expect(filterChunksToSelection(chunks, phrase)).toEqual([
+      { text: "improvements are included", type: "other" },
+    ]);
+  });
+
+  it("keeps a chunk that appears verbatim inside a longer selection", () => {
+    const phrase = "they finally decided to give up on the project";
+    const chunks = [
+      { text: "give up on", type: "phrasal_verb" },
+      { text: "in the end", type: "discourse_marker" }, // not in the selection
+    ];
+
+    expect(filterChunksToSelection(chunks, phrase)).toEqual([
+      { text: "give up on", type: "phrasal_verb" },
+    ]);
+  });
+
+  it("matches case- and whitespace-insensitively", () => {
+    const phrase = "In Fact, the\nresults were good";
+    const chunks = [
+      { text: "in fact" }, // different case
+      { text: "the   results" }, // collapsed whitespace vs newline
+    ];
+
+    expect(filterChunksToSelection(chunks, phrase)).toEqual([
+      { text: "in fact" },
+      { text: "the   results" },
+    ]);
+  });
+
+  it("drops chunks with empty or whitespace-only text", () => {
+    const phrase = "a perfectly normal sentence";
+    const chunks = [{ text: "" }, { text: "   " }, { text: "normal" }];
+
+    expect(filterChunksToSelection(chunks, phrase)).toEqual([
+      { text: "normal" },
+    ]);
+  });
+
+  it("returns an empty array when the selection is empty", () => {
+    expect(filterChunksToSelection([{ text: "anything" }], "")).toEqual([]);
+  });
+
+  it("returns an empty array for missing or non-array chunks", () => {
+    expect(filterChunksToSelection(undefined, "some phrase")).toEqual([]);
+    expect(filterChunksToSelection(null, "some phrase")).toEqual([]);
+  });
+});
diff --git a/server/src/modules/translation/chunk-filter.ts b/server/src/modules/translation/chunk-filter.ts
@@ -0,0 +1,41 @@
+/**
+ * Normalise text for a forgiving "does this appear in the selection?" check:
+ * lower-case, collapse any run of whitespace to a single space, and trim. This
+ * lets a chunk the model capitalised differently (or that spans a line break)
+ * still match, while still rejecting patterns that simply are not in the
+ * selection.
+ */
+function normaliseForMatch(text: string): string {
+  return text.toLowerCase().replace(/\s+/g, " ").trim();
+}
+
+/**
+ * Keep only the chunks whose `text` actually appears inside the user's marked
+ * selection (`phrase`).
+ *
+ * The detailed-translation and advice models intermittently surface reusable
+ * patterns drawn from the surrounding `context` rather than the marked text
+ * itself — e.g. selecting "improvements are included" yet returning "related to"
+ * and "based on", which only exist in the context paragraph. Chunks must come
+ * from the marked text only, so this is the deterministic guardrail that
+ * enforces that contract regardless of what the model returns.
+ *
+ * Matching is whitespace- and case-insensitive but otherwise verbatim (no
+ * punctuation stripping), matching the "appears verbatim inside the selection"
+ * rule the prompts ask the model to follow.
+ */
+export function filterChunksToSelection<T extends { text: string }>(
+  chunks: T[] | undefined | null,
+  phrase: string
+): T[] {
+  if (!Array.isArray(chunks)) return [];
+
+  const selection = normaliseForMatch(typeof phrase === "string" ? phrase : "");
+  if (!selection) return [];
+
+  return chunks.filter((chunk) => {
+    const text =
+      typeof chunk?.text === "string" ? normaliseForMatch(chunk.text) : "";
+    return text.length > 0 && selection.includes(text);
+  });
+}
diff --git a/server/src/modules/translation/service.ts b/server/src/modules/translation/service.ts
@@ -9,6 +9,7 @@ import {
   LanguageLearningDataSchema,
   TranslationAdviceSchema,
 } from "./schema";
+import { filterChunksToSelection } from "./chunk-filter";
 import { TRANSLATION_MODELS } from "../../utils/openrouter-models";
 
 /**
@@ -81,8 +82,8 @@ export async function getDetailedTranslation({
 
   Phonetic transliteration: spell out how to pronounce the SOURCE-language "phrase" itself (${sourceLanguage} -> read by a ${targetLanguage} speaker), written using the ${targetLanguage} alphabet. Do NOT transliterate the translation. For long selections (~5 words or more), return an empty string for the top-level transliteration and rely on the per-chunk transliterations instead.
 
-  Chunks: inside the user's selection ("phrase"), find the reusable language patterns worth learning (collocations, phrasal verbs, idioms, discourse markers).
-  Rules: at most one chunk per 5-8 words of the selection, hard ceiling of 2 chunks. Each chunk's "text" must appear verbatim inside the selection. For each chunk, also provide: "transliteration" (how to pronounce that chunk, source language, in the ${targetLanguage} alphabet) and "definition" (a short, self-contained explanation of that chunk's meaning and usage, 1-2 sentences, in ${targetLanguage}).
+  Chunks: look ONLY inside the user's selection ("phrase") and find the reusable language patterns worth learning there (collocations, phrasal verbs, idioms, discourse markers). The "context" is provided ONLY to disambiguate the selection's meaning — NEVER take a chunk from the context. A pattern is a valid chunk only if its exact words appear inside the selection itself.
+  Rules: at most one chunk per 5-8 words of the selection, hard ceiling of 2 chunks. Each chunk's "text" must appear verbatim inside the selection (not merely inside the context). For each chunk, also provide: "transliteration" (how to pronounce that chunk, source language, in the ${targetLanguage} alphabet) and "definition" (a short, self-contained explanation of that chunk's meaning and usage, 1-2 sentences, in ${targetLanguage}).
   Return an empty "chunks" array when the selection is under ~5 words, or when the selection is written in a different language than the target learning language.`;
 
   const userPrompt = `
@@ -117,6 +118,11 @@ export async function getDetailedTranslation({
         strict: true,
       });
 
+    // Guardrail: the model occasionally pulls chunks from the surrounding
+    // context instead of the marked text. Chunks must come from the selection
+    // only, so drop any whose text does not appear inside the phrase.
+    result.chunks = filterChunksToSelection(result.chunks, phrase);
+
     // Result is already validated by Zod
     return result;
   } catch (error: unknown) {
@@ -149,7 +155,7 @@ export async function getTranslationAdvice({
 
   Your MAIN job is to answer the user's questions about this phrase: meaning, grammar, usage, nuance, examples, differences between words, etc. Put your answer in "reply".
 
-  Editing the highlighted patterns is a SECONDARY ability. Only return a "chunks" array when the user EXPLICITLY asks to add, remove, or change which patterns are highlighted (e.g. "highlight X", "remove that", "don't include Y"). When you do, each chunk's "text" must appear verbatim in the selection, include its "transliteration" (pronunciation in the ${targetLanguage} alphabet), and you may also add a short "reply" explaining the change.
+  Editing the highlighted patterns is a SECONDARY ability. Only return a "chunks" array when the user EXPLICITLY asks to add, remove, or change which patterns are highlighted (e.g. "highlight X", "remove that", "don't include Y"). When you do, each chunk's "text" must appear verbatim in the selection — NEVER take a chunk from the context — include its "transliteration" (pronunciation in the ${targetLanguage} alphabet), and you may also add a short "reply" explaining the change.
   If the user is just asking a question (even one that mentions a phrase, like "what about 'on the'?"), answer it in "reply" and DO NOT change the chunks.`;
 
   // Maintain the conversation: replay prior turns so the model has context.
@@ -180,6 +186,12 @@ export async function getTranslationAdvice({
         strict: true,
       });
 
+    // Same guardrail as the save flow: any chunks the advisor proposes must
+    // come from the marked text, not the surrounding context.
+    if (result.chunks) {
+      result.chunks = filterChunksToSelection(result.chunks, phrase);
+    }
+
     return result;
   } catch (error: unknown) {
     console.error("Translation advice error:", error);