alex-mextner · alex-mextner · Jun 17, 2026 · Jun 17, 2026 · chatgpt-codex-connector · Jun 17, 2026
diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml
@@ -0,0 +1,25 @@
+# CodeQL analysis scope for tg-cli.
+#
+# The security gate analyzes the SHIPPED product surface — the `tg` / `tg-ctl`
+# binaries built from `features/` plus the workflow files. `scripts/` holds
+# one-off developer asset-generators (emoji/sticker set builders) that are run
+# BY HAND, never bundled into the released CLIs, and never process untrusted
+# input. CodeQL's js/file-access-to-http there ("an outbound request URL/body
+# depends on local file data") is true and intentional: they read a bot token /
+# prompt from a local config file and call an HTTP API with it. That is the job,
+# not a vulnerability — and it is out of scope for a gate meant to protect the
+# product.
+#
+# Why a directory `paths-ignore` and not a query-level exclude: `query-filters`
+# match on query METADATA only (id/tags/kind), with no per-path scoping, so
+# excluding `js/file-access-to-http` by id would also blind the gate to that
+# class in shipped `features/` code. Scoping by PATH keeps every query — this one
+# included — fully enforced on the product surface; only the by-hand `scripts/`
+# dir is out of scope. Tradeoff: a FUTURE script is not auto-analyzed; given the
+# dir is three hand-run generators, that is acceptable. Move a script into
+# `features/` (or import it from shipped code) and it is gated again.
+#
+# This is a SCOPE decision (which code the gate guards), not a per-finding
+# suppression of a real bug in shipped code.
+paths-ignore:
+  - scripts
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -117,6 +117,9 @@ jobs:
           languages: ${{ matrix.language }}
           build-mode: ${{ matrix.build-mode }}
           queries: security-extended
+          # Scope analysis to the shipped product surface; `scripts/` holds
+          # by-hand dev asset-generators, not gated code (see the config file).
+          config-file: ./.github/codeql/codeql-config.yml
 
       - name: Perform CodeQL Analysis
         if: steps.detect.outputs.has_source == 'true'

diff --git a/features/auto-attach/transmitter.ts b/features/auto-attach/transmitter.ts
@@ -13,6 +13,7 @@
 // feature is OFF, because they are not part of the feature — they are correct
 // behavior for any send.
 
+import { decodeHtmlEntities, stripHtmlTags } from '../render/html';
 import { isRichHtml, normalizeRichHtml, validateRichHtml } from '../render/rich';
 import { splitMessage } from './split';
 import { CAPTION_LIMIT, MESSAGE_LIMIT, type Format, type SendItem, type SendPlan } from './types';
@@ -48,14 +49,14 @@ export interface Transport {
 // not a full HTML length per Telegram's exact rules.
 export function visibleLength(text: string, format: Format): number {
   if (format !== 'html') return text.length;
-  const noTags = text.replace(/<[^>]+>/g, '');
-  const unescaped = noTags
-    .replace(/&lt;/g, '<')
-    .replace(/&gt;/g, '>')
-    .replace(/&amp;/g, '&')
-    .replace(/&quot;/g, '"')
-    .replace(/&#39;/g, "'");
-  return unescaped.length;
+  // Strip tags first, then decode entities in a single pass (stripHtmlTags /
+  // decodeHtmlEntities in render/html). This replaces a `/<[^>]+>/g` strip (which
+  // left a dangling `<script` behind — js/incomplete-multi-character-sanitization)
+  // followed by a chain of `.replace()` entity decodes. The chained decode trips
+  // js/double-escaping: an `&amp;`→`&` step alongside `&lt;`→`<` etc. can feed one
+  // decode's output into another (e.g. `&amp;lt;`→`&lt;`→`<`). The single-pass
+  // decoder consumes each entity exactly once, so no replacement chains.
+  return decodeHtmlEntities(stripHtmlTags(text)).length;
 }
 
 // Can the single text message ride as a media caption? Only when there is

diff --git a/features/install-skill/install.ts b/features/install-skill/install.ts
@@ -11,6 +11,21 @@
 import { existsSync, mkdirSync, readFileSync, writeFileSync, symlinkSync } from 'fs';
 import { homedir } from 'os';
 import { dirname, join } from 'path';
+import { escapeRegExp } from '../util/regex';
+
+// Read a UTF-8 file, returning undefined when it does not exist (ENOENT) and
+// re-throwing any other error. Replaces an `existsSync(p) ? readFileSync(p) : …`
+// pair: that check-then-read on the same path is a TOCTOU race (js/file-system-
+// race) because the file can vanish or be swapped between the two syscalls. A
+// single read that catches ENOENT is atomic — there is no window to race.
+function readTextIfExists(path: string): string | undefined {
+  try {
+    return readFileSync(path, 'utf8');
+  } catch (e) {
+    if ((e as NodeJS.ErrnoException).code === 'ENOENT') return undefined;
+    throw e;
+  }
+}
 
 // Prefer $HOME (what `tg` itself uses) over os.homedir(), which under Bun reads
 // getpwuid and ignores $HOME — so this stays consistent and unit-testable.
@@ -203,16 +218,12 @@ function detected(cmd: string, ...dirs: string[]): boolean {
   return dirs.some((d) => existsSync(d.startsWith('~/') ? join(home, d.slice(2)) : d));
 }
 
-function escapeRegex(s: string): string {
-  return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-}
-
 function appendMarked(path: string, tool: string, blurb: string): void {
   mkdirSync(dirname(path), { recursive: true });
   const start = `<!-- skill:${tool} -->`;
   const end = `<!-- /skill:${tool} -->`;
-  let existing = existsSync(path) ? readFileSync(path, 'utf8') : '';
-  const re = new RegExp(escapeRegex(start) + '[\\s\\S]*?' + escapeRegex(end) + '\\n?', 'g');
+  let existing = readTextIfExists(path) ?? '';
+  const re = new RegExp(escapeRegExp(start) + '[\\s\\S]*?' + escapeRegExp(end) + '\\n?', 'g');
   existing = existing.replace(re, '');
   const block = `${start}\n${blurb}\n${end}\n`;
   writeFileSync(path, existing.trim() ? existing.replace(/\s+$/, '') + '\n\n' + block : block);
@@ -223,9 +234,14 @@ function appendMarked(path: string, tool: string, blurb: string): void {
 function ensureSessionStartHook(home: string): boolean {
   const settingsPath = join(home, '.claude', 'settings.json');
   if (!existsSync(join(home, '.claude'))) return false;
+  // Read the existing settings ONCE into memory, then parse from and back up
+  // from that same in-memory copy. A separate existsSync()/readFileSync() per
+  // use would re-stat the path each time, racing with concurrent writers
+  // (js/file-system-race); a single read snapshots the bytes we then act on.
+  const original = readTextIfExists(settingsPath);
   let data: any;
   try {
-    data = existsSync(settingsPath) ? JSON.parse(readFileSync(settingsPath, 'utf8')) : {};
+    data = original !== undefined ? JSON.parse(original) : {};
   } catch {
     return false; // don't clobber a file we can't parse
   }
@@ -244,7 +260,10 @@ function ensureSessionStartHook(home: string): boolean {
     }
   }
   sessionStart.push({ hooks: [{ type: 'command', command: HOOK_COMMAND }] });
-  if (existsSync(settingsPath)) writeFileSync(settingsPath + '.bak', readFileSync(settingsPath));
+  // Back up the snapshot we already read (not a fresh read of the path) before
+  // overwriting, so the .bak is exactly what we parsed and there is no second
+  // check-then-read race.
+  if (original !== undefined) writeFileSync(settingsPath + '.bak', original);
   writeFileSync(settingsPath, JSON.stringify(data, null, 2) + '\n');
   return true;
 }

diff --git a/features/render/html.ts b/features/render/html.ts
@@ -18,6 +18,38 @@ export function escapeHtml(s: string): string {
   return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
 }
 
+// Strip HTML tags for VISIBLE-length / text-content purposes (not a security
+// sanitizer — output is never re-inserted into a page). One pass over the
+// string removes every `<...>` tag AND any trailing TAG-LIKE fragment that was
+// cut off before its closing `>` (e.g. a truncated `<script` or `</div`). A
+// naive `/<[^>]+>/g` leaves that dangling fragment behind, which both
+// over-counts text length and is what CodeQL flags as incomplete multi-
+// character sanitization. The trailing branch only fires on `<` immediately
+// followed by a tag-name start (letter), `/`, or `!` — so a lone literal `<`
+// inside ordinary text (e.g. `a < b`) is preserved and not greedily eaten to
+// end-of-string. Single pass, deterministic.
+export function stripHtmlTags(s: string): string {
+  return s.replace(/<[^>]*>|<[/!A-Za-z][^>]*$/g, "")
+}
+
+// Decode the small set of HTML entities `tg` itself emits (via escapeHtml plus
+// the documented quote/apostrophe forms), in a SINGLE pass. Decoding `&amp;`
+// first and `&lt;` second — the obvious chained-replace — double-unescapes
+// `&amp;lt;` into `<` (CodeQL js/double-escaping). A single regex with a lookup
+// table consumes each entity exactly once, left to right, so no replacement can
+// feed the next one. Unknown entities are left verbatim.
+const HTML_ENTITIES: Record<string, string> = {
+  "&amp;": "&",
+  "&lt;": "<",
+  "&gt;": ">",
+  "&quot;": '"',
+  "&#39;": "'",
+  "&apos;": "'",
+}
+export function decodeHtmlEntities(s: string): string {
+  return s.replace(/&(?:amp|lt|gt|quot|apos|#39);/g, (m) => HTML_ENTITIES[m] ?? m)
+}
+
 export function detectHtmlTags(text: string): boolean {
   const htmlPattern = /<(b|strong|i|em|u|ins|s|strike|del|span|tg-spoiler|a|tg-emoji|tg-time|code|pre|blockquote)\b/i
   return htmlPattern.test(text)

diff --git a/features/render/rich.ts b/features/render/rich.ts
@@ -17,6 +17,9 @@
 // text messages go rich vs. plain, and that a rich message is NOT 4096-split
 // and never rides as a media caption) lives in the transmitter.
 
+import { stripHtmlTags } from './html';
+import { escapeRegExp } from '../util/regex';
+
 // Rich-ONLY tags: present in the rich-html-style allowlist but NOT in the basic
 // sendMessage allowlist (features/render/html.ts detectHtmlTags). Seeing any of
 // these in an HTML body means the message must go via sendRichMessage. The
@@ -84,7 +87,7 @@ const RICH_TAG_RE = new RegExp(
   '</?(?:' +
     [...RICH_ONLY_TAGS]
       .sort((a, b) => b.length - a.length)
-      .map((t) => t.replace(/[-]/g, '\\-'))
+      .map((t) => escapeRegExp(t))
       .join('|') +
     ')(?=[\\s/>]|$)',
   'i',
@@ -210,6 +213,12 @@ const VOID_TAGS = new Set(['img', 'hr', 'br', 'input', 'tg-map', 'tg-emoji']);
 // left as-is.
 const NAMED_ENTITY_RE = /&(?:lt|gt|amp|quot|apos|nbsp|hellip|mdash|ndash|lsquo|rsquo|ldquo|rdquo);/g;
 const NUMERIC_ENTITY_RE = /&#(?:[0-9]+|x[0-9a-fA-F]+);/g;
+// NOT a js/double-escaping chain (unlike the decode this PR replaced elsewhere):
+// both replaces map an entity to the SAME placeholder ``, which contains no `&`,
+// so the second pass can never re-process the first's output. The two patterns
+// also match disjoint sets (named vs `&#…;` numeric). It collapses entities for a
+// CHARACTER-COUNT budget only — it must not be turned into decodeHtmlEntities,
+// which decodes to real chars and would change the count.
 function decodeEntitiesForCount(s: string): string {
   return s.replace(NAMED_ENTITY_RE, '').replace(NUMERIC_ENTITY_RE, '');
 }
@@ -241,7 +250,7 @@ export function validateRichHtml(html: string): RichValidation {
   const altText = (html.match(/\balt\s*=\s*"([^"]*)"|alt\s*=\s*'([^']*)'/gi) ?? [])
     .map((a) => a.replace(/^[^=]*=\s*["']?/, '').replace(/["']$/, ''))
     .join('');
-  const text = decodeEntitiesForCount(html.replace(/<[^>]+>/g, '') + altText);
+  const text = decodeEntitiesForCount(stripHtmlTags(html) + altText);
   const chars = [...text].length;
   if (chars > RICH_LIMITS.maxChars) {
     return {

diff --git a/features/transport/telegram.ts b/features/transport/telegram.ts
@@ -8,7 +8,7 @@
 // exactly the transmitter's Transport (sendMessage/sendPhoto/sendDocument/
 // sendMediaGroup); the transmitter drives caption-overflow, the >4096 split and
 // the photos→text→documents ordering on top of it.
-import { readFileSync, statSync } from 'fs';
+import { closeSync, fstatSync, openSync, readSync } from 'fs';
 import { BOM_MAX_BYTES, maybeAddBom } from '../auto-attach/encoding';
 import { type Transport } from '../auto-attach/transmitter';
 import { type SendItem } from '../auto-attach/types';
@@ -50,6 +50,48 @@ export async function checkResponse(resp: Response, method: string): Promise<unk
   return json.result;
 }
 
+// Read a file into memory ONLY when it is at most `maxBytes`, using a single
+// file descriptor for the size check and the read. Opening once and calling
+// fstat + read on that SAME fd closes the check-then-read race (js/file-system-
+// race): a path-based statSync()+readFileSync() pair can observe two different
+// inodes if the path is swapped in between, whereas an fd is pinned to the inode
+// it opened. Returns the bytes, or null when the file is unreadable, empty, or
+// larger than the cap (the caller then streams it via Bun.file unchanged).
+export function readSmallFile(path: string, maxBytes: number): Uint8Array | null {
+  let fd: number | undefined;
+  try {
+    fd = openSync(path, 'r');
+    const size = fstatSync(fd).size;
+    if (size <= 0 || size > maxBytes) return null;
+    const buf = Buffer.allocUnsafe(size);
+    let read = 0;
+    while (read < size) {
+      const n = readSync(fd, buf, read, size - read, read);
+      if (n === 0) break; // file shrank under us mid-read
+      read += n;
+    }
+    // A short read means the file changed beneath us; bail to null so the caller
+    // streams the file via Bun.file unchanged rather than stitching a BOM onto a
+    // truncated body (which would defeat the very encoding fix this serves).
+    if (read !== size) return null;
+    // Guard the other direction too: if the file GREW after the initial fstat we
+    // would have read only a prefix while still satisfying read === size. Re-stat
+    // the same fd and reject a size change so we never BOM a partial body.
+    if (fstatSync(fd).size !== size) return null;
+    return new Uint8Array(buf.subarray(0, read));
+  } catch {
+    return null; // unreadable here → let Bun.file surface the real error downstream
+  } finally {
+    if (fd !== undefined) {
+      try {
+        closeSync(fd);
+      } catch {
+        // already closed / invalid fd — nothing to do
+      }
+    }
+  }
+}
+
 // Upload bytes for an item. Text DOCUMENTS with non-ASCII UTF-8 content get
 // a BOM prepended to the uploaded copy (features/auto-attach/encoding.ts) so
 // Telegram's preview stops guessing legacy codepages for Cyrillic; the file
@@ -61,18 +103,13 @@ function blobFor(item: SendItem): { body: Blob | ReturnType<typeof Bun.file>; na
     return { body: new Blob([processed]), name: item.source.filename };
   }
   if (item.type === 'document') {
-    try {
-      const size = statSync(item.source.path).size;
-      if (size > 0 && size <= BOM_MAX_BYTES) {
-        const bytes = new Uint8Array(readFileSync(item.source.path));
-        const processed = maybeAddBom(bytes, item.source.path);
-        if (processed !== bytes) {
-          const name = item.source.path.slice(item.source.path.lastIndexOf('/') + 1);
-          return { body: new Blob([processed]), name };
-        }
+    const bytes = readSmallFile(item.source.path, BOM_MAX_BYTES);
+    if (bytes && bytes.length > 0) {
+      const processed = maybeAddBom(bytes, item.source.path);
+      if (processed !== bytes) {
+        const name = item.source.path.slice(item.source.path.lastIndexOf('/') + 1);
+        return { body: new Blob([processed]), name };
       }
-    } catch {
-      // unreadable here → let Bun.file surface the real error downstream
     }
   }
   return { body: Bun.file(item.source.path) };

diff --git a/features/util/regex.ts b/features/util/regex.ts
@@ -0,0 +1,12 @@
+// --- Regex utilities (pure) ---
+//
+// escapeRegExp turns an arbitrary string into a literal that is safe to splice
+// into a `new RegExp(...)` source. It escapes the FULL set of ECMAScript regex
+// metacharacters — not just one or two — so building a pattern from external or
+// structured tokens can never let a stray `.`, `\`, `(`, `[`, etc. change the
+// pattern's meaning. A partial escaper (e.g. one that handles `-` but leaves
+// `\` untouched) is what CodeQL flags as js/incomplete-sanitization; this is the
+// single, complete source of truth both render/rich and install-skill use.
+export function escapeRegExp(s: string): string {
+  return s.replace(/[.*+?^${}()|[\]\\-]/g, "\\$&");
+}