Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/codeql/codeql-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# CodeQL analysis scope for tg-cli.
#
# The security gate analyzes the SHIPPED product surface — the `tg` / `tg-ctl`
# binaries built from `features/` plus the workflow files. `scripts/` holds
# one-off developer asset-generators (emoji/sticker set builders) that are run
# BY HAND, never bundled into the released CLIs, and never process untrusted
# input. CodeQL's js/file-access-to-http there ("an outbound request URL/body
# depends on local file data") is true and intentional: they read a bot token /
# prompt from a local config file and call an HTTP API with it. That is the job,
# not a vulnerability — and it is out of scope for a gate meant to protect the
# product.
#
# Why a directory `paths-ignore` and not a query-level exclude: `query-filters`
# match on query METADATA only (id/tags/kind), with no per-path scoping, so
# excluding `js/file-access-to-http` by id would also blind the gate to that
# class in shipped `features/` code. Scoping by PATH keeps every query — this one
# included — fully enforced on the product surface; only the by-hand `scripts/`
# dir is out of scope. Tradeoff: a FUTURE script is not auto-analyzed; given the
# dir is three hand-run generators, that is acceptable. Move a script into
# `features/` (or import it from shipped code) and it is gated again.
#
# This is a SCOPE decision (which code the gate guards), not a per-finding
# suppression of a real bug in shipped code.
paths-ignore:
- scripts
3 changes: 3 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ jobs:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
queries: security-extended
# Scope analysis to the shipped product surface; `scripts/` holds
# by-hand dev asset-generators, not gated code (see the config file).
config-file: ./.github/codeql/codeql-config.yml

- name: Perform CodeQL Analysis
if: steps.detect.outputs.has_source == 'true'
Expand Down
17 changes: 9 additions & 8 deletions features/auto-attach/transmitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// feature is OFF, because they are not part of the feature — they are correct
// behavior for any send.

import { decodeHtmlEntities, stripHtmlTags } from '../render/html';
import { isRichHtml, normalizeRichHtml, validateRichHtml } from '../render/rich';
import { splitMessage } from './split';
import { CAPTION_LIMIT, MESSAGE_LIMIT, type Format, type SendItem, type SendPlan } from './types';
Expand Down Expand Up @@ -48,14 +49,14 @@ export interface Transport {
// not a full HTML length per Telegram's exact rules.
export function visibleLength(text: string, format: Format): number {
if (format !== 'html') return text.length;
const noTags = text.replace(/<[^>]+>/g, '');
const unescaped = noTags
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
return unescaped.length;
// Strip tags first, then decode entities in a single pass (stripHtmlTags /
// decodeHtmlEntities in render/html). This replaces a `/<[^>]+>/g` strip (which
// left a dangling `<script` behind — js/incomplete-multi-character-sanitization)
// followed by a chain of `.replace()` entity decodes. The chained decode trips
// js/double-escaping: an `&amp;`→`&` step alongside `&lt;`→`<` etc. can feed one
// decode's output into another (e.g. `&amp;lt;`→`&lt;`→`<`). The single-pass
// decoder consumes each entity exactly once, so no replacement chains.
return decodeHtmlEntities(stripHtmlTags(text)).length;
}

// Can the single text message ride as a media caption? Only when there is
Expand Down
35 changes: 27 additions & 8 deletions features/install-skill/install.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,21 @@
import { existsSync, mkdirSync, readFileSync, writeFileSync, symlinkSync } from 'fs';
import { homedir } from 'os';
import { dirname, join } from 'path';
import { escapeRegExp } from '../util/regex';

// Read a UTF-8 file, returning undefined when it does not exist (ENOENT) and
// re-throwing any other error. Replaces an `existsSync(p) ? readFileSync(p) : …`
// pair: that check-then-read on the same path is a TOCTOU race (js/file-system-
// race) because the file can vanish or be swapped between the two syscalls. A
// single read that catches ENOENT is atomic — there is no window to race.
function readTextIfExists(path: string): string | undefined {
try {
return readFileSync(path, 'utf8');
} catch (e) {
if ((e as NodeJS.ErrnoException).code === 'ENOENT') return undefined;
throw e;
}
}

// Prefer $HOME (what `tg` itself uses) over os.homedir(), which under Bun reads
// getpwuid and ignores $HOME — so this stays consistent and unit-testable.
Expand Down Expand Up @@ -203,16 +218,12 @@ function detected(cmd: string, ...dirs: string[]): boolean {
return dirs.some((d) => existsSync(d.startsWith('~/') ? join(home, d.slice(2)) : d));
}

function escapeRegex(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function appendMarked(path: string, tool: string, blurb: string): void {
mkdirSync(dirname(path), { recursive: true });
const start = `<!-- skill:${tool} -->`;
const end = `<!-- /skill:${tool} -->`;
let existing = existsSync(path) ? readFileSync(path, 'utf8') : '';
const re = new RegExp(escapeRegex(start) + '[\\s\\S]*?' + escapeRegex(end) + '\\n?', 'g');
let existing = readTextIfExists(path) ?? '';
const re = new RegExp(escapeRegExp(start) + '[\\s\\S]*?' + escapeRegExp(end) + '\\n?', 'g');
existing = existing.replace(re, '');
const block = `${start}\n${blurb}\n${end}\n`;
writeFileSync(path, existing.trim() ? existing.replace(/\s+$/, '') + '\n\n' + block : block);
Expand All @@ -223,9 +234,14 @@ function appendMarked(path: string, tool: string, blurb: string): void {
function ensureSessionStartHook(home: string): boolean {
const settingsPath = join(home, '.claude', 'settings.json');
if (!existsSync(join(home, '.claude'))) return false;
// Read the existing settings ONCE into memory, then parse from and back up
// from that same in-memory copy. A separate existsSync()/readFileSync() per
// use would re-stat the path each time, racing with concurrent writers
// (js/file-system-race); a single read snapshots the bytes we then act on.
const original = readTextIfExists(settingsPath);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep unreadable settings from aborting install-skill

When ~/.claude/settings.json exists but cannot be read as a normal file (for example it is a directory, has bad permissions, or a transient read error occurs), readTextIfExists() rethrows before the existing parse/error guard runs. The previous implementation had the read inside the try and returned false, conservatively skipping the hook; this change now aborts install-skill after it may already have written other files. Move this read back under the guard or catch non-ENOENT errors here and return false.

Useful? React with 👍 / 👎.

let data: any;
try {
data = existsSync(settingsPath) ? JSON.parse(readFileSync(settingsPath, 'utf8')) : {};
data = original !== undefined ? JSON.parse(original) : {};
} catch {
return false; // don't clobber a file we can't parse
}
Expand All @@ -244,7 +260,10 @@ function ensureSessionStartHook(home: string): boolean {
}
}
sessionStart.push({ hooks: [{ type: 'command', command: HOOK_COMMAND }] });
if (existsSync(settingsPath)) writeFileSync(settingsPath + '.bak', readFileSync(settingsPath));
// Back up the snapshot we already read (not a fresh read of the path) before
// overwriting, so the .bak is exactly what we parsed and there is no second
// check-then-read race.
if (original !== undefined) writeFileSync(settingsPath + '.bak', original);
writeFileSync(settingsPath, JSON.stringify(data, null, 2) + '\n');
return true;
}
Expand Down
32 changes: 32 additions & 0 deletions features/render/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,38 @@ export function escapeHtml(s: string): string {
return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
}

// Strip HTML tags for VISIBLE-length / text-content purposes (not a security
// sanitizer — output is never re-inserted into a page). One pass over the
// string removes every `<...>` tag AND any trailing TAG-LIKE fragment that was
// cut off before its closing `>` (e.g. a truncated `<script` or `</div`). A
// naive `/<[^>]+>/g` leaves that dangling fragment behind, which both
// over-counts text length and is what CodeQL flags as incomplete multi-
// character sanitization. The trailing branch only fires on `<` immediately
// followed by a tag-name start (letter), `/`, or `!` — so a lone literal `<`
// inside ordinary text (e.g. `a < b`) is preserved and not greedily eaten to
// end-of-string. Single pass, deterministic.
export function stripHtmlTags(s: string): string {
return s.replace(/<[^>]*>|<[/!A-Za-z][^>]*$/g, "")
}

// Decode the small set of HTML entities `tg` itself emits (via escapeHtml plus
// the documented quote/apostrophe forms), in a SINGLE pass. Decoding `&amp;`
// first and `&lt;` second — the obvious chained-replace — double-unescapes
// `&amp;lt;` into `<` (CodeQL js/double-escaping). A single regex with a lookup
// table consumes each entity exactly once, left to right, so no replacement can
// feed the next one. Unknown entities are left verbatim.
const HTML_ENTITIES: Record<string, string> = {
"&amp;": "&",
"&lt;": "<",
"&gt;": ">",
"&quot;": '"',
"&#39;": "'",
"&apos;": "'",
}
export function decodeHtmlEntities(s: string): string {
return s.replace(/&(?:amp|lt|gt|quot|apos|#39);/g, (m) => HTML_ENTITIES[m] ?? m)
}

export function detectHtmlTags(text: string): boolean {
const htmlPattern = /<(b|strong|i|em|u|ins|s|strike|del|span|tg-spoiler|a|tg-emoji|tg-time|code|pre|blockquote)\b/i
return htmlPattern.test(text)
Expand Down
13 changes: 11 additions & 2 deletions features/render/rich.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
// text messages go rich vs. plain, and that a rich message is NOT 4096-split
// and never rides as a media caption) lives in the transmitter.

import { stripHtmlTags } from './html';
import { escapeRegExp } from '../util/regex';

// Rich-ONLY tags: present in the rich-html-style allowlist but NOT in the basic
// sendMessage allowlist (features/render/html.ts detectHtmlTags). Seeing any of
// these in an HTML body means the message must go via sendRichMessage. The
Expand Down Expand Up @@ -84,7 +87,7 @@ const RICH_TAG_RE = new RegExp(
'</?(?:' +
[...RICH_ONLY_TAGS]
.sort((a, b) => b.length - a.length)
.map((t) => t.replace(/[-]/g, '\\-'))
.map((t) => escapeRegExp(t))
.join('|') +
')(?=[\\s/>]|$)',
'i',
Expand Down Expand Up @@ -210,6 +213,12 @@ const VOID_TAGS = new Set(['img', 'hr', 'br', 'input', 'tg-map', 'tg-emoji']);
// left as-is.
const NAMED_ENTITY_RE = /&(?:lt|gt|amp|quot|apos|nbsp|hellip|mdash|ndash|lsquo|rsquo|ldquo|rdquo);/g;
const NUMERIC_ENTITY_RE = /&#(?:[0-9]+|x[0-9a-fA-F]+);/g;
// NOT a js/double-escaping chain (unlike the decode this PR replaced elsewhere):
// both replaces map an entity to the SAME placeholder ``, which contains no `&`,
// so the second pass can never re-process the first's output. The two patterns
// also match disjoint sets (named vs `&#…;` numeric). It collapses entities for a
// CHARACTER-COUNT budget only — it must not be turned into decodeHtmlEntities,
// which decodes to real chars and would change the count.
function decodeEntitiesForCount(s: string): string {
return s.replace(NAMED_ENTITY_RE, '').replace(NUMERIC_ENTITY_RE, '');
}
Expand Down Expand Up @@ -241,7 +250,7 @@ export function validateRichHtml(html: string): RichValidation {
const altText = (html.match(/\balt\s*=\s*"([^"]*)"|alt\s*=\s*'([^']*)'/gi) ?? [])
.map((a) => a.replace(/^[^=]*=\s*["']?/, '').replace(/["']$/, ''))
.join('');
const text = decodeEntitiesForCount(html.replace(/<[^>]+>/g, '') + altText);
const text = decodeEntitiesForCount(stripHtmlTags(html) + altText);
const chars = [...text].length;
if (chars > RICH_LIMITS.maxChars) {
return {
Expand Down
61 changes: 49 additions & 12 deletions features/transport/telegram.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// exactly the transmitter's Transport (sendMessage/sendPhoto/sendDocument/
// sendMediaGroup); the transmitter drives caption-overflow, the >4096 split and
// the photos→text→documents ordering on top of it.
import { readFileSync, statSync } from 'fs';
import { closeSync, fstatSync, openSync, readSync } from 'fs';
import { BOM_MAX_BYTES, maybeAddBom } from '../auto-attach/encoding';
import { type Transport } from '../auto-attach/transmitter';
import { type SendItem } from '../auto-attach/types';
Expand Down Expand Up @@ -50,6 +50,48 @@ export async function checkResponse(resp: Response, method: string): Promise<unk
return json.result;
}

// Read a file into memory ONLY when it is at most `maxBytes`, using a single
// file descriptor for the size check and the read. Opening once and calling
// fstat + read on that SAME fd closes the check-then-read race (js/file-system-
// race): a path-based statSync()+readFileSync() pair can observe two different
// inodes if the path is swapped in between, whereas an fd is pinned to the inode
// it opened. Returns the bytes, or null when the file is unreadable, empty, or
// larger than the cap (the caller then streams it via Bun.file unchanged).
export function readSmallFile(path: string, maxBytes: number): Uint8Array | null {
let fd: number | undefined;
try {
fd = openSync(path, 'r');
const size = fstatSync(fd).size;
if (size <= 0 || size > maxBytes) return null;
const buf = Buffer.allocUnsafe(size);
let read = 0;
while (read < size) {
const n = readSync(fd, buf, read, size - read, read);
if (n === 0) break; // file shrank under us mid-read
read += n;
}
// A short read means the file changed beneath us; bail to null so the caller
// streams the file via Bun.file unchanged rather than stitching a BOM onto a
// truncated body (which would defeat the very encoding fix this serves).
if (read !== size) return null;
// Guard the other direction too: if the file GREW after the initial fstat we
// would have read only a prefix while still satisfying read === size. Re-stat
// the same fd and reject a size change so we never BOM a partial body.
if (fstatSync(fd).size !== size) return null;
return new Uint8Array(buf.subarray(0, read));
} catch {
return null; // unreadable here → let Bun.file surface the real error downstream
} finally {
if (fd !== undefined) {
try {
closeSync(fd);
} catch {
// already closed / invalid fd — nothing to do
}
}
}
}

// Upload bytes for an item. Text DOCUMENTS with non-ASCII UTF-8 content get
// a BOM prepended to the uploaded copy (features/auto-attach/encoding.ts) so
// Telegram's preview stops guessing legacy codepages for Cyrillic; the file
Expand All @@ -61,18 +103,13 @@ function blobFor(item: SendItem): { body: Blob | ReturnType<typeof Bun.file>; na
return { body: new Blob([processed]), name: item.source.filename };
}
if (item.type === 'document') {
try {
const size = statSync(item.source.path).size;
if (size > 0 && size <= BOM_MAX_BYTES) {
const bytes = new Uint8Array(readFileSync(item.source.path));
const processed = maybeAddBom(bytes, item.source.path);
if (processed !== bytes) {
const name = item.source.path.slice(item.source.path.lastIndexOf('/') + 1);
return { body: new Blob([processed]), name };
}
const bytes = readSmallFile(item.source.path, BOM_MAX_BYTES);
if (bytes && bytes.length > 0) {
const processed = maybeAddBom(bytes, item.source.path);
if (processed !== bytes) {
const name = item.source.path.slice(item.source.path.lastIndexOf('/') + 1);
return { body: new Blob([processed]), name };
}
} catch {
// unreadable here → let Bun.file surface the real error downstream
}
}
return { body: Bun.file(item.source.path) };
Expand Down
12 changes: 12 additions & 0 deletions features/util/regex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// --- Regex utilities (pure) ---
//
// escapeRegExp turns an arbitrary string into a literal that is safe to splice
// into a `new RegExp(...)` source. It escapes the FULL set of ECMAScript regex
// metacharacters — not just one or two — so building a pattern from external or
// structured tokens can never let a stray `.`, `\`, `(`, `[`, etc. change the
// pattern's meaning. A partial escaper (e.g. one that handles `-` but leaves
// `\` untouched) is what CodeQL flags as js/incomplete-sanitization; this is the
// single, complete source of truth both render/rich and install-skill use.
export function escapeRegExp(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\-]/g, "\\$&");
}
Loading
Loading