diff --git a/Dockerfile b/Dockerfile index efcdf063..5e1afee6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -108,7 +108,10 @@ COPY package.json README.md LICENSE BUILD.json* ./ EXPOSE 9090 # USER node # This would be great to use, but not possible as the volumes are mounted as root WORKDIR /app/worker -CMD ["node", "--disable-warning=ExperimentalWarning", "--optimize-for-size", "index.ts"] +# Heavy per-run work happens in a child process spawned by the orchestrator (see worker/src/worker.ts). +# The orchestrator itself only schedules; memory tuning should be done at the container level via +# NODE_OPTIONS=--max-old-space-size=... and mem_limit, which propagate to both the orchestrator and the child. +CMD ["node", "--disable-warning=ExperimentalWarning", "index.ts"] # ============================= # Install production dependencies for API diff --git a/tests/features/worker-utils/worker-operations.unit.spec.ts b/tests/features/worker-utils/worker-operations.unit.spec.ts index 10ff70cc..8d38ce4c 100644 --- a/tests/features/worker-utils/worker-operations.unit.spec.ts +++ b/tests/features/worker-utils/worker-operations.unit.spec.ts @@ -1,5 +1,5 @@ import { test, expect } from '@playwright/test' -import { buildErrorMessageFromStderr } from '../../../worker/src/utils/worker-operations.ts' +import { buildErrorMessageFromStderr, formatMemoryUsage, exitCodeHint } from '../../../worker/src/utils/worker-operations.ts' test.describe('buildErrorMessageFromStderr', () => { test('falls back to errMessage when stderr is empty', () => { @@ -30,3 +30,39 @@ test.describe('buildErrorMessageFromStderr', () => { expect(buildErrorMessageFromStderr('a\n\nb\n', 'fb')).toBe('a\nb') }) }) + +test.describe('formatMemoryUsage', () => { + test('renders all components rounded to MB', () => { + const mb = 1024 * 1024 + expect(formatMemoryUsage({ + rss: 256 * mb, + heapUsed: 128 * mb, + heapTotal: 200 * mb, + external: 16 * mb, + arrayBuffers: 0 + })).toBe('rss=256MB heap=128/200MB ext=16MB') + }) + + test('returns a string when called without arguments', () => { + expect(typeof formatMemoryUsage()).toBe('string') + }) +}) + +test.describe('exitCodeHint', () => { + test('returns a V8/SIGABRT hint for code 134', () => { + expect(exitCodeHint(134)).toContain('SIGABRT') + expect(exitCodeHint(134)).toContain('NODE_OPTIONS') + }) + + test('returns an OOM-kill hint for code 137', () => { + expect(exitCodeHint(137)).toContain('SIGKILL') + expect(exitCodeHint(137)).toContain('mem_limit') + }) + + test('returns empty string for unrelated codes', () => { + expect(exitCodeHint(1)).toBe('') + expect(exitCodeHint(143)).toBe('') + expect(exitCodeHint(null)).toBe('') + expect(exitCodeHint(undefined)).toBe('') + }) +}) diff --git a/worker/src/task/index.ts b/worker/src/task/index.ts index 862a208d..5993b960 100644 --- a/worker/src/task/index.ts +++ b/worker/src/task/index.ts @@ -3,9 +3,14 @@ import nodemailer from 'nodemailer' import config from '#config' import mongo from '#mongo' import { run, stop } from './task.ts' +import { formatMemoryUsage } from '../utils/worker-operations.ts' let exitCode = 0 +// Memory diagnostic: print on stderr so the parent worker captures it via +// buildErrorMessageFromStderr when the child exits non-zero. +console.error(`task start mem ${formatMemoryUsage()}`) + process.on('SIGTERM', function onSigterm () { console.info('Received SIGTERM signal, shutdown gracefully...') exitCode = 143 @@ -28,4 +33,5 @@ if (err) exitCode = 1 await mongo.close() mailTransport.close() +console.error(`task end mem ${formatMemoryUsage()}`) process.exit(exitCode) diff --git a/worker/src/utils/worker-operations.ts b/worker/src/utils/worker-operations.ts index b290fd71..4a4ddd41 100644 --- a/worker/src/utils/worker-operations.ts +++ b/worker/src/utils/worker-operations.ts @@ -18,3 +18,24 @@ export const buildErrorMessageFromStderr = (stderr: string, errMessage: string): if (!lines.length) lines.push(errMessage) return lines.join('\n') } + +/** + * Format a Node.js MemoryUsage as a compact one-liner, suitable for logging. + * All values are rounded to MB. + */ +export const formatMemoryUsage = (mem: NodeJS.MemoryUsage = process.memoryUsage()): string => { + const mb = (n: number) => Math.round(n / 1024 / 1024) + return `rss=${mb(mem.rss)}MB heap=${mb(mem.heapUsed)}/${mb(mem.heapTotal)}MB ext=${mb(mem.external)}MB` +} + +/** + * Map a non-zero child exit code to a human hint about likely causes. + * Returns an empty string when no specific hint applies. + * - 134 = SIGABRT, the signature of a V8 fatal allocation failure (std::bad_alloc / Check failed: (result.ptr) != nullptr). + * - 137 = SIGKILL, the signature of an OOM-kill from the host kernel / docker cgroup. + */ +export const exitCodeHint = (code: number | null | undefined): string => { + if (code === 134) return 'le processus enfant a abandonné (SIGABRT, code 134) — typique d\'une allocation V8 impossible. Vérifier NODE_OPTIONS=--max-old-space-size et la limite mémoire du conteneur (mem_limit / resources.limits.memory).' + if (code === 137) return 'le processus enfant a été tué (SIGKILL, code 137) — typique d\'un OOM-kill par le noyau / cgroup. Augmenter la limite mémoire du conteneur (mem_limit / resources.limits.memory).' + return '' +} diff --git a/worker/src/worker.ts b/worker/src/worker.ts index e107892d..f1b762df 100644 --- a/worker/src/worker.ts +++ b/worker/src/worker.ts @@ -18,7 +18,7 @@ import locks from '#locks' import limits from './utils/limits.ts' import { initMetrics } from './utils/metrics.ts' import { finish } from './utils/runs.ts' -import { buildErrorMessageFromStderr } from './utils/worker-operations.ts' +import { buildErrorMessageFromStderr, exitCodeHint } from './utils/worker-operations.ts' const debug = Debug('worker') const debugLoop = Debug('worker-loop') @@ -242,8 +242,11 @@ async function iter (run: Run) { }) await finish(run) } catch (err: any) { - // Build back the original error message from the stderr of the child process - const errorMessage = buildErrorMessageFromStderr(stderr, err.message) + // Build back the original error message from the stderr of the child process, + // appending a hint when the child exit code matches a known OOM signature. + const baseMessage = buildErrorMessageFromStderr(stderr, err.message) + const hint = exitCodeHint(err.code) + const errorMessage = hint ? `${baseMessage}\n${hint}` : baseMessage if (run) { // case of interruption by a SIGTERM