Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,10 @@ COPY package.json README.md LICENSE BUILD.json* ./
EXPOSE 9090
# USER node # This would be great to use, but not possible as the volumes are mounted as root
WORKDIR /app/worker
CMD ["node", "--disable-warning=ExperimentalWarning", "--optimize-for-size", "index.ts"]
# Heavy per-run work happens in a child process spawned by the orchestrator (see worker/src/worker.ts).
# The orchestrator itself only schedules; memory tuning should be done at the container level via
# NODE_OPTIONS=--max-old-space-size=... and mem_limit, which propagate to both the orchestrator and the child.
CMD ["node", "--disable-warning=ExperimentalWarning", "index.ts"]

# =============================
# Install production dependencies for API
Expand Down
38 changes: 37 additions & 1 deletion tests/features/worker-utils/worker-operations.unit.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { test, expect } from '@playwright/test'
import { buildErrorMessageFromStderr } from '../../../worker/src/utils/worker-operations.ts'
import { buildErrorMessageFromStderr, formatMemoryUsage, exitCodeHint } from '../../../worker/src/utils/worker-operations.ts'

test.describe('buildErrorMessageFromStderr', () => {
test('falls back to errMessage when stderr is empty', () => {
Expand Down Expand Up @@ -30,3 +30,39 @@ test.describe('buildErrorMessageFromStderr', () => {
expect(buildErrorMessageFromStderr('a\n\nb\n', 'fb')).toBe('a\nb')
})
})

test.describe('formatMemoryUsage', () => {
test('renders all components rounded to MB', () => {
const mb = 1024 * 1024
expect(formatMemoryUsage({
rss: 256 * mb,
heapUsed: 128 * mb,
heapTotal: 200 * mb,
external: 16 * mb,
arrayBuffers: 0
})).toBe('rss=256MB heap=128/200MB ext=16MB')
})

test('returns a string when called without arguments', () => {
expect(typeof formatMemoryUsage()).toBe('string')
})
})

test.describe('exitCodeHint', () => {
test('returns a V8/SIGABRT hint for code 134', () => {
expect(exitCodeHint(134)).toContain('SIGABRT')
expect(exitCodeHint(134)).toContain('NODE_OPTIONS')
})

test('returns an OOM-kill hint for code 137', () => {
expect(exitCodeHint(137)).toContain('SIGKILL')
expect(exitCodeHint(137)).toContain('mem_limit')
})

test('returns empty string for unrelated codes', () => {
expect(exitCodeHint(1)).toBe('')
expect(exitCodeHint(143)).toBe('')
expect(exitCodeHint(null)).toBe('')
expect(exitCodeHint(undefined)).toBe('')
})
})
6 changes: 6 additions & 0 deletions worker/src/task/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@ import nodemailer from 'nodemailer'
import config from '#config'
import mongo from '#mongo'
import { run, stop } from './task.ts'
import { formatMemoryUsage } from '../utils/worker-operations.ts'

let exitCode = 0

// Memory diagnostic: print on stderr so the parent worker captures it via
// buildErrorMessageFromStderr when the child exits non-zero.
console.error(`task start mem ${formatMemoryUsage()}`)

process.on('SIGTERM', function onSigterm () {
console.info('Received SIGTERM signal, shutdown gracefully...')
exitCode = 143
Expand All @@ -28,4 +33,5 @@ if (err) exitCode = 1
await mongo.close()
mailTransport.close()

console.error(`task end mem ${formatMemoryUsage()}`)
process.exit(exitCode)
21 changes: 21 additions & 0 deletions worker/src/utils/worker-operations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,24 @@ export const buildErrorMessageFromStderr = (stderr: string, errMessage: string):
if (!lines.length) lines.push(errMessage)
return lines.join('\n')
}

/**
* Format a Node.js MemoryUsage as a compact one-liner, suitable for logging.
* All values are rounded to MB.
*/
export const formatMemoryUsage = (mem: NodeJS.MemoryUsage = process.memoryUsage()): string => {
const mb = (n: number) => Math.round(n / 1024 / 1024)
return `rss=${mb(mem.rss)}MB heap=${mb(mem.heapUsed)}/${mb(mem.heapTotal)}MB ext=${mb(mem.external)}MB`
}

/**
* Map a non-zero child exit code to a human hint about likely causes.
* Returns an empty string when no specific hint applies.
* - 134 = SIGABRT, the signature of a V8 fatal allocation failure (std::bad_alloc / Check failed: (result.ptr) != nullptr).
* - 137 = SIGKILL, the signature of an OOM-kill from the host kernel / docker cgroup.
*/
export const exitCodeHint = (code: number | null | undefined): string => {
if (code === 134) return 'le processus enfant a abandonné (SIGABRT, code 134) — typique d\'une allocation V8 impossible. Vérifier NODE_OPTIONS=--max-old-space-size et la limite mémoire du conteneur (mem_limit / resources.limits.memory).'
if (code === 137) return 'le processus enfant a été tué (SIGKILL, code 137) — typique d\'un OOM-kill par le noyau / cgroup. Augmenter la limite mémoire du conteneur (mem_limit / resources.limits.memory).'
return ''
}
9 changes: 6 additions & 3 deletions worker/src/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import locks from '#locks'
import limits from './utils/limits.ts'
import { initMetrics } from './utils/metrics.ts'
import { finish } from './utils/runs.ts'
import { buildErrorMessageFromStderr } from './utils/worker-operations.ts'
import { buildErrorMessageFromStderr, exitCodeHint } from './utils/worker-operations.ts'

const debug = Debug('worker')
const debugLoop = Debug('worker-loop')
Expand Down Expand Up @@ -242,8 +242,11 @@ async function iter (run: Run) {
})
await finish(run)
} catch (err: any) {
// Build back the original error message from the stderr of the child process
const errorMessage = buildErrorMessageFromStderr(stderr, err.message)
// Build back the original error message from the stderr of the child process,
// appending a hint when the child exit code matches a known OOM signature.
const baseMessage = buildErrorMessageFromStderr(stderr, err.message)
const hint = exitCodeHint(err.code)
const errorMessage = hint ? `${baseMessage}\n${hint}` : baseMessage

if (run) {
// case of interruption by a SIGTERM
Expand Down
Loading