Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions src/runner/LightRunClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ export interface RunState {

const POLL_INTERVAL_MS = 500;

/*
* Safety net for a wedged run. A bounded node (timeout > 0) is killed by
* light-runner at its timeout and turns terminal shortly after; if polling
* still sees `running` past timeout + this grace (image pull, extraction and
* teardown all happen outside the container's own timeout window), the run is
* stuck - fail loudly instead of looping forever. Nodes with timeout 0 opt out
* of any limit and keep polling indefinitely.
*/
const POLL_GRACE_MS = 300_000;

let seq = 0;

function sleep(ms: number): Promise<void> {
Expand Down Expand Up @@ -122,7 +132,7 @@ export class LightRunClient {

const accepted = (await res.json()) as { id: string };
onRunId(accepted.id);
const state = await this.pollUntilDone(accepted.id, onLog);
const state = await this.pollUntilDone(accepted.id, onLog, node.timeout);

let output: Record<string, unknown> = {};
const artifactName = OUTPUT_FILE;
Expand Down Expand Up @@ -174,8 +184,9 @@ export class LightRunClient {
}
}

private async pollUntilDone(runId: string, onLog?: (line: string) => void): Promise<RunState> {
private async pollUntilDone(runId: string, onLog?: (line: string) => void, timeoutMs = 0): Promise<RunState> {
let printed = 0;
const deadline = timeoutMs > 0 ? Date.now() + timeoutMs + POLL_GRACE_MS : 0;
while (true) {
const res = await fetch(`${this.url}/runs/${runId}`, { headers: this.headers() });
if (!res.ok) {
Expand All @@ -189,6 +200,13 @@ export class LightRunClient {
printed = state.logs.length;
}
if (state.status !== 'running') return state;
if (deadline > 0 && Date.now() > deadline) {
throw new Error(
`light-run run ${runId} still running after ${Math.round(
(timeoutMs + POLL_GRACE_MS) / 1000,
)}s; treating it as wedged`,
);
}
await sleep(POLL_INTERVAL_MS);
}
}
Expand Down
Loading