Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions .github/workflows/sync-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,19 @@ jobs:
COMMIT: ${{ steps.payload.outputs.commit }}
run: |
set -euo pipefail
# The publisher pushed the cell in a separate commit; make sure we are
# validating exactly what was published, not a stale checkout.
git fetch origin "$COMMIT" || git fetch origin
git checkout "$COMMIT" -- results/ || echo "no results delta to sync"
# Bring the working tree to origin/main so the local results/
# holds the FULL tree (every previously-published cell) and not
# just the one commit the publisher pushed. The old `git checkout
# $COMMIT -- results/` replaced the whole results/ with the
# single commit's delta, which made the index-sync see a
# truncated tree and emit a manifest that listed only the
# most-recent run-K (e.g. only run-2 when run-1 + run-2 +
# run-3 were on disk). Fetching + checking out the entire
# origin/main branch is what makes update-index.mjs's whole-
# tree walk authoritative.
git fetch origin "$COMMIT" || true
git fetch origin main
git checkout origin/main -- results/

- name: Validate published cell
if: steps.payload.outputs.path != ''
Expand Down
26 changes: 23 additions & 3 deletions scripts/lib/results.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,16 @@ export function listArches(root, version, date) {
}

// listRuns tolerates both the Phase 4 flat layout (four files directly under
// the arch dir as run-1) and a future Phase 5 run-N/ subdirectory layout.
// the arch dir as run-1) and a future Phase 5 run-N/ subdirectory layout,
// and the run-N-rated/ subdirectory a back-to-back rated pass publishes
// to alongside its saturation pass (see mage_tier.go's RatedRunIDSuffix).
// The regex accepts the rated suffix as an optional tail so the same
// walker can enumerate both panels of a back-to-back run.
export function listRuns(root, version, date, arch) {
const dir = join(root, version, date, arch);
if (!existsSync(dir)) return [];
const subRuns = readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isDirectory() && /^run-\d+$/.test(e.name))
.filter((e) => e.isDirectory() && /^run-\d+(?:-rated)?$/.test(e.name))
.map((e) => e.name);
if (subRuns.length > 0) return subRuns.sort(runCmp);
if (existsSync(join(dir, "summary.json"))) return [DEFAULT_RUN];
Expand All @@ -90,8 +94,24 @@ export function runFilePath(version, date, arch, runId, name) {
return posix.join("results", version, date, arch, runId, name);
}

// runKey splits a run id into its numeric k and optional variant suffix
// ("run-2" -> [2, ""], "run-2-rated" -> [2, "rated"]). The numeric part
// sorts first so the canonical order is run-1, run-1-rated, run-2,
// run-2-rated, ...; the variant part breaks ties lexicographically, so
// a future variant (e.g. "run-1-soak") would naturally land after the
// rated one with no comparator change. Unknown shapes sort as 0/"" to
// keep the comparator total.
function runKey(r) {
const m = /^run-(\d+)(?:-(.+))?$/.exec(r);
if (!m) return [0, "", r];
return [Number(m[1]), m[2] || "", r];
}

export function runCmp(a, b) {
return Number(a.replace("run-", "")) - Number(b.replace("run-", ""));
const [na, sa] = runKey(a);
const [nb, sb] = runKey(b);
if (na !== nb) return na - nb;
return sa.localeCompare(sb);
}

// versionCmpDesc: semver, newest first; no-prerelease sorts ahead of prerelease.
Expand Down
85 changes: 83 additions & 2 deletions scripts/selftest.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
readFileSync,
existsSync,
rmSync,
writeFileSync,
} from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
Expand All @@ -40,8 +41,8 @@ try {
copyFileSync(join(fixtureDir, f), join(cellAbs, f));
}

const run = (script, args) => {
const r = spawnSync("node", [join(scriptsDir, script), ...args], { cwd: tmp, encoding: "utf8" });
const run = (script, args, cwd = tmp) => {
const r = spawnSync("node", [join(scriptsDir, script), ...args], { cwd, encoding: "utf8" });
const out = `${r.stdout || ""}${r.stderr || ""}`.trim();
if (out) process.stdout.write(` [${script}] ${out}\n`);
if (r.status !== 0) throw new Error(`${script} exited with status ${r.status}`);
Expand Down Expand Up @@ -74,6 +75,86 @@ try {
assert(run0 && run0.files && run0.files.summary && run0.files.timeseries, "run.files references all artifacts");
assert(existsSync(join(tmp, "results", "latest", ARCH, "summary.json")), `latest/${ARCH}/summary.json mirrored`);
assert(existsSync(join(tmp, "results", "latest", ARCH, "env.json")), `latest/${ARCH}/env.json mirrored`);

// ---- Back-to-back + rated subdirs (see mage_tier.go's RatedRunIDSuffix) ----
// A second self-test cell tree that mirrors a real 3-pass saturation +
// 3-pass rated back-to-back run. The single-cell fixture above
// validates the flat (run-1) layout; this one validates the
// sibling run-K/ + run-K-rated/ subdirs coexist, get enumerated
// individually in the index, and that default_run still resolves
// to run-1 (the saturation pass, not a rated subdir).
const multiTmp = mkdtempSync(join(tmpdir(), "results-selftest-multi-"));
try {
const multiRel = `results/${VER}/${DATE}/${ARCH}`;
const multiAbs = join(multiTmp, multiRel);
// Saturation subdirs (run-2, run-3) — copy the fixture and re-stamp
// env.run_id so validate-results.mjs accepts each cell against its
// own dispatch payload. The fixture's env carries run-1; every
// subdir needs its own id.
for (const k of [2, 3]) {
const subAbs = join(multiAbs, `run-${k}`);
mkdirSync(subAbs, { recursive: true });
for (const f of ["summary.json", "histograms.json.gz", "timeseries.json.gz"]) {
copyFileSync(join(fixtureDir, f), join(subAbs, f));
}
const envRaw = JSON.parse(readFileSync(join(fixtureDir, "env.json"), "utf8"));
envRaw.run_id = `run-${k}`;
writeFileSync(join(subAbs, "env.json"), JSON.stringify(envRaw));
}
// Rated subdirs (run-2-rated, run-3-rated) — same shape with the
// rated suffix; the producer's env.run_id is the full rated id.
for (const k of [2, 3]) {
const subAbs = join(multiAbs, `run-${k}-rated`);
mkdirSync(subAbs, { recursive: true });
for (const f of ["summary.json", "histograms.json.gz", "timeseries.json.gz"]) {
copyFileSync(join(fixtureDir, f), join(subAbs, f));
}
const envRaw = JSON.parse(readFileSync(join(fixtureDir, "env.json"), "utf8"));
envRaw.run_id = `run-${k}-rated`;
writeFileSync(join(subAbs, "env.json"), JSON.stringify(envRaw));
}

// Validate each cell against its dispatch payload. The selftest's
// single-cell case ships --run; for the rated pass we re-stamp env
// to match. We use the multiTmp cwd (not the single-cell tmp) so
// the relative --path resolves against the right tree.
for (const k of [2, 3]) {
run("validate-results.mjs", [
"--path", `${multiRel}/run-${k}`,
"--version", VER, "--arch", ARCH, "--date", DATE, "--run", `run-${k}`,
], multiTmp);
run("validate-results.mjs", [
"--path", `${multiRel}/run-${k}-rated`,
"--version", VER, "--arch", ARCH, "--date", DATE, "--run", `run-${k}-rated`,
], multiTmp);
}
run("update-index.mjs", [], multiTmp);

const multiIdx = JSON.parse(readFileSync(join(multiTmp, "results", "index.json"), "utf8"));
const multiRuns = multiIdx.versions?.[0]?.dates?.[0]?.arches?.[0]?.runs || [];
const runIds = multiRuns.map((r) => r.run_id);
assert(
JSON.stringify(runIds) === JSON.stringify(["run-2", "run-2-rated", "run-3", "run-3-rated"]),
`runs enumerated in sorted order: ${JSON.stringify(runIds)}`,
);
assert(
multiIdx.versions?.[0]?.dates?.[0]?.default_run === "run-2",
`default_run still picks the lowest run-K when run-1 absent, got ${multiIdx.versions?.[0]?.dates?.[0]?.default_run}`,
);
// The rated subdirs each carry their own files pointers, distinct
// from the saturation subdirs (proves the rated pass didn't
// overwrite the saturation grid at run-K).
const ratedFiles = multiRuns.find((r) => r.run_id === "run-2-rated")?.files || {};
assert(
ratedFiles.summary && ratedFiles.summary.endsWith("/run-2-rated/summary.json"),
`rated subdir files pointer stays in run-2-rated/, got ${ratedFiles.summary}`,
);
} catch (e) {
console.error(" ERROR (multi-run):", e.message);
failed = true;
} finally {
rmSync(multiTmp, { recursive: true, force: true });
}
} catch (e) {
console.error(" ERROR:", e.message);
failed = true;
Expand Down
Loading