diff --git a/.github/workflows/sync-benchmarks.yml b/.github/workflows/sync-benchmarks.yml index c816d0e..a87bbb1 100644 --- a/.github/workflows/sync-benchmarks.yml +++ b/.github/workflows/sync-benchmarks.yml @@ -88,10 +88,19 @@ jobs: COMMIT: ${{ steps.payload.outputs.commit }} run: | set -euo pipefail - # The publisher pushed the cell in a separate commit; make sure we are - # validating exactly what was published, not a stale checkout. - git fetch origin "$COMMIT" || git fetch origin - git checkout "$COMMIT" -- results/ || echo "no results delta to sync" + # Bring the working tree to origin/main so the local results/ + # holds the FULL tree (every previously-published cell) and not + # just the one commit the publisher pushed. The old `git checkout + # $COMMIT -- results/` replaced the whole results/ with the + # single commit's delta, which made the index-sync see a + # truncated tree and emit a manifest that listed only the + # most-recent run-K (e.g. only run-2 when run-1 + run-2 + + # run-3 were on disk). Fetching + checking out the entire + # origin/main branch is what makes update-index.mjs's whole- + # tree walk authoritative. + git fetch origin "$COMMIT" || true + git fetch origin main + git checkout origin/main -- results/ - name: Validate published cell if: steps.payload.outputs.path != '' diff --git a/scripts/lib/results.mjs b/scripts/lib/results.mjs index 89e1c2d..0b75dea 100644 --- a/scripts/lib/results.mjs +++ b/scripts/lib/results.mjs @@ -65,12 +65,16 @@ export function listArches(root, version, date) { } // listRuns tolerates both the Phase 4 flat layout (four files directly under -// the arch dir as run-1) and a future Phase 5 run-N/ subdirectory layout. +// the arch dir as run-1) and a future Phase 5 run-N/ subdirectory layout, +// and the run-N-rated/ subdirectory a back-to-back rated pass publishes +// to alongside its saturation pass (see mage_tier.go's RatedRunIDSuffix). +// The regex accepts the rated suffix as an optional tail so the same +// walker can enumerate both panels of a back-to-back run. export function listRuns(root, version, date, arch) { const dir = join(root, version, date, arch); if (!existsSync(dir)) return []; const subRuns = readdirSync(dir, { withFileTypes: true }) - .filter((e) => e.isDirectory() && /^run-\d+$/.test(e.name)) + .filter((e) => e.isDirectory() && /^run-\d+(?:-rated)?$/.test(e.name)) .map((e) => e.name); if (subRuns.length > 0) return subRuns.sort(runCmp); if (existsSync(join(dir, "summary.json"))) return [DEFAULT_RUN]; @@ -90,8 +94,24 @@ export function runFilePath(version, date, arch, runId, name) { return posix.join("results", version, date, arch, runId, name); } +// runKey splits a run id into its numeric k and optional variant suffix +// ("run-2" -> [2, ""], "run-2-rated" -> [2, "rated"]). The numeric part +// sorts first so the canonical order is run-1, run-1-rated, run-2, +// run-2-rated, ...; the variant part breaks ties lexicographically, so +// a future variant (e.g. "run-1-soak") would naturally land after the +// rated one with no comparator change. Unknown shapes sort as 0/"" to +// keep the comparator total. +function runKey(r) { + const m = /^run-(\d+)(?:-(.+))?$/.exec(r); + if (!m) return [0, "", r]; + return [Number(m[1]), m[2] || "", r]; +} + export function runCmp(a, b) { - return Number(a.replace("run-", "")) - Number(b.replace("run-", "")); + const [na, sa] = runKey(a); + const [nb, sb] = runKey(b); + if (na !== nb) return na - nb; + return sa.localeCompare(sb); } // versionCmpDesc: semver, newest first; no-prerelease sorts ahead of prerelease. diff --git a/scripts/selftest.mjs b/scripts/selftest.mjs index 20c4dc9..fdf13c3 100644 --- a/scripts/selftest.mjs +++ b/scripts/selftest.mjs @@ -17,6 +17,7 @@ import { readFileSync, existsSync, rmSync, + writeFileSync, } from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; @@ -40,8 +41,8 @@ try { copyFileSync(join(fixtureDir, f), join(cellAbs, f)); } - const run = (script, args) => { - const r = spawnSync("node", [join(scriptsDir, script), ...args], { cwd: tmp, encoding: "utf8" }); + const run = (script, args, cwd = tmp) => { + const r = spawnSync("node", [join(scriptsDir, script), ...args], { cwd, encoding: "utf8" }); const out = `${r.stdout || ""}${r.stderr || ""}`.trim(); if (out) process.stdout.write(` [${script}] ${out}\n`); if (r.status !== 0) throw new Error(`${script} exited with status ${r.status}`); @@ -74,6 +75,86 @@ try { assert(run0 && run0.files && run0.files.summary && run0.files.timeseries, "run.files references all artifacts"); assert(existsSync(join(tmp, "results", "latest", ARCH, "summary.json")), `latest/${ARCH}/summary.json mirrored`); assert(existsSync(join(tmp, "results", "latest", ARCH, "env.json")), `latest/${ARCH}/env.json mirrored`); + + // ---- Back-to-back + rated subdirs (see mage_tier.go's RatedRunIDSuffix) ---- + // A second self-test cell tree that mirrors a real 3-pass saturation + + // 3-pass rated back-to-back run. The single-cell fixture above + // validates the flat (run-1) layout; this one validates the + // sibling run-K/ + run-K-rated/ subdirs coexist, get enumerated + // individually in the index, and that default_run still resolves + // to run-1 (the saturation pass, not a rated subdir). + const multiTmp = mkdtempSync(join(tmpdir(), "results-selftest-multi-")); + try { + const multiRel = `results/${VER}/${DATE}/${ARCH}`; + const multiAbs = join(multiTmp, multiRel); + // Saturation subdirs (run-2, run-3) — copy the fixture and re-stamp + // env.run_id so validate-results.mjs accepts each cell against its + // own dispatch payload. The fixture's env carries run-1; every + // subdir needs its own id. + for (const k of [2, 3]) { + const subAbs = join(multiAbs, `run-${k}`); + mkdirSync(subAbs, { recursive: true }); + for (const f of ["summary.json", "histograms.json.gz", "timeseries.json.gz"]) { + copyFileSync(join(fixtureDir, f), join(subAbs, f)); + } + const envRaw = JSON.parse(readFileSync(join(fixtureDir, "env.json"), "utf8")); + envRaw.run_id = `run-${k}`; + writeFileSync(join(subAbs, "env.json"), JSON.stringify(envRaw)); + } + // Rated subdirs (run-2-rated, run-3-rated) — same shape with the + // rated suffix; the producer's env.run_id is the full rated id. + for (const k of [2, 3]) { + const subAbs = join(multiAbs, `run-${k}-rated`); + mkdirSync(subAbs, { recursive: true }); + for (const f of ["summary.json", "histograms.json.gz", "timeseries.json.gz"]) { + copyFileSync(join(fixtureDir, f), join(subAbs, f)); + } + const envRaw = JSON.parse(readFileSync(join(fixtureDir, "env.json"), "utf8")); + envRaw.run_id = `run-${k}-rated`; + writeFileSync(join(subAbs, "env.json"), JSON.stringify(envRaw)); + } + + // Validate each cell against its dispatch payload. The selftest's + // single-cell case ships --run; for the rated pass we re-stamp env + // to match. We use the multiTmp cwd (not the single-cell tmp) so + // the relative --path resolves against the right tree. + for (const k of [2, 3]) { + run("validate-results.mjs", [ + "--path", `${multiRel}/run-${k}`, + "--version", VER, "--arch", ARCH, "--date", DATE, "--run", `run-${k}`, + ], multiTmp); + run("validate-results.mjs", [ + "--path", `${multiRel}/run-${k}-rated`, + "--version", VER, "--arch", ARCH, "--date", DATE, "--run", `run-${k}-rated`, + ], multiTmp); + } + run("update-index.mjs", [], multiTmp); + + const multiIdx = JSON.parse(readFileSync(join(multiTmp, "results", "index.json"), "utf8")); + const multiRuns = multiIdx.versions?.[0]?.dates?.[0]?.arches?.[0]?.runs || []; + const runIds = multiRuns.map((r) => r.run_id); + assert( + JSON.stringify(runIds) === JSON.stringify(["run-2", "run-2-rated", "run-3", "run-3-rated"]), + `runs enumerated in sorted order: ${JSON.stringify(runIds)}`, + ); + assert( + multiIdx.versions?.[0]?.dates?.[0]?.default_run === "run-2", + `default_run still picks the lowest run-K when run-1 absent, got ${multiIdx.versions?.[0]?.dates?.[0]?.default_run}`, + ); + // The rated subdirs each carry their own files pointers, distinct + // from the saturation subdirs (proves the rated pass didn't + // overwrite the saturation grid at run-K). + const ratedFiles = multiRuns.find((r) => r.run_id === "run-2-rated")?.files || {}; + assert( + ratedFiles.summary && ratedFiles.summary.endsWith("/run-2-rated/summary.json"), + `rated subdir files pointer stays in run-2-rated/, got ${ratedFiles.summary}`, + ); + } catch (e) { + console.error(" ERROR (multi-run):", e.message); + failed = true; + } finally { + rmSync(multiTmp, { recursive: true, force: true }); + } } catch (e) { console.error(" ERROR:", e.message); failed = true;