goceleris · FumingPower3925 · Jun 23, 2026 · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026
diff --git a/ansible/bench.yml b/ansible/bench.yml
@@ -134,7 +134,7 @@
         docker rm -f probatorium-pg probatorium-redis probatorium-mc 2>/dev/null
         docker run -d --rm --name probatorium-pg -p 127.0.0.1:54321:5432 \
           -e POSTGRES_USER=bench -e POSTGRES_PASSWORD=bench -e POSTGRES_DB=bench \
-          postgres:17-alpine
+          postgres:17-alpine -c synchronous_commit=off
         docker run -d --rm --name probatorium-redis -p 127.0.0.1:63791:6379 redis:8.2-alpine
         docker run -d --rm --name probatorium-mc -p 127.0.0.1:21211:11211 memcached:1.6.41-alpine
       args:

diff --git a/ansible/tasks/run_bench_cell.yml b/ansible/tasks/run_bench_cell.yml
@@ -168,9 +168,10 @@
     # dbservices role only installs docker + pulls images; it never runs a
     # container). Every driver-capable competitor reads its backend address
     # from these PROBATORIUM_* vars; an unset var leaves that client nil and
-    # the driver cell 404s (→ capability-lie hard error). gin/celeris read
-    # PROBATORIUM_MEMCACHED_ADDR, echo reads PROBATORIUM_MC_ADDR, so we set
-    # both spellings. Harmless for static-only competitors (they ignore them).
+    # the driver cell 404s (→ capability-lie hard error). Every adapter reads
+    # PROBATORIUM_MEMCACHED_ADDR (standardized in v1.5.4 — fasthttp/echo/iris
+    # used to read PROBATORIUM_MC_ADDR). Harmless for static-only competitors
+    # (they ignore them).
     # Ports MUST match bench.yml's container -p mappings AND validate.yml:
     #   postgres 54321  redis 63791  memcached 21211  (remapped off the
     # defaults so a stray system postgres/redis on the bench host can't shadow
@@ -179,7 +180,6 @@
     PROBATORIUM_PG_DSN: "postgres://bench:bench@127.0.0.1:54321/bench?sslmode=disable"
     PROBATORIUM_REDIS_ADDR: "127.0.0.1:63791"
     PROBATORIUM_MEMCACHED_ADDR: "127.0.0.1:21211"
-    PROBATORIUM_MC_ADDR: "127.0.0.1:21211"
     # aspnet (.NET) is a FRAMEWORK-DEPENDENT publish: the native apphost
     # (competitors/aspnet) must locate the shared .NET runtime at launch. We
     # installed the SDK (which carries the runtime) pristinely under

diff --git a/ansible/validate.yml b/ansible/validate.yml
@@ -90,7 +90,7 @@
             docker run -d --rm --name probatorium-pg \
               -p 127.0.0.1:54321:5432 \
               -e POSTGRES_USER=bench -e POSTGRES_PASSWORD=bench -e POSTGRES_DB=bench \
-              postgres:17-alpine
+              postgres:17-alpine -c synchronous_commit=off
           register: dbsvc_pg
           changed_when: dbsvc_pg.rc == 0
           failed_when: false
@@ -158,7 +158,7 @@
             # run is a no-op.
             PROBATORIUM_PG_DSN: "postgres://bench:bench@127.0.0.1:54321/bench?sslmode=disable"
             PROBATORIUM_REDIS_ADDR: "127.0.0.1:63791"
-            PROBATORIUM_MC_ADDR: "127.0.0.1:21211"
+            PROBATORIUM_MEMCACHED_ADDR: "127.0.0.1:21211"
           ansible.builtin.shell: |
             cd {{ bench_root }}
             ulimit -l unlimited

diff --git a/budget/budget.go b/budget/budget.go
@@ -222,7 +222,7 @@ func plural(n int) string {
 // Profile. Both "headline" and "full" now cover the SAME grid — every
 // registered server × every scenario, capability-gated (Globs "*/*") —
 // so NEITHER silently drops servers or scenarios. They differ only by the
-// per-cell window: "headline" (the weekly cadence) uses a shorter 60s/15s
+// per-cell window: "headline" (the weekly cadence) uses a shorter 40s/12s
 // window so the whole grid fits the 24h budget single-arch; "full" uses a
 // longer 90s/20s window for the occasional exhaustive sweep (over 24h,
 // run as a manual dispatch with a raised BENCH_BUDGET). The default (empty

diff --git a/budget/budget_test.go b/budget/budget_test.go
@@ -139,7 +139,7 @@ func TestForProfileDefaultHasFullCoverage(t *testing.T) {
 		t.Fatalf("default profile must cover the full grid (Globs '*/*'), got %v", def.Globs)
 	}
 	if def.Cells < 400 {
-		t.Errorf("default profile Cells: want >= 400 (the full matrix is ~800 capability-gated), got %d. "+
+		t.Errorf("default profile Cells: want >= 400 (the full matrix is ~1111 capability-gated), got %d. "+
 			"A value this low means the default was silently scoped down to a curated subset.",
 			def.Cells)
 	}
@@ -163,6 +163,31 @@ func TestGlobsAreNonEmpty(t *testing.T) {
 	}
 }
 
+// TestRatedRealizedCellsMatchSubset guards the rated-pin against the
+// auto-mix-111 class of drift: the rated sweep runs RatedScenarios x
+// RatedServers, and every curated rated scenario is a plain-H1 static row
+// that applies to every curated rated server, so the realized count is the
+// full cross product with no capability-gating loss. If a stale entry
+// (an unregistered scenario whose glob matches nothing) sneaks back into
+// RatedScenarios, the cross-product pin and this assertion diverge from
+// reality — fail here rather than silently shrinking the published rated
+// grid while the budget over-projects.
+func TestRatedRealizedCellsMatchSubset(t *testing.T) {
+	want := len(RatedScenarios) * len(RatedServers)
+	if HeadlineRatedRealizedCells != want {
+		t.Errorf("HeadlineRatedRealizedCells = %d, want %d (len(RatedScenarios)=%d * len(RatedServers)=%d); "+
+			"a mismatch means a rated scenario is unregistered or the pin is stale",
+			HeadlineRatedRealizedCells, want, len(RatedScenarios), len(RatedServers))
+	}
+	if FullRatedRealizedCells != want {
+		t.Errorf("FullRatedRealizedCells = %d, want %d", FullRatedRealizedCells, want)
+	}
+	if len(ratedGlobs()) != want {
+		t.Errorf("len(ratedGlobs()) = %d, want %d (the expanded glob set must match the pin)",
+			len(ratedGlobs()), want)
+	}
+}
+
 // TestColumnWallClock pins the per-column projection the ansible hang
 // guard is sized from. The "v3.8 rated column" case uses the REAL run
 // config (33 capability-gated scenarios on celeris-epoll-h1-sync,

diff --git a/budget/profiles.go b/budget/profiles.go
@@ -21,11 +21,15 @@ import "time"
 // because it is the expensive additive dimension — see RatedServers below.
 
 // RatedScenarios is the curated rated/SLO subset (#156): the SLO-knee
-// scenarios where throughput-at-SLO carries the most signal.
+// scenarios where throughput-at-SLO carries the most signal. The two
+// registered static rows every rated server runs — a GET read and a 4 KiB
+// POST. (A third entry, "auto-mix-111", used to be listed here but the
+// scenario was deleted and never registered, so the -cells filter silently
+// matched nothing and the rated grid was 16 cells while the pin claimed 24;
+// removed from the rated pass in the v1.5.4 pre-run audit.)
 var RatedScenarios = []string{
 	"get-json",
 	"post-4k",
-	"auto-mix-111",
 }
 
 // RatedServers is the curated rated column subset: the four celeris modes
@@ -54,20 +58,20 @@ var RatedServers = []string{
 // rated sweep stays curated, so HeadlineRatedRealizedCells is unchanged.
 const (
 	HeadlineRealizedCells      = FullRealizedCells
-	HeadlineRatedRealizedCells = 24 // 8 rated servers x 3 rated scenarios, capability-gated
+	HeadlineRatedRealizedCells = 16 // 8 rated servers x 2 rated scenarios, capability-gated
 
-	// Full profile: every server x every scenario, capability-gated. After
-	// the mid-size payload rows (get/post-json-8k/16k) and the native h2c
-	// columns (axum/ntex/hyper/aspnet/fastapi/hono/elysia -h2) landed, the
-	// nominal grid is ~36 columns x 45 rows ~ 1620; capability gating (the
-	// streaming / driver / chain / TLS cells, plus the h2c-noupg columns
-	// skipping every H1 row) lands the realized count near ~800. Pinned
-	// conservatively high so FitWithin over-projects slightly and a registry
-	// change that blows the budget fails loudly rather than overflowing the
-	// run. Recompute with the scheduler's Applicable gate when the registry
-	// grows again.
-	FullRealizedCells      = 820
-	FullRatedRealizedCells = 24
+	// Full profile: every server x every scenario, capability-gated. This is
+	// the SAME realized "*/*" grid Fast runs (FullRealizedCells ==
+	// FastRealizedCells); the profiles differ only by per-cell window. The
+	// v1.5.4 redesign reshaped the grid — saturated static rows pruned (W1),
+	// the driver set deepened 4->10 (W3), WS/SSE coverage added to three more
+	// columns (W4), and the 12 middleware/chain scenarios REMOVED (pre-run
+	// audit: they compared unequal work across adapters) — so the realized
+	// count moved off the older ~800/1257/1111 pins to 835. Recompute with
+	// `cmd/runner -dry-run -cells '*/*' | grep -c '^run0'` when the registry
+	// changes; the grid is now 52 columns x 29 rows, capability-gated.
+	FullRealizedCells      = 835
+	FullRatedRealizedCells = 16
 )
 
 // HeadlineWeekly is the config the benchmark-tier workflow runs on the
@@ -88,16 +92,18 @@ const (
 // the correct loud failure, since the full grid x 2 serial arches cannot fit
 // 24h until ArchParallel (#168, blocked on loadgen linux/arm64) lands.
 //
-// Budget: ~820 cells x (15+60+5+12)s x 1 arch = ~20.9h saturation + ~0.7h
-// curated rated = ~21.6h < 24h. The rated sweep stays curated (RatedGlobs)
-// because it is the expensive additive dimension; expanding it to the full
-// grid would blow the budget many times over.
+// Budget: ~835 cells x (12+40+5+12)s x 1 arch = ~16.0h saturation + ~0.7h
+// curated rated = ~16.7h < 24h. The per-cell window stays at the v1.5.4
+// 40s/12s (the chain-scenario removal dropped the grid 1111->835, so there
+// is now ample headroom). The rated sweep stays curated (RatedGlobs) because
+// it is the expensive additive dimension; expanding it to the full grid
+// would blow the budget many times over.
 func HeadlineWeekly() Profile {
 	return Profile{
 		Name:          "headline",
 		Cells:         HeadlineRealizedCells,
-		Duration:      60 * time.Second,
-		Warmup:        15 * time.Second,
+		Duration:      40 * time.Second,
+		Warmup:        12 * time.Second,
 		Cooldown:      defaultCooldown,
 		Runs:          1,
 		Arches:        1,
@@ -116,7 +122,10 @@ func HeadlineWeekly() Profile {
 // Recompute with `cmd/runner -dry-run -cells '*/*' | grep -c '^run0'` when
 // the registry grows; FitWithin uses it to assert the fast profile still
 // fits 24h, so an over-large grid fails loudly instead of overrunning.
-const FastRealizedCells = 1257
+// v1.5.4 redesign: 1257 -> 1111 -> 835 (W1 pruned saturated static rows; W3
+// deepened drivers 4->10; W4 added WS/SSE to three columns; pre-run audit
+// REMOVED the 12 middleware/chain scenarios as unfair).
+const FastRealizedCells = 835
 
 // Fast is the DEFAULT routine + weekly profile: the FULL grid (every server
 // × every scenario, capability-gated, "*/*") in SATURATION ONLY — no rated
@@ -126,8 +135,8 @@ const FastRealizedCells = 1257
 // per cell, the dominant cost) is intentionally OFF here and belongs in a
 // separate, scoped dispatch when latency-under-controlled-load is the story.
 //
-// Budget: 1257 cells × (10+35+5+12)s × 1 arch = ~21.6h saturation, rated=0
-// → ~21.6h < 24h. RatedPasses=0 makes BenchTier skip the rated flag entirely
+// Budget: 835 cells × (10+35+5+12)s × 1 arch = ~14.4h saturation, rated=0
+// → ~14.4h < 24h. RatedPasses=0 makes BenchTier skip the rated flag entirely
 // (rated OFF for every cell), so this is the cheap, full-breadth mode.
 func Fast() Profile {
 	return Profile{

diff --git a/go.mod b/go.mod
@@ -5,7 +5,7 @@ go 1.26.4
 require (
 	github.com/HdrHistogram/hdrhistogram-go v1.2.0
 	github.com/bradfitz/gomemcache v0.0.0-20260422231931-4d751bb6e37c
-	github.com/goceleris/loadgen v1.4.9
+	github.com/goceleris/loadgen v1.4.10
 	github.com/google/gofuzz v1.2.0
 	github.com/jackc/pgx/v5 v5.10.0
 	github.com/pierrec/lz4/v4 v4.1.27

diff --git a/go.sum b/go.sum
@@ -15,8 +15,8 @@ github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxK
 github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/goceleris/loadgen v1.4.9 h1:Kd/AmLHP520Su3azQ9tCNoc6tsaeEf7Nx8ECr4AdYfg=
-github.com/goceleris/loadgen v1.4.9/go.mod h1:Olg2awQufUnRemRlCvFPFL6Ww3byUd+UvZYQAMJm6Co=
+github.com/goceleris/loadgen v1.4.10 h1:j8qi6xQK4Bk1AqObhHBTbE1ZsG+s3vbpinLE4uPSfCI=
+github.com/goceleris/loadgen v1.4.10/go.mod h1:9LvtFtzoZj8z3MkE4lFvOQ3VZt4jBBB0b/TXxfieIGA=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=

diff --git a/mage_bench_cellsglob_test.go b/mage_bench_cellsglob_test.go
@@ -83,23 +83,30 @@ func TestCellsGlobServersWildcardUsesFullRegistry(t *testing.T) {
 //     the exclude).
 //   - the full negative "get-*/celeris-std-h1" then
 //     "!get-*/celeris-std-h1" leaves an empty include glob → fallback
-//     to "*", which keeps the 4 celeris servers via the implicit
+//     to "*", which keeps the celeris servers via the implicit
 //     include from the empty include. Documented behaviour: empty
 //     include with all excludes = "use the registry, but respect the
 //     excludes against it" — same as the runner.
 func TestCellsGlobServersRespectsExcludes(t *testing.T) {
 	// Case 1: include "get-*/celeris-*" + exclude "get-simple/celeris-std-h1".
-	// The 4 celeris servers all match at least one (get-json etc.), so
+	// Every celeris engine column matches at least one (get-json etc.), so
 	// celeris-std-h1 stays (its get-json/celeris-std-h1 pair still
-	// matches the include, and no exclude covers the whole server).
+	// matches the include, and no exclude covers the whole server). The
+	// want set is the FULL celeris-* column family (the v1.5.4 redesign
+	// expanded it from 4 to the 9 engine modes below).
 	got, err := cellsGlobServers("get-*/celeris-*,!get-simple/celeris-std-h1")
 	if err != nil {
 		t.Fatalf("cellsGlobServers: %v", err)
 	}
 	want := []string{
+		"celeris-adaptive-auto+upg-async",
+		"celeris-adaptive-h1-async",
+		"celeris-epoll-auto+upg-async",
+		"celeris-epoll-h1-async",
 		"celeris-epoll-h1-sync",
 		"celeris-iouring-auto+upg-async",
 		"celeris-iouring-h1-async",
+		"celeris-iouring-h1-sync",
 		"celeris-std-h1",
 	}
 	if !reflect.DeepEqual(got, want) {
@@ -114,9 +121,14 @@ func TestCellsGlobServersRespectsExcludes(t *testing.T) {
 		t.Fatalf("cellsGlobServers: %v", err)
 	}
 	want = []string{
+		"celeris-adaptive-auto+upg-async",
+		"celeris-adaptive-h1-async",
+		"celeris-epoll-auto+upg-async",
+		"celeris-epoll-h1-async",
 		"celeris-epoll-h1-sync",
 		"celeris-iouring-auto+upg-async",
 		"celeris-iouring-h1-async",
+		"celeris-iouring-h1-sync",
 		"celeris-std-h1",
 	}
 	if !reflect.DeepEqual(got, want) {

diff --git a/mage_tier.go b/mage_tier.go
@@ -30,12 +30,12 @@ import (
 //
 // Per-cell execution: a cell visits ONE (server, scenario) pair and
 // runs the saturation pass unconditionally. If the scenario is in
-// the rated subset (currently get-json / post-4k / auto-mix-111) and
-// the runner is launched with BENCH_RATED=1, the same cell ALSO runs
-// the rated sweep after the saturation pass. The cell's JSON carries
-// both maps on the same row; the bench's published Document has a
-// per-scenario SaturationModeRPS (every scenario) + a per-scenario
-// LatencyAtSLO (only the rated 3).
+// the rated subset (currently get-json / post-4k) and the runner is
+// launched with BENCH_RATED=1, the same cell ALSO runs the rated
+// sweep after the saturation pass. The cell's JSON carries both maps
+// on the same row; the bench's published Document has a per-scenario
+// SaturationModeRPS (every scenario) + a per-scenario LatencyAtSLO
+// (only the rated 2).
 //
 // Flow:
 //
@@ -57,8 +57,8 @@ import (
 //	                           grid (every server × every scenario,
 //	                           capability-gated); they differ ONLY by the
 //	                           per-cell window. headline (the weekly
-//	                           cadence) uses 60s/15s so the whole grid fits
-//	                           24h single-arch (~21.6h); full uses 90s/20s
+//	                           cadence) uses 40s/12s so the whole grid fits
+//	                           24h single-arch (~16.7h); full uses 90s/20s
 //	                           for the exhaustive sweep (~30h on one arch,
 //	                           needs a raised BENCH_BUDGET). Default: full.
 //	BENCH_TARGET=both          msa2-server | msr1 | both (both = 2 arches)

diff --git a/report/document.go b/report/document.go
@@ -2,6 +2,7 @@ package report
 
 import (
 	"sort"
+	"strings"
 	"time"
 )
 
@@ -192,9 +193,13 @@ func BuildDocument(in BuildInput) *Document {
 		// loadgen still had CPU headroom is NIC-limited, not server-limited.
 		// Its saturation RPS converges across every fast adapter and must not
 		// be read as a ranking — the CPU efficiency in Resources is the real
-		// signal. Only flagged when the fabric's line rate is known (the LAN;
-		// the Tailscale overlay reports 0 and flags nothing).
-		if isNetworkBound(c.BytesMedian, c.LoadgenCPUP95, in.Environment.FabricLineRateBitsPerSec) {
+		// signal. Runtime detection only fires when the fabric's line rate is
+		// known (the LAN; the Tailscale overlay reports 0 and flags nothing),
+		// so the wire-bound-by-design scenarios are OR'd in unconditionally:
+		// post-1m is a documented wire-bound datapoint, never a ranking row,
+		// regardless of whether the line rate was measurable.
+		if isWireBoundByDesign(c.ScenarioName) ||
+			isNetworkBound(c.BytesMedian, c.LoadgenCPUP95, in.Environment.FabricLineRateBitsPerSec) {
 			if sr.NetworkBound == nil {
 				sr.NetworkBound = map[string]bool{}
 			}
@@ -240,6 +245,44 @@ const (
 	networkBoundLoadgenCPUCeiling = 8.0
 )
 
+// isWireBoundByDesign reports whether a scenario is wire-bound by design
+// rather than by runtime measurement. post-1m is a documented 1 MiB-payload
+// datapoint whose saturation RPS is dictated by the fabric, not the server,
+// so it must always land in the wire-bound section and never head a raw-RPS
+// ranking — even on overlays where the line rate is unknown and isNetworkBound
+// cannot fire.
+func isWireBoundByDesign(scenarioName string) bool {
+	return scenarioName == "post-1m"
+}
+
+// isFanoutBound reports whether a scenario's throughput is paced by the
+// server's fixed publish tick rather than by CPU. The hub-broadcast and
+// SSE-fanout cells push to N subscribers on a 1 ms cadence, so their RPS
+// ceiling is ~1000*N regardless of server headroom — a fan-out rate, not a
+// throughput the field can be ranked by. Their real signal is delivery
+// latency (the tail-latency section), so they are kept out of the headline
+// ranking just like the wire-bound cells. The echo modes (ws-echo /
+// ws-large-echo) are client-driven round-trips and stay ranked.
+func isFanoutBound(scenarioName string) bool {
+	switch scenarioName {
+	case "ws-hub-broadcast-128", "ws-hub-broadcast-1024",
+		"sse-fanout-128", "sse-fanout-1024":
+		return true
+	}
+	return false
+}
+
+// isLatencyProbeByDesign reports whether a scenario is a single-connection
+// latency probe whose saturation "RPS" is a latency reciprocal (1/RTT)
+// rather than a throughput. At one connection requests serialize, so the
+// number rewards low per-request latency, not throughput, and must never
+// head a raw-RPS ranking — its real signal is the tail-latency section. The
+// "-1c" suffix is the single-conn marker (scenarios.ProfileSingle);
+// get-json-1c is the only such scenario today.
+func isLatencyProbeByDesign(scenarioName string) bool {
+	return strings.HasSuffix(scenarioName, "-1c")
+}
+
 // isNetworkBound reports whether a cell's achieved egress bandwidth sat at
 // the fabric line rate (NIC-limited) rather than the server's CPU limit.
 // bytesPerSec is the median across-runs throughput; loadgenCPUP95 is the