api7 · membphis · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -39,6 +39,9 @@ jobs:
       - name: Build (release)
         run: cargo build --release
 
+      - name: Build benches (compile only, do not run)
+        run: cargo build --release --benches
+
       - name: Test (release)
         run: cargo test --release
 

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -14,7 +14,9 @@ The `Makefile` is the canonical entry point; `make help` lists targets.
 make build              # cargo build --release  → target/release/libqjson.so
 make test               # cargo test --release + busted Lua tests
 make lint               # cargo clippy --release --all-targets -- -D warnings
-make bench              # OpenResty LuaJIT benchmark vs lua-cjson and simdjson
+make bench              # Rust criterion (parse_eager) + OpenResty LuaJIT bench vs lua-cjson/simdjson
+make bench-rust         # Rust criterion only (fast inner loop for SIMD tuning)
+make bench-lua          # Lua bench only (qjson vs lua-cjson and simdjson)
 ```
 
 Under the hood / for narrower invocations:
@@ -79,7 +81,7 @@ src/
 lua/qjson.lua    LuaJIT wrapper (ffi.cdef + Doc/Cursor metatables)
 include/qjson.h  public C header
 tests/                Rust integration tests + tests/lua/ busted suite
-benches/              lua_bench.lua vs lua-cjson/simdjson; fixtures/ has small_api.json + medium_resp.json
+benches/              parse_eager.rs (criterion) + lua_bench.lua vs lua-cjson/simdjson; fixtures/ has small_api.json + medium_resp.json + medium_resp_cjk.json
 ```
 
 The enum values in `src/error.rs` are duplicated in `include/qjson.h` and `lua/qjson.lua` (the latter only encodes the `T_*` type tags and `NOT_FOUND = 2`). Keep all three in sync when adding/renumbering codes.

diff --git a/Cargo.toml b/Cargo.toml
@@ -22,7 +22,12 @@ rustc-hash = "2"
 once_cell = "1"
 
 [dev-dependencies]
-proptest = "1"
+proptest  = "1"
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "parse_eager"
+harness = false
 
 [profile.release]
 opt-level = 3

diff --git a/Makefile b/Makefile
@@ -17,7 +17,7 @@ else
 LUA_ENV := LD_LIBRARY_PATH=$(LIB_DIR) LUA_PATH='$(LUA_PATH)' LUA_CPATH='$(LUA_CPATH)'
 endif
 
-.PHONY: help build test lint bench clean
+.PHONY: help build test lint bench bench-rust bench-lua clean
 
 help: ## Show this help
 	@# FS uses [^#]* (not .*) so a description containing `##` isn't truncated.
@@ -34,7 +34,12 @@ test: build ## Run cargo tests + busted Lua tests
 lint: ## Run clippy with -D warnings
 	cargo clippy --release --all-targets -- -D warnings
 
-bench: build vendor/lua-cjson/cjson.so ## Run the OpenResty LuaJIT benchmark
+bench: bench-rust bench-lua ## Run all benchmarks (Rust criterion + Lua vs cjson)
+
+bench-rust: build ## Rust criterion microbench (parse path, MB/s + statistical CI)
+	cargo bench --bench parse_eager
+
+bench-lua: build vendor/lua-cjson/cjson.so ## Lua bench: qjson vs cjson vs simdjson
 	$(LUA_ENV) $(RESTY) benches/lua_bench.lua
 
 vendor/lua-cjson/cjson.so: | vendor/lua-cjson/Makefile

diff --git a/README.md b/README.md
@@ -118,12 +118,14 @@ and `simdjson` retain more Lua heap because they materialize the table tree.
 
 See [`docs/benchmarks.md`](docs/benchmarks.md) for the full size ladder,
 memory numbers, an "encode round-trip" row (passthrough emit via
-`memcpy`), exact environment, and the reproduction command. `make bench`
+`memcpy`), exact environment, and the reproduction command. `make bench-lua`
 uses `lua-resty-simdjson` when `resty.simdjson` is available in the
 OpenResty environment; otherwise it skips the simdjson rows.
 
 ```sh
-make bench       # qjson vs cjson and lua-resty-simdjson
+make bench       # full suite: Rust criterion + Lua vs cjson/simdjson
+make bench-lua   # Lua bench only (the table above)
+make bench-rust  # Rust criterion only (internal SIMD tuning)
 ```
 
 ## RFC 8259 conformance

diff --git a/benches/fixtures/medium_resp_cjk.json b/benches/fixtures/medium_resp_cjk.json
diff --git a/benches/fixtures/medium_resp_emoji.json b/benches/fixtures/medium_resp_emoji.json
diff --git a/benches/fixtures/medium_resp_mixed.json b/benches/fixtures/medium_resp_mixed.json
diff --git a/benches/parse_eager.rs b/benches/parse_eager.rs
@@ -0,0 +1,57 @@
+//! End-to-end parse benchmark across ASCII and CJK fixtures, in both
+//! EAGER and LAZY mode. Used to measure the cost of value-level
+//! validation (the EAGER-vs-LAZY gap) — which is what the upcoming
+//! SIMD UTF-8 validator targets — and to guard against ASCII-path
+//! regressions.
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
+use qjson::doc::Document;
+use qjson::options::{
+    Options, QJSON_DEFAULT_MAX_DEPTH, QJSON_MODE_EAGER, QJSON_MODE_LAZY,
+};
+use std::fs;
+
+const FIXTURES: &[(&str, &str)] = &[
+    ("ascii", "benches/fixtures/medium_resp.json"),
+    ("cjk",   "benches/fixtures/medium_resp_cjk.json"),
+    ("mixed", "benches/fixtures/medium_resp_mixed.json"),
+    ("emoji", "benches/fixtures/medium_resp_emoji.json"),
+];
+
+fn run(c: &mut Criterion) {
+    for (name, path) in FIXTURES {
+        let buf = fs::read(path)
+            .unwrap_or_else(|e| panic!("read {}: {}", path, e));
+        let mut group = c.benchmark_group(format!("parse/{}", name));
+        group.throughput(Throughput::Bytes(buf.len() as u64));
+
+        let eager = Options {
+            mode:      QJSON_MODE_EAGER,
+            max_depth: QJSON_DEFAULT_MAX_DEPTH,
+        };
+        let lazy = Options {
+            mode:      QJSON_MODE_LAZY,
+            max_depth: QJSON_DEFAULT_MAX_DEPTH,
+        };
+
+        group.bench_function("eager", |b| {
+            b.iter(|| {
+                let doc = Document::parse_with_options(black_box(&buf), &eager)
+                    .expect("parse eager");
+                black_box(doc);
+            })
+        });
+        group.bench_function("lazy", |b| {
+            b.iter(|| {
+                let doc = Document::parse_with_options(black_box(&buf), &lazy)
+                    .expect("parse lazy");
+                black_box(doc);
+            })
+        });
+
+        group.finish();
+    }
+}
+
+criterion_group!(benches, run);
+criterion_main!(benches);
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
@@ -61,17 +61,17 @@ parsing workloads with ~3-5% structural density.
 
 ## Reproducing
 
-Run the full comparison with one command:
+Run the Lua-vs-cjson comparison (the table on this page) with:
 
 ```sh
-make bench
+make bench-lua
 ```
 
 This builds `qjson`, builds the vendored `lua-cjson` against OpenResty's
 LuaJIT, then invokes `benches/lua_bench.lua` through OpenResty's `resty` so
-`lua-resty-simdjson` runs in its normal `ngx` environment.
-If `resty.simdjson` is not available on `package.path` / `package.cpath`, the
-harness prints a skip message and omits the simdjson rows.
+`lua-resty-simdjson` runs in its normal `ngx` environment. `make bench`
+runs this plus the Rust criterion microbenchmark used for internal
+optimization tracking.
 
 Numbers below come from one such run.
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -18,4 +18,9 @@ pub mod __test_api {
     pub use crate::scan::avx2::Avx2Scanner;
     #[cfg(target_arch = "aarch64")]
     pub use crate::scan::neon::NeonScanner;
+
+    // String validator backends for cross-backend property testing.
+    pub use crate::validate::strings::scalar::validate_span_scalar;
+    #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
+    pub use crate::validate::strings::avx2::validate_span_avx2;
 }