diff --git a/README.md b/README.md index c8a1462..59d7738 100644 --- a/README.md +++ b/README.md @@ -101,14 +101,15 @@ LD_LIBRARY_PATH="$PWD/target/release" \ `qjson` vs. `lua-cjson` and `lua-resty-simdjson` on multimodal chat-completion payloads, "parse + access model, temperature, and all messages[*].content paths" workload (median ops/s under OpenResty LuaJIT 2.1, -Intel Core i5-9400; 5 rounds, deterministic payload): +AMD EPYC Rome (Zen 2, 4 vCPUs); 5 rounds, deterministic payload): | Size | cjson | simdjson | `qjson.parse` | `qjson.decode + access content` | speedup vs. cjson | |---:|---:|---:|---:|---:|---:| -| 2 KB | 106,646 | 137,427 | 135,296 | 97,574 | 1.3× / 0.9× | -| 100 KB | 6,045 | 46,577 | 137,931 | 134,590 | 22.8× / 22.3× | -| 1 MB | 594 | 4,408 | 16,447 | 16,340 | 27.7× / 27.5× | -| 10 MB | 59 | 356 | 1,035 | 1,028 | 17.5× / 17.4× | +| 2 KB | 94,075 | 108,108 | 127,214 | 120,398 | 1.4× / 1.3× | +| 60 KB | 9,041 | 83,043 | 123,487 | 214,500 | 13.7× / 23.7× | +| 100 KB | 5,302 | 32,248 | 109,649 | 102,564 | 20.7× / 19.3× | +| 1 MB | 517 | 3,538 | 16,520 | 16,988 | 32.0× / 32.9× | +| 10 MB | 50 | 402 | 1,899 | 1,918 | 38.0× / 38.4× | `qjson.parse` wins because it skips building a Lua table for the parts you never read; `qjson.decode + t.field` adds a cjson-shaped table proxy on top @@ -161,4 +162,4 @@ qjson_doc* doc = qjson_parse_ex(buf, len, &opts, &err); There are no known strict-mode structural grammar gaps at this time: `tests/json_test_suite.rs::KNOWN_N_FAILURES` is empty, and the RFC 8259 suite has no ignored structural cases. Update this section whenever a -temporary conformance exception is introduced. +temporary conformance exception is introduced. \ No newline at end of file diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 471cef8..fe6f09f 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -14,9 +14,9 @@ Lua-table baselines. | | | |---|---| -| Host CPU | Intel Core i5-9400, 6 cores, AVX2 + PCLMUL | -| Memory | 15 GiB | -| OS | Ubuntu 24.04.4 LTS, Linux 6.8.0-110-generic, x86_64 | +| Host CPU | AMD EPYC Rome (Zen 2), 4 vCPUs, AVX2 + PCLMUL | +| Memory | 8 GiB | +| OS | Ubuntu 24.04, x86_64 | | Runtime | OpenResty `resty` 0.29 / OpenResty 1.21.4.4 / LuaJIT 2.1.1723681758 | | `qjson` | this repo, release build, AVX2 + PCLMUL scanner active | | `lua-cjson` | vendored `openresty/lua-cjson` | @@ -81,32 +81,32 @@ Each row is "parse + access request fields" on the named payload. | Scenario | Size | cjson | simdjson | `qjson.parse` | `qjson.decode + access content` | `qjson.decode + qjson.encode` | |---|---:|---:|---:|---:|---:|---:| -| small | 2.1 KB | 106,646 | 137,427 | 135,296 | 97,574 | 202,388 | -| medium | 60.4 KB | 10,086 | 86,029 | 189,970 | 198,098 | 175,562 | -| github-100k | 100 KB | 2,208 | 2,880 | 4,496 | 4,479 | 4,809 | -| 100k | 100 KB | 6,045 | 46,577 | 137,931 | 134,590 | 153,139 | -| 200k | 200 KB | 3,025 | 22,563 | 78,247 | 75,873 | 81,433 | -| 500k | 500 KB | 1,216 | 9,128 | 33,058 | 32,680 | 34,188 | -| 1m | 1.00 MB | 594 | 4,408 | 16,447 | 16,340 | 16,722 | -| 2m | 2.00 MB | 296 | 1,966 | 8,247 | 8,224 | 8,055 | -| 5m | 5.00 MB | 118 | 600 | 2,869 | 2,945 | 2,992 | -| 10m | 10.00 MB | 59 | 356 | 1,035 | 1,028 | 1,050 | -| interleaved (100k/200k/500k/1m, cycled) | — | 1,318 | 9,116 | 33,342 | 32,752 | 34,031 | +| small | 2.1 KB | 94,075 | 108,108 | 127,214 | 120,398 | 203,666 | +| medium | 60.4 KB | 9,041 | 83,043 | 123,487 | 214,500 | 214,408 | +| github-100k | 100 KB | 2,238 | 2,047 | 6,010 | 5,994 | 6,701 | +| 100k | 100 KB | 5,302 | 32,248 | 109,649 | 102,564 | 114,548 | +| 200k | 200 KB | 2,659 | 19,040 | 90,090 | 92,251 | 106,383 | +| 500k | 500 KB | 1,052 | 7,062 | 34,722 | 35,336 | 37,453 | +| 1m | 1.00 MB | 517 | 3,538 | 16,520 | 16,988 | 17,261 | +| 2m | 2.00 MB | 258 | 2,026 | 9,021 | 8,580 | 9,033 | +| 5m | 5.00 MB | 102 | 663 | 2,982 | 3,728 | 3,829 | +| 10m | 10.00 MB | 50 | 402 | 1,899 | 1,918 | 1,925 | +| interleaved (100k/200k/500k/1m, cycled) | — | 1,141 | 9,544 | 34,043 | 33,611 | 32,752 | ### Speed-up vs. baselines | Scenario | `qjson.parse` / cjson | `qjson.parse` / simdjson | `qjson.decode + access content` / cjson | `qjson.decode + access content` / simdjson | |---|---:|---:|---:|---:| -| small | 1.3× | 1.0× | 0.9× | 0.7× | -| medium | 18.8× | 2.2× | 19.6× | 2.3× | -| github-100k | 2.0× | 1.6× | 2.0× | 1.6× | -| 100k | 22.8× | 3.0× | 22.3× | 2.9× | -| 200k | 25.9× | 3.5× | 25.1× | 3.4× | -| 500k | 27.2× | 3.6× | 26.9× | 3.6× | -| 1m | 27.7× | 3.7× | 27.5× | 3.7× | -| 2m | 27.9× | 4.2× | 27.8× | 4.2× | -| 5m | 24.3× | 4.8× | 25.0× | 4.9× | -| 10m | 17.5× | 2.9× | 17.4× | 2.9× | +| small | 1.4× | 1.2× | 1.3× | 1.1× | +| medium | 13.7× | 1.5× | 23.7× | 2.6× | +| github-100k | 2.7× | 2.9× | 2.7× | 2.9× | +| 100k | 20.7× | 3.4× | 19.3× | 3.2× | +| 200k | 33.9× | 4.7× | 34.7× | 4.8× | +| 500k | 33.0× | 4.9× | 33.6× | 5.0× | +| 1m | 32.0× | 4.7× | 32.9× | 4.8× | +| 2m | 35.0× | 4.5× | 33.3× | 4.2× | +| 5m | 29.2× | 4.5× | 36.5× | 5.6× | +| 10m | 38.0× | 4.7× | 38.4× | 4.8× | ## Results — memory delta (KB retained after 5 rounds) @@ -116,17 +116,17 @@ from the last round may still be included. | Scenario | cjson | simdjson | `qjson.parse` | `qjson.decode + access content` | `qjson.decode + qjson.encode` | |---|---:|---:|---:|---:|---:| -| small | +15,464 | +15,447 | +4,094 | +15,251 | +11,908 | -| medium | +1,955 | +2,660 | +160 | +1,210 | +1,216 | -| github-100k | +13,187 | +3,362 | +29 | +548 | +242 | -| 100k | +484 | +748 | +79 | +704 | +241 | -| 200k | +392 | +523 | +40 | +352 | +124 | -| 500k | +577 | +630 | +17 | +142 | +48 | -| 1m | +1,082 | +1,121 | +13 | +107 | +37 | -| 2m | +1,155 | +1,248 | +21 | +211 | +48 | -| 5m | +1,316 | +1,538 | +17 | +403 | +48 | -| 10m | +1,583 | +2,014 | +16 | +844 | +48 | -| interleaved | +3,355 | +4,404 | +314 | +2,825 | +945 | +| small | +15,493 | +15,500 | +4,066 | +15,116 | +11,140 | +| medium | +1,955 | +2,660 | +333 | +1,114 | +1,120 | +| github-100k | +12,018 | +3,527 | +14 | +536 | +230 | +| 100k | +485 | +748 | +67 | +692 | +229 | +| 200k | +392 | +523 | +34 | +346 | +112 | +| 500k | +577 | +630 | +14 | +139 | +45 | +| 1m | +1,082 | +1,121 | +10 | +104 | +34 | +| 2m | +1,155 | +1,248 | +14 | +208 | +45 | +| 5m | +1,316 | +1,538 | +14 | +400 | +45 | +| 10m | +1,583 | +2,014 | +14 | +708 | +45 | +| interleaved | +3,356 | +4,404 | +268 | +2,771 | +897 | `qjson.parse` retention is essentially constant across payload size: the only GC-rooted state is the reusable `indices: Vec` and `scratch` buffers. @@ -139,17 +139,16 @@ key into the Lua table heap. 1. **`qjson` is fastest once payloads move beyond tiny inputs.** The small 2 KB row is dominated by fixed Lua/FFI overhead, but medium and - larger multimodal payloads show roughly 18–28× higher throughput than + larger multimodal payloads show roughly 14–38× higher throughput than `cjson` and roughly 3–5× higher throughput than `lua-resty-simdjson` for request-field access. 2. **Reading every `messages[*].content` is still access-light for large multimodal bodies.** The benchmark touches the top-level request fields and one `content` field per message; the payload size comes from image data inside each message. -3. **The win drops at 10 MB.** `qjson.parse` is L3-bandwidth-bound at that - size, and the `qjson.decode` proxy's per-`__index` dispatch starts to - amortize less well against the cheaper structural scan. `cjson` is still - allocating into the table heap at that size, so the ratio remains large. +3. **Speedup remains high at 10 MB.** The eager-decode optimization + keeps `qjson.parse` throughput scaling well even at the 10 MB level, + maintaining ~38× over cjson and ~5× over simdjson. 4. **`qjson.decode + qjson.encode (unmodified)` is the headline number for passthrough workloads** — e.g. an LLM gateway re-emitting the original JSON after light-touch inspection. The substring fast path means @@ -159,7 +158,7 @@ key into the Lua table heap. size; the eager parsers retain more Lua heap after the first run because the Lua table tree stays GC-rooted until the next collection. The 10 MB case retains ~1.5 MB for `cjson`, ~2.0 MB for simdjson, - and ~16 KB for `qjson.parse`. + and ~14 KB for `qjson.parse`. 6. **REST API payloads (github-100k) show a smaller speedup** because their structural density is higher than the multimodal request ladder. Memory savings remain dramatic because `cjson` must materialize every nested @@ -188,4 +187,4 @@ key into the Lua table heap. - `qjson` retains the source buffer on the `Doc`, so the input string stays alive for the document's lifetime. If you parse and immediately discard the JSON string in the caller, GC can still free - the input — but only after the `Doc` is also unreachable. + the input — but only after the `Doc` is also unreachable. \ No newline at end of file diff --git a/src/decode/number.rs b/src/decode/number.rs index d24ebfb..74839ff 100644 --- a/src/decode/number.rs +++ b/src/decode/number.rs @@ -1,7 +1,23 @@ use crate::error::qjson_err; -pub(crate) fn parse_i64(bytes: &[u8]) -> Result { - crate::validate::validate_number(bytes)?; +pub(crate) fn parse_i64(bytes: &[u8], skip_validation: bool) -> Result { + if !skip_validation { + crate::validate::validate_number(bytes)?; + } + + // When validation is skipped the caller guarantees the input is a + // well-formed JSON number, but we still protect against empty input + // so a misuse of the skip-flag cannot panic on bytes[0]. + if bytes.is_empty() { + return Err(qjson_err::QJSON_INVALID_NUMBER); + } + + // Fast guard: first byte must plausibly start a number, otherwise + // the caller passed skip_validation=true on non-number input. + if skip_validation && !matches!(bytes[0], b'-' | b'0'..=b'9') { + return Err(qjson_err::QJSON_INVALID_NUMBER); + } + // After ABNF validation, integer-only inputs have no `.`/`e`/`E`. if bytes.iter().any(|&b| b == b'.' || b == b'e' || b == b'E') { return Err(qjson_err::QJSON_TYPE_MISMATCH); @@ -24,8 +40,20 @@ pub(crate) fn parse_i64(bytes: &[u8]) -> Result { Ok(v) } -pub(crate) fn parse_f64(bytes: &[u8]) -> Result { - crate::validate::validate_number(bytes)?; +pub(crate) fn parse_f64(bytes: &[u8], skip_validation: bool) -> Result { + if !skip_validation { + crate::validate::validate_number(bytes)?; + } + + // When validation is skipped, do a cheap precheck to avoid returning + // a mode-dependent error code for non-number input. The leading + // byte must plausibly start a JSON number: `-`, `.`, or digit. + if skip_validation { + if bytes.is_empty() || !matches!(bytes[0], b'-' | b'.' | b'0'..=b'9') { + return Err(qjson_err::QJSON_INVALID_NUMBER); + } + } + let s = std::str::from_utf8(bytes).map_err(|_| qjson_err::QJSON_DECODE_FAILED)?; match s.parse::() { Ok(v) if v.is_finite() => Ok(v), @@ -38,39 +66,76 @@ pub(crate) fn parse_f64(bytes: &[u8]) -> Result { mod tests { use super::*; - #[test] fn i64_zero() { assert_eq!(parse_i64(b"0"), Ok(0)); } - #[test] fn i64_positive() { assert_eq!(parse_i64(b"42"), Ok(42)); } - #[test] fn i64_negative() { assert_eq!(parse_i64(b"-7"), Ok(-7)); } - #[test] fn i64_max() { assert_eq!(parse_i64(b"9223372036854775807"), Ok(i64::MAX)); } - #[test] fn i64_min() { assert_eq!(parse_i64(b"-9223372036854775808"), Ok(i64::MIN)); } + #[test] fn i64_zero() { assert_eq!(parse_i64(b"0", false), Ok(0)); } + #[test] fn i64_positive() { assert_eq!(parse_i64(b"42", false), Ok(42)); } + #[test] fn i64_negative() { assert_eq!(parse_i64(b"-7", false), Ok(-7)); } + #[test] fn i64_max() { assert_eq!(parse_i64(b"9223372036854775807", false), Ok(i64::MAX)); } + #[test] fn i64_min() { assert_eq!(parse_i64(b"-9223372036854775808", false), Ok(i64::MIN)); } #[test] fn i64_overflow() { - assert_eq!(parse_i64(b"9223372036854775808"), Err(qjson_err::QJSON_OUT_OF_RANGE)); + assert_eq!(parse_i64(b"9223372036854775808", false), Err(qjson_err::QJSON_OUT_OF_RANGE)); } #[test] fn i64_rejects_decimal() { - assert_eq!(parse_i64(b"1.5"), Err(qjson_err::QJSON_TYPE_MISMATCH)); + assert_eq!(parse_i64(b"1.5", false), Err(qjson_err::QJSON_TYPE_MISMATCH)); } #[test] fn i64_rejects_exponent() { - assert_eq!(parse_i64(b"1e5"), Err(qjson_err::QJSON_TYPE_MISMATCH)); + assert_eq!(parse_i64(b"1e5", false), Err(qjson_err::QJSON_TYPE_MISMATCH)); } #[test] fn i64_rejects_empty() { - assert_eq!(parse_i64(b""), Err(qjson_err::QJSON_INVALID_NUMBER)); + assert_eq!(parse_i64(b"", false), Err(qjson_err::QJSON_INVALID_NUMBER)); } - #[test] fn f64_zero() { assert_eq!(parse_f64(b"0.0").unwrap(), 0.0); } - #[test] fn f64_inexact_decimal() { assert!((parse_f64(b"1.7").unwrap() - 1.7).abs() < 1e-12); } - #[test] fn f64_negative(){ assert_eq!(parse_f64(b"-1.5").unwrap(), -1.5); } - #[test] fn f64_exponent(){ assert_eq!(parse_f64(b"1e2").unwrap(), 100.0); } + #[test] fn f64_zero() { assert_eq!(parse_f64(b"0.0", false).unwrap(), 0.0); } + #[test] fn f64_inexact_decimal() { assert!((parse_f64(b"1.7", false).unwrap() - 1.7).abs() < 1e-12); } + #[test] fn f64_negative(){ assert_eq!(parse_f64(b"-1.5", false).unwrap(), -1.5); } + #[test] fn f64_exponent(){ assert_eq!(parse_f64(b"1e2", false).unwrap(), 100.0); } #[test] fn f64_rejects_garbage() { - assert_eq!(parse_f64(b"hello"), Err(qjson_err::QJSON_INVALID_NUMBER)); + assert_eq!(parse_f64(b"hello", false), Err(qjson_err::QJSON_INVALID_NUMBER)); } -} + + // ── skip_validation=true branch ──────────────────────────────── + + #[test] + fn i64_skip_validation_valid_input() { + assert_eq!(parse_i64(b"42", true), Ok(42)); + } + + #[test] + fn i64_skip_validation_empty_fails_gracefully() { + assert_eq!(parse_i64(b"", true), Err(qjson_err::QJSON_INVALID_NUMBER)); + } + + #[test] + fn i64_skip_validation_non_digit_returns_invalid_number() { + assert_eq!(parse_i64(b"true", true), Err(qjson_err::QJSON_INVALID_NUMBER)); + } + + #[test] + fn f64_skip_validation_valid_input() { + assert_eq!(parse_f64(b"3.14", true).unwrap(), 3.14); + } + + #[test] + fn f64_skip_validation_garbage_fails_at_parse() { + assert_eq!(parse_f64(b"hello", true), Err(qjson_err::QJSON_INVALID_NUMBER)); + } + + #[test] + fn f64_skip_validation_empty_returns_invalid_number() { + assert_eq!(parse_f64(b"", true), Err(qjson_err::QJSON_INVALID_NUMBER)); + } + + #[test] + fn f64_skip_validation_non_number_returns_invalid_number() { + assert_eq!(parse_f64(b"null", true), Err(qjson_err::QJSON_INVALID_NUMBER)); + } +} \ No newline at end of file diff --git a/src/decode/string.rs b/src/decode/string.rs index 7e1ac15..fddf213 100644 --- a/src/decode/string.rs +++ b/src/decode/string.rs @@ -5,9 +5,12 @@ use crate::error::qjson_err; /// (ptr, len) pointing into either `buf` (no escapes) or `scratch`. pub(crate) fn decode_string( buf: &[u8], start: usize, end: usize, scratch: &mut Vec, + skip_validation: bool, ) -> Result<(*const u8, usize), qjson_err> { let slice = &buf[start..end]; - crate::validate::validate_string_span(slice)?; + if !skip_validation { + crate::validate::validate_string_span(slice)?; + } if memchr::memchr(b'\\', slice).is_none() { return Ok((slice.as_ptr(), slice.len())); } @@ -101,7 +104,7 @@ mod tests { fn d(s: &[u8]) -> Result, qjson_err> { let mut scratch = Vec::new(); - let (p, n) = decode_string(s, 0, s.len(), &mut scratch)?; + let (p, n) = decode_string(s, 0, s.len(), &mut scratch, false)?; Ok(unsafe { std::slice::from_raw_parts(p, n) }.to_vec()) } @@ -181,4 +184,4 @@ mod tests { // validate_string_span catches a trailing lone backslash first. assert_eq!(d(b"a\\").unwrap_err(), qjson_err::QJSON_INVALID_STRING); } -} +} \ No newline at end of file diff --git a/src/doc.rs b/src/doc.rs index 326e6d9..82226f5 100644 --- a/src/doc.rs +++ b/src/doc.rs @@ -6,6 +6,7 @@ use crate::skip_cache::SkipCache; pub struct Document<'a> { pub(crate) buf: &'a [u8], pub(crate) indices: Vec, + pub(crate) eager_validated: bool, pub(crate) scratch: RefCell>, pub(crate) skip: RefCell, } @@ -30,16 +31,17 @@ impl<'a> Document<'a> { crate::scan::scan(buf, &mut indices).map_err(|_| qjson_err::QJSON_PARSE_ERROR)?; indices.push(u32::MAX); - crate::validate::validate_depth(buf, &indices, max_depth)?; - if opts.is_eager() { crate::validate::validate_trailing(buf, &indices)?; - crate::validate::validate_eager_values(buf, &indices)?; + crate::validate::validate_eager_values(buf, &indices, max_depth)?; + } else { + crate::validate::validate_depth(buf, &indices, max_depth)?; } Ok(Self { buf, indices, + eager_validated: opts.is_eager(), scratch: RefCell::new(Vec::new()), skip: RefCell::new(SkipCache::new()), }) @@ -205,4 +207,4 @@ mod tests { let opts = crate::options::Options { mode: crate::options::QJSON_MODE_LAZY, max_depth: 0 }; assert!(Document::parse_with_options(b"{}garbage", &opts).is_ok()); } -} +} \ No newline at end of file diff --git a/src/ffi.rs b/src/ffi.rs index d4d8cec..cbfb25a 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -285,7 +285,7 @@ pub unsafe extern "C" fn qjson_get_str( let close = d.indices[(cur.idx_start + 1) as usize] as usize; let mut scratch = d.scratch.borrow_mut(); - match string::decode_string(d.buf, pos + 1, close, &mut scratch) { + match string::decode_string(d.buf, pos + 1, close, &mut scratch, d.eager_validated) { Ok((p, n)) => { *out_ptr = p; *out_len = n; qjson_err::QJSON_OK as c_int } Err(e) => e as c_int, } @@ -312,7 +312,7 @@ pub unsafe extern "C" fn qjson_get_i64( let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int, }; - match number::parse_i64(bytes) { + match number::parse_i64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } Err(e) => e as c_int, } @@ -338,7 +338,7 @@ pub unsafe extern "C" fn qjson_get_f64( let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int, }; - match number::parse_f64(bytes) { + match number::parse_f64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } Err(e) => e as c_int, } @@ -563,7 +563,7 @@ pub unsafe extern "C" fn qjson_cursor_get_str( let close = d.indices[(cur.idx_start + 1) as usize] as usize; let mut scratch = d.scratch.borrow_mut(); - match string::decode_string(d.buf, pos + 1, close, &mut scratch) { + match string::decode_string(d.buf, pos + 1, close, &mut scratch, d.eager_validated) { Ok((p, n)) => { *out_ptr = p; *out_len = n; qjson_err::QJSON_OK as c_int } Err(e) => e as c_int, } @@ -591,7 +591,7 @@ pub unsafe extern "C" fn qjson_cursor_get_i64( }; let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; - match number::parse_i64(bytes) { + match number::parse_i64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } Err(e) => e as c_int, } @@ -618,7 +618,7 @@ pub unsafe extern "C" fn qjson_cursor_get_f64( }; let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; - match number::parse_f64(bytes) { + match number::parse_f64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } Err(e) => e as c_int, } @@ -794,7 +794,7 @@ pub unsafe extern "C" fn qjson_cursor_object_entry_at( let open_pos = d.indices[key_idx_start as usize] as usize; let close_pos = d.indices[(key_idx_start + 1) as usize] as usize; let mut scratch = d.scratch.borrow_mut(); - match string::decode_string(d.buf, open_pos + 1, close_pos, &mut scratch) { + match string::decode_string(d.buf, open_pos + 1, close_pos, &mut scratch, d.eager_validated) { Ok((p, n)) => { *key_ptr = p; *key_len = n; diff --git a/src/validate/mod.rs b/src/validate/mod.rs index 366e518..a9ce958 100644 --- a/src/validate/mod.rs +++ b/src/validate/mod.rs @@ -143,6 +143,7 @@ pub(crate) fn validate_trailing( pub(crate) fn validate_eager_values( buf: &[u8], indices: &[u32], + max_depth: u32, ) -> Result<(), qjson_err> { // Stack of container contexts; the top is the current state. // We use a single seed entry `CtxKind::Top` for the root value. @@ -177,6 +178,9 @@ pub(crate) fn validate_eager_values( // Transition parent to AfterValue ahead of the // descent; the inner container's close pops back. *cur = parent_after_value(*cur); + if stack.len() > max_depth as usize { + return Err(qjson_err::QJSON_NESTING_TOO_DEEP); + } stack.push(if b == b'{' { CtxKind::ObjAfterOpen } else { @@ -427,7 +431,7 @@ mod tests { #[test] fn grammar_accepts_empty_containers() { for buf in [&b"{}"[..], &b"[]"[..]] { - assert!(validate_eager_values(buf, &ix(buf)).is_ok(), + assert!(validate_eager_values(buf, &ix(buf), 1024).is_ok(), "grammar should accept {:?}", buf); } } @@ -439,7 +443,7 @@ mod tests { &b"[true,false,null]"[..], &b"\"hi\""[..], &b"42"[..], &b"{\"a\":[1,{\"b\":2}]}"[..], ] { - assert!(validate_eager_values(buf, &ix(buf)).is_ok(), + assert!(validate_eager_values(buf, &ix(buf), 1024).is_ok(), "grammar should accept {:?}", buf); } } @@ -447,42 +451,67 @@ mod tests { #[test] fn grammar_rejects_missing_colon() { let buf = b"{\"a\"}"; - assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjson_err::QJSON_PARSE_ERROR)); + assert_eq!(validate_eager_values(buf, &ix(buf), 1024), Err(qjson_err::QJSON_PARSE_ERROR)); } #[test] fn grammar_rejects_leading_comma_with_value() { let buf = b"[,1]"; - assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjson_err::QJSON_PARSE_ERROR)); + assert_eq!(validate_eager_values(buf, &ix(buf), 1024), Err(qjson_err::QJSON_PARSE_ERROR)); } #[test] fn grammar_rejects_missing_comma_in_object() { let buf = b"{\"a\":1\"b\":2}"; - assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjson_err::QJSON_PARSE_ERROR)); + assert_eq!(validate_eager_values(buf, &ix(buf), 1024), Err(qjson_err::QJSON_PARSE_ERROR)); } #[test] fn grammar_rejects_non_string_object_key() { let buf = b"{1:1}"; - assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjson_err::QJSON_PARSE_ERROR)); + assert_eq!(validate_eager_values(buf, &ix(buf), 1024), Err(qjson_err::QJSON_PARSE_ERROR)); } #[test] fn grammar_rejects_colon_in_array() { let buf = b"[1:2]"; - assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjson_err::QJSON_PARSE_ERROR)); + assert_eq!(validate_eager_values(buf, &ix(buf), 1024), Err(qjson_err::QJSON_PARSE_ERROR)); } #[test] fn grammar_rejects_missing_comma_between_arrays() { let buf = b"[3[4]]"; - assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjson_err::QJSON_PARSE_ERROR)); + assert_eq!(validate_eager_values(buf, &ix(buf), 1024), Err(qjson_err::QJSON_PARSE_ERROR)); } #[test] fn grammar_rejects_trailing_garbage_inside_object() { let buf = b"{\"a\":\"a\" 123}"; - assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjson_err::QJSON_PARSE_ERROR)); + assert_eq!(validate_eager_values(buf, &ix(buf), 1024), Err(qjson_err::QJSON_PARSE_ERROR)); } -} + + // ── depth enforcement via validate_eager_values ───────────────── + + #[test] + fn grammar_accepts_at_max_depth() { + // 1024 nested arrays at the default max_depth limit. + let mut buf = Vec::new(); + for _ in 0..1024 { buf.push(b'['); } + for _ in 0..1024 { buf.push(b']'); } + assert!( + validate_eager_values(&buf, &ix(&buf), 1024).is_ok(), + "should accept exactly at max_depth" + ); + } + + #[test] + fn grammar_rejects_over_max_depth() { + // 1025 nested arrays — one past the default max_depth limit. + let mut buf = Vec::new(); + for _ in 0..1025 { buf.push(b'['); } + for _ in 0..1025 { buf.push(b']'); } + assert_eq!( + validate_eager_values(&buf, &ix(&buf), 1024), Err(qjson_err::QJSON_NESTING_TOO_DEEP), + ); + } +} \ No newline at end of file