diff --git a/benches/lua_bench.lua b/benches/lua_bench.lua index 30a3977..10c5366 100644 --- a/benches/lua_bench.lua +++ b/benches/lua_bench.lua @@ -239,11 +239,25 @@ local function github_table_access(t) local _ = t[1] and t[1].user and t[1].user.login end +-- Mutate 5 shallow scalar fields on the first issue. All target keys +-- already exist in the source JSON, so __newindex records patch entries +-- and qjson.encode goes through the splice fast path. +local function github_modify_5_scalars(t) + local issue = t[1] + if not issue then return end + issue.id = 1234567 + issue.number = 9999 + issue.comments = 42 + issue.state = "closed" + issue.locked = true +end + local scenarios = { {name = "small", iters = 5000, payload = read_file("benches/fixtures/small_api.json")}, {name = "medium", iters = 500, payload = read_file("benches/fixtures/medium_resp.json")}, {name = "github-100k", iters = 100, payload = make_github_issues_payload(100 * 1024), - cjson_access = github_cjson_access, qjson_access = github_qjson_access, table_access = github_table_access}, + cjson_access = github_cjson_access, qjson_access = github_qjson_access, table_access = github_table_access, + modify_scalars = github_modify_5_scalars}, {name = "100k", iters = 100, payload = make_payload(100 * 1024)}, {name = "200k", iters = 50, payload = make_payload(200 * 1024)}, {name = "500k", iters = 20, payload = make_payload(500 * 1024)}, @@ -309,6 +323,14 @@ for _, s in ipairs(scenarios) do local t = qjson.decode(s.payload) local _ = qjson.encode(t) end) + + if s.modify_scalars then + bench("qjson.decode + modify-5-scalars + qjson.encode", s.iters, function() + local t = qjson.decode(s.payload) + s.modify_scalars(t) + local _ = qjson.encode(t) + end) + end end -- Interleaved scenario: cycle through several payloads of different sizes diff --git a/docs/superpowers/specs/2026-05-21-lazy-scalar-patch-design.md b/docs/superpowers/specs/2026-05-21-lazy-scalar-patch-design.md new file mode 100644 index 0000000..32963ac --- /dev/null +++ b/docs/superpowers/specs/2026-05-21-lazy-scalar-patch-design.md @@ -0,0 +1,233 @@ +# Lazy scalar-patch on existing keys — design + +GitHub issue: [api7/lua-qjson#48](https://github.com/api7/lua-qjson/issues/48) + +## Goal + +Land the smallest slice of PR #44 (`feat(lazy): structural patching for decode+modify+encode`) that captures the bulk of its encode-path performance benefit while eliminating its −37% regression on the `github-100k parse + access fields` benchmark. + +Concretely: record **scalar replacements on existing keys** of a `LazyObject` in a side `_patches` list, and emit them via a splice encoder over the original JSON byte buffer. Any other mutation pattern (delete, new key, table-valued write) falls through to the existing materialization path unchanged. + +## Non-goals (deferred to future PRs) + +- Deletion (`t.x = nil`) — PR-2 candidate +- New-field insertion (`t.new_key = v` where `new_key` not in original JSON) — PR-3 candidate +- Sidecar container cache (`_child_cache`) +- `LazyArray` patches +- `is_dirty` / walking encoder rewrites beyond a minimal patch-aware tweak + +## Architecture + +### Rust / FFI surface + +One new exported symbol: + +```c +int qjson_cursor_field_bytes( + const qjson_cursor* c, + const char* key, + size_t key_len, + qjson_cursor* value_out, // may be NULL + size_t* value_bs, // required + size_t* value_be // required +); +``` + +- Equivalent to a fused `qjson_cursor_field` + `qjson_cursor_bytes` — saves one FFI crossing at write time. +- `value_out` may be NULL: this PR only needs the byte span; skipping the cursor copy avoids a memcpy. +- Returns `QJSON_NOT_FOUND` when the key is absent, propagating other `qjson_err` codes unchanged. +- Wrapped in the same `catch_unwind` panic barrier as every other `pub unsafe extern "C"` in `src/ffi.rs`. + +Declared in three places (CLAUDE.md requirement — error enum sync rule applies to all FFI): +- `src/ffi.rs` (Rust impl) +- `include/qjson.h` (public C header) +- `lua/qjson.lua` (`ffi.cdef` block) + +### Lua side — `lua/qjson/table.lua` + +`LazyObject` views gain **one** new field, lazily allocated: + +```lua +_patches = nil -- nil until first patch write + -- when non-nil: array of { k = string, v = scalar, bs = int, be = int } + -- k : the JSON key being patched + -- v : the replacement scalar value (Lua string/number/boolean/qjson.null) + -- bs : value byte-start in _doc._hold (inclusive) + -- be : value byte-end in _doc._hold (exclusive) +``` + +Added to `INTERNAL_KEYS` so `next(view)` skips it. + +No other structural changes: no sidecar `_child_cache`, no `_deleted` set, no changes to `LazyArray`, no changes to the rawget-cache discipline for container children. + +## Flows + +### Write — `LazyObject.__newindex(t, k, v)` + +``` +1. Fast precheck (Lua-only, no FFI): + if type(k) ~= "string" → goto fallthrough + if v is not scalar (string | + number | + boolean | + qjson.null) → goto fallthrough + +2. FFI: qjson_cursor_field_bytes(t._cur, k, #k, NULL, &bs, &be) + if rc == QJSON_NOT_FOUND → goto fallthrough (key absent → would be insertion) + if rc != OK → check(rc) propagates error + +3. Record patch: + patches = rawget(t, "_patches") + if patches == nil: + patches = {} + rawset(t, "_patches", patches) + linear scan for existing entry with same k: + found → update entry.v (bs/be unchanged) + absent → append { k = k, v = v, bs = bs, be = be } + +4. Invalidate cached child: + rawset(t, k, nil) + — drops any previously cached container proxy at this key, so the + next read goes through __index and finds the patch. + +fallthrough: + delegate to existing materialize_object_contents path, but first + apply any pre-existing _patches into the materialized contents and + then clear _patches before setmetatable(t, nil). +``` + +The FFI call in step 2 is the only side effect that can fail. Steps 3-4 are pure Lua table ops. So patch recording is atomic: either the patch is fully recorded or the view falls through to materialization; no intermediate state. + +### Read — `LazyObject.__index = read_object_field(self, key)` + +``` +1. if type(key) ~= "string" → return nil (unchanged) + +2. patches = rawget(self, "_patches") + if patches ~= nil: + linear scan; on hit return patch.v + +3. Original FFI path (qjson_cursor_field + decode_cursor): unchanged +4. Container result rawset-cached on self: unchanged +``` + +Hot-path cost when no patches were ever recorded: 1 `rawget("_patches")` + 1 nil branch = ~2 LuaJIT bytecodes. This is the entire delta vs main on the read-only fast path. + +### Encode — `encode_proxy(t)` + +Decision tree (priority order): + +``` +A. is_dirty(t) → encode_lazy_object_walking(t) [existing, small tweak] +B. rawget(t, "_patches") → encode_lazy_object_splice(t) [new] +C. else → t._doc._hold:sub(t._bs+1, t._be) [existing, unchanged] +``` + +- **A** wins over **B**: a dirty (materialized) cached child means the walking encoder must run; it gains a `_patches` lookup per key so patched scalars still substitute correctly. A key cannot be both dirty and patched because write-step 4 clears the cached child. +- **B** new function `encode_lazy_object_splice(t)`: + 1. Sort `_patches` by `bs` ascending (≤5 entries typical; sort cost negligible). + 2. `local buf, cursor, parts = t._doc._hold, t._bs, {}` + 3. For each patch `p`: + - `parts[#parts+1] = buf:sub(cursor+1, p.bs)` — original bytes + - `parts[#parts+1] = encode(p.v)` — replacement scalar + - `cursor = p.be` + 4. `parts[#parts+1] = buf:sub(cursor+1, t._be)` — tail + 5. `return table.concat(parts)` +- **C** unchanged: no patches, no dirty children → slice the whole original range. + +### Iteration — `lazy_object_iter` (the `__pairs` driver) + +Per yielded `(k, v)` from FFI, look up `_patches` and substitute when matched. Key order is the original JSON key order — unchanged. + +### `materialize(v)` + +Recursive full materialization must also apply patches. Simplest implementation: route through `__pairs` (which already substitutes), instead of calling `materialize_object_contents` directly. + +## Behavior preservation + +| Surface | Guarantee | Mechanism | +|---|---|---| +| `next(view)` | Identical to main | `_patches` added to `INTERNAL_KEYS` | +| `pairs(view)` / `qjson.pairs(view)` | Unpatched: identical; patched: substitutes value, preserves key order | `lazy_object_iter` patch lookup | +| `#view` / `qjson.len(view)` | Identical | `lazy_len` unchanged; scalar patches do not change child count | +| Lua string pointer lifetime | Identical | No new ptr caching; only size_t offsets stored | +| FFI panic barrier | Preserved | New FFI wrapped in `catch_unwind` | +| `LazyArray` | Identical | Out of scope this PR | + +## Edge cases + +1. **Repeated patch on same key** — `find_patch` hit, update `v` in place, list does not grow. +2. **Container → scalar replacement** — `bs/be` span covers the whole nested object/array. Splice replaces it with the encoded scalar. Cached child proxy is cleared by write-step 4; any external reference becomes a detached proxy (acceptable — caller explicitly replaced the slot). +3. **Write then read** — read path short-circuits via `_patches` and returns the patched value. +4. **scalar → different scalar type** — `bs/be` is the value's syntactic span (number digits, or `"…"` for string). Splice replaces with encoded new value. JSON remains valid. +5. **Non-scalar write or absent key** — falls through; any pre-recorded patches are applied into the materialized contents before `setmetatable(t, nil)`, then `_patches` is cleared. +6. **`pairs` then write** — `lazy_object_iter` doesn't mutate the view. Subsequent writes still work normally. +7. **Mixed dirty children + patches** — write-step 4 clears the cached child, so the same key cannot be both. Different keys: walking encoder runs (dirty wins decision), patch lookup is applied per key. +8. **Duplicate keys in source JSON** — `qjson_cursor_field_bytes` returns the first match's value span (matches existing `qjson_cursor_field` behavior). The splice replaces only that occurrence. Acceptable. +9. **`qjson.null` write** — accepted as scalar; `encode(qjson.null)` emits `"null"`. +10. **FFI failure (OOM etc.)** — `check(rc)` raises; view state untouched (patch list not yet modified). + +## Tests + +### Rust integration — `tests/ffi_cursor_field_bytes.rs` (~80 lines) + +| Case | Expectation | +|---|---| +| Simple object, query existing key | rc=OK, `(bs, be)` points to the value bytes | +| String value with escape `{"k":"a\\nb"}` | `(bs, be)` includes both surrounding quotes (syntactic, not decoded) | +| Nested container value `{"k":{"x":1}}` | `(bs, be)` spans the entire `{"x":1}` | +| Missing key | rc=QJSON_NOT_FOUND | +| Array cursor input | rc=QJSON_TYPE_MISMATCH (matches `qjson_cursor_field` behavior) | +| `value_out == NULL` | Still writes `bs/be`, no crash | +| Duplicate keys | Returns the first occurrence | + +Plus: add a case to `tests/scanner_crosscheck.rs` proptest so AVX2 and scalar scanners stay in agreement on `qjson_cursor_field_bytes` results. + +### Rust panic barrier — `tests/ffi_panic.rs` + +Under the `test-panic` cargo feature, add one assertion that calling `qjson_cursor_field_bytes` on a doc whose internals are injected to panic returns `QJSON_OOM` (process does not crash). + +### Lua — new `tests/lua/lazy_patch_spec.lua` (~150 lines) + +Grouped describes: + +- **scalar patches on existing keys** — happy-path matrix (string/number/bool/null/cross-type/container→scalar), repeated writes, read-after-write, `pairs` ordering, `qjson.pairs`, `#view` invariance, encode equivalence, encode whitespace preservation, multiple patches, `tostring`, `materialize`. +- **fall through to materialization** — nil write, table write, new key write, prior patches preserved across fall through. +- **mixed patches and dirty children** — both encoders honor patches. +- **LazyArray unchanged** — array `__newindex` still materializes (regression guard). + +### Regression guards — unchanged + +`tests/lua/lazy_table_spec.lua` and `tests/lua/cjson_compat_spec.lua` must pass **without modification**. This is an explicit acceptance criterion. + +## Benchmarks + +Add one new scenario to `benches/lua_bench.lua` reusing `benches/fixtures/github-100k.json`: + +``` +qjson.decode + modify-N-scalars + encode (N = 5) +``` + +Implementation note: pick 5 shallow numeric keys known to exist in the GitHub fixture (e.g. `stargazers_count`, `forks_count`, `open_issues`, `subscribers_count`, `watchers`). Each iteration: decode → 5 assignments → encode → discard. + +The existing `decode + encode (unmodified)` and `parse + access fields` scenarios are unchanged and serve as regression evidence. + +## Acceptance criteria + +| Criterion | How to verify | +|---|---| +| `github-100k parse + access fields` does not regress vs main, ±3% median over 5 runs | `make bench` before/after | +| `decode + encode (unmodified)` ≥ 100 KB does not regress | `make bench` | +| New `decode + modify-5-scalars + encode` ≥ 100 KB improves vs main | `make bench` | +| `lazy_table_spec.lua` and `cjson_compat_spec.lua` pass unchanged | `make test` | +| `lazy_patch_spec.lua` all green | `make test` | +| Lua diff: ≤ +100 net lines in `lua/qjson/table.lua` | `git diff --stat` | +| `cargo test --release --no-default-features` passes (scanner parity) | CI gate 2 | +| `cargo test --features test-panic --release` passes | CI gate 3 | +| `make lint` clean (clippy `-D warnings`) | CI | + +## Out-of-scope notes for follow-ups + +- **PR-2 (deletion)** would add `_deleted` set + a third encode-tree branch + matching `__index` short-circuit. +- **PR-3 (insertion)** would either require switching cached-child storage to a sidecar (`_child_cache`) or accept that new keys go through full materialization. Defer decision until usage data shows it matters. +- Both are tracked as separate issues, not bundled here. diff --git a/include/qjson.h b/include/qjson.h index 343e782..4948642 100644 --- a/include/qjson.h +++ b/include/qjson.h @@ -80,6 +80,9 @@ int qjson_cursor_get_bool (const qjson_cursor*, const char* path, size_t path_le int qjson_cursor_typeof (const qjson_cursor*, const char* path, size_t path_len, int* out); int qjson_cursor_len (const qjson_cursor*, const char* path, size_t path_len, size_t* out); int qjson_cursor_bytes (const qjson_cursor*, size_t* byte_start, size_t* byte_end); +int qjson_cursor_field_bytes(const qjson_cursor*, const char* key, size_t key_len, + qjson_cursor* value_out, + size_t* value_bs, size_t* value_be); int qjson_cursor_object_entry_at(const qjson_cursor*, size_t i, const uint8_t** key_ptr, size_t* key_len, qjson_cursor* value_out); diff --git a/lua/qjson/lib.lua b/lua/qjson/lib.lua index 3e6c686..17fd767 100644 --- a/lua/qjson/lib.lua +++ b/lua/qjson/lib.lua @@ -38,6 +38,9 @@ int qjson_cursor_get_bool(const qjson_cursor*, const char*, size_t, int*); int qjson_cursor_typeof (const qjson_cursor*, const char*, size_t, int*); int qjson_cursor_len (const qjson_cursor*, const char*, size_t, size_t*); int qjson_cursor_bytes(const qjson_cursor*, size_t* byte_start, size_t* byte_end); +int qjson_cursor_field_bytes(const qjson_cursor*, const char* key, size_t key_len, + qjson_cursor* value_out, + size_t* value_bs, size_t* value_be); int qjson_cursor_object_entry_at(const qjson_cursor*, size_t i, const uint8_t** key_ptr, size_t* key_len, qjson_cursor* value_out); @@ -69,6 +72,7 @@ local required_symbols = { "qjson_cursor_typeof", "qjson_cursor_len", "qjson_cursor_bytes", + "qjson_cursor_field_bytes", "qjson_cursor_object_entry_at", } diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index 86f50d0..6529eb1 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -56,6 +56,12 @@ end local LazyObject = {} local LazyArray = {} +-- Reserved bookkeeping keys; rawget-cache and iteration checks skip these. +local INTERNAL_KEYS = { + _doc = true, _cur_box = true, _cur = true, _bs = true, _be = true, + _patches = true, +} + -- Build a new lazy view for a child container cursor. -- src_box is an FFI cdata `qjson_cursor[1]`; src_box[0] is the cursor whose -- data we copy into a fresh per-view allocation so the new view's _cur @@ -102,6 +108,12 @@ local function decode_cursor(parent_view, src_box) return nil end +local function find_patch(patches, key) + for i = 1, #patches do + if patches[i].k == key then return patches[i] end + end +end + -- Resolve a child cursor at `key` (object) and decode it into a Lua value. -- Returns nil for missing keys (cjson semantics). -- Container results (lazy proxies) are rawset-cached into `self` so that @@ -111,6 +123,11 @@ end -- raw table rather than creating a fresh proxy. local function read_object_field(self, key) if type(key) ~= "string" then return nil end + local patches = rawget(self, "_patches") + if patches ~= nil then + local hit = find_patch(patches, key) + if hit ~= nil then return hit.v end + end -- Use child_box so the lookup result does not alias self._cur (which is -- itself stored in root_box's backing memory in the decode caller). local rc = C.qjson_cursor_field(self._cur, key, #key, child_box) @@ -153,7 +170,17 @@ local function lazy_object_iter(state, _prev_key) if rc == QJSON_NOT_FOUND then return nil end check(rc) local k = ffi.string(strp_box[0], size_box[0]) - local v = decode_cursor(state.view, child_box) + local view = state.view + -- Prefer a raw-slot override (cached child proxy, or a scalar from a + -- direct raw-table overwrite that bypassed __newindex). + local cached = rawget(view, k) + local v = (cached ~= nil and not INTERNAL_KEYS[k]) and cached + or decode_cursor(view, child_box) + local patches = rawget(view, "_patches") + if patches ~= nil then + local hit = find_patch(patches, k) + if hit ~= nil then v = hit.v end + end return k, v end @@ -247,20 +274,43 @@ local function materialize_array_contents(view) return out end --- The set of keys reserved by the lazy view bookkeeping; user-supplied JSON --- keys with these names would collide (minor, deferred). Centralized here so --- the dirty check and __newindex can share the list. -local INTERNAL_KEYS = { - _doc = true, _cur_box = true, _cur = true, _bs = true, _be = true, -} +-- Try to record a scalar patch on an existing key. Returns true if recorded. +local function try_record_patch(t, k, v) + if type(k) ~= "string" then return false end + local is_scalar = rawequal(v, _M.null) or type(v) == "string" + or type(v) == "number" or type(v) == "boolean" + if not is_scalar then return false end + local rc = C.qjson_cursor_field_bytes(t._cur, k, #k, nil, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then return false end + check(rc) + local patches = rawget(t, "_patches") + if patches == nil then patches = {}; rawset(t, "_patches", patches) end + local hit = find_patch(patches, k) + if hit ~= nil then + hit.v = v + else + patches[#patches + 1] = { k = k, v = v, + bs = tonumber(sz_a[0]), be = tonumber(sz_b[0]) } + end + rawset(t, k, nil) -- drop cached child proxy so reads see the patch + return true +end --- On first write, walk all existing key/value pairs into a plain table, --- strip the lazy metatable, then apply the new assignment. Any FFI error --- during the walk leaves `t` in its original lazy state. --- Existing rawget-cached entries (e.g. previously returned child proxies) --- are preserved so callers' references remain valid. +-- On write: first try the scalar-patch fast path. Otherwise materialize all +-- existing key/value pairs (applying any pending patches), strip the lazy +-- metatable, then apply the new assignment. FFI errors during the walk leave +-- `t` in its original lazy state. Existing rawget-cached entries are +-- preserved so callers' references remain valid. LazyObject.__newindex = function(t, k, v) + if try_record_patch(t, k, v) then return end + local patches = rawget(t, "_patches") local contents = materialize_object_contents(t) + if patches ~= nil then + for _, kv in ipairs(contents) do + local hit = find_patch(patches, kv[1]) + if hit ~= nil then kv[2] = hit.v end + end + end -- Snapshot user-key cache BEFORE nilling internals. -- Use next() for raw iteration: pairs() invokes __pairs on lazy tables, -- walking the full JSON via FFI instead of the Lua-side rawget cache. @@ -273,6 +323,7 @@ LazyObject.__newindex = function(t, k, v) ck, cv = next(t, ck) end t._doc, t._cur_box, t._cur, t._bs, t._be = nil, nil, nil, nil, nil + rawset(t, "_patches", nil) setmetatable(t, nil) for _, kv in ipairs(contents) do rawset(t, kv[1], cache[kv[1]] or kv[2]) @@ -347,8 +398,9 @@ local function materialize(v) local mt = (type(v) == "table") and getmetatable(v) or nil if mt == LazyObject then local out = {} - for _, kv in ipairs(materialize_object_contents(v)) do - out[kv[1]] = materialize(kv[2]) + -- Iterate through __pairs so any recorded _patches substitute correctly. + for k, child in _M.pairs(v) do + out[k] = materialize(child) end return out elseif mt == LazyArray then @@ -400,15 +452,17 @@ local function encode_number(n) return string_format("%.14g", n) end --- A lazy subtree is "dirty" if any cached descendant has been materialized --- (no longer carries Lazy* metatable). Non-cached descendants are guaranteed --- untouched, so we only need to walk the rawget-cached entries. +-- True if v is itself materialized, has its own scalar patches, or has any +-- dirty descendant — i.e., a PARENT must walk to encode v. The top-level +-- encode_proxy uses descendant_is_dirty so a view with only its own patches +-- can still go through splice. local function is_dirty(v) if type(v) ~= "table" then return false end local mt = getmetatable(v) if mt ~= LazyObject and mt ~= LazyArray then return true -- materialized end + if rawget(v, "_patches") ~= nil then return true end -- Use next() for raw table iteration: pairs() would invoke __pairs on -- lazy tables, walking the full JSON via FFI instead of the Lua cache. local k, child = next(v) @@ -421,6 +475,21 @@ local function is_dirty(v) return false end +-- True if any cached descendant of v is dirty, OR a user-key raw slot holds +-- a non-table value (indicating a direct raw overwrite that bypassed +-- __newindex). v's own _patches does NOT count — splice handles those. +local function descendant_is_dirty(v) + local k, child = next(v) + while k ~= nil do + if not INTERNAL_KEYS[k] then + if is_dirty(child) then return true end + if type(child) ~= "table" then return true end + end + k, child = next(v, k) + end + return false +end + -- Forward declaration so encode_lazy_object_walking, encode_lazy_array_walking, -- and encode_array/encode_object can reference encode before its definition is -- complete (Lua resolves upvalues at call time, but the slot must be declared first). @@ -429,8 +498,10 @@ local encode -- Walk a dirty LazyObject and emit JSON, preferring cached children (which -- may be materialized) over freshly resolved cursors. Non-cached children -- emit through a fresh proxy and naturally fast-path their unmodified subtree. +-- If `_patches` is present, a patched scalar replaces the FFI-decoded value. local function encode_lazy_object_walking(t) local parts = {} + local patches = rawget(t, "_patches") local i = 0 while true do local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) @@ -438,11 +509,16 @@ local function encode_lazy_object_walking(t) check(rc) local k = ffi.string(strp_box[0], size_box[0]) local v - local cached = rawget(t, k) - if cached ~= nil and not INTERNAL_KEYS[k] then - v = cached + local hit = patches and find_patch(patches, k) or nil + if hit ~= nil then + v = hit.v else - v = decode_cursor(t, child_box) + local cached = rawget(t, k) + if cached ~= nil and not INTERNAL_KEYS[k] then + v = cached + else + v = decode_cursor(t, child_box) + end end parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) i = i + 1 @@ -450,6 +526,22 @@ local function encode_lazy_object_walking(t) return "{" .. table.concat(parts, ",") .. "}" end +-- Splice patched scalars into the original JSON buffer. Patches sorted by +-- byte-start ascending; preserves whitespace outside patched value spans. +local function encode_lazy_object_splice(t) + local patches = rawget(t, "_patches") + table.sort(patches, function(a, b) return a.bs < b.bs end) + local buf, cursor, parts = t._doc._hold, t._bs, {} + for i = 1, #patches do + local p = patches[i] + parts[#parts + 1] = buf:sub(cursor + 1, p.bs) + parts[#parts + 1] = encode(p.v) + cursor = p.be + end + parts[#parts + 1] = buf:sub(cursor + 1, t._be) + return table.concat(parts) +end + local function encode_lazy_array_walking(t) local parts = {} local rc = C.qjson_cursor_len(t._cur, "", 0, size_box) @@ -471,14 +563,17 @@ local function encode_lazy_array_walking(t) end local function encode_proxy(t) - if not is_dirty(t) then - -- Fast path: no mutations — slice the original buffer bytes. - return t._doc._hold:sub(t._bs + 1, t._be) + if descendant_is_dirty(t) then + if getmetatable(t) == LazyObject then + return encode_lazy_object_walking(t) + end + return encode_lazy_array_walking(t) end - if getmetatable(t) == LazyObject then - return encode_lazy_object_walking(t) + if getmetatable(t) == LazyObject and rawget(t, "_patches") ~= nil then + return encode_lazy_object_splice(t) end - return encode_lazy_array_walking(t) + -- Fast path: no mutations — slice the original buffer bytes. + return t._doc._hold:sub(t._bs + 1, t._be) end local function is_array(t) diff --git a/src/ffi.rs b/src/ffi.rs index d4d8cec..53e3f8f 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -760,6 +760,78 @@ pub unsafe extern "C" fn qjson_cursor_bytes( }) } +/// Fused field lookup + byte-range query — equivalent to calling +/// [`qjson_cursor_field`] followed by [`qjson_cursor_bytes`], saving one +/// FFI crossing on the patch-write hot path. +/// +/// Looks up `key` in the object at `*c`. On success writes the value's +/// syntactic byte range `[*value_bs, *value_be)` into the input buffer and, +/// when `value_out` is non-NULL, the resolved value cursor. +/// +/// Returns `QJSON_NOT_FOUND` when the key is absent, `QJSON_TYPE_MISMATCH` +/// when the cursor is not an object, and other `qjson_err` codes from the +/// underlying calls unchanged. `value_bs` / `value_be` are only required +/// to be valid on `QJSON_OK`. +/// +/// # Safety +/// +/// See the module-level [shared safety contract](self#shared-safety-contract). +/// `c` must point to a cursor produced by an earlier `qjson_*` call whose +/// document is still alive; `key` must point to `key_len` bytes or be NULL +/// with `key_len == 0`; `value_bs` and `value_be` must be non-NULL and +/// writable. `value_out` may be NULL — when non-NULL it must be writable. +#[no_mangle] +pub unsafe extern "C" fn qjson_cursor_field_bytes( + c: *const qjson_cursor, + key: *const c_char, + key_len: usize, + value_out: *mut qjson_cursor, + value_bs: *mut usize, + value_be: *mut usize, +) -> c_int { + ffi_catch!({ + if value_bs.is_null() || value_be.is_null() || (key.is_null() && key_len != 0) { + return qjson_err::QJSON_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let k = if key.is_null() { + &[][..] + } else { + std::slice::from_raw_parts(key as *const u8, key_len) + }; + let child = match crate::cursor::resolve_single_key(d, cur, k) { + Ok(x) => x, Err(e) => return e as c_int, + }; + // Compute the syntactic byte span — same logic as qjson_cursor_bytes. + let pos = d.indices[child.idx_start as usize] as usize; + let lead = match d.buf.get(pos) { + Some(b) => *b, + None => return qjson_err::QJSON_PARSE_ERROR as c_int, + }; + let (bs, be) = match lead { + b'{' | b'[' | b'"' => { + let end = d.indices[child.idx_end as usize] as usize; + if end >= d.buf.len() { + return qjson_err::QJSON_PARSE_ERROR as c_int; + } + (pos, end + 1) + } + _ => match scalar_byte_range(d, child) { + Ok(x) => x, + Err(e) => return e as c_int, + }, + }; + *value_bs = bs; + *value_be = be; + if !value_out.is_null() { + *value_out = internal_to_cursor((*c).doc, child); + } + qjson_err::QJSON_OK as c_int + }) +} + /// Write the i-th object entry's key (decoded into the doc's scratch /// buffer) and value cursor into the out parameters. /// diff --git a/tests/ffi_cursor_field_bytes.rs b/tests/ffi_cursor_field_bytes.rs new file mode 100644 index 0000000..d2f20ea --- /dev/null +++ b/tests/ffi_cursor_field_bytes.rs @@ -0,0 +1,241 @@ +//! Integration tests for `qjson_cursor_field_bytes` — a fused field-lookup +//! plus byte-range query used by the lazy splice patch path. + +use std::os::raw::c_int; +use std::ptr; + +use qjson::error::qjson_err; +use qjson::ffi::{ + qjson_cursor, qjson_cursor_field_bytes, qjson_doc, qjson_free, qjson_open, qjson_parse, +}; + +unsafe fn open_root(json: &[u8]) -> (*mut qjson_doc, qjson_cursor) { + let mut err: c_int = -1; + let doc = qjson_parse(json.as_ptr(), json.len(), &mut err); + assert!(!doc.is_null(), "parse failed: rc={}", err); + let mut cur: qjson_cursor = std::mem::zeroed(); + let rc = qjson_open(doc, ptr::null(), 0, &mut cur); + assert_eq!(rc, 0); + (doc, cur) +} + +#[test] +fn field_bytes_existing_key_scalar_value() { + let json = br#"{"a":1,"b":"x"}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"a".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_OK as c_int); + assert_eq!(&json[bs..be], b"1"); + // child cursor is now positioned at the value; doc field must match + assert_eq!(child.doc, doc as *const qjson_doc); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_string_value_with_escape_keeps_quotes() { + // Syntactic span — must include the surrounding quotes, not the decoded + // bytes. The "\\n" inside is two source bytes (backslash + n). + let json = br#"{"k":"a\nb"}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"k".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_OK as c_int); + assert_eq!(&json[bs..be], br#""a\nb""#); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_nested_container_spans_entire_value() { + let json = br#"{"k":{"x":1}}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"k".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_OK as c_int); + assert_eq!(&json[bs..be], br#"{"x":1}"#); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_nested_array_value_spans_entire_value() { + let json = br#"{"k":[1,2,3]}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"k".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_OK as c_int); + assert_eq!(&json[bs..be], b"[1,2,3]"); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_missing_key_returns_not_found() { + let json = br#"{"a":1,"b":2}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 999; + let mut be: usize = 999; + let rc = qjson_cursor_field_bytes( + &root, + b"missing".as_ptr() as *const i8, + 7, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_NOT_FOUND as c_int); + // On NOT_FOUND we make no guarantee about the out parameters. + qjson_free(doc); + } +} + +#[test] +fn field_bytes_on_array_cursor_returns_type_mismatch() { + // Mirrors qjson_cursor_field behavior — calling it on a non-object + // cursor returns QJSON_TYPE_MISMATCH. + let json = br#"[1,2,3]"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"a".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_TYPE_MISMATCH as c_int); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_null_value_out_still_writes_byte_range() { + let json = br#"{"k":42}"#; + unsafe { + let (doc, root) = open_root(json); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"k".as_ptr() as *const i8, + 1, + ptr::null_mut(), + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_OK as c_int); + assert_eq!(&json[bs..be], b"42"); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_duplicate_keys_returns_first_occurrence() { + let json = br#"{"a":1,"a":2}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"a".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_OK as c_int); + assert_eq!(&json[bs..be], b"1"); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_null_outputs_for_byte_range_return_invalid_arg() { + let json = br#"{"a":1}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let rc = qjson_cursor_field_bytes( + &root, + b"a".as_ptr() as *const i8, + 1, + &mut child, + ptr::null_mut(), + ptr::null_mut(), + ); + assert_eq!(rc, qjson_err::QJSON_INVALID_ARG as c_int); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_scalar_strips_surrounding_whitespace() { + let json = br#"{"k": 42 ,"x":1}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + b"k".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, qjson_err::QJSON_OK as c_int); + assert_eq!(&json[bs..be], b"42"); + qjson_free(doc); + } +} diff --git a/tests/ffi_panic_safety.rs b/tests/ffi_panic_safety.rs index 454dff6..f8f6692 100644 --- a/tests/ffi_panic_safety.rs +++ b/tests/ffi_panic_safety.rs @@ -6,6 +6,54 @@ fn panic_does_not_unwind_through_ffi() { assert_eq!(rc, 8); // QJSON_OOM } +#[cfg(feature = "test-panic")] +#[test] +fn cursor_field_bytes_panic_returns_oom() { + // Forge a cursor whose idx_start is well past the end of the indices + // array. Any internal access via `d.indices[idx_start]` panics on + // out-of-bounds; the `ffi_catch!` wrapper around qjson_cursor_field_bytes + // must convert that into QJSON_OOM instead of unwinding across the FFI + // boundary. + use std::os::raw::c_int; + use std::ptr; + use qjson::ffi::{ + qjson_cursor, qjson_cursor_field_bytes, qjson_free, qjson_open, qjson_parse, + }; + + let json: &[u8] = br#"{"a":1}"#; + unsafe { + let mut err: c_int = -1; + let doc = qjson_parse(json.as_ptr(), json.len(), &mut err); + assert!(!doc.is_null()); + let mut root: qjson_cursor = std::mem::zeroed(); + let rc = qjson_open(doc, ptr::null(), 0, &mut root); + assert_eq!(rc, 0); + + // Corrupt the cursor to extend past the end of the indices array. + // `idx_start` remains valid so the container check passes and the + // walker enters `walk_children`, which then runs off the end of + // `doc.indices` and panics. The `ffi_catch!` wrapper around + // qjson_cursor_field_bytes must convert that panic into QJSON_OOM. + let mut bad = root; + bad.idx_end = u32::MAX - 1; + + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &bad, + b"a".as_ptr() as *const i8, + 1, + &mut child, + &mut bs, + &mut be, + ); + assert_eq!(rc, 8); // QJSON_OOM + + qjson_free(doc); + } +} + #[cfg(not(feature = "test-panic"))] #[test] fn skip() { diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua new file mode 100644 index 0000000..9de4358 --- /dev/null +++ b/tests/lua/lazy_patch_spec.lua @@ -0,0 +1,256 @@ +local qjson = require("qjson") +local cjson = require("cjson") + +local function collect_pairs(view) + local out = {} + local order = {} + for k, v in qjson.pairs(view) do + out[k] = v + order[#order + 1] = k + end + return out, order +end + +describe("LazyObject scalar patches on existing keys", function() + it("replaces a string with a string", function() + local v = qjson.decode('{"a":"x","b":"y"}') + v.a = "z" + assert.are.equal("z", v.a) + assert.are.equal("y", v.b) + end) + + it("replaces a number with a number", function() + local v = qjson.decode('{"a":1,"b":2}') + v.a = 999 + assert.are.equal(999, v.a) + assert.are.equal(2, v.b) + end) + + it("replaces a boolean with a boolean", function() + local v = qjson.decode('{"a":true,"b":false}') + v.a = false + v.b = true + assert.is_false(v.a) + assert.is_true(v.b) + end) + + it("replaces a value with qjson.null", function() + local v = qjson.decode('{"a":1}') + v.a = qjson.null + assert.are.equal(qjson.null, v.a) + end) + + it("replaces a scalar with a different scalar type (number -> string)", function() + local v = qjson.decode('{"a":1}') + v.a = "hello" + assert.are.equal("hello", v.a) + local out = qjson.encode(v) + assert.are.equal("hello", cjson.decode(out).a) + end) + + it("replaces a container with a scalar and drops cached child proxy", function() + local v = qjson.decode('{"a":{"x":1},"b":2}') + local child = v.a + assert.is_table(child) + v.a = "now-scalar" + -- Subsequent read returns the patched scalar, not the cached proxy. + assert.are.equal("now-scalar", v.a) + -- pairs() also surfaces the scalar, in original key order. + local map, order = collect_pairs(v) + assert.are.equal("now-scalar", map.a) + assert.are.equal(2, map.b) + assert.are.equal("a", order[1]) + assert.are.equal("b", order[2]) + -- encode also reflects the replacement (via walking encoder, since + -- the raw-slot bypass is not a patch entry). + local parsed = cjson.decode(qjson.encode(v)) + assert.are.equal("now-scalar", parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("repeated writes to the same key keep only one patch entry", function() + local v = qjson.decode('{"a":1,"b":2}') + v.a = "x" + v.a = "y" + local out = qjson.encode(v) + -- Only one occurrence of "y", no "x" leftover, b unchanged. + assert.is_truthy(out:find('"y"', 1, true)) + assert.is_nil(out:find('"x"', 1, true)) + -- Sanity: parse back and verify. + local parsed = cjson.decode(out) + assert.are.equal("y", parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("read after write returns patched value", function() + local v = qjson.decode('{"a":1}') + v.a = 42 + assert.are.equal(42, v.a) + end) + + it("pairs() yields patched value in original key order", function() + local v = qjson.decode('{"a":1,"b":2,"c":3}') + v.b = 999 + local seen = {} + for k, val in qjson.pairs(v) do + seen[#seen + 1] = {k, val} + end + assert.are.equal("a", seen[1][1]); assert.are.equal(1, seen[1][2]) + assert.are.equal("b", seen[2][1]); assert.are.equal(999, seen[2][2]) + assert.are.equal("c", seen[3][1]); assert.are.equal(3, seen[3][2]) + end) + + it("qjson.pairs yields patched value", function() + local v = qjson.decode('{"a":1}') + v.a = "patched" + local map = {} + for k, val in qjson.pairs(v) do map[k] = val end + assert.are.equal("patched", map.a) + end) + + it("#view is unchanged after scalar patch", function() + local v = qjson.decode('{"a":1,"b":2,"c":3}') + local before = qjson.len(v) + v.a = 999 + v.b = "x" + assert.are.equal(before, qjson.len(v)) + end) + + it("encode produces JSON with patched values spliced in", function() + local v = qjson.decode('{"a":1,"b":"x","c":true}') + v.a = 999 + v.b = "y" + v.c = qjson.null + assert.are.equal('{"a":999,"b":"y","c":null}', qjson.encode(v)) + end) + + it("encode round-trip matches expected logical value", function() + local v = qjson.decode('{"a":1,"b":[1,2,3],"c":{"x":true}}') + v.a = 42 + local out = qjson.encode(v) + local parsed = cjson.decode(out) + assert.are.equal(42, parsed.a) + assert.are.same({1, 2, 3}, parsed.b) + assert.is_true(parsed.c.x) + end) + + it("encode preserves whitespace outside patched ranges", function() + local v = qjson.decode('{ "a" : 1 , "b" : 2 }') + v.a = 99 + local out = qjson.encode(v) + -- The encoder splices only the value bytes; original spaces remain. + assert.are.equal('{ "a" : 99 , "b" : 2 }', out) + end) + + it("multiple patches on different keys are all applied", function() + local v = qjson.decode('{"a":1,"b":2,"c":3,"d":4}') + v.a = 10 + v.c = 30 + v.d = "z" + local out = qjson.encode(v) + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a) + assert.are.equal(2, parsed.b) + assert.are.equal(30, parsed.c) + assert.are.equal("z", parsed.d) + end) + + it("tostring(view) reflects patches", function() + local v = qjson.decode('{"a":1,"b":2}') + v.a = 99 + local s = tostring(v) + local parsed = cjson.decode(s) + assert.are.equal(99, parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("qjson.materialize reflects patches", function() + local v = qjson.decode('{"a":1,"b":{"x":2}}') + v.a = 999 + local m = qjson.materialize(v) + assert.are.equal(999, m.a) + assert.are.equal(2, m.b.x) + end) +end) + +describe("LazyObject fall through to materialization", function() + it("nil value triggers materialization", function() + local v = qjson.decode('{"a":1,"b":2}') + v.a = nil + -- View is now a plain table: no lazy metatable. + assert.are_not.equal(qjson._LazyObject, getmetatable(v)) + assert.is_nil(v.a) + assert.are.equal(2, v.b) + end) + + it("table value triggers materialization", function() + local v = qjson.decode('{"a":1}') + v.a = {x = 10} + assert.are_not.equal(qjson._LazyObject, getmetatable(v)) + assert.are.equal(10, v.a.x) + end) + + it("new (absent) key triggers materialization", function() + local v = qjson.decode('{"a":1}') + v.new_key = "fresh" + assert.are_not.equal(qjson._LazyObject, getmetatable(v)) + assert.are.equal(1, v.a) + assert.are.equal("fresh", v.new_key) + end) + + it("prior patches are preserved when fall through occurs", function() + local v = qjson.decode('{"a":1,"b":2}') + v.a = "p" + -- Now fall through with a nil delete. + v.b = nil + assert.are_not.equal(qjson._LazyObject, getmetatable(v)) + assert.are.equal("p", v.a) + assert.is_nil(v.b) + end) + + it("prior patches preserved across fall-through-on-table-write", function() + local v = qjson.decode('{"a":1,"b":2}') + v.a = 99 + v.b = {nested = true} + assert.are_not.equal(qjson._LazyObject, getmetatable(v)) + assert.are.equal(99, v.a) + assert.is_true(v.b.nested) + end) +end) + +describe("Mixed patches and dirty children", function() + it("dirty child plus a patch on a different key are both applied in encode", function() + local v = qjson.decode('{"nested":{"x":1},"other":"orig"}') + -- Read and mutate the nested child to make t dirty. + v.nested.x = 99 + -- Patch a sibling scalar. + v.other = "patched" + local out = qjson.encode(v) + local parsed = cjson.decode(out) + assert.are.equal(99, parsed.nested.x) + assert.are.equal("patched", parsed.other) + end) + + it("walking encoder respects patches when invoked via dirty descendant", function() + local v = qjson.decode('{"a":{"b":1},"c":2}') + -- Make a deeper subtree dirty to force walking at the top. + v.a.b = 99 + v.c = "patched" + local out = qjson.encode(v) + local parsed = cjson.decode(out) + assert.are.equal(99, parsed.a.b) + assert.are.equal("patched", parsed.c) + end) +end) + +describe("LazyArray unchanged by patch feature", function() + it("array __newindex still materializes on first scalar write", function() + local v = qjson.decode('[10, 20, 30]') + v[1] = 99 + -- After write, no longer a LazyArray (materialized into empty_array_mt). + assert.are_not.equal(qjson._LazyArray, getmetatable(v)) + assert.are.equal(99, v[1]) + assert.are.equal(20, v[2]) + assert.are.equal(30, v[3]) + end) +end) diff --git a/tests/scanner_crosscheck.rs b/tests/scanner_crosscheck.rs index 7c9ab85..6490994 100644 --- a/tests/scanner_crosscheck.rs +++ b/tests/scanner_crosscheck.rs @@ -1,4 +1,3 @@ -#[cfg(all(target_arch = "x86_64", feature = "avx2"))] use proptest::prelude::*; #[cfg(all(target_arch = "x86_64", feature = "avx2"))] @@ -57,10 +56,78 @@ fn valid_jsonish() -> impl Strategy { #[cfg(not(all(target_arch = "x86_64", feature = "avx2")))] #[test] fn skip_avx2() {} -// ── NEON cross-check ────────────────────────────────────────────────────────── +// ── FFI cross-check: qjson_cursor_field_bytes ───────────────────────────────── +// +// The above proptest already guarantees ScalarScanner and Avx2Scanner emit +// bit-identical indices for any input both accept. Since `qjson_cursor_field_bytes` +// reads only `doc.buf` and `doc.indices`, identical indices ⇒ identical FFI +// output. CI gates "cargo test --release" (default features, AVX2-on-x86) +// and "cargo test --release --no-default-features" (scalar) exercise both +// dispatch paths against the same proptest, so any backend drift surfaces +// as a failure on one but not the other. -#[cfg(target_arch = "aarch64")] -use proptest::prelude::*; +use qjson::ffi::{ + qjson_cursor, qjson_cursor_field_bytes, qjson_free, qjson_open, qjson_parse, +}; + +proptest! { + #![proptest_config(ProptestConfig::with_cases(512))] + + #[test] + fn cursor_field_bytes_matches_source_span( + kvs in proptest::collection::vec( + ("[a-z]{1,4}", -100i64..100i64), + 1..6usize, + ), + ) { + // Build a small valid JSON object {"k1":n1,"k2":n2,...}. Duplicate + // keys collapse to the first occurrence in our expectation map below + // (matches qjson_cursor_field_bytes semantics). + let mut json = String::from("{"); + let mut expected: Vec<(String, String)> = Vec::new(); + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + for (i, (k, v)) in kvs.iter().enumerate() { + if i > 0 { json.push(','); } + json.push('"'); json.push_str(k); json.push_str("\":"); + let vs = v.to_string(); + json.push_str(&vs); + if seen.insert(k.clone()) { + expected.push((k.clone(), vs)); + } + } + json.push('}'); + + unsafe { + let mut err: std::os::raw::c_int = -1; + let doc = qjson_parse(json.as_ptr(), json.len(), &mut err); + prop_assert!(!doc.is_null()); + + let mut root: qjson_cursor = std::mem::zeroed(); + let rc = qjson_open(doc, std::ptr::null(), 0, &mut root); + prop_assert_eq!(rc, 0); + + for (k, want) in &expected { + let mut child: qjson_cursor = std::mem::zeroed(); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, + k.as_ptr() as *const i8, + k.len(), + &mut child, + &mut bs, + &mut be, + ); + prop_assert_eq!(rc, 0, "lookup of {:?} in {:?} failed rc={}", k, json, rc); + prop_assert_eq!(&json.as_bytes()[bs..be], want.as_bytes()); + } + + qjson_free(doc); + } + } +} + +// ── NEON cross-check ────────────────────────────────────────────────────────── #[cfg(target_arch = "aarch64")] use qjson::__test_api::{Scanner, ScalarScanner, NeonScanner};