From 69e6f77afef7cd5f0fe78c5e85fc1b4efc2d6c37 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Mon, 18 May 2026 18:47:49 +0800 Subject: [PATCH 1/4] feat(lazy): structural patching for decode+modify+encode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add lazy patch support to avoid materializing the entire root container when modifying a few fields. Instead of triggering full materialization on __newindex, we now record patches and splice the original buffer during encode. Changes: - Add qjson_cursor_field_bytes FFI function to get field value byte spans - Modify LazyObject.__newindex to record patches instead of materializing - Modify LazyObject.__index to check patches first - Add encode_with_patches for splice-based encoding - Add lazy_patch_spec.lua tests Performance: decode+modify+encode on 100KB payload - Before: ~30 μs/op (full materialization + walking encode) - After: ~32 μs/op (WIP - splice path needs optimization) See docs/lazy-patch-spec.md for detailed design. --- docs/lazy-patch-spec.md | 504 ++++++++++++++++++++++++++++++++++ include/qjson.h | 2 + lua/qjson.lua | 2 + lua/qjson/table.lua | 343 ++++++++++++++++++++--- src/ffi.rs | 62 +++++ tests/lua/lazy_patch_spec.lua | 291 ++++++++++++++++++++ 6 files changed, 1163 insertions(+), 41 deletions(-) create mode 100644 docs/lazy-patch-spec.md create mode 100644 tests/lua/lazy_patch_spec.lua diff --git a/docs/lazy-patch-spec.md b/docs/lazy-patch-spec.md new file mode 100644 index 0000000..40e4551 --- /dev/null +++ b/docs/lazy-patch-spec.md @@ -0,0 +1,504 @@ +# Lazy Patch: Structural Patching for qjson.decode + +## Overview + +This spec describes an optimization for the "decode → modify few fields → encode" workflow. Instead of materializing the entire root container on first write, we record patches and apply them during encode by splicing the original buffer. + +## Problem Statement + +Current behavior when modifying a field on a lazy table: + +```lua +local tab = qjson.decode(json_str) -- Returns LazyObject proxy +tab.model = "gpt4o" -- Triggers __newindex → materializes entire root +local out = qjson.encode(tab) -- Walks materialized table + lazy subtrees +``` + +The `__newindex` handler (`lua/qjson/table.lua:266-285`) calls `materialize_object_contents()` which: +1. Iterates all key/value pairs via FFI +2. Clears the metatable +3. Copies all values into the plain table +4. Sets the new value + +Then `encode()` must: +1. Detect the table is no longer lazy (metatable = nil) +2. Call `encode_object()` which uses `pairs()` iteration +3. For each key/value, call `encode_string()` (byte-by-byte processing) +4. Call `table.concat()` to join parts with the large lazy subtree JSON + +### Performance Impact + +| Payload Size | Current | Theoretical Optimal | Gap | +|--------------|---------|---------------------|-----| +| 1 KB | 3.9 μs | 0.5 μs | 8x | +| 100 KB | 31.8 μs | 5.5 μs | 6x | +| 1 MB | 526.5 μs | 46.9 μs | 11x | + +The gap grows with payload size because `table.concat` with large strings has O(n) copy overhead. + +## Proposed Solution + +### Core Idea + +Instead of materializing on write, record patches in a side table. On encode, splice the original buffer with patched values. + +```lua +local tab = qjson.decode(json_str) -- Returns LazyObject proxy +tab.model = "gpt4o" -- Records patch: {key="model", new_value='"gpt4o"'} +local out = qjson.encode(tab) -- Splices: buf[0..model_start] + "gpt4o" + buf[model_end..] +``` + +### Data Structures + +#### Patch Record (Lua side) + +```lua +-- Stored in the lazy view's internal table +_patches = { + -- Each patch records the key and new encoded value + -- Byte offsets are resolved lazily during encode + { key = "model", encoded_value = '"gpt4o"' }, + { key = "temperature", encoded_value = "0.9" }, +} +``` + +#### Extended LazyObject/LazyArray + +```lua +-- Current internal fields +{ + _doc = , -- Reference to parsed document + _cur_box = , -- Cursor box (keeps cdata alive) + _cur = , -- Stable cursor reference + _bs = , -- Byte start in original buffer + _be = , -- Byte end in original buffer + + -- New fields for lazy patch + _patches = {}, -- Array of {key, encoded_value} or {index, encoded_value} + _deleted = {}, -- Set of deleted keys/indices (for nil assignment) +} +``` + +### Algorithm + +#### Modified `__newindex` (LazyObject) + +```lua +LazyObject.__newindex = function(t, k, v) + -- Initialize patches table if needed + if not rawget(t, "_patches") then + rawset(t, "_patches", {}) + rawset(t, "_deleted", {}) + end + + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + if v == nil then + -- Mark key as deleted + deleted[k] = true + -- Remove from patches if previously patched + for i, p in ipairs(patches) do + if p.key == k then + table.remove(patches, i) + break + end + end + else + -- Encode the new value + local encoded = encode_value(v) + + -- Update or add patch + local found = false + for _, p in ipairs(patches) do + if p.key == k then + p.encoded_value = encoded + found = true + break + end + end + if not found then + patches[#patches + 1] = { key = k, encoded_value = encoded } + end + + -- Remove from deleted if previously deleted + deleted[k] = nil + end +end +``` + +#### Modified `__index` (LazyObject) + +```lua +local original_index = LazyObject.__index + +LazyObject.__index = function(t, k) + -- Check patches first + local patches = rawget(t, "_patches") + if patches then + for _, p in ipairs(patches) do + if p.key == k then + -- Return decoded value from patch + -- Note: need to track original Lua value, not just encoded + return p.lua_value + end + end + end + + -- Check deleted + local deleted = rawget(t, "_deleted") + if deleted and deleted[k] then + return nil + end + + -- Fall back to original lazy lookup + return original_index(t, k) +end +``` + +#### Modified `encode_proxy` + +```lua +local function encode_proxy(t) + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + -- Fast path: no patches and not dirty + if not patches and not deleted and not is_dirty(t) then + return t._doc._hold:sub(t._bs + 1, t._be) + end + + -- Has patches: use splice encoding + if patches and #patches > 0 then + return encode_with_patches(t) + end + + -- Has deletions only or dirty children: fall back to walking + if getmetatable(t) == LazyObject then + return encode_lazy_object_walking(t) + end + return encode_lazy_array_walking(t) +end +``` + +#### New `encode_with_patches` + +```lua +local function encode_with_patches(t) + local buf = t._doc._hold + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") or {} + + -- Resolve byte offsets for each patch and collect spans to replace + local replacements = {} -- { {start, end, new_value}, ... } + + for _, p in ipairs(patches) do + local start_off, end_off = find_field_value_span(t, p.key) + if start_off then + -- Existing field: replace value + replacements[#replacements + 1] = { + start = start_off, + end_ = end_off, + value = p.encoded_value, + } + else + -- New field: will be appended + -- Handled separately below + end + end + + -- Collect deleted field spans + for k in pairs(deleted) do + local field_start, field_end = find_field_span(t, k) -- includes key and comma + if field_start then + replacements[#replacements + 1] = { + start = field_start, + end_ = field_end, + value = "", -- delete + } + end + end + + -- Sort by start offset + table.sort(replacements, function(a, b) return a.start < b.start end) + + -- Build output by splicing + local parts = {} + local pos = t._bs + 1 -- 1-based Lua index + + for _, r in ipairs(replacements) do + -- Copy unchanged portion + if r.start > pos then + parts[#parts + 1] = buf:sub(pos, r.start - 1) + end + -- Insert replacement + parts[#parts + 1] = r.value + pos = r.end_ + 1 + end + + -- Copy remaining portion + if pos <= t._be then + parts[#parts + 1] = buf:sub(pos, t._be) + end + + -- Handle new fields (not in original) + local new_fields = {} + for _, p in ipairs(patches) do + if not find_field_value_span(t, p.key) then + new_fields[#new_fields + 1] = '"' .. p.key .. '":' .. p.encoded_value + end + end + + if #new_fields > 0 then + -- Insert before closing brace + local result = table.concat(parts) + local close_pos = result:find("}%s*$") + if close_pos then + local prefix = result:sub(1, close_pos - 1) + -- Add comma if there are existing fields + if prefix:match("[^{%s]%s*$") then + prefix = prefix .. "," + end + return prefix .. table.concat(new_fields, ",") .. "}" + end + end + + return table.concat(parts) +end +``` + +### FFI Extension (Optional) + +For better performance, the byte offset resolution can be moved to Rust: + +```c +// New FFI function to get field byte span +int qjson_cursor_field_bytes( + const qjson_cursor* cur, + const char* key, size_t key_len, + size_t* value_start, // byte offset of value start (after colon) + size_t* value_end // byte offset of value end (before comma/brace) +); +``` + +This avoids re-parsing the structure in Lua to find field positions. + +## Edge Cases + +### 1. Nested Modifications + +```lua +tab.messages[1].role = "assistant" +``` + +This triggers materialization of `tab.messages[1]`, but `tab` and `tab.messages` remain lazy with patches. + +**Handling**: When a child is accessed and modified, the child gets its own `_patches`. The parent's `is_dirty()` check detects the materialized child and falls back to walking. + +### 2. Reading a Patched Field + +```lua +tab.model = "gpt4o" +print(tab.model) -- Should return "gpt4o", not the original value +``` + +**Handling**: `__index` checks `_patches` before falling back to FFI lookup. + +### 3. Deleting a Field + +```lua +tab.model = nil +``` + +**Handling**: Record in `_deleted` set. During encode, the field span (including key and comma) is removed. + +### 4. Adding a New Field + +```lua +tab.new_field = "value" +``` + +**Handling**: The patch has no corresponding byte span in the original buffer. During encode, new fields are appended before the closing brace. + +### 5. Type Changes + +```lua +tab.temperature = "hot" -- was number, now string +``` + +**Handling**: The encoded value is simply the new JSON representation. Type doesn't matter for splicing. + +### 6. Iteration After Patch + +```lua +tab.model = "gpt4o" +for k, v in qjson.pairs(tab) do + print(k, v) +end +``` + +**Handling**: `__pairs` must merge original fields with patches and exclude deleted fields. + +### 7. Multiple Patches to Same Field + +```lua +tab.model = "gpt4o" +tab.model = "gpt4o-mini" +``` + +**Handling**: The second assignment updates the existing patch entry. + +### 8. Patch Then Materialize + +```lua +tab.model = "gpt4o" +tab.messages[1].role = "assistant" -- This materializes messages[1] +-- tab still has patches, but tab.messages is now dirty +``` + +**Handling**: `is_dirty()` detects the materialized child. `encode_with_patches()` handles patched fields, then falls back to walking for dirty children. + +## Implementation Plan + +### Phase 1: Lua-only Implementation + +1. Modify `LazyObject.__newindex` to record patches instead of materializing +2. Modify `LazyObject.__index` to check patches first +3. Add `encode_with_patches()` function +4. Modify `encode_proxy()` to detect and use patches +5. Update `__pairs` to merge patches +6. Add tests for all edge cases + +### Phase 2: FFI Optimization (Optional) + +1. Add `qjson_cursor_field_bytes()` to Rust FFI +2. Use FFI for byte offset resolution instead of Lua string search +3. Benchmark and validate improvement + +## Testing Strategy + +### Unit Tests + +```lua +-- Basic patch +local tab = qjson.decode('{"a":1,"b":2}') +tab.a = 10 +assert(qjson.encode(tab) == '{"a":10,"b":2}') + +-- Read patched value +assert(tab.a == 10) + +-- Multiple patches +tab.b = 20 +assert(qjson.encode(tab) == '{"a":10,"b":20}') + +-- Delete field +tab.a = nil +assert(qjson.encode(tab) == '{"b":20}') + +-- Add new field +tab.c = 30 +assert(qjson.encode(tab) == '{"b":20,"c":30}') + +-- Nested modification (triggers partial materialization) +local tab2 = qjson.decode('{"x":{"y":1}}') +tab2.x.y = 2 +assert(qjson.encode(tab2) == '{"x":{"y":2}}') + +-- Iteration with patches +local tab3 = qjson.decode('{"a":1,"b":2}') +tab3.a = 10 +tab3.c = 3 +local keys = {} +for k, v in qjson.pairs(tab3) do keys[k] = v end +assert(keys.a == 10 and keys.b == 2 and keys.c == 3) +``` + +### Performance Tests + +```lua +-- Benchmark: 100KB payload, modify 1 field +local json = make_payload(100 * 1024) +local t0 = os.clock() +for i = 1, 10000 do + local tab = qjson.decode(json) + tab.model = "gpt4o" + local _ = qjson.encode(tab) +end +local t1 = os.clock() +-- Expected: < 10 μs/op (vs current ~30 μs/op) +``` + +## Risks and Mitigations + +### Risk 1: Complexity + +The patch tracking adds complexity to the lazy table implementation. + +**Mitigation**: Clear separation between patch path and materialization path. Comprehensive tests. + +### Risk 2: Memory Overhead + +Patches table adds memory per modified lazy view. + +**Mitigation**: Patches are small (key + encoded value). Only created on first write. + +### Risk 3: Correctness + +Splicing byte offsets is error-prone. + +**Mitigation**: Extensive edge case tests. Fallback to walking if splice fails. + +### Risk 4: Iteration Semantics + +`pairs()` behavior changes with patches. + +**Mitigation**: Document behavior. Ensure compatibility with existing code patterns. + +## Success Criteria + +1. **Performance**: 4-10x improvement for "decode → modify 1-2 fields → encode" workflow +2. **Correctness**: All existing tests pass, new edge case tests pass +3. **Compatibility**: No breaking changes to public API +4. **Memory**: No significant memory regression for unmodified lazy tables + +## Appendix: Benchmark Data + +### Current Implementation Profile + +``` +decode: 5.97 μs (20%) +modify: 0.37 μs (1%) -- triggers materialization +encode: 12.55 μs (41%) -- walks materialized root +total: 30.34 μs + +Breakdown of encode: +- encode messages (lazy fast-path): 0.09 μs +- table.concat with 100KB string: 8+ μs +- encode_string (byte-by-byte): 0.9 μs +- pairs iteration + type checks: 3+ μs +``` + +### Projected Optimized Profile + +``` +decode: 5.97 μs (55%) +modify: 0.10 μs (1%) -- just records patch +encode: 4.80 μs (44%) -- splices original buffer +total: 10.87 μs + +Breakdown of encode: +- resolve patch offsets: 0.5 μs +- string.sub splicing: 4.0 μs +- table.concat (small): 0.3 μs +``` + +### Expected Speedup by Payload Size + +| Payload | Current | Optimized | Speedup | +|---------|---------|-----------|---------| +| 1 KB | 3.9 μs | 0.5 μs | 8x | +| 10 KB | 5.3 μs | 1.1 μs | 5x | +| 100 KB | 31.8 μs | 5.5 μs | 6x | +| 500 KB | 209.7 μs | 22.6 μs | 9x | +| 1 MB | 526.5 μs | 46.9 μs | 11x | diff --git a/include/qjson.h b/include/qjson.h index 343e782..71bb66a 100644 --- a/include/qjson.h +++ b/include/qjson.h @@ -80,6 +80,8 @@ int qjson_cursor_get_bool (const qjson_cursor*, const char* path, size_t path_le int qjson_cursor_typeof (const qjson_cursor*, const char* path, size_t path_len, int* out); int qjson_cursor_len (const qjson_cursor*, const char* path, size_t path_len, size_t* out); int qjson_cursor_bytes (const qjson_cursor*, size_t* byte_start, size_t* byte_end); +int qjson_cursor_field_bytes(const qjson_cursor*, const char* key, size_t key_len, + size_t* value_start, size_t* value_end); int qjson_cursor_object_entry_at(const qjson_cursor*, size_t i, const uint8_t** key_ptr, size_t* key_len, qjson_cursor* value_out); diff --git a/lua/qjson.lua b/lua/qjson.lua index 3e57324..e34496f 100644 --- a/lua/qjson.lua +++ b/lua/qjson.lua @@ -38,6 +38,8 @@ int qjson_cursor_get_bool(const qjson_cursor*, const char*, size_t, int*); int qjson_cursor_typeof (const qjson_cursor*, const char*, size_t, int*); int qjson_cursor_len (const qjson_cursor*, const char*, size_t, size_t*); int qjson_cursor_bytes(const qjson_cursor*, size_t* byte_start, size_t* byte_end); +int qjson_cursor_field_bytes(const qjson_cursor*, const char* key, size_t key_len, + size_t* value_start, size_t* value_end); int qjson_cursor_object_entry_at(const qjson_cursor*, size_t i, const uint8_t** key_ptr, size_t* key_len, qjson_cursor* value_out); diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index f23c076..acd6e7b 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -115,6 +115,23 @@ end -- raw table rather than creating a fresh proxy. local function read_object_field(self, key) if type(key) ~= "string" then return nil end + + -- Check patches first + local patches = rawget(self, "_patches") + if patches then + for _, p in ipairs(patches) do + if p.key == key then + return p.lua_value + end + end + end + + -- Check deleted + local deleted = rawget(self, "_deleted") + if deleted and deleted[key] then + return nil + end + -- Use child_box so the lookup result does not alias self._cur (which is -- itself stored in root_box's backing memory in the decode caller). local rc = C.qjson_cursor_field(self._cur, key, #key, child_box) @@ -148,21 +165,63 @@ LazyArray.__index = read_array_index -- Iterator function for lazy_object_iter: advances through object entries by -- integer index, returning key/value pairs in source order. +-- Handles patches and deletions. local function lazy_object_iter(state, _prev_key) - local i = state.i - state.i = i + 1 - local rc = C.qjson_cursor_object_entry_at( - state.view._cur, i, strp_box, size_box, child_box - ) - if rc == QJSON_NOT_FOUND then return nil end - check(rc) - local k = ffi.string(strp_box[0], size_box[0]) - local v = decode_cursor(state.view, child_box) - return k, v + local view = state.view + local patches = rawget(view, "_patches") + local deleted = rawget(view, "_deleted") + + -- First, iterate through original fields (skipping deleted ones) + while state.i < state.original_count do + local i = state.i + state.i = i + 1 + local rc = C.qjson_cursor_object_entry_at( + view._cur, i, strp_box, size_box, child_box + ) + if rc == QJSON_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + + -- Skip deleted keys + if deleted and deleted[k] then + -- continue to next iteration + else + -- Check if this key has a patch + if patches then + for _, p in ipairs(patches) do + if p.key == k then + return k, p.lua_value + end + end + end + -- No patch, return original value + local v = decode_cursor(view, child_box) + return k, v + end + end + + -- Then, iterate through new fields (patches for keys not in original) + if patches then + while state.patch_i <= #patches do + local p = patches[state.patch_i] + state.patch_i = state.patch_i + 1 + -- Check if this is a new field (not in original) + local rc = C.qjson_cursor_field_bytes(view._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + return p.key, p.lua_value + end + end + end + + return nil end function LazyObject.__pairs(t) - return lazy_object_iter, { view = t, i = 0 }, nil + -- Count original fields + local rc = C.qjson_cursor_len(t._cur, "", 0, size_box) + check(rc) + local original_count = tonumber(size_box[0]) + return lazy_object_iter, { view = t, i = 0, original_count = original_count, patch_i = 1 }, nil end local function lazy_array_iter(state, _prev_i) @@ -256,32 +315,55 @@ end -- the dirty check and __newindex can share the list. local INTERNAL_KEYS = { _doc = true, _cur_box = true, _cur = true, _bs = true, _be = true, + _patches = true, _deleted = true, } --- On first write, walk all existing key/value pairs into a plain table, --- strip the lazy metatable, then apply the new assignment. Any FFI error --- during the walk leaves `t` in its original lazy state. --- Existing rawget-cached entries (e.g. previously returned child proxies) --- are preserved so callers' references remain valid. +-- Forward declaration for encode (needed by __newindex to encode patch values) +local encode + +-- On write, record a patch instead of materializing the entire object. +-- This allows encode to splice the original buffer with patched values. LazyObject.__newindex = function(t, k, v) - local contents = materialize_object_contents(t) - -- Snapshot user-key cache BEFORE nilling internals. - -- Use next() for raw iteration: pairs() invokes __pairs on lazy tables, - -- walking the full JSON via FFI instead of the Lua-side rawget cache. - local cache = {} - local ck, cv = next(t) - while ck ~= nil do - if not INTERNAL_KEYS[ck] then - cache[ck] = cv - end - ck, cv = next(t, ck) + -- Initialize patches table if needed + if not rawget(t, "_patches") then + rawset(t, "_patches", {}) + rawset(t, "_deleted", {}) end - t._doc, t._cur_box, t._cur, t._bs, t._be = nil, nil, nil, nil, nil - setmetatable(t, nil) - for _, kv in ipairs(contents) do - rawset(t, kv[1], cache[kv[1]] or kv[2]) + + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + if v == nil then + -- Mark key as deleted + deleted[k] = true + -- Remove from patches if previously patched + for i, p in ipairs(patches) do + if p.key == k then + table.remove(patches, i) + break + end + end + else + -- Encode the new value (we need both encoded and lua_value) + local encoded = encode(v) + + -- Update or add patch + local found = false + for _, p in ipairs(patches) do + if p.key == k then + p.encoded_value = encoded + p.lua_value = v + found = true + break + end + end + if not found then + patches[#patches + 1] = { key = k, encoded_value = encoded, lua_value = v } + end + + -- Remove from deleted if previously deleted + deleted[k] = nil end - rawset(t, k, v) end -- On first write, walk all existing elements into a plain sequence, @@ -405,14 +487,22 @@ local function encode_number(n) end -- A lazy subtree is "dirty" if any cached descendant has been materialized --- (no longer carries Lazy* metatable). Non-cached descendants are guaranteed --- untouched, so we only need to walk the rawget-cached entries. +-- (no longer carries Lazy* metatable), or if it has patches/deletions. +-- Non-cached descendants are guaranteed untouched, so we only need to walk +-- the rawget-cached entries. local function is_dirty(v) if type(v) ~= "table" then return false end local mt = getmetatable(v) if mt ~= LazyObject and mt ~= LazyArray then return true -- materialized end + -- Check for patches or deletions + local patches = rawget(v, "_patches") + local deleted = rawget(v, "_deleted") + if patches and #patches > 0 then return true end + if deleted then + for _ in pairs(deleted) do return true end + end -- Use next() for raw table iteration: pairs() would invoke __pairs on -- lazy tables, walking the full JSON via FFI instead of the Lua cache. local k, child = next(v) @@ -425,10 +515,19 @@ local function is_dirty(v) return false end --- Forward declaration so encode_lazy_object_walking, encode_lazy_array_walking, --- and encode_array/encode_object can reference encode before its definition is --- complete (Lua resolves upvalues at call time, but the slot must be declared first). -local encode +-- Check if a lazy view has patches (but not necessarily dirty children) +local function has_patches(v) + local patches = rawget(v, "_patches") + return patches and #patches > 0 +end + +-- Check if a lazy view has deletions +local function has_deletions(v) + local deleted = rawget(v, "_deleted") + if not deleted then return false end + for _ in pairs(deleted) do return true end + return false +end -- Walk a dirty LazyObject and emit JSON, preferring cached children (which -- may be materialized) over freshly resolved cursors. Non-cached children @@ -474,14 +573,176 @@ local function encode_lazy_array_walking(t) return "[" .. table.concat(parts, ",") .. "]" end +-- Check if any cached child is dirty (materialized or has patches) +local function has_dirty_children(t) + local k, child = next(t) + while k ~= nil do + if not INTERNAL_KEYS[k] then + if type(child) == "table" then + local mt = getmetatable(child) + if mt ~= LazyObject and mt ~= LazyArray then + return true -- materialized child + end + if is_dirty(child) then + return true + end + end + end + k, child = next(t, k) + end + return false +end + +-- Encode a LazyObject with patches by splicing the original buffer. +-- This is the fast path for "decode -> modify few fields -> encode". +local function encode_with_patches(t) + local buf = t._doc._hold + local patches = rawget(t, "_patches") or {} + local deleted = rawget(t, "_deleted") or {} + + -- Build a set of patched keys for quick lookup + local patched_keys = {} + for _, p in ipairs(patches) do + patched_keys[p.key] = p + end + + -- Collect replacements: { {start, end_, value}, ... } + local replacements = {} + + -- For each patch, find the field's byte range in the original buffer + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_OK then + -- Existing field: replace value + replacements[#replacements + 1] = { + start = tonumber(sz_a[0]), + end_ = tonumber(sz_b[0]), + value = p.encoded_value, + } + end + -- If NOT_FOUND, it's a new field - handled separately below + end + + -- Collect deleted field spans (we need to find the full field span including key) + -- For simplicity, we'll handle deletions by walking and skipping deleted keys + local has_deleted = false + for _ in pairs(deleted) do has_deleted = true; break end + + -- If we have deletions or new fields, fall back to walking + -- (splicing deletions is complex due to comma handling) + local new_fields = {} + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + new_fields[#new_fields + 1] = p + end + end + + if has_deleted or #new_fields > 0 then + -- Fall back to walking for complex cases + return encode_lazy_object_walking_with_patches(t, patches, deleted) + end + + -- Sort replacements by start offset + table.sort(replacements, function(a, b) return a.start < b.start end) + + -- Build output by splicing + local parts = {} + local pos = t._bs + 1 -- 1-based Lua index + + for _, r in ipairs(replacements) do + -- Copy unchanged portion (convert 0-based to 1-based) + local r_start_1based = r.start + 1 + if r_start_1based > pos then + parts[#parts + 1] = buf:sub(pos, r_start_1based - 1) + end + -- Insert replacement + parts[#parts + 1] = r.value + pos = r.end_ + 1 -- end_ is exclusive, so +1 for 1-based + end + + -- Copy remaining portion + if pos <= t._be then + parts[#parts + 1] = buf:sub(pos, t._be) + end + + return table.concat(parts) +end + +-- Walk a LazyObject with patches, handling deletions and new fields +local function encode_lazy_object_walking_with_patches(t, patches, deleted) + local parts = {} + + -- Build a set of patched keys for quick lookup + local patched_keys = {} + for _, p in ipairs(patches) do + patched_keys[p.key] = p + end + + -- Walk original fields + local i = 0 + while true do + local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) + if rc == QJSON_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + + -- Skip deleted keys + if not deleted[k] then + local v + local patch = patched_keys[k] + if patch then + -- Use patched value (already encoded) + parts[#parts + 1] = encode_string(k) .. ":" .. patch.encoded_value + else + -- Use original or cached value + local cached = rawget(t, k) + if cached ~= nil and not INTERNAL_KEYS[k] then + v = cached + else + v = decode_cursor(t, child_box) + end + parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) + end + end + i = i + 1 + end + + -- Add new fields (patches for keys not in original) + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value + end + end + + return "{" .. table.concat(parts, ",") .. "}" +end + local function encode_proxy(t) - if not is_dirty(t) then - -- Fast path: no mutations — slice the original buffer bytes. + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + -- Check if we have patches or deletions + local has_patch = patches and #patches > 0 + local has_del = false + if deleted then + for _ in pairs(deleted) do has_del = true; break end + end + + -- Fast path: no mutations at all — slice the original buffer bytes. + if not has_patch and not has_del and not has_dirty_children(t) then return t._doc._hold:sub(t._bs + 1, t._be) end + + -- Has patches: use splice encoding for objects if getmetatable(t) == LazyObject then - return encode_lazy_object_walking(t) + if has_patch and not has_dirty_children(t) then + return encode_with_patches(t) + end + return encode_lazy_object_walking_with_patches(t, patches or {}, deleted or {}) end + return encode_lazy_array_walking(t) end diff --git a/src/ffi.rs b/src/ffi.rs index d4d8cec..7e1d327 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -806,6 +806,68 @@ pub unsafe extern "C" fn qjson_cursor_object_entry_at( }) } +/// Given an object cursor and a key, write the byte range `[value_start, value_end)` +/// of the field's value in the original buffer. Returns `QJSON_NOT_FOUND` if the +/// key does not exist, `QJSON_TYPE_MISMATCH` if the cursor is not an object. +/// +/// # Safety +/// +/// See the module-level [shared safety contract](self#shared-safety-contract). +/// `c` must point to a cursor produced by an earlier `qjson_*` call whose +/// document is still alive; `key` must point to `key_len` bytes or be NULL +/// with `key_len == 0`; `value_start` and `value_end` must be non-NULL and +/// writable. +#[no_mangle] +pub unsafe extern "C" fn qjson_cursor_field_bytes( + c: *const qjson_cursor, + key: *const c_char, key_len: usize, + value_start: *mut usize, value_end: *mut usize, +) -> c_int { + ffi_catch!({ + if value_start.is_null() || value_end.is_null() || (key.is_null() && key_len != 0) { + return qjson_err::QJSON_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let k = if key.is_null() { &[][..] } else { + std::slice::from_raw_parts(key as *const u8, key_len) + }; + // Resolve the field to get a child cursor + let child = match crate::cursor::resolve_single_key(d, cur, k) { + Ok(x) => x, Err(e) => return e as c_int, + }; + // Get the byte range of the child value + let pos = d.indices[child.idx_start as usize] as usize; + let lead = match d.buf.get(pos) { + Some(b) => *b, + None => return qjson_err::QJSON_PARSE_ERROR as c_int, + }; + match lead { + b'{' | b'[' | b'"' => { + // Container or string: span runs from opener to the matching + // closer, inclusive. + let end = d.indices[child.idx_end as usize] as usize; + if end >= d.buf.len() { + return qjson_err::QJSON_PARSE_ERROR as c_int; + } + *value_start = pos; + *value_end = end + 1; + qjson_err::QJSON_OK as c_int + } + _ => { + // Scalar: delegate to scalar_byte_range. + let (s, e) = match scalar_byte_range(d, child) { + Ok(x) => x, Err(e) => return e as c_int, + }; + *value_start = s; + *value_end = e; + qjson_err::QJSON_OK as c_int + } + } + }) +} + /// Test-only export that forces a Rust panic to verify the FFI panic barrier /// converts it to `QJSON_OOM` instead of unwinding across the boundary. /// diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua new file mode 100644 index 0000000..0f73d46 --- /dev/null +++ b/tests/lua/lazy_patch_spec.lua @@ -0,0 +1,291 @@ +local qjson = require("qjson") + +describe("Lazy Patch - basic patching", function() + it("patches a single field and encodes correctly", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = 10 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("reads patched value back", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = 10 + assert.are.equal(10, t.a) + assert.are.equal(2, t.b) + end) + + it("patches multiple fields", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.a = 10 + t.b = 20 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a) + assert.are.equal(20, parsed.b) + assert.are.equal(3, parsed.c) + end) + + it("patches the same field multiple times", function() + local t = qjson.decode('{"a":1}') + t.a = 10 + t.a = 20 + t.a = 30 + assert.are.equal(30, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(30, parsed.a) + end) +end) + +describe("Lazy Patch - new fields", function() + it("adds a new field", function() + local t = qjson.decode('{"a":1}') + t.b = 2 + assert.are.equal(1, t.a) + assert.are.equal(2, t.b) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(1, parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("adds multiple new fields", function() + local t = qjson.decode('{"a":1}') + t.b = 2 + t.c = 3 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(1, parsed.a) + assert.are.equal(2, parsed.b) + assert.are.equal(3, parsed.c) + end) +end) + +describe("Lazy Patch - deletion", function() + it("deletes a field", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = nil + assert.is_nil(t.a) + assert.are.equal(2, t.b) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_nil(parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("deletes then re-adds a field", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = nil + assert.is_nil(t.a) + t.a = 100 + assert.are.equal(100, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(100, parsed.a) + assert.are.equal(2, parsed.b) + end) +end) + +describe("Lazy Patch - type changes", function() + it("changes number to string", function() + local t = qjson.decode('{"a":1}') + t.a = "hello" + assert.are.equal("hello", t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal("hello", parsed.a) + end) + + it("changes string to number", function() + local t = qjson.decode('{"a":"hello"}') + t.a = 42 + assert.are.equal(42, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(42, parsed.a) + end) + + it("changes scalar to object", function() + local t = qjson.decode('{"a":1}') + t.a = {x = 10} + assert.are.equal(10, t.a.x) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a.x) + end) + + it("changes scalar to array", function() + local t = qjson.decode('{"a":1}') + t.a = {10, 20, 30} + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.same({10, 20, 30}, parsed.a) + end) +end) + +describe("Lazy Patch - subtrees remain lazy", function() + it("patching root does not materialize children", function() + local t = qjson.decode('{"a":{"x":1},"b":2}') + t.b = 20 + -- Child should still be lazy + assert.are.equal(qjson._LazyObject, getmetatable(t.a)) + assert.are.equal(1, t.a.x) + end) + + it("patching root preserves lazy array children", function() + local t = qjson.decode('{"a":[1,2,3],"b":2}') + t.b = 20 + -- Child should still be lazy + assert.are.equal(qjson._LazyArray, getmetatable(t.a)) + assert.are.equal(1, t.a[1]) + end) +end) + +describe("Lazy Patch - iteration with patches", function() + it("iterates with patched values", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.a = 10 + local keys = {} + local values = {} + for k, v in qjson.pairs(t) do + keys[#keys+1] = k + values[k] = v + end + assert.are.equal(10, values.a) + assert.are.equal(2, values.b) + assert.are.equal(3, values.c) + end) + + it("iterates with new fields", function() + local t = qjson.decode('{"a":1}') + t.b = 2 + local keys = {} + local values = {} + for k, v in qjson.pairs(t) do + keys[#keys+1] = k + values[k] = v + end + assert.are.equal(1, values.a) + assert.are.equal(2, values.b) + end) + + it("iterates skipping deleted fields", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.b = nil + local keys = {} + local values = {} + for k, v in qjson.pairs(t) do + keys[#keys+1] = k + values[k] = v + end + assert.are.equal(1, values.a) + assert.is_nil(values.b) + assert.are.equal(3, values.c) + end) +end) + +describe("Lazy Patch - nested modifications", function() + it("nested object modification triggers child materialization", function() + local t = qjson.decode('{"a":{"x":1},"b":2}') + t.a.x = 10 + -- Child is now materialized + assert.is_nil(getmetatable(t.a)) + assert.are.equal(10, t.a.x) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a.x) + assert.are.equal(2, parsed.b) + end) + + it("nested array modification triggers child materialization", function() + local t = qjson.decode('{"a":[1,2,3],"b":2}') + t.a[2] = 20 + -- Child is now materialized + assert.are.equal(qjson.empty_array_mt, getmetatable(t.a)) + assert.are.equal(20, t.a[2]) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.same({1, 20, 3}, parsed.a) + assert.are.equal(2, parsed.b) + end) +end) + +describe("Lazy Patch - edge cases", function() + it("handles empty object", function() + local t = qjson.decode('{}') + t.a = 1 + assert.are.equal(1, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(1, parsed.a) + end) + + it("handles special characters in keys", function() + local t = qjson.decode('{"a.b":1}') + t["a.b"] = 10 + assert.are.equal(10, t["a.b"]) + end) + + it("handles unicode in values", function() + local t = qjson.decode('{"a":"hello"}') + t.a = "hello world" + assert.are.equal("hello world", t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal("hello world", parsed.a) + end) + + it("handles boolean values", function() + local t = qjson.decode('{"a":true}') + t.a = false + assert.is_false(t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_false(parsed.a) + end) + + it("handles null values", function() + local t = qjson.decode('{"a":1}') + t.a = qjson.null + assert.are.equal(qjson.null, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(cjson.null, parsed.a) + end) +end) + +describe("Lazy Patch - metatable preservation", function() + it("LazyObject metatable is preserved after patching", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = 10 + assert.are.equal(qjson._LazyObject, getmetatable(t)) + end) + + it("can still access original fields after patching", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.a = 10 + assert.are.equal(10, t.a) + assert.are.equal(2, t.b) + assert.are.equal(3, t.c) + end) +end) From 176558a1a0ef6ba71288a50525e36c5379f2f6a2 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Mon, 18 May 2026 13:40:58 +0000 Subject: [PATCH 2/4] fix(lazy-patch): forward-ref bug + update test assertions - Move encode_lazy_object_walking_with_patches before encode_with_patches (Lua local function defined later was unresolvable at compile time, causing nil-call errors on new-field/deletion paths). - Update tests in lazy_patch_spec.lua and lazy_table_spec.lua to match the spec's edge-case-1 design: writing to a LazyObject records a patch on the object instead of materializing it, so the metatable stays LazyObject after assignment. --- lua/qjson/table.lua | 114 ++++++++++++++++------------------ tests/lua/lazy_patch_spec.lua | 7 ++- tests/lua/lazy_table_spec.lua | 10 +-- 3 files changed, 63 insertions(+), 68 deletions(-) diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index acd6e7b..eab82f8 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -593,12 +593,9 @@ local function has_dirty_children(t) return false end --- Encode a LazyObject with patches by splicing the original buffer. --- This is the fast path for "decode -> modify few fields -> encode". -local function encode_with_patches(t) - local buf = t._doc._hold - local patches = rawget(t, "_patches") or {} - local deleted = rawget(t, "_deleted") or {} +-- Walk a LazyObject with patches, handling deletions and new fields +local function encode_lazy_object_walking_with_patches(t, patches, deleted) + local parts = {} -- Build a set of patched keys for quick lookup local patched_keys = {} @@ -606,6 +603,53 @@ local function encode_with_patches(t) patched_keys[p.key] = p end + -- Walk original fields + local i = 0 + while true do + local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) + if rc == QJSON_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + + -- Skip deleted keys + if not deleted[k] then + local v + local patch = patched_keys[k] + if patch then + -- Use patched value (already encoded) + parts[#parts + 1] = encode_string(k) .. ":" .. patch.encoded_value + else + -- Use original or cached value + local cached = rawget(t, k) + if cached ~= nil and not INTERNAL_KEYS[k] then + v = cached + else + v = decode_cursor(t, child_box) + end + parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) + end + end + i = i + 1 + end + + -- Add new fields (patches for keys not in original) + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value + end + end + + return "{" .. table.concat(parts, ",") .. "}" +end + +-- Encode a LazyObject with patches by splicing the original buffer. +-- This is the fast path for "decode -> modify few fields -> encode". +local function encode_with_patches(t) + local buf = t._doc._hold + local patches = rawget(t, "_patches") or {} + local deleted = rawget(t, "_deleted") or {} + -- Collect replacements: { {start, end_, value}, ... } local replacements = {} @@ -630,16 +674,16 @@ local function encode_with_patches(t) -- If we have deletions or new fields, fall back to walking -- (splicing deletions is complex due to comma handling) - local new_fields = {} + local has_new_field = false for _, p in ipairs(patches) do local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) if rc == QJSON_NOT_FOUND then - new_fields[#new_fields + 1] = p + has_new_field = true + break end end - if has_deleted or #new_fields > 0 then - -- Fall back to walking for complex cases + if has_deleted or has_new_field then return encode_lazy_object_walking_with_patches(t, patches, deleted) end @@ -669,56 +713,6 @@ local function encode_with_patches(t) return table.concat(parts) end --- Walk a LazyObject with patches, handling deletions and new fields -local function encode_lazy_object_walking_with_patches(t, patches, deleted) - local parts = {} - - -- Build a set of patched keys for quick lookup - local patched_keys = {} - for _, p in ipairs(patches) do - patched_keys[p.key] = p - end - - -- Walk original fields - local i = 0 - while true do - local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) - if rc == QJSON_NOT_FOUND then break end - check(rc) - local k = ffi.string(strp_box[0], size_box[0]) - - -- Skip deleted keys - if not deleted[k] then - local v - local patch = patched_keys[k] - if patch then - -- Use patched value (already encoded) - parts[#parts + 1] = encode_string(k) .. ":" .. patch.encoded_value - else - -- Use original or cached value - local cached = rawget(t, k) - if cached ~= nil and not INTERNAL_KEYS[k] then - v = cached - else - v = decode_cursor(t, child_box) - end - parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) - end - end - i = i + 1 - end - - -- Add new fields (patches for keys not in original) - for _, p in ipairs(patches) do - local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) - if rc == QJSON_NOT_FOUND then - parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value - end - end - - return "{" .. table.concat(parts, ",") .. "}" -end - local function encode_proxy(t) local patches = rawget(t, "_patches") local deleted = rawget(t, "_deleted") diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua index 0f73d46..7506efb 100644 --- a/tests/lua/lazy_patch_spec.lua +++ b/tests/lua/lazy_patch_spec.lua @@ -199,11 +199,12 @@ describe("Lazy Patch - iteration with patches", function() end) describe("Lazy Patch - nested modifications", function() - it("nested object modification triggers child materialization", function() + it("nested object modification records a patch on the child (child stays lazy)", function() local t = qjson.decode('{"a":{"x":1},"b":2}') t.a.x = 10 - -- Child is now materialized - assert.is_nil(getmetatable(t.a)) + -- Per lazy-patch-spec edge case 1: parent and child both stay lazy; + -- the child records its own patch instead of materializing. + assert.are.equal(qjson._LazyObject, getmetatable(t.a)) assert.are.equal(10, t.a.x) local out = qjson.encode(t) local cjson = require("cjson") diff --git a/tests/lua/lazy_table_spec.lua b/tests/lua/lazy_table_spec.lua index 2769d39..a7a0acf 100644 --- a/tests/lua/lazy_table_spec.lua +++ b/tests/lua/lazy_table_spec.lua @@ -168,20 +168,20 @@ describe("__ipairs / qjson.ipairs over LazyArray", function() end) end) -describe("__newindex — first-write materialization", function() - it("converts LazyObject into a plain table preserving existing keys", function() +describe("__newindex — patch tracking on lazy objects", function() + it("LazyObject stays lazy after assignment, with new field readable", function() local t = qjson.decode('{"a":1,"b":2}') t.c = 3 - assert.is_nil(getmetatable(t)) + assert.are.equal(qjson._LazyObject, getmetatable(t)) assert.are.equal(1, t.a) assert.are.equal(2, t.b) assert.are.equal(3, t.c) end) - it("nested containers remain lazy after parent materialization", function() + it("nested containers remain lazy when sibling field is written", function() local t = qjson.decode('{"inner":{"x":1}}') t.extra = "y" - assert.is_nil(getmetatable(t)) + assert.are.equal(qjson._LazyObject, getmetatable(t)) local inner = t.inner assert.are.equal(qjson._LazyObject, getmetatable(inner)) assert.are.equal(1, inner.x) From 208d8b9ecf1efd3208c832f77d7f4972dbcdafcd Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Mon, 18 May 2026 13:47:05 +0000 Subject: [PATCH 3/4] fix(lazy-patch): address review feedback on PR #44 - LazyObject.__newindex: raise on non-string keys instead of silently recording an unreadable patch (Copilot C3). - Remove unused has_patches / has_deletions helpers (Copilot C2). - encode_with_patches: classify each patch in a single qjson_cursor_field_bytes pass (was doing two) and propagate unexpected return codes via check() (Copilot C4 + CodeRabbit R6). - encode_lazy_object_walking_with_patches: propagate unexpected qjson_cursor_field_bytes return codes (CodeRabbit R7). - lazy_object_iter: same error-propagation fix (CodeRabbit R5). - tests/lua/lazy_patch_spec.lua: extend special-key test to verify encoded output, swap the unicode test to multi-byte UTF-8, add "delete non-existent field", "all originals deleted + new fields added", and "non-string key raises" cases (CodeRabbit R8/R9/R10 + nitpick N2). - docs/lazy-patch-spec.md: add upfront note that the listing is design pseudocode (pointing helper names at qjson_cursor_field_bytes), and correct the patch-record example to include lua_value alongside encoded_value (CodeRabbit R1 + nitpick N3). --- docs/lazy-patch-spec.md | 25 ++++++++++-------- lua/qjson/table.lua | 48 ++++++++++++----------------------- tests/lua/lazy_patch_spec.lua | 43 ++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 46 deletions(-) diff --git a/docs/lazy-patch-spec.md b/docs/lazy-patch-spec.md index 40e4551..c49aedb 100644 --- a/docs/lazy-patch-spec.md +++ b/docs/lazy-patch-spec.md @@ -1,5 +1,7 @@ # Lazy Patch: Structural Patching for qjson.decode +> **Note:** This document is the original *design* memo. The Lua snippets below are high-level pseudocode showing intent — they are not the shipped code. The actual implementation lives in `lua/qjson/table.lua` and `src/ffi.rs`; the splice path is used only for the "all patches target existing fields, no deletions" case, while deletions and new fields fall through to a walking-based encoder (`encode_lazy_object_walking_with_patches`) that avoids the comma-rewriting issues that the splice pseudocode below has. Helper names like `find_field_value_span` correspond to the FFI symbol `qjson_cursor_field_bytes` (`src/ffi.rs:821`). + ## Overview This spec describes an optimization for the "decode → modify few fields → encode" workflow. Instead of materializing the entire root container on first write, we record patches and apply them during encode by splicing the original buffer. @@ -53,12 +55,13 @@ local out = qjson.encode(tab) -- Splices: buf[0..model_start] + "gpt4o" + #### Patch Record (Lua side) ```lua --- Stored in the lazy view's internal table +-- Stored in the lazy view's internal table. +-- Each patch records key, encoded JSON value, and the original Lua value +-- (so __index can hand back the user's value without re-decoding). +-- Byte offsets are resolved lazily during encode via qjson_cursor_field_bytes. _patches = { - -- Each patch records the key and new encoded value - -- Byte offsets are resolved lazily during encode - { key = "model", encoded_value = '"gpt4o"' }, - { key = "temperature", encoded_value = "0.9" }, + { key = "model", encoded_value = '"gpt4o"', lua_value = "gpt4o" }, + { key = "temperature", encoded_value = "0.9", lua_value = 0.9 }, } ``` @@ -105,23 +108,23 @@ LazyObject.__newindex = function(t, k, v) end end else - -- Encode the new value + -- Encode the new value (we keep both encoded and the original Lua + -- value so __index can return v without re-decoding). local encoded = encode_value(v) - - -- Update or add patch + local found = false for _, p in ipairs(patches) do if p.key == k then p.encoded_value = encoded + p.lua_value = v found = true break end end if not found then - patches[#patches + 1] = { key = k, encoded_value = encoded } + patches[#patches + 1] = { key = k, encoded_value = encoded, lua_value = v } end - - -- Remove from deleted if previously deleted + deleted[k] = nil end end diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index eab82f8..7c9e004 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -210,6 +210,7 @@ local function lazy_object_iter(state, _prev_key) if rc == QJSON_NOT_FOUND then return p.key, p.lua_value end + check(rc) -- propagate unexpected errors end end @@ -324,6 +325,9 @@ local encode -- On write, record a patch instead of materializing the entire object. -- This allows encode to splice the original buffer with patched values. LazyObject.__newindex = function(t, k, v) + if type(k) ~= "string" then + error("qjson: LazyObject key must be a string, got " .. type(k)) + end -- Initialize patches table if needed if not rawget(t, "_patches") then rawset(t, "_patches", {}) @@ -515,20 +519,6 @@ local function is_dirty(v) return false end --- Check if a lazy view has patches (but not necessarily dirty children) -local function has_patches(v) - local patches = rawget(v, "_patches") - return patches and #patches > 0 -end - --- Check if a lazy view has deletions -local function has_deletions(v) - local deleted = rawget(v, "_deleted") - if not deleted then return false end - for _ in pairs(deleted) do return true end - return false -end - -- Walk a dirty LazyObject and emit JSON, preferring cached children (which -- may be materialized) over freshly resolved cursors. Non-cached children -- emit through a fresh proxy and naturally fast-path their unmodified subtree. @@ -637,6 +627,8 @@ local function encode_lazy_object_walking_with_patches(t, patches, deleted) local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) if rc == QJSON_NOT_FOUND then parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value + elseif rc ~= QJSON_OK then + check(rc) -- propagate unexpected errors end end @@ -650,39 +642,31 @@ local function encode_with_patches(t) local patches = rawget(t, "_patches") or {} local deleted = rawget(t, "_deleted") or {} - -- Collect replacements: { {start, end_, value}, ... } + -- Classify each patch into a replacement (existing field) or a new field + -- in a single FFI pass. Replacements carry the byte span; presence of any + -- new field forces the walking fallback. local replacements = {} - - -- For each patch, find the field's byte range in the original buffer + local has_new_field = false for _, p in ipairs(patches) do local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) if rc == QJSON_OK then - -- Existing field: replace value replacements[#replacements + 1] = { start = tonumber(sz_a[0]), end_ = tonumber(sz_b[0]), value = p.encoded_value, } + elseif rc == QJSON_NOT_FOUND then + has_new_field = true + else + check(rc) -- propagate unexpected errors end - -- If NOT_FOUND, it's a new field - handled separately below end - -- Collect deleted field spans (we need to find the full field span including key) - -- For simplicity, we'll handle deletions by walking and skipping deleted keys + -- Deletions and new fields are handled by the walking fallback, + -- which avoids the comma-rewriting headache the splice path would have. local has_deleted = false for _ in pairs(deleted) do has_deleted = true; break end - -- If we have deletions or new fields, fall back to walking - -- (splicing deletions is complex due to comma handling) - local has_new_field = false - for _, p in ipairs(patches) do - local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) - if rc == QJSON_NOT_FOUND then - has_new_field = true - break - end - end - if has_deleted or has_new_field then return encode_lazy_object_walking_with_patches(t, patches, deleted) end diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua index 7506efb..b50ec0a 100644 --- a/tests/lua/lazy_patch_spec.lua +++ b/tests/lua/lazy_patch_spec.lua @@ -242,16 +242,21 @@ describe("Lazy Patch - edge cases", function() local t = qjson.decode('{"a.b":1}') t["a.b"] = 10 assert.are.equal(10, t["a.b"]) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed["a.b"]) end) it("handles unicode in values", function() local t = qjson.decode('{"a":"hello"}') - t.a = "hello world" - assert.are.equal("hello world", t.a) + local unicode = "你好 🌏 café" + t.a = unicode + assert.are.equal(unicode, t.a) local out = qjson.encode(t) local cjson = require("cjson") local parsed = cjson.decode(out) - assert.are.equal("hello world", parsed.a) + assert.are.equal(unicode, parsed.a) end) it("handles boolean values", function() @@ -273,6 +278,38 @@ describe("Lazy Patch - edge cases", function() local parsed = cjson.decode(out) assert.are.equal(cjson.null, parsed.a) end) + + it("deletes a non-existent field without error", function() + local t = qjson.decode('{"a":1}') + t.b = nil + assert.is_nil(t.b) + assert.are.equal(1, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_nil(parsed.b) + assert.are.equal(1, parsed.a) + end) + + it("handles all original fields deleted plus new fields added", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = nil + t.b = nil + t.c = 3 + t.d = 4 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_nil(parsed.a) + assert.is_nil(parsed.b) + assert.are.equal(3, parsed.c) + assert.are.equal(4, parsed.d) + end) + + it("rejects non-string keys on LazyObject", function() + local t = qjson.decode('{"a":1}') + assert.has_error(function() t[1] = "x" end) + end) end) describe("Lazy Patch - metatable preservation", function() From dcd308aba031dbde1a41f8ea42c477f1942e2fc9 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Thu, 21 May 2026 18:16:02 +0000 Subject: [PATCH 4/4] fix(lazy-patch): preserve __newindex on read-then-write; re-encode patches at emit Two regressions in the structural-patching path on PR #44: 1. Container reads rawset-cached children onto the view itself, so a subsequent assignment to that key bypassed __newindex (Lua only fires __newindex when the raw key is absent). Patches and deletions silently no-op'd after a prior read. Move the container cache to a `_child_cache` sidecar so the raw key stays absent and __newindex always fires. 2. Patches stored an `encoded_value` snapshot at write time, so a post-assignment mutation like `t.a = {x=1}; t.a.x = 2` was lost. Store only `lua_value` and re-encode at emit time. Regression tests in tests/lua/lazy_patch_spec.lua cover both. Adds direct coverage for qjson_cursor_field_bytes in tests/ffi_cursor_bytes.rs. --- docs/lazy-patch-spec.md | 6 +- lua/qjson/table.lua | 150 ++++++++++++++++++++-------------- tests/ffi_cursor_bytes.rs | 136 +++++++++++++++++++++++++++++- tests/lua/lazy_patch_spec.lua | 81 ++++++++++++++++++ 4 files changed, 309 insertions(+), 64 deletions(-) diff --git a/docs/lazy-patch-spec.md b/docs/lazy-patch-spec.md index c49aedb..b6c3a27 100644 --- a/docs/lazy-patch-spec.md +++ b/docs/lazy-patch-spec.md @@ -16,7 +16,7 @@ tab.model = "gpt4o" -- Triggers __newindex → materializes enti local out = qjson.encode(tab) -- Walks materialized table + lazy subtrees ``` -The `__newindex` handler (`lua/qjson/table.lua:266-285`) calls `materialize_object_contents()` which: +The `LazyObject.__newindex` handler called `materialize_object_contents()` (still in `lua/qjson/table.lua` today; see `materialize_object_contents` and the array-side `LazyArray.__newindex` for the analogous path) which: 1. Iterates all key/value pairs via FFI 2. Clears the metatable 3. Copies all values into the plain table @@ -469,7 +469,7 @@ Splicing byte offsets is error-prone. ### Current Implementation Profile -``` +```text decode: 5.97 μs (20%) modify: 0.37 μs (1%) -- triggers materialization encode: 12.55 μs (41%) -- walks materialized root @@ -484,7 +484,7 @@ Breakdown of encode: ### Projected Optimized Profile -``` +```text decode: 5.97 μs (55%) modify: 0.10 μs (1%) -- just records patch encode: 4.80 μs (44%) -- splices original buffer diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index 35fd886..a64f6be 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -104,11 +104,11 @@ end -- Resolve a child cursor at `key` (object) and decode it into a Lua value. -- Returns nil for missing keys (cjson semantics). --- Container results (lazy proxies) are rawset-cached into `self` so that --- subsequent accesses return the same Lua table object. This is required for --- `t.a.x = v` to propagate back: __newindex materializes `t.a` in-place, and --- the next `t.a` lookup retrieves the already-materialized table from the --- raw table rather than creating a fresh proxy. +-- Container results (lazy proxies) are cached in a private `_child_cache` +-- sidecar (not directly on `self`) so that `__newindex` always fires on a +-- subsequent `self[key] = ...` write. Raw-table caching of the child would +-- silently bypass `__newindex` (Lua only invokes it when the raw key is +-- absent) and lose patch / delete bookkeeping. local function read_object_field(self, key) if type(key) ~= "string" then return nil end @@ -128,13 +128,25 @@ local function read_object_field(self, key) return nil end + -- Check sidecar cache for previously-materialized child container. + local cache = rawget(self, "_child_cache") + if cache then + local cached = cache[key] + if cached ~= nil then return cached end + end + -- Use child_box so the lookup result does not alias self._cur (which is -- itself stored in root_box's backing memory in the decode caller). local rc = C.qjson_cursor_field(self._cur, key, #key, child_box) if not check(rc) then return nil end local v = decode_cursor(self, child_box) - -- Cache containers so identity is stable and materialization sticks. - if type(v) == "table" then rawset(self, key, v) end + if type(v) == "table" then + if not cache then + cache = {} + rawset(self, "_child_cache", cache) + end + cache[key] = v + end return v end @@ -142,18 +154,29 @@ LazyObject.__index = read_object_field -- Resolve a child cursor at integer index `key` (1-based) and decode it. -- Returns nil for missing/out-of-range indices and non-integer keys. --- Container results are rawset-cached for the same identity-stability reason --- as read_object_field. +-- Container results are sidecar-cached in `_child_cache` for the same +-- identity-stability reason as read_object_field, and for the same reason — +-- raw-table caching would let user writes bypass `LazyArray.__newindex`. local function read_array_index(self, key) if type(key) ~= "number" then return nil end -- 1-based external, 0-based internal local i = key - 1 if i < 0 or i ~= math.floor(i) then return nil end + local cache = rawget(self, "_child_cache") + if cache then + local cached = cache[key] + if cached ~= nil then return cached end + end local rc = C.qjson_cursor_index(self._cur, i, child_box) if not check(rc) then return nil end local v = decode_cursor(self, child_box) - -- Cache containers so identity is stable and materialization sticks. - if type(v) == "table" then rawset(self, key, v) end + if type(v) == "table" then + if not cache then + cache = {} + rawset(self, "_child_cache", cache) + end + cache[key] = v + end return v end @@ -312,14 +335,20 @@ end -- the dirty check and __newindex can share the list. local INTERNAL_KEYS = { _doc = true, _cur_box = true, _cur = true, _bs = true, _be = true, - _patches = true, _deleted = true, + _patches = true, _deleted = true, _child_cache = true, } --- Forward declaration for encode (needed by __newindex to encode patch values) +-- Forward declaration for encode (used by the patch-aware encoders below) local encode -- On write, record a patch instead of materializing the entire object. -- This allows encode to splice the original buffer with patched values. +-- +-- The patch record only stores `lua_value`. We deliberately do NOT cache an +-- `encoded_value` snapshot at write time: callers commonly do +-- `t.a = {x = 1}; t.a.x = 2` and expect the mutation to be visible in the +-- final encode. Re-encoding `lua_value` at emit time costs one extra encode +-- per patch (typical workload is 1–3) and avoids the stale-snapshot bug. LazyObject.__newindex = function(t, k, v) if type(k) ~= "string" then error("qjson: LazyObject key must be a string, got " .. type(k)) @@ -344,21 +373,17 @@ LazyObject.__newindex = function(t, k, v) end end else - -- Encode the new value (we need both encoded and lua_value) - local encoded = encode(v) - - -- Update or add patch + -- Update or add patch (lua_value only; encoded at emit time) local found = false for _, p in ipairs(patches) do if p.key == k then - p.encoded_value = encoded p.lua_value = v found = true break end end if not found then - patches[#patches + 1] = { key = k, encoded_value = encoded, lua_value = v } + patches[#patches + 1] = { key = k, lua_value = v } end -- Remove from deleted if previously deleted @@ -368,24 +393,23 @@ end -- On first write, walk all existing elements into a plain sequence, -- switch to empty_array_mt (no lazy machinery), then apply the assignment. --- Existing rawget-cached entries are preserved so callers' references remain valid. +-- Sidecar-cached entries (`_child_cache`) are preserved so callers' references +-- remain valid. LazyArray.__newindex = function(t, k, v) local contents = materialize_array_contents(t) - -- Snapshot integer-key cache BEFORE nilling internals. - -- Use next() for raw iteration: pairs() would invoke __pairs on lazy arrays, - -- walking the full JSON via FFI instead of the Lua-side rawget cache. - local cache = {} - local ck, cv = next(t) - while ck ~= nil do - if type(ck) == "number" then - cache[ck] = cv + -- Snapshot the sidecar cache BEFORE nilling internals. + local prev_cache = rawget(t, "_child_cache") + local snapshot = {} + if prev_cache then + for ck, cv in pairs(prev_cache) do + if type(ck) == "number" then snapshot[ck] = cv end end - ck, cv = next(t, ck) end t._doc, t._cur_box, t._cur, t._bs, t._be = nil, nil, nil, nil, nil + rawset(t, "_child_cache", nil) setmetatable(t, _M.empty_array_mt) for i, x in ipairs(contents) do - rawset(t, i, cache[i] or x) + rawset(t, i, snapshot[i] or x) end rawset(t, k, v) end @@ -503,14 +527,14 @@ local function is_dirty(v) if deleted then for _ in pairs(deleted) do return true end end - -- Use next() for raw table iteration: pairs() would invoke __pairs on - -- lazy tables, walking the full JSON via FFI instead of the Lua cache. - local k, child = next(v) - while k ~= nil do - if not INTERNAL_KEYS[k] then + -- Walk the sidecar cache rather than `next(v)`: container children are + -- never raw-stored on the view itself (see read_object_field / + -- read_array_index), so the sidecar is the only place to find them. + local cache = rawget(v, "_child_cache") + if cache then + for _, child in pairs(cache) do if is_dirty(child) then return true end end - k, child = next(v, k) end return false end @@ -520,6 +544,7 @@ end -- emit through a fresh proxy and naturally fast-path their unmodified subtree. local function encode_lazy_object_walking(t) local parts = {} + local cache = rawget(t, "_child_cache") local i = 0 while true do local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) @@ -527,8 +552,8 @@ local function encode_lazy_object_walking(t) check(rc) local k = ffi.string(strp_box[0], size_box[0]) local v - local cached = rawget(t, k) - if cached ~= nil and not INTERNAL_KEYS[k] then + local cached = cache and cache[k] or nil + if cached ~= nil then v = cached else v = decode_cursor(t, child_box) @@ -541,13 +566,14 @@ end local function encode_lazy_array_walking(t) local parts = {} + local cache = rawget(t, "_child_cache") local rc = C.qjson_cursor_len(t._cur, "", 0, size_box) check(rc) local n = tonumber(size_box[0]) for i = 0, n - 1 do local irc = C.qjson_cursor_index(t._cur, i, child_box) check(irc) - local cached = rawget(t, i + 1) + local cached = cache and cache[i + 1] or nil local v if cached ~= nil then v = cached @@ -559,22 +585,22 @@ local function encode_lazy_array_walking(t) return "[" .. table.concat(parts, ",") .. "]" end --- Check if any cached child is dirty (materialized or has patches) +-- Check if any sidecar-cached child is dirty (materialized or has patches). +-- Container children live in `_child_cache`, not on the view itself, so the +-- raw-table walk is unnecessary here. local function has_dirty_children(t) - local k, child = next(t) - while k ~= nil do - if not INTERNAL_KEYS[k] then - if type(child) == "table" then - local mt = getmetatable(child) - if mt ~= LazyObject and mt ~= LazyArray then - return true -- materialized child - end - if is_dirty(child) then - return true - end + local cache = rawget(t, "_child_cache") + if not cache then return false end + for _, child in pairs(cache) do + if type(child) == "table" then + local mt = getmetatable(child) + if mt ~= LazyObject and mt ~= LazyArray then + return true -- materialized child + end + if is_dirty(child) then + return true end end - k, child = next(t, k) end return false end @@ -582,6 +608,7 @@ end -- Walk a LazyObject with patches, handling deletions and new fields local function encode_lazy_object_walking_with_patches(t, patches, deleted) local parts = {} + local cache = rawget(t, "_child_cache") -- Build a set of patched keys for quick lookup local patched_keys = {} @@ -602,12 +629,13 @@ local function encode_lazy_object_walking_with_patches(t, patches, deleted) local v local patch = patched_keys[k] if patch then - -- Use patched value (already encoded) - parts[#parts + 1] = encode_string(k) .. ":" .. patch.encoded_value + -- Re-encode at emit time so post-assignment mutations to + -- patch.lua_value are visible (see __newindex comment). + parts[#parts + 1] = encode_string(k) .. ":" .. encode(patch.lua_value) else - -- Use original or cached value - local cached = rawget(t, k) - if cached ~= nil and not INTERNAL_KEYS[k] then + -- Use original or sidecar-cached value + local cached = cache and cache[k] or nil + if cached ~= nil then v = cached else v = decode_cursor(t, child_box) @@ -622,7 +650,7 @@ local function encode_lazy_object_walking_with_patches(t, patches, deleted) for _, p in ipairs(patches) do local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) if rc == QJSON_NOT_FOUND then - parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value + parts[#parts + 1] = encode_string(p.key) .. ":" .. encode(p.lua_value) elseif rc ~= QJSON_OK then check(rc) -- propagate unexpected errors end @@ -640,7 +668,9 @@ local function encode_with_patches(t) -- Classify each patch into a replacement (existing field) or a new field -- in a single FFI pass. Replacements carry the byte span; presence of any - -- new field forces the walking fallback. + -- new field forces the walking fallback. Re-encode `lua_value` at emit + -- time so any post-assignment mutation (e.g. `t.a = {x=1}; t.a.x = 2`) is + -- visible — we deliberately do not cache the encoded form on the patch. local replacements = {} local has_new_field = false for _, p in ipairs(patches) do @@ -649,7 +679,7 @@ local function encode_with_patches(t) replacements[#replacements + 1] = { start = tonumber(sz_a[0]), end_ = tonumber(sz_b[0]), - value = p.encoded_value, + value = encode(p.lua_value), } elseif rc == QJSON_NOT_FOUND then has_new_field = true diff --git a/tests/ffi_cursor_bytes.rs b/tests/ffi_cursor_bytes.rs index b47fda4..7e3aaf1 100644 --- a/tests/ffi_cursor_bytes.rs +++ b/tests/ffi_cursor_bytes.rs @@ -3,7 +3,8 @@ use std::ptr; use qjson::error::qjson_err; use qjson::ffi::{ - qjson_cursor, qjson_cursor_bytes, qjson_cursor_field, qjson_doc, qjson_free, qjson_open, qjson_parse, + qjson_cursor, qjson_cursor_bytes, qjson_cursor_field, qjson_cursor_field_bytes, qjson_doc, + qjson_free, qjson_open, qjson_parse, }; unsafe fn open_root(json: &[u8]) -> (*mut qjson_doc, qjson_cursor) { @@ -88,3 +89,136 @@ fn bytes_of_root_array_covers_full_json() { qjson_free(doc); } } + +// --------------------------------------------------------------------------- +// qjson_cursor_field_bytes: the splice path's load-bearing FFI symbol. + +unsafe fn field_bytes(root: &qjson_cursor, key: &[u8]) -> (c_int, usize, usize) { + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjson_cursor_field_bytes(root, key.as_ptr() as *const i8, key.len(), &mut bs, &mut be); + (rc, bs, be) +} + +#[test] +fn field_bytes_string_value_returns_quoted_span() { + let json = br#"{"k":"hello"}"#; + unsafe { + let (doc, root) = open_root(json); + let (rc, bs, be) = field_bytes(&root, b"k"); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], br#""hello""#); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_number_value_strips_separators() { + let json = br#"{"k": 42 ,"x":1}"#; + unsafe { + let (doc, root) = open_root(json); + let (rc, bs, be) = field_bytes(&root, b"k"); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], b"42"); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_object_value_covers_braces() { + let json = br#"{"k":{"a":1,"b":[2,3]},"x":1}"#; + unsafe { + let (doc, root) = open_root(json); + let (rc, bs, be) = field_bytes(&root, b"k"); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], br#"{"a":1,"b":[2,3]}"#); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_array_value_covers_brackets() { + let json = br#"{"k":[1,"two",true],"x":1}"#; + unsafe { + let (doc, root) = open_root(json); + let (rc, bs, be) = field_bytes(&root, b"k"); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], br#"[1,"two",true]"#); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_missing_key_returns_not_found() { + let json = br#"{"a":1}"#; + unsafe { + let (doc, root) = open_root(json); + let (rc, _, _) = field_bytes(&root, b"missing"); + assert_eq!(rc, qjson_err::QJSON_NOT_FOUND as c_int); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_non_object_cursor_returns_type_mismatch() { + // root is an array — qjson_cursor_field_bytes should refuse to index by key. + let json = br#"[1,2,3]"#; + unsafe { + let (doc, root) = open_root(json); + let (rc, _, _) = field_bytes(&root, b"k"); + assert_eq!(rc, qjson_err::QJSON_TYPE_MISMATCH as c_int); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_null_out_pointer_returns_invalid_arg() { + let json = br#"{"a":1}"#; + unsafe { + let (doc, root) = open_root(json); + let key = b"a"; + // Both out pointers null + let rc = qjson_cursor_field_bytes( + &root, key.as_ptr() as *const i8, key.len(), ptr::null_mut(), ptr::null_mut(), + ); + assert_eq!(rc, qjson_err::QJSON_INVALID_ARG as c_int); + // Only value_end null + let mut bs: usize = 0; + let rc = qjson_cursor_field_bytes( + &root, key.as_ptr() as *const i8, key.len(), &mut bs, ptr::null_mut(), + ); + assert_eq!(rc, qjson_err::QJSON_INVALID_ARG as c_int); + qjson_free(doc); + } +} + +#[test] +fn field_bytes_round_trip_splice() { + // The use case the FFI exists for: extract a field's span, replace it + // with a different JSON literal, re-parse and check the resulting field. + let json = br#"{"a":1,"b":"old","c":3}"#; + unsafe { + let (doc, root) = open_root(json); + let (rc, bs, be) = field_bytes(&root, b"b"); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], br#""old""#); + qjson_free(doc); + + // Splice + reparse + let mut spliced = Vec::new(); + spliced.extend_from_slice(&json[..bs]); + spliced.extend_from_slice(br#"["new",42]"#); + spliced.extend_from_slice(&json[be..]); + + let (doc2, root2) = open_root(&spliced); + let (rc2, bs2, be2) = field_bytes(&root2, b"b"); + assert_eq!(rc2, 0); + assert_eq!(&spliced[bs2..be2], br#"["new",42]"#); + // Unchanged neighbours + let (_, bsa, bea) = field_bytes(&root2, b"a"); + assert_eq!(&spliced[bsa..bea], b"1"); + let (_, bsc, bec) = field_bytes(&root2, b"c"); + assert_eq!(&spliced[bsc..bec], b"3"); + qjson_free(doc2); + } +} diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua index b50ec0a..779c794 100644 --- a/tests/lua/lazy_patch_spec.lua +++ b/tests/lua/lazy_patch_spec.lua @@ -327,3 +327,84 @@ describe("Lazy Patch - metatable preservation", function() assert.are.equal(3, t.c) end) end) + +describe("Lazy Patch - read-then-write does not bypass __newindex", function() + -- Regression: container reads used to rawset-cache onto the view, which + -- made a subsequent assignment to that key skip __newindex entirely (Lua + -- only fires __newindex when the raw key is absent). Patches went + -- unrecorded and encode silently returned the original bytes. + it("read-then-replace-with-scalar updates the field", function() + local obj = qjson.decode('{"foo":{"x":1}}') + local _ = obj.foo + obj.foo = "replaced" + assert.are.equal("replaced", obj.foo) + local cjson = require("cjson") + local parsed = cjson.decode(qjson.encode(obj)) + assert.are.equal("replaced", parsed.foo) + end) + + it("read-then-delete actually deletes", function() + local obj = qjson.decode('{"foo":{"x":1},"bar":2}') + local _ = obj.foo + obj.foo = nil + assert.is_nil(obj.foo) + local cjson = require("cjson") + local parsed = cjson.decode(qjson.encode(obj)) + assert.is_nil(parsed.foo) + assert.are.equal(2, parsed.bar) + end) + + it("read-then-replace-with-table also updates", function() + local obj = qjson.decode('{"foo":{"x":1}}') + local _ = obj.foo + obj.foo = { y = 2 } + local cjson = require("cjson") + local parsed = cjson.decode(qjson.encode(obj)) + assert.is_nil(parsed.foo.x) + assert.are.equal(2, parsed.foo.y) + end) + + it("array: read-then-replace-with-scalar updates the slot", function() + local arr = qjson.decode('[{"x":1},"b"]') + local _ = arr[1] + arr[1] = "replaced" + local cjson = require("cjson") + local parsed = cjson.decode(qjson.encode(arr)) + assert.are.equal("replaced", parsed[1]) + assert.are.equal("b", parsed[2]) + end) +end) + +describe("Lazy Patch - mutable patch values re-encode at emit time", function() + -- Regression: patches used to cache the encoded JSON string at write time, + -- so a subsequent mutation of the assigned Lua table was lost. + it("mutates a patched table after assignment", function() + local t = qjson.decode('{"a":1}') + t.a = { x = 1 } + t.a.x = 2 + local cjson = require("cjson") + local parsed = cjson.decode(qjson.encode(t)) + assert.are.equal(2, parsed.a.x) + end) + + it("mutates a patched table after assignment (existing key replace)", function() + local t = qjson.decode('{"a":{"old":1},"b":2}') + t.a = { x = 1 } + t.a.x = 99 + local cjson = require("cjson") + local parsed = cjson.decode(qjson.encode(t)) + assert.are.equal(99, parsed.a.x) + assert.is_nil(parsed.a.old) + assert.are.equal(2, parsed.b) + end) + + it("mutates a patched table after assignment (new key)", function() + local t = qjson.decode('{"a":1}') + t.newkey = { z = 0 } + t.newkey.z = 42 + local cjson = require("cjson") + local parsed = cjson.decode(qjson.encode(t)) + assert.are.equal(42, parsed.newkey.z) + assert.are.equal(1, parsed.a) + end) +end)