From a9a8c05b232e6421a0c0461a65f174d8799451ed Mon Sep 17 00:00:00 2001 From: ares <285551516+New1Direction@users.noreply.github.com> Date: Mon, 8 Jun 2026 10:47:25 -0700 Subject: [PATCH 1/6] feat(sandbox): verifiable sandboxed `bash` tool via just-bash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A persistent just-bash Node sidecar (in-memory FS, no host/network access) gives the agent a real shell that physically cannot touch the host. Each exec returns fs_hash — a hash of the full virtual-FS state — and KorgChat chains every tool call into the korg ledger, so the shell session is tamper-evident and replayable (same commands from a fresh sandbox reproduce the same hashes). - sandbox/sidecar.mjs: stdio JSON-RPC just-bash shell + deterministic FS hash - korgchat.sandbox: SandboxClient, bash_tool(), tools_with_sandbox() - --sandbox flag wires it into the CLI session - tests: persistence, determinism/replay, host isolation, end-to-end ledger recording of the exec with fs_hash (skips cleanly without node) --- CHANGELOG.md | 1 + README.md | 26 + sandbox/.gitignore | 1 + sandbox/package-lock.json | 997 ++++++++++++++++++++++++++++++++++++++ sandbox/package.json | 15 + sandbox/sidecar.mjs | 122 +++++ src/korgchat/__main__.py | 28 +- src/korgchat/sandbox.py | 214 ++++++++ tests/test_sandbox.py | 116 +++++ 9 files changed, 1518 insertions(+), 2 deletions(-) create mode 100644 sandbox/.gitignore create mode 100644 sandbox/package-lock.json create mode 100644 sandbox/package.json create mode 100644 sandbox/sidecar.mjs create mode 100644 src/korgchat/sandbox.py create mode 100644 tests/test_sandbox.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 2055c87..61ab902 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- **Sandboxed `bash` tool with verifiable exec (`--sandbox`).** New `korgchat.sandbox` module adds a `bash` tool backed by [just-bash](https://github.com/vercel-labs/just-bash) — a JS reimplementation of bash + ~90 coreutils over an in-memory filesystem, run as a persistent Node sidecar. The shell physically cannot reach the host filesystem or network (no network/python/js by default). Every `exec` returns `fs_hash`, a hash of the full virtual-filesystem state after the command; because each tool call is hash-chained into the ledger, the agent's shell session is **tamper-evident and replayable** — the same commands from a fresh sandbox reproduce the same hashes. Exports `SandboxClient` (stdio JSON-RPC), `bash_tool()`, and `tools_with_sandbox()`; enable in the CLI with `--sandbox` (requires Node ≥18 and `npm install` in `sandbox/`). - **Auto-context injection is now a first-class ledger event.** Previously the recall-augmented preamble the model actually saw was a *ghost* — the journal recorded only the user's original prompt. Now, whenever auto-context injects a preamble, a `context_injection` event is written capturing the preamble text, the recall query, and the matched `seq_id`s + scores, causally chained `user_prompt → context_injection → llm_inference`. The user_prompt event still records only what the user typed; the injected context is a separate, auditable, replayable event. New `AutoContextEngine.build_context()` returns a `ContextInjection` (preamble + structured matches); `build_preamble()` is now a thin wrapper over it. - **Tool-schema snapshot + conformance events.** Every tool execution is now bracketed by two events: a `tool_schema_snapshot` *before* the call (the declared `input_schema`, `description`, and a deterministic `schema_hash`) and a `tool_validation` *after* (did the call's input conform to the declared schema? did the call succeed?). A replayed conversation stays meaningful even after a tool's schema changes — the contract it ran against is frozen on the ledger, and a stale call is detectable. New `korgchat.schema` module: `schema_hash()` (canonical sha256, byte-for-byte aligned with `korg-ledger@v1` canonicalization) and a dependency-free `validate_input()`. diff --git a/README.md b/README.md index c6dc4a9..1637a4f 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,34 @@ korgchat --mock --no-stream # Tune mock-mode streaming speed (default 0.005s/char): korgchat --mock --stream-delay 0.05 # slow & visible korgchat --mock --stream-delay 0 # instant + +# Give the agent a sandboxed `bash` tool (verifiable exec): +cd sandbox && npm install && cd .. # one-time: pulls just-bash +korgchat --sandbox ``` +## Sandboxed shell (`--sandbox`) + +`--sandbox` adds a `bash` tool backed by +[just-bash](https://github.com/vercel-labs/just-bash) — a JS reimplementation +of bash + ~90 coreutils over an **in-memory** filesystem, run as a persistent +Node sidecar (`sandbox/sidecar.mjs`). The shell physically cannot reach the +host filesystem or network (no network/python/js are enabled). + +Every command returns `fs_hash` — a hash of the full virtual-filesystem state +after it runs. Because each tool call is hash-chained into the ledger, the +agent's shell session becomes **tamper-evident and replayable**: the same +commands from a fresh sandbox reproduce the same hashes. + +```python +from korgchat import ChatSession +from korgchat.sandbox import tools_with_sandbox + +session = ChatSession(journal_path=..., responder=..., tools=tools_with_sandbox()) +``` + +Requires Node ≥18 and a one-time `npm install` in `sandbox/`. + ## Streaming (v0.4.2) By default, every assistant text reply streams to stdout character-by-character diff --git a/sandbox/.gitignore b/sandbox/.gitignore new file mode 100644 index 0000000..c2658d7 --- /dev/null +++ b/sandbox/.gitignore @@ -0,0 +1 @@ +node_modules/ diff --git a/sandbox/package-lock.json b/sandbox/package-lock.json new file mode 100644 index 0000000..4199681 --- /dev/null +++ b/sandbox/package-lock.json @@ -0,0 +1,997 @@ +{ + "name": "sandbox", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "sandbox", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "just-bash": "^3.0.1" + } + }, + "node_modules/@borewit/text-codec": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.2.tgz", + "integrity": "sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@jitl/quickjs-ffi-types": { + "version": "0.32.0", + "resolved": "https://registry.npmjs.org/@jitl/quickjs-ffi-types/-/quickjs-ffi-types-0.32.0.tgz", + "integrity": "sha512-v9T+GQpmk43VDJ7d72sf0Nexhk+ArvtUihW27dy7lqAl0zBObFKtSBBIm5RBjwIhE8VwsPPm9PNuvPvNqLWUEg==", + "license": "MIT" + }, + "node_modules/@jitl/quickjs-wasmfile-debug-asyncify": { + "version": "0.32.0", + "resolved": "https://registry.npmjs.org/@jitl/quickjs-wasmfile-debug-asyncify/-/quickjs-wasmfile-debug-asyncify-0.32.0.tgz", + "integrity": "sha512-EX8zbXwGqCgAE764M+qvkHtyXDi/FUoMBea0JnES7vCM3P7a2+EOZOjGv85wtZ2sJhI1oJ+nekmqpOODFDY+hw==", + "license": "MIT", + "dependencies": { + "@jitl/quickjs-ffi-types": "0.32.0" + } + }, + "node_modules/@jitl/quickjs-wasmfile-debug-sync": { + "version": "0.32.0", + "resolved": "https://registry.npmjs.org/@jitl/quickjs-wasmfile-debug-sync/-/quickjs-wasmfile-debug-sync-0.32.0.tgz", + "integrity": "sha512-LeYWrPGC1uNCTBWvibo3ZLJj0CSVNYUXvJpXMCmuQ5Sap2cCACc3uvGvYV4homHHBAzfw5akoTqMMS4YFRtw+Q==", + "license": "MIT", + "dependencies": { + "@jitl/quickjs-ffi-types": "0.32.0" + } + }, + "node_modules/@jitl/quickjs-wasmfile-release-asyncify": { + "version": "0.32.0", + "resolved": "https://registry.npmjs.org/@jitl/quickjs-wasmfile-release-asyncify/-/quickjs-wasmfile-release-asyncify-0.32.0.tgz", + "integrity": "sha512-3oSwPfja12ICz4aIblB58cuY8JlEq5Txt8Cut4VLo+LH47QN+mzCnSgnbB03hWzg1LBcc+VyyI9UOag7a1NF+Q==", + "license": "MIT", + "dependencies": { + "@jitl/quickjs-ffi-types": "0.32.0" + } + }, + "node_modules/@jitl/quickjs-wasmfile-release-sync": { + "version": "0.32.0", + "resolved": "https://registry.npmjs.org/@jitl/quickjs-wasmfile-release-sync/-/quickjs-wasmfile-release-sync-0.32.0.tgz", + "integrity": "sha512-BKNDI/TPBfGlLNGYpLrhcDGXmIk4xHm4MRAisOBnOzpXVn9HZWsfmMAc9WMBrAHjvvds6HOikKeaOBKdPdpVrg==", + "license": "MIT", + "dependencies": { + "@jitl/quickjs-ffi-types": "0.32.0" + } + }, + "node_modules/@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", + "license": "BSD-2-Clause" + }, + "node_modules/@mongodb-js/zstd": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/@mongodb-js/zstd/-/zstd-7.0.0.tgz", + "integrity": "sha512-mQ2s0pYYiav+tzCDR05Zptem8Ey2v8s11lri5RKGhTtL4COVCvVCk5vtyRYNT+9L8qSfyOqqefF9UtnW8mC5jA==", + "hasInstallScript": true, + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "node-addon-api": "^8.5.0", + "prebuild-install": "^7.1.3" + }, + "engines": { + "node": ">= 20.19.0" + } + }, + "node_modules/@nodable/entities": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@nodable/entities/-/entities-2.1.1.tgz", + "integrity": "sha512-Pig3HxDIoMgjdEH8OCf/dkcTmLFjJRjWuq8jSnklu284/TKOPibSRERmOykiwmyXTtv61mP+44f3GMx0tLAyjg==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/nodable" + } + ], + "license": "MIT" + }, + "node_modules/@tokenizer/inflate": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@tokenizer/inflate/-/inflate-0.4.1.tgz", + "integrity": "sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "token-types": "^6.1.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@tokenizer/token": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", + "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==", + "license": "MIT" + }, + "node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "license": "MIT", + "optional": true, + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, + "node_modules/brace-expansion": { + "version": "5.0.6", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.6.tgz", + "integrity": "sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g==", + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, + "node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "license": "ISC", + "optional": true + }, + "node_modules/commander": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", + "integrity": "sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==", + "license": "MIT", + "engines": { + "node": ">= 6" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "license": "MIT", + "optional": true, + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "license": "Apache-2.0", + "optional": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/diff": { + "version": "8.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.4.tgz", + "integrity": "sha512-DPi0FmjiSU5EvQV0++GFDOJ9ASQUVFh5kD+OzOnYdi7n3Wpm9hWWGfB/O2blfHcMVTL5WkQXSnRiK9makhrcnw==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "license": "MIT", + "optional": true, + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "license": "(MIT OR WTFPL)", + "optional": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/fast-xml-builder": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.2.0.tgz", + "integrity": "sha512-00aAWieqff+ZJhsXA4g1g7M8k+7AYoMUUHF+/zFb5U6Uv/P0Vl4QZo84/IcufzYalLuEj9928bXN9PbbFzMF0Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "dependencies": { + "path-expression-matcher": "^1.5.0", + "xml-naming": "^0.1.0" + } + }, + "node_modules/fast-xml-parser": { + "version": "5.8.0", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.8.0.tgz", + "integrity": "sha512-6bIM7fsJxeo3uXv7OncQYsBAMPJ7V16Slahl/6M98C/i2q+vB1+4a0MtrvYwDFEUrwDSbAmeLDRXsOBwrL7yAg==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "dependencies": { + "@nodable/entities": "^2.1.0", + "fast-xml-builder": "^1.2.0", + "path-expression-matcher": "^1.5.0", + "strnum": "^2.3.0", + "xml-naming": "^0.1.0" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, + "node_modules/file-type": { + "version": "21.3.4", + "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz", + "integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==", + "license": "MIT", + "dependencies": { + "@tokenizer/inflate": "^0.4.1", + "strtok3": "^10.3.4", + "token-types": "^6.1.1", + "uint8array-extras": "^1.4.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sindresorhus/file-type?sponsor=1" + } + }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", + "license": "MIT", + "optional": true + }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "license": "MIT", + "optional": true + }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC", + "optional": true + }, + "node_modules/ini": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/ini/-/ini-6.0.0.tgz", + "integrity": "sha512-IBTdIkzZNOpqm7q3dRqJvMaldXjDHWkEDfrwGEQTs5eaQMWV+djAhR+wahyNNMAa+qpbDUhBMVt4ZKNwpPm7xQ==", + "license": "ISC", + "engines": { + "node": "^20.17.0 || >=22.9.0" + } + }, + "node_modules/just-bash": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/just-bash/-/just-bash-3.0.1.tgz", + "integrity": "sha512-YVyzCN08fKarUnwqy7rKOAcX+2MLYLnYInuowmUXn3mqhrtd4ieZNBuzdQG+qYV9DqnIWuv9Whiph0WRIWsBtw==", + "license": "Apache-2.0", + "dependencies": { + "diff": "^8.0.2", + "fast-xml-parser": "^5.7.3", + "file-type": "^21.2.0", + "ini": "^6.0.0", + "minimatch": "^10.1.1", + "modern-tar": "^0.7.3", + "papaparse": "^5.5.3", + "quickjs-emscripten": "^0.32.0", + "re2js": "^1.2.1", + "seek-bzip": "^2.0.0", + "smol-toml": "^1.6.0", + "sprintf-js": "^1.1.3", + "sql.js": "^1.13.0", + "turndown": "^7.2.2", + "yaml": "^2.8.2" + }, + "bin": { + "just-bash": "dist/bin/just-bash.js", + "just-bash-shell": "dist/bin/shell/shell.js" + }, + "optionalDependencies": { + "@mongodb-js/zstd": "^7.0.0", + "node-liblzma": "^2.0.3" + } + }, + "node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/minimatch": { + "version": "10.2.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz", + "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==", + "license": "BlueOak-1.0.0", + "dependencies": { + "brace-expansion": "^5.0.5" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "optional": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", + "license": "MIT", + "optional": true + }, + "node_modules/modern-tar": { + "version": "0.7.6", + "resolved": "https://registry.npmjs.org/modern-tar/-/modern-tar-0.7.6.tgz", + "integrity": "sha512-sweCIVXzx1aIGTCdzcMlSZt1h8k5Tmk08VNAuRk3IU28XamGiOH5ypi11g6De2CH7PhYqSSnGy2A/EFhbWnVKg==", + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", + "license": "MIT", + "optional": true + }, + "node_modules/node-abi": { + "version": "3.92.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.92.0.tgz", + "integrity": "sha512-KdHvFWZjEKDf0cakgFjebl371GPsISX2oZHcuyKqM7DtogIsHrqKeLTo8wBHxaXRAQlY2PsPlZmfo+9ZCxEREQ==", + "license": "MIT", + "optional": true, + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/node-addon-api": { + "version": "8.8.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.8.0.tgz", + "integrity": "sha512-c5Ko1fZJIJmzhFIkhRN76WTq+fC6tWnGy9CXA0fA+XygsWZmEwG8vmbkNqxMyoaa0Tin4djul49NzdVcJJcjeA==", + "license": "MIT", + "optional": true, + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", + "license": "MIT", + "optional": true, + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, + "node_modules/node-liblzma": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/node-liblzma/-/node-liblzma-2.2.0.tgz", + "integrity": "sha512-s0KzNOWwOJJgPG6wxg6cKohnAl9Wk/oW1KrQaVzJBjQwVcUGPQCzpR46Ximygjqj/3KhOrtJXnYMp/xYAXp75g==", + "hasInstallScript": true, + "license": "LGPL-3.0", + "optional": true, + "dependencies": { + "node-addon-api": "^8.5.0", + "node-gyp-build": "^4.8.4" + }, + "bin": { + "nxz": "lib/cli/nxz.js" + }, + "engines": { + "node": ">=16.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/oorabona" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "optional": true, + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/papaparse": { + "version": "5.5.3", + "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.5.3.tgz", + "integrity": "sha512-5QvjGxYVjxO59MGU2lHVYpRWBBtKHnlIAcSe1uNFCkkptUh63NFRj0FJQm7nR67puEruUci/ZkjmEFrjCAyP4A==", + "license": "MIT" + }, + "node_modules/path-expression-matcher": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.5.0.tgz", + "integrity": "sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.", + "license": "MIT", + "optional": true, + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "license": "MIT", + "optional": true, + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/quickjs-emscripten": { + "version": "0.32.0", + "resolved": "https://registry.npmjs.org/quickjs-emscripten/-/quickjs-emscripten-0.32.0.tgz", + "integrity": "sha512-So0Sqw869y/S2oE3Nuc0uT3Dhqgvsj8FSrwBdsuTosVsG8ME5/OcudU1GxsrIFdFABgy17GHnTVO9TYV/bLQcA==", + "license": "MIT", + "dependencies": { + "@jitl/quickjs-wasmfile-debug-asyncify": "0.32.0", + "@jitl/quickjs-wasmfile-debug-sync": "0.32.0", + "@jitl/quickjs-wasmfile-release-asyncify": "0.32.0", + "@jitl/quickjs-wasmfile-release-sync": "0.32.0", + "quickjs-emscripten-core": "0.32.0" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/quickjs-emscripten-core": { + "version": "0.32.0", + "resolved": "https://registry.npmjs.org/quickjs-emscripten-core/-/quickjs-emscripten-core-0.32.0.tgz", + "integrity": "sha512-QFnPfjFey8EqknSrSxe1hZrf1/8z7/6s1QzGOmKo6++02r7QRRX7ZoyNaZh7JuVjWsVW87KnQrbZqnHkOAzUyg==", + "license": "MIT", + "dependencies": { + "@jitl/quickjs-ffi-types": "0.32.0" + } + }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "optional": true, + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, + "node_modules/rc/node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "license": "ISC", + "optional": true + }, + "node_modules/re2js": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/re2js/-/re2js-1.3.3.tgz", + "integrity": "sha512-s/I5zEAo79SUK0Qw4dpZKpiMwbQ6Gz0KU2NRr7eaO4x/p2g7Vvmn3hdeXDg8VsaUjfj/ora+e9oi27LX/C9+mw==", + "license": "MIT" + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "optional": true, + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/seek-bzip": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/seek-bzip/-/seek-bzip-2.0.0.tgz", + "integrity": "sha512-SMguiTnYrhpLdk3PwfzHeotrcwi8bNV4iemL9tx9poR/yeaMYwB9VzR1w7b57DuWpuqR8n6oZboi0hj3AxZxQg==", + "license": "MIT", + "dependencies": { + "commander": "^6.0.0" + }, + "bin": { + "seek-bunzip": "bin/seek-bunzip", + "seek-table": "bin/seek-bzip-table" + } + }, + "node_modules/semver": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.3.tgz", + "integrity": "sha512-wnilbGyMxzbY7dNOl7jpKbLSjcfeweJWU5j4+u5qW+6/wuGD9KzIGOyZnQVSBM9E7DtWaaH3CyHkppYrKYoxwg==", + "license": "ISC", + "optional": true, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "optional": true, + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, + "node_modules/smol-toml": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/smol-toml/-/smol-toml-1.6.1.tgz", + "integrity": "sha512-dWUG8F5sIIARXih1DTaQAX4SsiTXhInKf1buxdY9DIg4ZYPZK5nGM1VRIYmEbDbsHt7USo99xSLFu5Q1IqTmsg==", + "license": "BSD-3-Clause", + "engines": { + "node": ">= 18" + }, + "funding": { + "url": "https://github.com/sponsors/cyyynthia" + } + }, + "node_modules/sprintf-js": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", + "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==", + "license": "BSD-3-Clause" + }, + "node_modules/sql.js": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/sql.js/-/sql.js-1.14.1.tgz", + "integrity": "sha512-gcj8zBWU5cFsi9WUP+4bFNXAyF1iRpA3LLyS/DP5xlrNzGmPIizUeBggKa8DbDwdqaKwUcTEnChtd2grWo/x/A==", + "license": "MIT" + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "optional": true, + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/strnum": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.3.0.tgz", + "integrity": "sha512-ums3KNd42PGyx5xaoVTO1mjU1bH3NpY4vsrVlnv9PNGqQj8wd7rJ6nEypLrJ7z5vxK5RP0yMLo6J/Gsm62DI5Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT" + }, + "node_modules/strtok3": { + "version": "10.3.5", + "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.5.tgz", + "integrity": "sha512-ki4hZQfh5rX0QDLLkOCj+h+CVNkqmp/CMf8v8kZpkNVK6jGQooMytqzLZYUVYIZcFZ6yDB70EfD8POcFXiF5oA==", + "license": "MIT", + "dependencies": { + "@tokenizer/token": "^0.3.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/tar-fs": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", + "license": "MIT", + "optional": true, + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, + "node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "license": "MIT", + "optional": true, + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/token-types": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.1.2.tgz", + "integrity": "sha512-dRXchy+C0IgK8WPC6xvCHFRIWYUbqqdEIKPaKo/AcTUNzwLTK6AH7RjdLWsEZcAN/TBdtfUw3PYEgPr5VPr6ww==", + "license": "MIT", + "dependencies": { + "@borewit/text-codec": "^0.2.1", + "@tokenizer/token": "^0.3.0", + "ieee754": "^1.2.1" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, + "node_modules/turndown": { + "version": "7.2.4", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.4.tgz", + "integrity": "sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==", + "license": "MIT", + "dependencies": { + "@mixmark-io/domino": "^2.2.0" + }, + "engines": { + "node": ">=18", + "npm": ">=9" + } + }, + "node_modules/uint8array-extras": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz", + "integrity": "sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT", + "optional": true + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC", + "optional": true + }, + "node_modules/xml-naming": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/xml-naming/-/xml-naming-0.1.0.tgz", + "integrity": "sha512-k8KO9hrMyNk6tUWqUfkTEZbezRRpONVOzUTnc97VnCvyj6Tf9lyUR9EDAIeiVLv56jsMcoXEwjW8Kv5yPY52lw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/yaml": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.9.0.tgz", + "integrity": "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==", + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } + } + } +} diff --git a/sandbox/package.json b/sandbox/package.json new file mode 100644 index 0000000..d392e63 --- /dev/null +++ b/sandbox/package.json @@ -0,0 +1,15 @@ +{ + "name": "sandbox", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "just-bash": "^3.0.1" + } +} diff --git a/sandbox/sidecar.mjs b/sandbox/sidecar.mjs new file mode 100644 index 0000000..db8b388 --- /dev/null +++ b/sandbox/sidecar.mjs @@ -0,0 +1,122 @@ +// KorgChat sandbox sidecar — a persistent just-bash shell driven over stdio. +// +// One Bash instance lives for the process lifetime, so the in-memory +// filesystem is shared across exec calls (an agent session), while each +// exec gets fresh shell state (env/cwd) per just-bash semantics. +// +// Protocol: newline-delimited JSON, one request -> one response. +// -> {"id":1,"op":"ping"} +// <- {"id":1,"ok":true,"version":"..."} +// -> {"id":2,"op":"exec","cmd":"echo hi > a.txt; cat a.txt","timeoutMs":10000} +// <- {"id":2,"ok":true,"stdout":"hi\n","stderr":"","exit_code":0, +// "fs_hash":"","fs_files":N} +// -> {"id":3,"op":"reset"} // fresh sandbox +// +// Safe by default: InMemoryFs, no network, no python, no js-exec. The shell +// physically cannot reach the host filesystem or network. + +import { Bash } from "just-bash"; +import { createHash } from "node:crypto"; +import { createInterface } from "node:readline"; + +const VERSION = "korgchat-sandbox/1 (just-bash 3.0.1)"; + +function newBash() { + // All capability flags omitted => safest defaults (no net/python/js, InMemoryFs). + return new Bash(); +} +let bash = newBash(); + +const sha256 = (data) => createHash("sha256").update(data).digest("hex"); + +// Walk the virtual filesystem into a deterministic manifest and hash it. +// The hash is a pure function of full FS state => same commands from genesis +// reproduce the same hash (replayable / tamper-evident when chained). +async function fsHash(root = "/") { + const fs = bash.fs; + const lines = []; + async function walk(dir) { + let names; + try { + names = await fs.readdir(dir); + } catch { + return; + } + for (const name of [...names].sort()) { + const p = dir === "/" ? "/" + name : dir + "/" + name; + let st; + try { + st = await fs.stat(p); + } catch { + continue; + } + if (st.isDirectory) { + lines.push("d " + p); + await walk(p); + } else if (st.isSymbolicLink) { + lines.push("l " + p); + } else if (st.isFile) { + try { + const buf = await fs.readFileBuffer(p); + lines.push("f " + p + " " + buf.length + " " + sha256(buf)); + } catch { + lines.push("f " + p + " ?"); + } + } + } + } + await walk(root); + return { hash: sha256(lines.join("\n")), files: lines.length }; +} + +async function handle(req) { + const { id, op } = req; + try { + if (op === "ping") return { id, ok: true, version: VERSION }; + if (op === "reset") { + bash = newBash(); + const f = await fsHash(); + return { id, ok: true, fs_hash: f.hash, fs_files: f.files }; + } + if (op === "exec") { + const opts = {}; + if (req.cwd) opts.cwd = req.cwd; + if (req.env && typeof req.env === "object") opts.env = req.env; + opts.signal = AbortSignal.timeout(req.timeoutMs ?? 10000); + const r = await bash.exec(String(req.cmd ?? ""), opts); + const f = await fsHash(req.fsRoot ?? "/"); + return { + id, + ok: true, + stdout: r.stdout, + stderr: r.stderr, + exit_code: r.exitCode, + fs_hash: f.hash, + fs_files: f.files, + }; + } + return { id, ok: false, error: `unknown op: ${op}` }; + } catch (e) { + return { id, ok: false, error: String((e && e.message) || e) }; + } +} + +// Serialize handling so concurrent lines never interleave async FS state. +let queue = Promise.resolve(); +const rl = createInterface({ input: process.stdin }); +rl.on("line", (line) => { + const text = line.trim(); + if (!text) return; + queue = queue.then(async () => { + let req; + try { + req = JSON.parse(text); + } catch { + process.stdout.write(JSON.stringify({ id: null, ok: false, error: "bad json" }) + "\n"); + return; + } + const res = await handle(req); + process.stdout.write(JSON.stringify(res) + "\n"); + }); +}); +rl.on("close", () => process.exit(0)); diff --git a/src/korgchat/__main__.py b/src/korgchat/__main__.py index 26a8a1c..8560f5c 100644 --- a/src/korgchat/__main__.py +++ b/src/korgchat/__main__.py @@ -13,6 +13,7 @@ from korgchat import __version__ from korgchat.chat import ChatSession, MockResponder, ToolCall, select_responder from korgchat.recall import RecallEngine, format_matches +from korgchat.sandbox import SandboxClient, SandboxError, tools_with_sandbox from korgchat.summary import SummarizeEngine @@ -70,6 +71,14 @@ def _build_parser() -> argparse.ArgumentParser: help="(--mock only) Delay between simulated tokens, seconds. Default " "0.005 gives a visible streaming effect; set to 0 for instant output.", ) + p.add_argument( + "--sandbox", + action="store_true", + help="Add a sandboxed `bash` tool backed by just-bash (in-memory " + "filesystem, no host or network access). Requires Node and " + "`npm install` in sandbox/. Every command and the resulting " + "filesystem hash are recorded to the ledger for verifiable replay.", + ) return p @@ -512,13 +521,28 @@ def main(argv: list[str] | None = None) -> int: if args.mock and streaming and args.stream_delay > 0: responder = MockResponder(stream_delay_secs=args.stream_delay) - session = ChatSession( + sandbox_client: SandboxClient | None = None + session_kwargs: dict = dict( journal_path=Path(args.journal), responder=responder, auto_context=args.auto_context, ) + if args.sandbox: + try: + sandbox_client = SandboxClient() + sandbox_client.ping() + except SandboxError as e: + print(f"[korgchat] --sandbox unavailable: {e}", file=sys.stderr) + return 2 + session_kwargs["tools"] = tools_with_sandbox(client=sandbox_client) + + session = ChatSession(**session_kwargs) _print_banner(session, mock=args.mock, streaming=streaming) - return _interactive_loop(session, args.turns, streaming=streaming) + try: + return _interactive_loop(session, args.turns, streaming=streaming) + finally: + if sandbox_client is not None: + sandbox_client.close() if __name__ == "__main__": diff --git a/src/korgchat/sandbox.py b/src/korgchat/sandbox.py new file mode 100644 index 0000000..bcb5cc3 --- /dev/null +++ b/src/korgchat/sandbox.py @@ -0,0 +1,214 @@ +"""Sandboxed ``bash`` tool for KorgChat, backed by a just-bash sidecar. + +The sidecar (``sandbox/sidecar.mjs``) is a persistent Node process running +`just-bash `_ — a JS reimplementation +of bash + ~90 coreutils over an in-memory filesystem. It physically cannot +reach the host filesystem or network, so the model can run real shell commands +safely. + +Every ``exec`` returns ``fs_hash`` — a hash of the full virtual-filesystem +state after the command. Because KorgChat records each tool call into the korg +ledger (hash-chained), embedding ``fs_hash`` in the tool result makes the +agent's shell session **tamper-evident and replayable**: the same commands +from a fresh sandbox reproduce the same hashes. + +Usage:: + + from korgchat import ChatSession + from korgchat.sandbox import tools_with_sandbox + + session = ChatSession(journal_path=..., responder=..., tools=tools_with_sandbox()) +""" + +from __future__ import annotations + +import json +import shutil +import subprocess +import threading +from pathlib import Path +from typing import Any + +from .tools import Tool, ToolRegistry, default_tools + +# sandbox.py lives at src/korgchat/sandbox.py; the sidecar sits at /sandbox/. +_SIDECAR = Path(__file__).resolve().parent.parent.parent / "sandbox" / "sidecar.mjs" + +DEFAULT_TIMEOUT_MS = 10_000 + + +class SandboxError(RuntimeError): + """Raised when the sandbox sidecar is unavailable or an exec fails to run.""" + + +class SandboxClient: + """Manages a persistent just-bash sidecar over stdio JSON-RPC. + + The Node process is spawned lazily on first use and reused, so the + in-memory filesystem persists across commands within a session. Requests + are serialized under a lock (one request, one response line). + """ + + def __init__( + self, + *, + node: str | None = None, + sidecar: Path | None = None, + default_timeout_ms: int = DEFAULT_TIMEOUT_MS, + ) -> None: + self._node = node or shutil.which("node") + self._sidecar = Path(sidecar) if sidecar else _SIDECAR + self._default_timeout_ms = default_timeout_ms + self._proc: subprocess.Popen[str] | None = None + self._lock = threading.Lock() + self._id = 0 + + def _ensure(self) -> subprocess.Popen[str]: + if self._proc is not None and self._proc.poll() is None: + return self._proc + if not self._node: + raise SandboxError( + "node not found on PATH; install Node >=18 to use the bash sandbox" + ) + if not self._sidecar.exists(): + raise SandboxError( + f"sandbox sidecar not found at {self._sidecar}; " + f"run `npm install` in {self._sidecar.parent}" + ) + self._proc = subprocess.Popen( + [self._node, str(self._sidecar)], + cwd=str(self._sidecar.parent), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + bufsize=1, + ) + return self._proc + + def _rpc(self, req: dict[str, Any]) -> dict[str, Any]: + with self._lock: + proc = self._ensure() + assert proc.stdin is not None and proc.stdout is not None + self._id += 1 + payload = {"id": self._id, **req} + try: + proc.stdin.write(json.dumps(payload) + "\n") + proc.stdin.flush() + line = proc.stdout.readline() + except (BrokenPipeError, ValueError) as e: # pragma: no cover - process died + raise SandboxError(f"sandbox sidecar communication failed: {e}") from e + if not line: + raise SandboxError("sandbox sidecar closed unexpectedly") + return json.loads(line) + + def ping(self) -> dict[str, Any]: + return self._rpc({"op": "ping"}) + + def exec( + self, command: str, *, timeout_ms: int | None = None, cwd: str | None = None + ) -> dict[str, Any]: + req: dict[str, Any] = { + "op": "exec", + "cmd": command, + "timeoutMs": timeout_ms or self._default_timeout_ms, + } + if cwd: + req["cwd"] = cwd + res = self._rpc(req) + if not res.get("ok"): + raise SandboxError(res.get("error", "sandbox exec failed")) + return res + + def reset(self) -> dict[str, Any]: + """Discard the virtual filesystem and start a fresh sandbox.""" + return self._rpc({"op": "reset"}) + + def close(self) -> None: + with self._lock: + proc = self._proc + self._proc = None + if proc is not None and proc.poll() is None: + try: + if proc.stdin is not None: + proc.stdin.close() + except Exception: + pass + try: + proc.terminate() + proc.wait(timeout=3) + except Exception: + try: + proc.kill() + except Exception: + pass + + def __enter__(self) -> "SandboxClient": + return self + + def __exit__(self, *exc: object) -> None: + self.close() + + +def bash_tool(client: SandboxClient | None = None, *, name: str = "bash") -> Tool: + """A ``bash`` :class:`~korgchat.tools.Tool` backed by the just-bash sandbox. + + The result includes ``fs_hash`` (hash of the full virtual-FS state after the + command), which KorgChat chains into the ledger — making the shell session + replayable and tamper-evident. + """ + sandbox = client or SandboxClient() + + def handler(args: dict[str, Any]) -> dict[str, Any]: + command = args.get("command") + if not isinstance(command, str) or not command.strip(): + raise ValueError("bash: 'command' must be a non-empty string") + timeout_ms = args.get("timeout_ms") + res = sandbox.exec( + command, + timeout_ms=timeout_ms if isinstance(timeout_ms, int) else None, + ) + return { + "stdout": res.get("stdout", ""), + "stderr": res.get("stderr", ""), + "exit_code": res.get("exit_code"), + "fs_hash": res.get("fs_hash"), + "fs_files": res.get("fs_files"), + } + + return Tool( + name=name, + description=( + "Run a shell command in a sandboxed bash environment with an " + "in-memory filesystem and NO host or network access. Supports " + "standard unix commands (ls, cat, grep, sed, awk, find, sort, " + "wc, jq, ...), pipes, redirects, variables, and loops. The " + "filesystem persists across calls within a session. Returns " + "stdout, stderr, exit_code, and fs_hash (a hash of the resulting " + "filesystem state, used for verifiable replay)." + ), + input_schema={ + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to run.", + }, + "timeout_ms": { + "type": "integer", + "description": "Optional per-command timeout in milliseconds.", + }, + }, + "required": ["command"], + }, + handler=handler, + ) + + +def tools_with_sandbox( + *, frozen_time: float | None = None, client: SandboxClient | None = None +) -> ToolRegistry: + """The default builtins plus the sandboxed ``bash`` tool.""" + registry = default_tools(frozen_time=frozen_time) + registry.register(bash_tool(client)) + return registry diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py new file mode 100644 index 0000000..e61de38 --- /dev/null +++ b/tests/test_sandbox.py @@ -0,0 +1,116 @@ +"""Sandboxed `bash` tool + verifiable ledger-exec. + +These tests need Node and the just-bash sidecar (run `npm install` in +``sandbox/``). They skip gracefully when that isn't available so the rest of +the suite stays green on a bare checkout. +""" + +import json +import shutil + +import pytest + +from korgchat.chat import ChatSession, MockResponder, Reply, ToolUse +from korgchat.sandbox import SandboxClient, bash_tool, tools_with_sandbox + + +def _sandbox_available() -> bool: + if not shutil.which("node"): + return False + try: + SandboxClient().ping() + return True + except Exception: + return False + + +pytestmark = pytest.mark.skipif( + not _sandbox_available(), + reason="node + just-bash sandbox unavailable (run `npm install` in sandbox/)", +) + + +def test_exec_persists_and_hash_is_deterministic(): + sb = SandboxClient() + try: + run = bash_tool(sb).call + r1 = run({"command": "echo persisted > /w.txt; cat /w.txt"}) + assert r1["stdout"] == "persisted\n" + assert r1["exit_code"] == 0 + assert len(r1["fs_hash"]) == 64 + + # the in-memory filesystem persists across tool calls within a session + r2 = run({"command": "wc -c < /w.txt"}) + assert r2["stdout"].strip() == "10" + assert r2["fs_hash"] == r1["fs_hash"] # read-only: state unchanged + + # mutating then restoring state returns to the exact prior hash + r3 = run({"command": "rm /w.txt"}) + assert r3["fs_hash"] != r1["fs_hash"] + + # replay from a fresh sandbox reproduces the same hash (deterministic) + sb.reset() + r4 = run({"command": "echo persisted > /w.txt; cat /w.txt"}) + assert r4["fs_hash"] == r1["fs_hash"] + finally: + sb.close() + + +def test_sandbox_cannot_reach_host_filesystem(): + sb = SandboxClient() + try: + # the host's real /etc/passwd must not be visible inside the sandbox + out = bash_tool(sb).call({"command": "cat /etc/passwd 2>&1 || true"}) + assert "root:" not in out["stdout"] + finally: + sb.close() + + +def test_bash_exec_is_recorded_in_the_ledger_with_fs_hash(tmp_path): + jpath = str(tmp_path / "journal.json") + responder = MockResponder( + replies=[ + Reply( + tool_uses=[ + ToolUse( + id="t1", + name="bash", + input={"command": "echo verifiable > /proof.txt; wc -c < /proof.txt"}, + ) + ] + ), + Reply(text="done"), + ] + ) + sb = SandboxClient() + try: + session = ChatSession( + journal_path=jpath, + responder=responder, + tools=tools_with_sandbox(client=sb), + ) + turn = session.send("write a file and count its bytes") + assert len(turn.tool_calls) == 1 + + events = json.load(open(jpath)) + + # the bash exec landed in the ledger, carrying its fs_hash + bash_event = next( + e + for e in events + if e["event"]["tool_name"] == "bash" + and "fs_hash" in (e["event"].get("result") or {}) + ) + result = bash_event["event"]["result"] + assert result["stdout"].strip() == "11" # len("verifiable\n") + assert result["exit_code"] == 0 + assert len(result["fs_hash"]) == 64 + + # the exec is hash-chained into the ledger (genesis -> ... continuity) + prev = "0" * 64 + for e in events: + assert e["prev_hash"] == prev, f"chain breaks at seq {e['seq_id']}" + assert e.get("entry_hash"), f"seq {e['seq_id']} not chained" + prev = e["entry_hash"] + finally: + sb.close() From 296a29624f9b307de7f66afe28fc3fafb54da013 Mon Sep 17 00:00:00 2001 From: ares <285551516+New1Direction@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:00:01 -0700 Subject: [PATCH 2/6] feat(sandbox): mandate-gated shell (command allowlist + ledger verdict) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sandboxed bash tool can be constrained to a command allowlist, enforced physically (just-bash registers only allowed commands) AND as a pre-exec AST verdict that fails closed on denied or dynamically-named commands ($CMD). Each call carries {decision, reasons, commands_used, mandate_hash} recorded to the ledger — so what the agent was allowed to run is itself provable. - sidecar: configure op + parse()-based command extraction + verdict - shell_mandate(), SandboxClient(mandate=)/.configure(), tools_with_sandbox(mandate=) - --mandate-allow CLI flag - tests: allow/deny gating, file-untouched-on-reject, dynamic fail-closed, verdict recorded in the ledger --- CHANGELOG.md | 1 + README.md | 17 ++++++ sandbox/sidecar.mjs | 122 ++++++++++++++++++++++++++++++++++----- src/korgchat/__main__.py | 18 +++++- src/korgchat/sandbox.py | 83 +++++++++++++++++++++----- tests/test_sandbox.py | 58 ++++++++++++++++++- 6 files changed, 266 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61ab902..4934a4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - **Sandboxed `bash` tool with verifiable exec (`--sandbox`).** New `korgchat.sandbox` module adds a `bash` tool backed by [just-bash](https://github.com/vercel-labs/just-bash) — a JS reimplementation of bash + ~90 coreutils over an in-memory filesystem, run as a persistent Node sidecar. The shell physically cannot reach the host filesystem or network (no network/python/js by default). Every `exec` returns `fs_hash`, a hash of the full virtual-filesystem state after the command; because each tool call is hash-chained into the ledger, the agent's shell session is **tamper-evident and replayable** — the same commands from a fresh sandbox reproduce the same hashes. Exports `SandboxClient` (stdio JSON-RPC), `bash_tool()`, and `tools_with_sandbox()`; enable in the CLI with `--sandbox` (requires Node ≥18 and `npm install` in `sandbox/`). +- **Mandate-gated shell (`--mandate-allow`).** The sandboxed `bash` tool can be constrained to a command allowlist. Enforced two ways: just-bash only registers the allowed commands (physical), and each line is parsed before exec so a disallowed or dynamically-named command (`$CMD`) is rejected — **fail-closed**. Every call carries a verdict (`{decision, reasons, commands_used, mandate_hash}`) recorded to the ledger, so what the agent was *allowed* to run is itself provable. New `shell_mandate(allow, deny)`; `SandboxClient(mandate=...)` / `.configure()`; `tools_with_sandbox(mandate=...)`; CLI `--mandate-allow ls,cat,grep,...`. - **Auto-context injection is now a first-class ledger event.** Previously the recall-augmented preamble the model actually saw was a *ghost* — the journal recorded only the user's original prompt. Now, whenever auto-context injects a preamble, a `context_injection` event is written capturing the preamble text, the recall query, and the matched `seq_id`s + scores, causally chained `user_prompt → context_injection → llm_inference`. The user_prompt event still records only what the user typed; the injected context is a separate, auditable, replayable event. New `AutoContextEngine.build_context()` returns a `ContextInjection` (preamble + structured matches); `build_preamble()` is now a thin wrapper over it. - **Tool-schema snapshot + conformance events.** Every tool execution is now bracketed by two events: a `tool_schema_snapshot` *before* the call (the declared `input_schema`, `description`, and a deterministic `schema_hash`) and a `tool_validation` *after* (did the call's input conform to the declared schema? did the call succeed?). A replayed conversation stays meaningful even after a tool's schema changes — the contract it ran against is frozen on the ledger, and a stale call is detectable. New `korgchat.schema` module: `schema_hash()` (canonical sha256, byte-for-byte aligned with `korg-ledger@v1` canonicalization) and a dependency-free `validate_input()`. diff --git a/README.md b/README.md index 1637a4f..cabd75c 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,23 @@ from korgchat.sandbox import tools_with_sandbox session = ChatSession(journal_path=..., responder=..., tools=tools_with_sandbox()) ``` +**Mandate (`--mandate-allow`).** Constrain the shell to a command allowlist. +It's enforced two ways — just-bash only registers the allowed commands +(physical), and each line is parsed before exec so a disallowed or +dynamically-named command (`$CMD`) is rejected (fail-closed). Every call +carries a verdict that's recorded to the ledger, so what the agent was +*allowed* to run is itself provable. + +```bash +korgchat --mandate-allow "ls,cat,grep,sed,awk,find,sort,wc,head,tail,echo" +``` + +```python +from korgchat.sandbox import tools_with_sandbox, shell_mandate + +tools = tools_with_sandbox(mandate=shell_mandate(["ls", "cat", "grep"], deny=["rm"])) +``` + Requires Node ≥18 and a one-time `npm install` in `sandbox/`. ## Streaming (v0.4.2) diff --git a/sandbox/sidecar.mjs b/sandbox/sidecar.mjs index db8b388..84160fb 100644 --- a/sandbox/sidecar.mjs +++ b/sandbox/sidecar.mjs @@ -7,31 +7,101 @@ // Protocol: newline-delimited JSON, one request -> one response. // -> {"id":1,"op":"ping"} // <- {"id":1,"ok":true,"version":"..."} -// -> {"id":2,"op":"exec","cmd":"echo hi > a.txt; cat a.txt","timeoutMs":10000} -// <- {"id":2,"ok":true,"stdout":"hi\n","stderr":"","exit_code":0, -// "fs_hash":"","fs_files":N} -// -> {"id":3,"op":"reset"} // fresh sandbox +// -> {"id":2,"op":"configure","mandate":{"allow":["ls","cat","grep"]}} +// <- {"id":2,"ok":true,"mandate_hash":"...","fs_hash":"...","fs_files":N} +// -> {"id":3,"op":"exec","cmd":"echo hi > a.txt; cat a.txt","timeoutMs":10000} +// <- {"id":3,"ok":true,"gate":{"decision":"ACCEPT",...},"stdout":"hi\n", +// "stderr":"","exit_code":0,"fs_hash":"...","fs_files":N} +// -> {"id":4,"op":"reset"} // fresh sandbox (keeps the mandate) // // Safe by default: InMemoryFs, no network, no python, no js-exec. The shell -// physically cannot reach the host filesystem or network. +// physically cannot reach the host filesystem or network. A mandate adds a +// command allowlist enforced two ways: just-bash only registers the allowed +// commands (physical), AND each line is parsed before exec so a disallowed or +// dynamically-named command is rejected with a verdict (fail-closed). -import { Bash } from "just-bash"; +import { Bash, parse } from "just-bash"; import { createHash } from "node:crypto"; import { createInterface } from "node:readline"; -const VERSION = "korgchat-sandbox/1 (just-bash 3.0.1)"; +const VERSION = "korgchat-sandbox/2 (just-bash 3.0.1)"; + +// mandate: { allow?: string[], deny?: string[] } | null (null = unrestricted) +let mandate = null; +let mandateHash = null; function newBash() { - // All capability flags omitted => safest defaults (no net/python/js, InMemoryFs). - return new Bash(); + const opts = {}; + if (mandate && Array.isArray(mandate.allow) && mandate.allow.length) { + opts.commands = mandate.allow; // physical: only these commands are registered + } + return new Bash(opts); } let bash = newBash(); const sha256 = (data) => createHash("sha256").update(data).digest("hex"); +// Stable JSON (sorted keys) so a mandate hashes the same regardless of order. +function stable(v) { + if (v === null || typeof v !== "object") return JSON.stringify(v); + if (Array.isArray(v)) return "[" + v.map(stable).join(",") + "]"; + return "{" + Object.keys(v).sort().map((k) => JSON.stringify(k) + ":" + stable(v[k])).join(",") + "}"; +} + +const DYNAMIC = ""; + +// Recursively collect the command names a parsed line invokes. A command whose +// name is not a single static literal (e.g. `$CMD`) is reported as DYNAMIC so +// the mandate can fail closed on it. +function collectCommands(node, out) { + if (!node || typeof node !== "object") return; + if (Array.isArray(node)) { + for (const n of node) collectCommands(n, out); + return; + } + if (node.type === "SimpleCommand" && node.name) { + const parts = node.name.parts; + if (Array.isArray(parts) && parts.length === 1 && parts[0].type === "Literal") { + out.add(String(parts[0].value)); + } else { + out.add(DYNAMIC); + } + } + for (const k of Object.keys(node)) collectCommands(node[k], out); +} + +function gateVerdict(cmd) { + if (!mandate) return { decision: "ACCEPT", reasons: [], commands_used: [], mandate_hash: null }; + let names; + try { + const out = new Set(); + collectCommands(parse(cmd), out); + names = [...out]; + } catch (e) { + return { + decision: "REJECT", + reasons: [`unparseable command: ${(e && e.message) || e}`], + commands_used: [], + mandate_hash: mandateHash, + }; + } + const allow = mandate.allow; + const deny = mandate.deny || []; + const reasons = []; + for (const n of names) { + if (n === DYNAMIC) reasons.push("dynamic command name (variable/expansion) not permitted under mandate"); + else if (deny.includes(n)) reasons.push(`command '${n}' is explicitly denied`); + else if (allow && allow.length && !allow.includes(n)) reasons.push(`command '${n}' not in mandate allowlist`); + } + return { + decision: reasons.length ? "REJECT" : "ACCEPT", + reasons, + commands_used: names, + mandate_hash: mandateHash, + }; +} + // Walk the virtual filesystem into a deterministic manifest and hash it. -// The hash is a pure function of full FS state => same commands from genesis -// reproduce the same hash (replayable / tamper-evident when chained). async function fsHash(root = "/") { const fs = bash.fs; const lines = []; @@ -73,21 +143,45 @@ async function handle(req) { const { id, op } = req; try { if (op === "ping") return { id, ok: true, version: VERSION }; - if (op === "reset") { + if (op === "configure") { + mandate = req.mandate && typeof req.mandate === "object" ? req.mandate : null; + mandateHash = mandate ? sha256(stable(mandate)) : null; bash = newBash(); const f = await fsHash(); - return { id, ok: true, fs_hash: f.hash, fs_files: f.files }; + return { id, ok: true, mandate_hash: mandateHash, fs_hash: f.hash, fs_files: f.files }; + } + if (op === "reset") { + bash = newBash(); // keeps the current mandate + const f = await fsHash(); + return { id, ok: true, mandate_hash: mandateHash, fs_hash: f.hash, fs_files: f.files }; } if (op === "exec") { + const cmd = String(req.cmd ?? ""); + const gate = gateVerdict(cmd); + if (gate.decision === "REJECT") { + const f = await fsHash(req.fsRoot ?? "/"); + return { + id, + ok: true, + blocked: true, + gate, + stdout: "", + stderr: "blocked by mandate: " + gate.reasons.join("; ") + "\n", + exit_code: 126, + fs_hash: f.hash, + fs_files: f.files, + }; + } const opts = {}; if (req.cwd) opts.cwd = req.cwd; if (req.env && typeof req.env === "object") opts.env = req.env; opts.signal = AbortSignal.timeout(req.timeoutMs ?? 10000); - const r = await bash.exec(String(req.cmd ?? ""), opts); + const r = await bash.exec(cmd, opts); const f = await fsHash(req.fsRoot ?? "/"); return { id, ok: true, + gate, stdout: r.stdout, stderr: r.stderr, exit_code: r.exitCode, diff --git a/src/korgchat/__main__.py b/src/korgchat/__main__.py index 8560f5c..942cbea 100644 --- a/src/korgchat/__main__.py +++ b/src/korgchat/__main__.py @@ -13,7 +13,7 @@ from korgchat import __version__ from korgchat.chat import ChatSession, MockResponder, ToolCall, select_responder from korgchat.recall import RecallEngine, format_matches -from korgchat.sandbox import SandboxClient, SandboxError, tools_with_sandbox +from korgchat.sandbox import SandboxClient, SandboxError, shell_mandate, tools_with_sandbox from korgchat.summary import SummarizeEngine @@ -79,6 +79,14 @@ def _build_parser() -> argparse.ArgumentParser: "`npm install` in sandbox/. Every command and the resulting " "filesystem hash are recorded to the ledger for verifiable replay.", ) + p.add_argument( + "--mandate-allow", + metavar="CMDS", + help="Comma-separated allowlist of shell commands the sandbox may run " + "(e.g. 'ls,cat,grep,sed,awk,find'). Enables --sandbox. Commands " + "outside the list are blocked and the verdict is recorded to the " + "ledger; the allowlist is enforced physically and as a pre-exec check.", + ) return p @@ -527,9 +535,13 @@ def main(argv: list[str] | None = None) -> int: responder=responder, auto_context=args.auto_context, ) - if args.sandbox: + mandate = None + if args.mandate_allow: + allow = [c.strip() for c in args.mandate_allow.split(",") if c.strip()] + mandate = shell_mandate(allow) + if args.sandbox or mandate is not None: try: - sandbox_client = SandboxClient() + sandbox_client = SandboxClient(mandate=mandate) sandbox_client.ping() except SandboxError as e: print(f"[korgchat] --sandbox unavailable: {e}", file=sys.stderr) diff --git a/src/korgchat/sandbox.py b/src/korgchat/sandbox.py index bcb5cc3..00e929a 100644 --- a/src/korgchat/sandbox.py +++ b/src/korgchat/sandbox.py @@ -55,10 +55,12 @@ def __init__( node: str | None = None, sidecar: Path | None = None, default_timeout_ms: int = DEFAULT_TIMEOUT_MS, + mandate: dict[str, Any] | None = None, ) -> None: self._node = node or shutil.which("node") self._sidecar = Path(sidecar) if sidecar else _SIDECAR self._default_timeout_ms = default_timeout_ms + self._mandate = mandate self._proc: subprocess.Popen[str] | None = None self._lock = threading.Lock() self._id = 0 @@ -84,23 +86,34 @@ def _ensure(self) -> subprocess.Popen[str]: text=True, bufsize=1, ) + # Apply the mandate to the fresh process before any exec (lock already held). + if self._mandate is not None: + self._id += 1 + self._send_recv( + {"id": self._id, "op": "configure", "mandate": self._mandate}, self._proc + ) return self._proc + def _send_recv( + self, payload: dict[str, Any], proc: subprocess.Popen[str] + ) -> dict[str, Any]: + """Write one request + read one response line. Caller holds the lock.""" + assert proc.stdin is not None and proc.stdout is not None + try: + proc.stdin.write(json.dumps(payload) + "\n") + proc.stdin.flush() + line = proc.stdout.readline() + except (BrokenPipeError, ValueError) as e: # pragma: no cover - process died + raise SandboxError(f"sandbox sidecar communication failed: {e}") from e + if not line: + raise SandboxError("sandbox sidecar closed unexpectedly") + return json.loads(line) + def _rpc(self, req: dict[str, Any]) -> dict[str, Any]: with self._lock: proc = self._ensure() - assert proc.stdin is not None and proc.stdout is not None self._id += 1 - payload = {"id": self._id, **req} - try: - proc.stdin.write(json.dumps(payload) + "\n") - proc.stdin.flush() - line = proc.stdout.readline() - except (BrokenPipeError, ValueError) as e: # pragma: no cover - process died - raise SandboxError(f"sandbox sidecar communication failed: {e}") from e - if not line: - raise SandboxError("sandbox sidecar closed unexpectedly") - return json.loads(line) + return self._send_recv({"id": self._id, **req}, proc) def ping(self) -> dict[str, Any]: return self._rpc({"op": "ping"}) @@ -121,9 +134,19 @@ def exec( return res def reset(self) -> dict[str, Any]: - """Discard the virtual filesystem and start a fresh sandbox.""" + """Discard the virtual filesystem and start a fresh sandbox (keeps the mandate).""" return self._rpc({"op": "reset"}) + def configure(self, mandate: dict[str, Any] | None) -> dict[str, Any]: + """Set the command mandate (``{"allow": [...], "deny": [...]}``). + + The allowlist is enforced physically (only those commands are + registered in the sandbox) and as a pre-exec verdict. Resets the + virtual filesystem. + """ + self._mandate = mandate + return self._rpc({"op": "configure", "mandate": mandate}) + def close(self) -> None: with self._lock: proc = self._proc @@ -168,13 +191,19 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: command, timeout_ms=timeout_ms if isinstance(timeout_ms, int) else None, ) - return { + out: dict[str, Any] = { "stdout": res.get("stdout", ""), "stderr": res.get("stderr", ""), "exit_code": res.get("exit_code"), "fs_hash": res.get("fs_hash"), "fs_files": res.get("fs_files"), } + # Mandate verdict (present only when a mandate is configured). + if "gate" in res: + out["gate"] = res["gate"] + if res.get("blocked"): + out["blocked"] = True + return out return Tool( name=name, @@ -205,10 +234,34 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: ) +def shell_mandate(allow: list[str], deny: list[str] | None = None) -> dict[str, Any]: + """Build a shell mandate: an allowlist of commands (plus optional denylist). + + The allowlist is the security boundary — only those commands are registered + in the sandbox. ``deny`` overrides ``allow`` for finer policy. + """ + mandate: dict[str, Any] = {"allow": list(allow)} + if deny: + mandate["deny"] = list(deny) + return mandate + + def tools_with_sandbox( - *, frozen_time: float | None = None, client: SandboxClient | None = None + *, + frozen_time: float | None = None, + client: SandboxClient | None = None, + mandate: dict[str, Any] | None = None, ) -> ToolRegistry: - """The default builtins plus the sandboxed ``bash`` tool.""" + """The default builtins plus the sandboxed ``bash`` tool. + + Pass ``mandate`` (e.g. from :func:`shell_mandate`) to gate the shell to an + allowlist of commands; the verdict for each call is carried in the tool + result and recorded to the ledger. + """ + if client is None: + client = SandboxClient(mandate=mandate) + elif mandate is not None: + client.configure(mandate) registry = default_tools(frozen_time=frozen_time) registry.register(bash_tool(client)) return registry diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index e61de38..538faa5 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -11,7 +11,7 @@ import pytest from korgchat.chat import ChatSession, MockResponder, Reply, ToolUse -from korgchat.sandbox import SandboxClient, bash_tool, tools_with_sandbox +from korgchat.sandbox import SandboxClient, bash_tool, shell_mandate, tools_with_sandbox def _sandbox_available() -> bool: @@ -114,3 +114,59 @@ def test_bash_exec_is_recorded_in_the_ledger_with_fs_hash(tmp_path): prev = e["entry_hash"] finally: sb.close() + + +def test_mandate_allowlist_gates_commands(): + sb = SandboxClient(mandate=shell_mandate(["echo", "cat", "ls"], deny=["rm"])) + try: + run = bash_tool(sb).call + + ok = run({"command": "echo hi > /a.txt; cat /a.txt"}) + assert ok["gate"]["decision"] == "ACCEPT" + assert ok["stdout"] == "hi\n" + assert not ok.get("blocked") + + # explicitly denied command is blocked, and the exec never runs + denied = run({"command": "rm /a.txt"}) + assert denied["gate"]["decision"] == "REJECT" + assert denied.get("blocked") is True + assert denied["exit_code"] == 126 + assert any("rm" in r for r in denied["gate"]["reasons"]) + assert run({"command": "cat /a.txt"})["stdout"] == "hi\n" # file untouched + + # command outside the allowlist is rejected + assert run({"command": "curl http://x"})["gate"]["decision"] == "REJECT" + + # a dynamically-named command fails closed + assert run({"command": "C=ls; $C /"})["gate"]["decision"] == "REJECT" + finally: + sb.close() + + +def test_mandate_verdict_is_recorded_in_the_ledger(tmp_path): + jpath = str(tmp_path / "journal.json") + responder = MockResponder( + replies=[ + Reply(tool_uses=[ToolUse(id="t1", name="bash", input={"command": "rm -rf /"})]), + Reply(text="that was blocked"), + ] + ) + sb = SandboxClient(mandate=shell_mandate(["echo", "ls", "cat"])) + try: + ChatSession( + journal_path=jpath, + responder=responder, + tools=tools_with_sandbox(client=sb), + ).send("delete everything") + events = json.load(open(jpath)) + bash_event = next( + e + for e in events + if e["event"]["tool_name"] == "bash" and "gate" in (e["event"].get("result") or {}) + ) + result = bash_event["event"]["result"] + assert result["gate"]["decision"] == "REJECT" + assert result.get("blocked") is True + assert result["gate"]["mandate_hash"] # the mandate it ran against is on the ledger + finally: + sb.close() From 9f29a8fc2cfee882b4192ff62938e979c5ca33f0 Mon Sep 17 00:00:00 2001 From: ares <285551516+New1Direction@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:23:10 -0700 Subject: [PATCH 3/6] feat(gate): goldseel-gated `pay` tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A `pay` tool that authorizes payments through the owned goldseel mandate- enforcement model (Modal, serverless). Deterministic spend-cap floor first (short-circuits the pay-per-call model on over-cap), then goldseel judges the payment vs the authorized intent -> 3-way decision: ACCEPT / REJECT / ESCALATE (unreachable defers to a human, never auto-approves). Decision + verdict + mandate hash recorded to the ledger. - korgchat.gate: GoldseelGate (injectable), payment_mandate(), goldseel_pay_tool() - tests: offline with a fake judge + opt-in live test (KORGCHAT_GOLDSEEL_LIVE=1) Dogfood note: the wiring is correct and proven live; the currently-deployed goldseel over-rejects valid payments (a model-quality / retrain issue, separate from this integration — the gate is model-agnostic via GOLDSEEL_URL). --- CHANGELOG.md | 1 + README.md | 28 +++++++ src/korgchat/gate.py | 195 +++++++++++++++++++++++++++++++++++++++++++ tests/test_gate.py | 135 ++++++++++++++++++++++++++++++ 4 files changed, 359 insertions(+) create mode 100644 src/korgchat/gate.py create mode 100644 tests/test_gate.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4934a4b..173a4a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - **Sandboxed `bash` tool with verifiable exec (`--sandbox`).** New `korgchat.sandbox` module adds a `bash` tool backed by [just-bash](https://github.com/vercel-labs/just-bash) — a JS reimplementation of bash + ~90 coreutils over an in-memory filesystem, run as a persistent Node sidecar. The shell physically cannot reach the host filesystem or network (no network/python/js by default). Every `exec` returns `fs_hash`, a hash of the full virtual-filesystem state after the command; because each tool call is hash-chained into the ledger, the agent's shell session is **tamper-evident and replayable** — the same commands from a fresh sandbox reproduce the same hashes. Exports `SandboxClient` (stdio JSON-RPC), `bash_tool()`, and `tools_with_sandbox()`; enable in the CLI with `--sandbox` (requires Node ≥18 and `npm install` in `sandbox/`). - **Mandate-gated shell (`--mandate-allow`).** The sandboxed `bash` tool can be constrained to a command allowlist. Enforced two ways: just-bash only registers the allowed commands (physical), and each line is parsed before exec so a disallowed or dynamically-named command (`$CMD`) is rejected — **fail-closed**. Every call carries a verdict (`{decision, reasons, commands_used, mandate_hash}`) recorded to the ledger, so what the agent was *allowed* to run is itself provable. New `shell_mandate(allow, deny)`; `SandboxClient(mandate=...)` / `.configure()`; `tools_with_sandbox(mandate=...)`; CLI `--mandate-allow ls,cat,grep,...`. +- **goldseel-gated `pay` tool.** New `korgchat.gate` module: a `pay` tool that authorizes a payment through the owned **goldseel** mandate-enforcement model (served on Modal, serverless). A deterministic spend-cap floor runs first — and short-circuits the pay-per-call model on an over-cap payment; goldseel then judges the payment against the authorized intent. Maps to a three-way decision: **REJECT** (cap or goldseel), **ESCALATE** (goldseel unreachable → defer to a human, *never* auto-approve), **ACCEPT** (within cap and approved). The decision + verdict + mandate hash are recorded to the ledger, so what an agent was allowed to spend is provable. New `GoldseelGate` (injectable), `payment_mandate()`, `goldseel_pay_tool()`. Offline tests use a fake judge; a live endpoint test is opt-in (`KORGCHAT_GOLDSEEL_LIVE=1`). - **Auto-context injection is now a first-class ledger event.** Previously the recall-augmented preamble the model actually saw was a *ghost* — the journal recorded only the user's original prompt. Now, whenever auto-context injects a preamble, a `context_injection` event is written capturing the preamble text, the recall query, and the matched `seq_id`s + scores, causally chained `user_prompt → context_injection → llm_inference`. The user_prompt event still records only what the user typed; the injected context is a separate, auditable, replayable event. New `AutoContextEngine.build_context()` returns a `ContextInjection` (preamble + structured matches); `build_preamble()` is now a thin wrapper over it. - **Tool-schema snapshot + conformance events.** Every tool execution is now bracketed by two events: a `tool_schema_snapshot` *before* the call (the declared `input_schema`, `description`, and a deterministic `schema_hash`) and a `tool_validation` *after* (did the call's input conform to the declared schema? did the call succeed?). A replayed conversation stays meaningful even after a tool's schema changes — the contract it ran against is frozen on the ledger, and a stale call is detectable. New `korgchat.schema` module: `schema_hash()` (canonical sha256, byte-for-byte aligned with `korg-ledger@v1` canonicalization) and a dependency-free `validate_input()`. diff --git a/README.md b/README.md index cabd75c..9db6dc7 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,34 @@ tools = tools_with_sandbox(mandate=shell_mandate(["ls", "cat", "grep"], deny=["r Requires Node ≥18 and a one-time `npm install` in `sandbox/`. +## Payments (goldseel-gated) + +The `pay` tool (`korgchat.gate`) authorizes a payment through **goldseel**, an +owned mandate-enforcement model served on Modal. A deterministic spend-cap runs +first; goldseel then judges the payment against the authorized intent. The +outcome is three-way: + +- **ACCEPT** — within cap and goldseel approved +- **REJECT** — over cap, or goldseel rejected +- **ESCALATE** — goldseel unreachable → defer to a human (never auto-approved) + +```python +from korgchat import ChatSession +from korgchat.gate import goldseel_pay_tool, payment_mandate +from korgchat.tools import default_tools + +tools = default_tools() +tools.register(goldseel_pay_tool(payment_mandate( + "Pay only for AI inference / GPU compute. No gambling, adult, or crypto-trading.", + spend_cap_usd=50, +))) +session = ChatSession(journal_path=..., responder=..., tools=tools) +``` + +The decision, the goldseel verdict, and the mandate hash are recorded to the +ledger — so *what an agent was allowed to spend, and why,* is provable. The +gate is model-agnostic: point `GOLDSEEL_URL` at any goldseel deployment. + ## Streaming (v0.4.2) By default, every assistant text reply streams to stdout character-by-character diff --git a/src/korgchat/gate.py b/src/korgchat/gate.py new file mode 100644 index 0000000..a64b95d --- /dev/null +++ b/src/korgchat/gate.py @@ -0,0 +1,195 @@ +"""goldseel-gated ``pay`` tool for KorgChat. + +goldseel is an owned mandate-enforcement model (served on Modal, serverless): +given an INTENT (what the human authorized), a MANDATE_SUMMARY (spend cap, +expiry, recipient policy), and a REDEMPTION (the proposed payment), it returns +``approve`` / ``reject``. The ``pay`` tool consults it *before* authorizing a +payment, layered over a deterministic spend-cap floor, and maps the outcome to +a three-way decision: + + * **REJECT** — a deterministic check failed, or goldseel rejected it. + * **ESCALATE** — goldseel was unreachable (defer to a human; fail-safe, never + auto-approve when the owned model is down). + * **ACCEPT** — within cap and goldseel approved. + +The decision + the goldseel verdict + the mandate hash are returned in the tool +result, which KorgChat hash-chains into the korg ledger — so *what an agent was +allowed to spend, and why,* is provable. + +Settlement itself is out of scope here (the x402 on-chain path lives in the +quaestor demo); ``pay`` records the authorization decision and, on ACCEPT, +marks a simulated settlement. +""" + +from __future__ import annotations + +import hashlib +import json +import os +import urllib.error +import urllib.request +from typing import Any, Protocol + +from .tools import Tool + +# goldseel on Modal (serverless, native shape: {intent, mandate_summary, redemption} -> {verdict, reasoning}). +DEFAULT_GOLDSEEL_URL = "https://kabukich0--goldseel-endpoint-goldseel-evaluate.modal.run" + + +class Gate(Protocol): + """Anything that can judge a redemption against a mandate.""" + + def evaluate( + self, intent: str, mandate_summary: dict[str, Any], redemption: dict[str, Any] + ) -> dict[str, Any]: ... + + +class GoldseelGate: + """Calls the goldseel Modal endpoint (native shape). Never raises. + + Returns ``{"verdict": "approve"|"reject"|"skip", "reasoning": str}``; + ``"skip"`` means goldseel was unreachable (the caller should escalate). + """ + + def __init__(self, url: str | None = None, *, timeout: float = 35.0) -> None: + self.url = url or os.environ.get("GOLDSEEL_URL") or DEFAULT_GOLDSEEL_URL + self.timeout = timeout + + def evaluate( + self, intent: str, mandate_summary: dict[str, Any], redemption: dict[str, Any] + ) -> dict[str, Any]: + body = json.dumps( + {"intent": intent, "mandate_summary": mandate_summary, "redemption": redemption} + ).encode() + req = urllib.request.Request( + self.url, data=body, headers={"Content-Type": "application/json"}, method="POST" + ) + try: + with urllib.request.urlopen(req, timeout=self.timeout) as resp: + data = json.loads(resp.read()) + except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as e: + return {"verdict": "skip", "reasoning": f"goldseel unreachable: {e}"} + verdict = str(data.get("verdict", "")).lower() + normalized = "approve" if "approve" in verdict else "reject" if "reject" in verdict else "skip" + return {"verdict": normalized, "reasoning": str(data.get("reasoning", ""))} + + +def payment_mandate( + intent: str, spend_cap_usd: float, *, recipient_policy: Any = None +) -> dict[str, Any]: + """Build a payment mandate: the human-authorized intent + a spend cap.""" + return { + "intent": intent, + "spend_cap_usd": float(spend_cap_usd), + "recipient_policy": recipient_policy, + } + + +def _mandate_hash(mandate: dict[str, Any]) -> str: + return hashlib.sha256( + json.dumps(mandate, sort_keys=True, separators=(",", ":")).encode() + ).hexdigest() + + +def goldseel_pay_tool( + mandate: dict[str, Any], + *, + gate: Gate | None = None, + name: str = "pay", + simulate: bool = True, +) -> Tool: + """A ``pay`` :class:`~korgchat.tools.Tool` gated by goldseel. + + ``mandate`` is a :func:`payment_mandate`. The tool tracks remaining cap + across calls within the session. + """ + judge: Gate = gate or GoldseelGate() + mandate_hash = _mandate_hash(mandate) + state = {"remaining": float(mandate["spend_cap_usd"])} + + def handler(args: dict[str, Any]) -> dict[str, Any]: + amount = args.get("amount_usd") + if not isinstance(amount, (int, float)) or amount <= 0: + raise ValueError("pay: 'amount_usd' must be a positive number") + amount = float(amount) + recipient = args.get("recipient_name") or args.get("recipient_domain") or "unknown" + + reasons: list[str] = [] + # deterministic floor: never exceed the signed cap + if amount > state["remaining"]: + reasons.append( + f"amount ${amount:.2f} exceeds remaining mandate cap ${state['remaining']:.2f}" + ) + + # Consult goldseel only if the deterministic floor passed — the owned + # model is pay-per-call, so don't spend a call on an already-doomed pay. + verdict = {"verdict": "skip", "reasoning": "not evaluated (deterministic reject)"} + escalate = False + if not reasons: + mandate_summary = { + "spend_cap_remaining": state["remaining"], + "use_counter_remaining": None, + "expiry": None, + "recipient_policy": mandate.get("recipient_policy"), + } + redemption = { + "recipient_domain": args.get("recipient_domain"), + "recipient_name": args.get("recipient_name"), + "recipient_categories": args.get("recipient_categories"), + "vendor_known": bool(args.get("recipient_name") or args.get("recipient_domain")), + "amount_usdc": int(round(amount * 1e6)), + "resource_description": args.get("resource_description"), + } + verdict = judge.evaluate(mandate["intent"], mandate_summary, redemption) + if verdict["verdict"] == "reject": + reasons.append(f"goldseel: {verdict['reasoning']}") + escalate = verdict["verdict"] == "skip" # owned model down -> defer to a human + + decision = "REJECT" if reasons else ("ESCALATE" if escalate else "ACCEPT") + settled = False + if decision == "ACCEPT": + state["remaining"] -= amount + settled = simulate + + return { + "decision": decision, + "amount_usd": amount, + "recipient": recipient, + "reasons": reasons, + "goldseel": verdict, + "remaining_after": round(state["remaining"], 6), + "mandate_hash": mandate_hash, + "settled": settled, + "settlement": "simulated" if settled else None, + } + + return Tool( + name=name, + description=( + "Authorize a payment, gated by the goldseel mandate-enforcement " + "model. Returns a decision (ACCEPT / REJECT / ESCALATE) and the " + "goldseel verdict. A deterministic spend-cap is enforced first; " + "goldseel then judges the payment against the authorized intent. " + "If goldseel is unreachable the payment ESCALATEs to a human " + "(never auto-approved). The decision is recorded to the ledger." + ), + input_schema={ + "type": "object", + "properties": { + "amount_usd": {"type": "number", "description": "Amount to pay in USD."}, + "recipient_name": {"type": "string", "description": "Payee name."}, + "recipient_domain": {"type": "string", "description": "Payee domain."}, + "recipient_categories": { + "type": "array", + "items": {"type": "string"}, + "description": "Vendor categories, e.g. ['ml-inference'].", + }, + "resource_description": { + "type": "string", + "description": "What the payment is for.", + }, + }, + "required": ["amount_usd"], + }, + handler=handler, + ) diff --git a/tests/test_gate.py b/tests/test_gate.py new file mode 100644 index 0000000..05dfa55 --- /dev/null +++ b/tests/test_gate.py @@ -0,0 +1,135 @@ +"""goldseel-gated `pay` tool. + +The gate is injectable, so these run offline with a fake judge. The live +endpoint test is opt-in (set ``KORGCHAT_GOLDSEEL_LIVE=1``). +""" + +import json +import os + +import pytest + +from korgchat.chat import ChatSession, MockResponder, Reply, ToolUse +from korgchat.gate import GoldseelGate, goldseel_pay_tool, payment_mandate +from korgchat.tools import default_tools + +INTENT = "Pay only for AI inference / GPU compute. No gambling, adult, or crypto-trading." + + +class FakeGate: + def __init__(self, verdict: str, reasoning: str = "fake") -> None: + self.verdict = verdict + self.reasoning = reasoning + self.calls: list = [] + + def evaluate(self, intent, mandate_summary, redemption): + self.calls.append((intent, mandate_summary, redemption)) + return {"verdict": self.verdict, "reasoning": self.reasoning} + + +def _tool(verdict, cap=50.0): + gate = FakeGate(verdict) + return goldseel_pay_tool(payment_mandate(INTENT, cap), gate=gate), gate + + +def test_accept_within_cap_and_approved(): + tool, gate = _tool("approve", cap=50) + r = tool.call( + {"amount_usd": 12, "recipient_name": "OpenAI", "recipient_categories": ["ml-inference"]} + ) + assert r["decision"] == "ACCEPT" + assert r["settled"] is True + assert r["remaining_after"] == 38.0 + assert r["mandate_hash"] + assert len(gate.calls) == 1 + assert gate.calls[0][1]["spend_cap_remaining"] == 50.0 # goldseel saw the right cap + + +def test_remaining_cap_decrements_across_calls(): + tool, _ = _tool("approve", cap=50) + tool.call({"amount_usd": 30, "recipient_name": "OpenAI"}) + r2 = tool.call({"amount_usd": 25, "recipient_name": "OpenAI"}) # 30+25 > 50 + assert r2["decision"] == "REJECT" + assert any("cap" in x for x in r2["reasons"]) + + +def test_reject_when_goldseel_rejects(): + tool, _ = _tool("reject") + r = tool.call( + {"amount_usd": 10, "recipient_name": "Bet365", "recipient_categories": ["gambling"]} + ) + assert r["decision"] == "REJECT" + assert r["settled"] is False + assert any("goldseel" in x for x in r["reasons"]) + + +def test_over_cap_does_not_spend_a_goldseel_call(): + tool, gate = _tool("approve", cap=50) + r = tool.call({"amount_usd": 100, "recipient_name": "OpenAI"}) + assert r["decision"] == "REJECT" + assert gate.calls == [] # deterministic floor short-circuited the paid call + + +def test_escalate_when_goldseel_unreachable(): + tool, _ = _tool("skip") # "skip" == unreachable + r = tool.call({"amount_usd": 10, "recipient_name": "OpenAI"}) + assert r["decision"] == "ESCALATE" + assert r["settled"] is False + + +def test_pay_decision_is_recorded_in_the_ledger(tmp_path): + jpath = str(tmp_path / "journal.json") + registry = default_tools() + registry.register(goldseel_pay_tool(payment_mandate(INTENT, 50), gate=FakeGate("approve"))) + responder = MockResponder( + replies=[ + Reply( + tool_uses=[ + ToolUse( + id="p1", + name="pay", + input={ + "amount_usd": 12, + "recipient_name": "OpenAI", + "recipient_categories": ["ml-inference"], + }, + ) + ] + ), + Reply(text="paid"), + ] + ) + ChatSession(journal_path=jpath, responder=responder, tools=registry).send( + "pay OpenAI $12 for inference" + ) + events = json.load(open(jpath)) + pay_event = next( + e + for e in events + if e["event"]["tool_name"] == "pay" and "decision" in (e["event"].get("result") or {}) + ) + result = pay_event["event"]["result"] + assert result["decision"] == "ACCEPT" + assert result["mandate_hash"] + assert result["goldseel"]["verdict"] == "approve" + + +@pytest.mark.skipif( + os.environ.get("KORGCHAT_GOLDSEEL_LIVE") != "1", + reason="set KORGCHAT_GOLDSEEL_LIVE=1 to hit the live goldseel Modal endpoint", +) +def test_live_goldseel_endpoint(): + gate = GoldseelGate() + verdict = gate.evaluate( + INTENT, + {"spend_cap_remaining": 80, "use_counter_remaining": None, "expiry": None, "recipient_policy": None}, + { + "recipient_name": "Bet365", + "recipient_domain": "bet365.com", + "recipient_categories": ["gambling"], + "vendor_known": True, + "amount_usdc": 20_000_000, + "resource_description": "casino deposit", + }, + ) + assert verdict["verdict"] in ("approve", "reject", "skip") From ece3035394d446e1f86f062206c1a20b7f323ec6 Mon Sep 17 00:00:00 2001 From: ares <285551516+New1Direction@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:57:30 -0700 Subject: [PATCH 4/6] feat(ontology): deterministic category knowledge floor that compounds korgchat.ontology: a controlled vocabulary of recipient categories (synonyms + is-a hierarchy) + a vendor registry. The pay tool resolves known recipients deterministically (ALLOW/DENY, no model call) and only sends genuine unknowns to goldseel -- making the ml-inference != ai-inference false-reject structurally impossible. It compounds: learn() writes newly-classified recipients back to the registry (optionally persisted), so the known set grows monotonically (fewer model calls, more consistent decisions over time -- a data network effect). - payment_mandate() gains allow_classes/deny_classes (default deny prohibited) - pay result records decided_by (ontology vs goldseel), floor verdict, learned - tests: synonyms/hierarchy, ALLOW/DENY/UNKNOWN, learn+persist, and the pay-tool paths (allow bypasses a reject-happy model; deny bypasses an approve-happy model; unknown consults the model; compounding) --- CHANGELOG.md | 1 + README.md | 26 +++++ src/korgchat/gate.py | 100 ++++++++++++----- src/korgchat/ontology.py | 235 +++++++++++++++++++++++++++++++++++++++ tests/test_gate.py | 54 ++++++++- tests/test_ontology.py | 60 ++++++++++ 6 files changed, 446 insertions(+), 30 deletions(-) create mode 100644 src/korgchat/ontology.py create mode 100644 tests/test_ontology.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 173a4a4..64829c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Sandboxed `bash` tool with verifiable exec (`--sandbox`).** New `korgchat.sandbox` module adds a `bash` tool backed by [just-bash](https://github.com/vercel-labs/just-bash) — a JS reimplementation of bash + ~90 coreutils over an in-memory filesystem, run as a persistent Node sidecar. The shell physically cannot reach the host filesystem or network (no network/python/js by default). Every `exec` returns `fs_hash`, a hash of the full virtual-filesystem state after the command; because each tool call is hash-chained into the ledger, the agent's shell session is **tamper-evident and replayable** — the same commands from a fresh sandbox reproduce the same hashes. Exports `SandboxClient` (stdio JSON-RPC), `bash_tool()`, and `tools_with_sandbox()`; enable in the CLI with `--sandbox` (requires Node ≥18 and `npm install` in `sandbox/`). - **Mandate-gated shell (`--mandate-allow`).** The sandboxed `bash` tool can be constrained to a command allowlist. Enforced two ways: just-bash only registers the allowed commands (physical), and each line is parsed before exec so a disallowed or dynamically-named command (`$CMD`) is rejected — **fail-closed**. Every call carries a verdict (`{decision, reasons, commands_used, mandate_hash}`) recorded to the ledger, so what the agent was *allowed* to run is itself provable. New `shell_mandate(allow, deny)`; `SandboxClient(mandate=...)` / `.configure()`; `tools_with_sandbox(mandate=...)`; CLI `--mandate-allow ls,cat,grep,...`. - **goldseel-gated `pay` tool.** New `korgchat.gate` module: a `pay` tool that authorizes a payment through the owned **goldseel** mandate-enforcement model (served on Modal, serverless). A deterministic spend-cap floor runs first — and short-circuits the pay-per-call model on an over-cap payment; goldseel then judges the payment against the authorized intent. Maps to a three-way decision: **REJECT** (cap or goldseel), **ESCALATE** (goldseel unreachable → defer to a human, *never* auto-approve), **ACCEPT** (within cap and approved). The decision + verdict + mandate hash are recorded to the ledger, so what an agent was allowed to spend is provable. New `GoldseelGate` (injectable), `payment_mandate()`, `goldseel_pay_tool()`. Offline tests use a fake judge; a live endpoint test is opt-in (`KORGCHAT_GOLDSEEL_LIVE=1`). +- **Recipient-category ontology — the gate's deterministic knowledge floor (`korgchat.ontology`).** A controlled vocabulary of recipient categories with **synonyms** and an **is-a hierarchy** (`ml-inference` ≡ `ai-inference` ≡ `llm-inference`, all *is-a* `ai-compute`; `gambling`/`adult`/`crypto-trading` *is-a* `prohibited`), plus a seeded **vendor registry**. The `pay` tool now resolves *known* recipients deterministically — **ALLOW/DENY without a model call** — and only genuine unknowns reach goldseel, making the `ml-inference ≠ ai-inference` false-reject *structurally impossible*. **It compounds:** `learn()` writes newly-classified recipients back to the registry (optionally persisted), so the known set grows monotonically — more decisions → fewer model calls → more consistent outcomes (a data network effect). The `pay` result records `decided_by` (ontology vs goldseel), the floor verdict, and what was learned. `payment_mandate()` gains `allow_classes` / `deny_classes` (default deny `["prohibited"]`). - **Auto-context injection is now a first-class ledger event.** Previously the recall-augmented preamble the model actually saw was a *ghost* — the journal recorded only the user's original prompt. Now, whenever auto-context injects a preamble, a `context_injection` event is written capturing the preamble text, the recall query, and the matched `seq_id`s + scores, causally chained `user_prompt → context_injection → llm_inference`. The user_prompt event still records only what the user typed; the injected context is a separate, auditable, replayable event. New `AutoContextEngine.build_context()` returns a `ContextInjection` (preamble + structured matches); `build_preamble()` is now a thin wrapper over it. - **Tool-schema snapshot + conformance events.** Every tool execution is now bracketed by two events: a `tool_schema_snapshot` *before* the call (the declared `input_schema`, `description`, and a deterministic `schema_hash`) and a `tool_validation` *after* (did the call's input conform to the declared schema? did the call succeed?). A replayed conversation stays meaningful even after a tool's schema changes — the contract it ran against is frozen on the ledger, and a stale call is detectable. New `korgchat.schema` module: `schema_hash()` (canonical sha256, byte-for-byte aligned with `korg-ledger@v1` canonicalization) and a dependency-free `validate_input()`. diff --git a/README.md b/README.md index 9db6dc7..764ce4e 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,32 @@ The decision, the goldseel verdict, and the mandate hash are recorded to the ledger — so *what an agent was allowed to spend, and why,* is provable. The gate is model-agnostic: point `GOLDSEEL_URL` at any goldseel deployment. +### Knowledge floor (ontology) — and how it compounds + +Before goldseel is consulted, the `pay` tool resolves the recipient against a +**category ontology** (`korgchat.ontology`): a controlled vocabulary with +synonyms and an is-a hierarchy (`ml-inference` ≡ `ai-inference` ≡ +`llm-inference`, all *is-a* `ai-compute`; `gambling`/`adult`/`crypto-trading` +*is-a* `prohibited`) plus a vendor registry. **Known recipients resolve +deterministically** — ALLOW/DENY with no model call — so `ml-inference ≠ +ai-inference` mistakes are impossible, and the model is only spent on genuine +unknowns. + +It **compounds**: every newly-classified recipient is learned back into the +registry (`learn()`, optionally persisted), so the known set grows +monotonically — the more decisions the system makes, the fewer reach the model +and the more consistent it gets (a data network effect). Each `pay` result +records `decided_by` (`ontology` vs `goldseel`) so the audit shows *which* +layer decided. + +```python +from korgchat.gate import payment_mandate, goldseel_pay_tool +tool = goldseel_pay_tool(payment_mandate( + "Pay only for AI inference / GPU compute. No gambling.", + spend_cap_usd=50, allow_classes=["ai-compute"], deny_classes=["prohibited"], +)) +``` + ## Streaming (v0.4.2) By default, every assistant text reply streams to stdout character-by-character diff --git a/src/korgchat/gate.py b/src/korgchat/gate.py index a64b95d..e59deee 100644 --- a/src/korgchat/gate.py +++ b/src/korgchat/gate.py @@ -30,6 +30,7 @@ import urllib.request from typing import Any, Protocol +from .ontology import CategoryOntology from .tools import Tool # goldseel on Modal (serverless, native shape: {intent, mandate_summary, redemption} -> {verdict, reasoning}). @@ -75,12 +76,25 @@ def evaluate( def payment_mandate( - intent: str, spend_cap_usd: float, *, recipient_policy: Any = None + intent: str, + spend_cap_usd: float, + *, + allow_classes: list[str] | None = None, + deny_classes: list[str] | None = None, + recipient_policy: Any = None, ) -> dict[str, Any]: - """Build a payment mandate: the human-authorized intent + a spend cap.""" + """Build a payment mandate. + + ``allow_classes`` / ``deny_classes`` are ontology category classes (e.g. + ``["ai-compute"]`` / ``["prohibited"]``) used by the deterministic floor. + ``deny_classes`` defaults to ``["prohibited"]``. ``intent`` is the free-text + purpose used by goldseel for recipients the ontology can't resolve. + """ return { "intent": intent, "spend_cap_usd": float(spend_cap_usd), + "allow_classes": list(allow_classes) if allow_classes else [], + "deny_classes": list(deny_classes) if deny_classes is not None else ["prohibited"], "recipient_policy": recipient_policy, } @@ -95,15 +109,22 @@ def goldseel_pay_tool( mandate: dict[str, Any], *, gate: Gate | None = None, + ontology: CategoryOntology | None = None, + learn: bool = True, name: str = "pay", simulate: bool = True, ) -> Tool: - """A ``pay`` :class:`~korgchat.tools.Tool` gated by goldseel. + """A ``pay`` :class:`~korgchat.tools.Tool` gated by the ontology + goldseel. - ``mandate`` is a :func:`payment_mandate`. The tool tracks remaining cap + ``mandate`` is a :func:`payment_mandate`. The deterministic category + ontology resolves known recipients (ALLOW/DENY) without a model call; only + genuine unknowns reach goldseel. With ``learn=True`` newly-classified + recipients are written back to the ontology so future calls resolve + deterministically — the system compounds. The tool tracks remaining cap across calls within the session. """ judge: Gate = gate or GoldseelGate() + ont: CategoryOntology = ontology or CategoryOntology() mandate_hash = _mandate_hash(mandate) state = {"remaining": float(mandate["spend_cap_usd"])} @@ -114,36 +135,56 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: amount = float(amount) recipient = args.get("recipient_name") or args.get("recipient_domain") or "unknown" + redemption = { + "recipient_domain": args.get("recipient_domain"), + "recipient_name": args.get("recipient_name"), + "recipient_categories": args.get("recipient_categories"), + "vendor_known": bool(args.get("recipient_name") or args.get("recipient_domain")), + "amount_usdc": int(round(amount * 1e6)), + "resource_description": args.get("resource_description"), + } + reasons: list[str] = [] - # deterministic floor: never exceed the signed cap + decided_by = None + floor = {"floor": "SKIPPED", "categories": []} + verdict = {"verdict": "skip", "reasoning": "not evaluated"} + escalate = False + + # 1. deterministic spend-cap floor if amount > state["remaining"]: reasons.append( f"amount ${amount:.2f} exceeds remaining mandate cap ${state['remaining']:.2f}" ) + decided_by = "deterministic-cap" - # Consult goldseel only if the deterministic floor passed — the owned - # model is pay-per-call, so don't spend a call on an already-doomed pay. - verdict = {"verdict": "skip", "reasoning": "not evaluated (deterministic reject)"} - escalate = False + # 2. ontology floor — known recipients resolve without a model call if not reasons: - mandate_summary = { - "spend_cap_remaining": state["remaining"], - "use_counter_remaining": None, - "expiry": None, - "recipient_policy": mandate.get("recipient_policy"), - } - redemption = { - "recipient_domain": args.get("recipient_domain"), - "recipient_name": args.get("recipient_name"), - "recipient_categories": args.get("recipient_categories"), - "vendor_known": bool(args.get("recipient_name") or args.get("recipient_domain")), - "amount_usdc": int(round(amount * 1e6)), - "resource_description": args.get("resource_description"), - } - verdict = judge.evaluate(mandate["intent"], mandate_summary, redemption) - if verdict["verdict"] == "reject": - reasons.append(f"goldseel: {verdict['reasoning']}") - escalate = verdict["verdict"] == "skip" # owned model down -> defer to a human + floor = ont.resolve( + redemption, allow=mandate.get("allow_classes"), deny=mandate.get("deny_classes") + ) + if floor["floor"] == "DENY": + reasons.append(f"ontology: {floor['reasons'][0]}") + decided_by = "ontology" + elif floor["floor"] == "ALLOW": + decided_by = "ontology" # deterministic accept — goldseel not consulted + else: + # 3. genuine unknown -> consult the owned model (pay-per-call) + mandate_summary = { + "spend_cap_remaining": state["remaining"], + "use_counter_remaining": None, + "expiry": None, + "recipient_policy": mandate.get("recipient_policy"), + } + verdict = judge.evaluate(mandate["intent"], mandate_summary, redemption) + decided_by = "goldseel" + if verdict["verdict"] == "reject": + reasons.append(f"goldseel: {verdict['reasoning']}") + escalate = verdict["verdict"] == "skip" # model down -> defer to a human + + # compounding: cache any explicit classification so the next call is deterministic + learned_key = None + if learn and redemption.get("recipient_categories"): + learned_key = ont.learn(redemption, redemption["recipient_categories"]) decision = "REJECT" if reasons else ("ESCALATE" if escalate else "ACCEPT") settled = False @@ -155,10 +196,15 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: "decision": decision, "amount_usd": amount, "recipient": recipient, + "decided_by": decided_by, + "floor": floor["floor"], + "categories": floor.get("categories", []), "reasons": reasons, "goldseel": verdict, "remaining_after": round(state["remaining"], 6), "mandate_hash": mandate_hash, + "learned": learned_key, + "vendors_known": ont.stats()["vendors_known"], "settled": settled, "settlement": "simulated" if settled else None, } diff --git a/src/korgchat/ontology.py b/src/korgchat/ontology.py new file mode 100644 index 0000000..a1aa5ae --- /dev/null +++ b/src/korgchat/ontology.py @@ -0,0 +1,235 @@ +"""Recipient-category ontology — the gate's deterministic knowledge floor. + +A controlled vocabulary of recipient categories with **synonyms** and an +**is-a hierarchy**, plus a **vendor registry** (domain/name -> categories). The +gate resolves a payment against the mandate's allowed/denied category *classes* +here FIRST; only genuinely unknown recipients fall through to the model. This +is what makes `ml-inference ≡ ai-inference ≡ llm-inference` (all *is-a* +`ai-compute`) a structural fact instead of something a 3B model has to +re-derive — and occasionally get wrong. + +**It compounds.** `learn()` writes a newly-classified recipient back into the +registry (optionally persisted to disk), so the known set grows monotonically: +the more decisions the system makes, the fewer reach the (fallible, pay-per- +call) model, and the more consistent it gets. That's a data network effect — +each decision makes the next one cheaper and surer. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +# ── is-a hierarchy: leaf category -> parent class ────────────────────────── +HIERARCHY: dict[str, str] = { + # ai-compute + "ml-inference": "ai-compute", + "llm-inference": "ai-compute", + "gpu-compute": "ai-compute", + "training-compute": "ai-compute", + "fine-tuning": "ai-compute", + "embeddings": "ai-compute", + "vector-db": "ai-compute", + # infra + "cloud-hosting": "infra", + "object-storage": "infra", + "cdn": "infra", + "bandwidth": "infra", + "serverless": "infra", + # data + "data-api": "data", + "market-data": "data", + "search-api": "data", + "web-scraping": "data", + # software / saas (neutral) + "saas": "software", + "api-credits": "software", + "dev-tools": "software", + # prohibited (default-deny class) + "gambling": "prohibited", + "adult": "prohibited", + "crypto-trading": "prohibited", + "weapons": "prohibited", + "drugs": "prohibited", + "darknet": "prohibited", +} + +# ── synonyms -> canonical leaf category ──────────────────────────────────── +SYNONYMS: dict[str, str] = { + "ai-inference": "ml-inference", + "ai inference": "ml-inference", + "inference": "ml-inference", + "model-inference": "ml-inference", + "ai-compute-inference": "ml-inference", + "llm": "llm-inference", + "llm-api": "llm-inference", + "gpu": "gpu-compute", + "gpu-rental": "gpu-compute", + "compute": "gpu-compute", + "training": "training-compute", + "finetuning": "fine-tuning", + "embedding": "embeddings", + "vectors": "vector-db", + "vector-database": "vector-db", + "hosting": "cloud-hosting", + "vps": "cloud-hosting", + "storage": "object-storage", + "blob-storage": "object-storage", + "lambda": "serverless", + "casino": "gambling", + "betting": "gambling", + "sportsbook": "gambling", + "poker": "gambling", + "porn": "adult", + "nsfw": "adult", + "crypto": "crypto-trading", + "trading": "crypto-trading", + "exchange": "crypto-trading", + "defi": "crypto-trading", +} + +# ── seed vendor registry: domain/name (lowercased) -> categories ─────────── +SEED_VENDORS: dict[str, list[str]] = { + "api.openai.com": ["ml-inference"], + "openai": ["ml-inference"], + "api.anthropic.com": ["ml-inference"], + "anthropic": ["ml-inference"], + "api.x.ai": ["ml-inference"], + "replicate.com": ["ml-inference"], + "api.together.xyz": ["ml-inference"], + "huggingface.co": ["ml-inference", "embeddings"], + "vast.ai": ["gpu-compute"], + "runpod.io": ["gpu-compute"], + "lambdalabs.com": ["gpu-compute"], + "modal.com": ["serverless", "gpu-compute"], + "aws.amazon.com": ["cloud-hosting"], + "cloud.google.com": ["cloud-hosting"], + "cloudflare.com": ["cdn", "cloud-hosting"], + "vercel.com": ["serverless", "cloud-hosting"], + "pinecone.io": ["vector-db"], + "bet365.com": ["gambling"], + "draftkings.com": ["gambling"], + "binance.com": ["crypto-trading"], + "coinbase.com": ["crypto-trading"], + "kraken.com": ["crypto-trading"], + "pornhub.com": ["adult"], +} + + +def canonical(category: str) -> str: + """Resolve a category string to its canonical leaf via synonyms.""" + c = str(category).lower().strip() + return SYNONYMS.get(c, c) + + +def parent_class(category: str) -> str: + """The is-a parent class of a category (or itself if it's already a class).""" + c = canonical(category) + return HIERARCHY.get(c, c) + + +def expand(category: str) -> set[str]: + """A category plus its parent class — the set to test membership against.""" + c = canonical(category) + return {c, parent_class(c)} + + +class CategoryOntology: + """Deterministic recipient classification + the learning loop. + + Pass ``store_path`` to persist learned vendor mappings across runs (the + compounding knowledge base). Without it, learning is in-memory only. + """ + + def __init__(self, store_path: str | Path | None = None) -> None: + self.vendors: dict[str, list[str]] = {k: list(v) for k, v in SEED_VENDORS.items()} + self.store_path = Path(store_path) if store_path else None + self.learned_count = 0 + if self.store_path and self.store_path.exists(): + try: + learned = json.loads(self.store_path.read_text()) + for k, v in learned.items(): + self.vendors[k.lower()] = sorted(set(self.vendors.get(k.lower(), [])) | set(v)) + except (json.JSONDecodeError, OSError): + pass + + # ── classification ──────────────────────────────────────────────────── + def categories_for(self, redemption: dict[str, Any]) -> set[str]: + """Canonical categories for a redemption: explicit ones + registry lookup.""" + cats = {canonical(c) for c in (redemption.get("recipient_categories") or [])} + for key in (redemption.get("recipient_domain"), redemption.get("recipient_name")): + if key and str(key).lower() in self.vendors: + cats.update(canonical(c) for c in self.vendors[str(key).lower()]) + return cats + + def resolve( + self, + redemption: dict[str, Any], + *, + allow: list[str] | None = None, + deny: list[str] | None = None, + ) -> dict[str, Any]: + """Deterministic floor verdict against allowed/denied category classes. + + ``allow`` / ``deny`` may contain leaf categories or parent classes. + Returns floor ``DENY`` / ``ALLOW`` / ``UNKNOWN`` (defer to the model). + """ + allow_set = {canonical(a) for a in (allow or [])} + deny_set = {canonical(d) for d in (deny or [])} + cats = self.categories_for(redemption) + if not cats: + return {"floor": "UNKNOWN", "reasons": ["recipient not classified by ontology"], "categories": []} + + denied = sorted(c for c in cats if expand(c) & deny_set) + if denied: + return { + "floor": "DENY", + "reasons": [f"category {denied} is in a denied class"], + "categories": sorted(cats), + } + if allow_set and all(expand(c) & allow_set for c in cats): + return { + "floor": "ALLOW", + "reasons": [f"all categories {sorted(cats)} within allowed classes"], + "categories": sorted(cats), + } + return { + "floor": "UNKNOWN", + "reasons": ["category not clearly within allowed or denied classes"], + "categories": sorted(cats), + } + + # ── the compounding loop ────────────────────────────────────────────── + def learn(self, redemption: dict[str, Any], categories: list[str]) -> str | None: + """Remember a recipient's categories so future payments resolve deterministically. + + Caches the *classification* (domain -> categories), never a verdict — + the verdict is always re-derived from the mandate. Returns the key + learned, or None if there was nothing to key on. + """ + key = (redemption.get("recipient_domain") or redemption.get("recipient_name") or "") + key = str(key).lower().strip() + cats = sorted({canonical(c) for c in categories if c}) + if not key or not cats: + return None + merged = sorted(set(self.vendors.get(key, [])) | set(cats)) + if merged != self.vendors.get(key): + self.vendors[key] = merged + self.learned_count += 1 + self._persist() + return key + + def _persist(self) -> None: + if not self.store_path: + return + # persist only the delta vs the seed (the learned knowledge) + learned = {k: v for k, v in self.vendors.items() if v != SEED_VENDORS.get(k)} + try: + self.store_path.parent.mkdir(parents=True, exist_ok=True) + self.store_path.write_text(json.dumps(learned, indent=1, sort_keys=True)) + except OSError: + pass + + def stats(self) -> dict[str, int]: + return {"vendors_known": len(self.vendors), "learned_this_session": self.learned_count} diff --git a/tests/test_gate.py b/tests/test_gate.py index 05dfa55..662015b 100644 --- a/tests/test_gate.py +++ b/tests/test_gate.py @@ -55,10 +55,10 @@ def test_remaining_cap_decrements_across_calls(): def test_reject_when_goldseel_rejects(): tool, _ = _tool("reject") - r = tool.call( - {"amount_usd": 10, "recipient_name": "Bet365", "recipient_categories": ["gambling"]} - ) + # an UNKNOWN recipient (no ontology match) actually reaches the model + r = tool.call({"amount_usd": 10, "recipient_domain": "unknownshop.io"}) assert r["decision"] == "REJECT" + assert r["decided_by"] == "goldseel" assert r["settled"] is False assert any("goldseel" in x for x in r["reasons"]) @@ -114,6 +114,54 @@ def test_pay_decision_is_recorded_in_the_ledger(tmp_path): assert result["goldseel"]["verdict"] == "approve" +def _ont_tool(verdict, cap=100.0): + gate = FakeGate(verdict) + mandate = payment_mandate(INTENT, cap, allow_classes=["ai-compute"], deny_classes=["prohibited"]) + return goldseel_pay_tool(mandate, gate=gate), gate + + +def test_ontology_allow_bypasses_the_model(): + # the model would (wrongly) reject — but a known-good recipient never reaches it + tool, gate = _ont_tool("reject") + r = tool.call( + {"amount_usd": 12, "recipient_domain": "api.openai.com", "recipient_categories": ["ml-inference"]} + ) + assert r["decision"] == "ACCEPT" + assert r["decided_by"] == "ontology" + assert r["floor"] == "ALLOW" + assert gate.calls == [] # false-reject is structurally impossible here + + +def test_ontology_deny_bypasses_the_model(): + # the model would (wrongly) approve — but a prohibited recipient is blocked first + tool, gate = _ont_tool("approve") + r = tool.call( + {"amount_usd": 10, "recipient_domain": "bet365.com", "recipient_categories": ["gambling"]} + ) + assert r["decision"] == "REJECT" + assert r["decided_by"] == "ontology" + assert r["floor"] == "DENY" + assert gate.calls == [] + + +def test_unknown_recipient_consults_the_model(): + tool, gate = _ont_tool("approve") + r = tool.call({"amount_usd": 5, "recipient_domain": "mystery.io"}) + assert r["decided_by"] == "goldseel" + assert len(gate.calls) == 1 + + +def test_compounding_learns_a_new_vendor(): + tool, gate = _ont_tool("approve") + r1 = tool.call({"amount_usd": 5, "recipient_domain": "newgpu.io", "recipient_categories": ["gpu-compute"]}) + assert r1["decided_by"] == "ontology" + assert r1["learned"] == "newgpu.io" + # second call has NO explicit category — resolved from the learned registry + r2 = tool.call({"amount_usd": 5, "recipient_domain": "newgpu.io"}) + assert r2["decided_by"] == "ontology" + assert gate.calls == [] # the model was never needed + + @pytest.mark.skipif( os.environ.get("KORGCHAT_GOLDSEEL_LIVE") != "1", reason="set KORGCHAT_GOLDSEEL_LIVE=1 to hit the live goldseel Modal endpoint", diff --git a/tests/test_ontology.py b/tests/test_ontology.py new file mode 100644 index 0000000..927b909 --- /dev/null +++ b/tests/test_ontology.py @@ -0,0 +1,60 @@ +"""Recipient-category ontology: the deterministic knowledge floor + learning loop.""" + +import json + +from korgchat.ontology import CategoryOntology, canonical, expand, parent_class + + +def test_synonyms_and_hierarchy(): + assert canonical("ai-inference") == "ml-inference" + assert canonical("AI Inference") == "ml-inference" + assert canonical("casino") == "gambling" + assert parent_class("ml-inference") == "ai-compute" + assert parent_class("ai-inference") == "ai-compute" # resolved via synonym first + assert parent_class("gambling") == "prohibited" + assert "ai-compute" in expand("llm-inference") + + +def test_resolve_allow_deny_unknown(): + ont = CategoryOntology() + allow, deny = ["ai-compute"], ["prohibited"] + + # known-good vendor (registry) -> ALLOW + assert ont.resolve({"recipient_domain": "api.openai.com"}, allow=allow, deny=deny)["floor"] == "ALLOW" + # synonym category resolves to an allowed class + assert ont.resolve({"recipient_categories": ["ai-inference"]}, allow=allow, deny=deny)["floor"] == "ALLOW" + # prohibited class -> DENY (even via synonym) + assert ont.resolve({"recipient_categories": ["casino"]}, allow=allow, deny=deny)["floor"] == "DENY" + # unknown vendor, no category -> defer to the model + assert ont.resolve({"recipient_domain": "mystery.io"}, allow=allow, deny=deny)["floor"] == "UNKNOWN" + # a known category outside the allowed (and not denied) classes -> defer + assert ont.resolve({"recipient_categories": ["market-data"]}, allow=allow, deny=deny)["floor"] == "UNKNOWN" + + +def test_learn_grows_registry_and_persists(tmp_path): + store = tmp_path / "ontology.json" + ont = CategoryOntology(store_path=store) + before = ont.stats()["vendors_known"] + + key = ont.learn({"recipient_domain": "newvendor.io"}, ["gpu-compute"]) + assert key == "newvendor.io" + assert ont.stats()["vendors_known"] == before + 1 + # now resolves deterministically + assert ont.resolve({"recipient_domain": "newvendor.io"}, allow=["ai-compute"])["floor"] == "ALLOW" + + # persisted -> a fresh ontology pointed at the same store reloads it + reloaded = CategoryOntology(store_path=store) + assert "newvendor.io" in reloaded.vendors + assert reloaded.resolve({"recipient_domain": "newvendor.io"}, allow=["ai-compute"])["floor"] == "ALLOW" + + +def test_persist_stores_only_the_learned_delta(tmp_path): + store = tmp_path / "ontology.json" + ont = CategoryOntology(store_path=store) + # re-learning a seed vendor with the same categories is a no-op (no growth, not persisted) + ont.learn({"recipient_domain": "api.openai.com"}, ["ml-inference"]) + ont.learn({"recipient_domain": "brandnew.ai"}, ["ml-inference"]) + if store.exists(): + learned = json.loads(store.read_text()) + assert "api.openai.com" not in learned # unchanged seed isn't written + assert learned.get("brandnew.ai") == ["ml-inference"] From 65d7a74897efb874cada59c626189cead6156b1f Mon Sep 17 00:00:00 2001 From: ares <285551516+New1Direction@users.noreply.github.com> Date: Mon, 8 Jun 2026 12:28:10 -0700 Subject: [PATCH 5/6] fix(gate): send goldseel dollar-denominated amounts (not micros) goldseel was trained on dollar amounts (e.g. "12.00") + a positive use-counter. The pay tool sent amount_usdc in on-chain micros (12 -> 12000000) and a null counter, so the model saw every $12 payment as $12M over cap (or counter exhausted) and rejected everything. Send the dollar view + a positive counter to the model; the canonical redemption (micros) is unchanged for settlement. This was the real cause of goldseel's 'harshness' (compounded by an old deployed checkpoint). With v0.3.2 @ Q8 + this format, the two-sided benchmark goes 0 false-approve / 0 false-reject (was 0/6 approve). --- src/korgchat/gate.py | 17 +++++++++++------ tests/test_gate.py | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/korgchat/gate.py b/src/korgchat/gate.py index e59deee..68dffb4 100644 --- a/src/korgchat/gate.py +++ b/src/korgchat/gate.py @@ -168,14 +168,19 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: elif floor["floor"] == "ALLOW": decided_by = "ontology" # deterministic accept — goldseel not consulted else: - # 3. genuine unknown -> consult the owned model (pay-per-call) + # 3. genuine unknown -> consult the owned model (pay-per-call). + # goldseel was trained on DOLLAR-denominated amounts (e.g. "12.00"), + # NOT on-chain micros — send the dollar view so it reads the cap right. mandate_summary = { - "spend_cap_remaining": state["remaining"], - "use_counter_remaining": None, - "expiry": None, - "recipient_policy": mandate.get("recipient_policy"), + # match goldseel's training distribution: dollar cap, a POSITIVE + # use-counter (None reads as "exhausted -> reject"), no expiry. + "spend_cap_remaining": f"{state['remaining']:.2f} USDC", + "use_counter_remaining": 999, + "expiry_iso": None, + "recipient_policy": mandate.get("recipient_policy") or "any", } - verdict = judge.evaluate(mandate["intent"], mandate_summary, redemption) + gs_redemption = {**redemption, "amount_usdc": f"{amount:.2f}"} + verdict = judge.evaluate(mandate["intent"], mandate_summary, gs_redemption) decided_by = "goldseel" if verdict["verdict"] == "reject": reasons.append(f"goldseel: {verdict['reasoning']}") diff --git a/tests/test_gate.py b/tests/test_gate.py index 662015b..227340e 100644 --- a/tests/test_gate.py +++ b/tests/test_gate.py @@ -42,7 +42,7 @@ def test_accept_within_cap_and_approved(): assert r["remaining_after"] == 38.0 assert r["mandate_hash"] assert len(gate.calls) == 1 - assert gate.calls[0][1]["spend_cap_remaining"] == 50.0 # goldseel saw the right cap + assert gate.calls[0][1]["spend_cap_remaining"] == "50.00 USDC" # dollar-formatted for goldseel def test_remaining_cap_decrements_across_calls(): From 7c8fc25a2c77652baf4707b62eafda3be4a35ce0 Mon Sep 17 00:00:00 2001 From: ares <285551516+New1Direction@users.noreply.github.com> Date: Mon, 8 Jun 2026 13:11:56 -0700 Subject: [PATCH 6/6] =?UTF-8?q?feat(escalation):=20harvest=20loop=20?= =?UTF-8?q?=E2=80=94=20escalations=20become=20training=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The second compounding loop. When the pay gate ESCALATEs (goldseel defers or is unreachable), the case is logged; a human resolves it; resolved escalations export in goldseel's training format and feed the next retrain — the cases the model couldn't handle become the ones it learns. Ontology compounds knowledge; this compounds judgment. - korgchat.escalation: EscalationLog (record/pending/resolve/export/persist) - goldseel_pay_tool(escalation_log=...) logs on ESCALATE, returns escalation_id - GoldseelGate now recognizes the model's 'escalate' verdict (was -> skip) - tests: record/resolve/export/persist, idempotency, pay-tool wiring, unreachable --- CHANGELOG.md | 1 + src/korgchat/escalation.py | 121 +++++++++++++++++++++++++++++++++++++ src/korgchat/gate.py | 36 +++++++++-- tests/test_escalation.py | 91 ++++++++++++++++++++++++++++ 4 files changed, 244 insertions(+), 5 deletions(-) create mode 100644 src/korgchat/escalation.py create mode 100644 tests/test_escalation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 64829c0..40fd708 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Mandate-gated shell (`--mandate-allow`).** The sandboxed `bash` tool can be constrained to a command allowlist. Enforced two ways: just-bash only registers the allowed commands (physical), and each line is parsed before exec so a disallowed or dynamically-named command (`$CMD`) is rejected — **fail-closed**. Every call carries a verdict (`{decision, reasons, commands_used, mandate_hash}`) recorded to the ledger, so what the agent was *allowed* to run is itself provable. New `shell_mandate(allow, deny)`; `SandboxClient(mandate=...)` / `.configure()`; `tools_with_sandbox(mandate=...)`; CLI `--mandate-allow ls,cat,grep,...`. - **goldseel-gated `pay` tool.** New `korgchat.gate` module: a `pay` tool that authorizes a payment through the owned **goldseel** mandate-enforcement model (served on Modal, serverless). A deterministic spend-cap floor runs first — and short-circuits the pay-per-call model on an over-cap payment; goldseel then judges the payment against the authorized intent. Maps to a three-way decision: **REJECT** (cap or goldseel), **ESCALATE** (goldseel unreachable → defer to a human, *never* auto-approve), **ACCEPT** (within cap and approved). The decision + verdict + mandate hash are recorded to the ledger, so what an agent was allowed to spend is provable. New `GoldseelGate` (injectable), `payment_mandate()`, `goldseel_pay_tool()`. Offline tests use a fake judge; a live endpoint test is opt-in (`KORGCHAT_GOLDSEEL_LIVE=1`). - **Recipient-category ontology — the gate's deterministic knowledge floor (`korgchat.ontology`).** A controlled vocabulary of recipient categories with **synonyms** and an **is-a hierarchy** (`ml-inference` ≡ `ai-inference` ≡ `llm-inference`, all *is-a* `ai-compute`; `gambling`/`adult`/`crypto-trading` *is-a* `prohibited`), plus a seeded **vendor registry**. The `pay` tool now resolves *known* recipients deterministically — **ALLOW/DENY without a model call** — and only genuine unknowns reach goldseel, making the `ml-inference ≠ ai-inference` false-reject *structurally impossible*. **It compounds:** `learn()` writes newly-classified recipients back to the registry (optionally persisted), so the known set grows monotonically — more decisions → fewer model calls → more consistent outcomes (a data network effect). The `pay` result records `decided_by` (ontology vs goldseel), the floor verdict, and what was learned. `payment_mandate()` gains `allow_classes` / `deny_classes` (default deny `["prohibited"]`). +- **Escalation harvest — the second compounding loop (`korgchat.escalation`).** When the pay gate **ESCALATEs** (goldseel defers, or is unreachable), the case is logged; a human resolves it (approve/reject + why); resolved escalations export in goldseel's training format and feed the *next* retrain. So the cases the model *couldn't* handle become the ones it *learns* — the ontology compounds **knowledge**, this compounds **judgment**. `EscalationLog` (`record` / `pending` / `resolve` / `export_training_cases` / `write_training_jsonl`); `goldseel_pay_tool(escalation_log=...)` logs on ESCALATE and returns an `escalation_id`; `GoldseelGate` now recognizes the model's `escalate` verdict (it was collapsing to `skip`). - **Auto-context injection is now a first-class ledger event.** Previously the recall-augmented preamble the model actually saw was a *ghost* — the journal recorded only the user's original prompt. Now, whenever auto-context injects a preamble, a `context_injection` event is written capturing the preamble text, the recall query, and the matched `seq_id`s + scores, causally chained `user_prompt → context_injection → llm_inference`. The user_prompt event still records only what the user typed; the injected context is a separate, auditable, replayable event. New `AutoContextEngine.build_context()` returns a `ContextInjection` (preamble + structured matches); `build_preamble()` is now a thin wrapper over it. - **Tool-schema snapshot + conformance events.** Every tool execution is now bracketed by two events: a `tool_schema_snapshot` *before* the call (the declared `input_schema`, `description`, and a deterministic `schema_hash`) and a `tool_validation` *after* (did the call's input conform to the declared schema? did the call succeed?). A replayed conversation stays meaningful even after a tool's schema changes — the contract it ran against is frozen on the ledger, and a stale call is detectable. New `korgchat.schema` module: `schema_hash()` (canonical sha256, byte-for-byte aligned with `korg-ledger@v1` canonicalization) and a dependency-free `validate_input()`. diff --git a/src/korgchat/escalation.py b/src/korgchat/escalation.py new file mode 100644 index 0000000..140982e --- /dev/null +++ b/src/korgchat/escalation.py @@ -0,0 +1,121 @@ +"""Escalation harvest — the second compounding loop. + +When the pay gate **ESCALATEs** (genuine ambiguity goldseel couldn't resolve, +or the model was unreachable), the case is logged here. A human later resolves +it (approve / reject + why). Resolved escalations export in goldseel's training +format and feed the next retrain — so the cases the model *couldn't* handle +become the cases it *learns*. + +Two loops make the system compound: + * the ontology (`korgchat.ontology`) compounds **knowledge** — known + recipients resolve deterministically, the known set grows. + * this log compounds **judgment** — ambiguous cases a human had to judge + become labeled data, so the model needs the human less over time. +""" + +from __future__ import annotations + +import hashlib +import json +from pathlib import Path +from typing import Any + + +class EscalationLog: + """A JSONL log of escalations and their human resolutions.""" + + def __init__(self, path: str | Path) -> None: + self.path = Path(path) + self._entries: list[dict[str, Any]] = [] + if self.path.exists(): + self._entries = [ + json.loads(line) for line in self.path.read_text().splitlines() if line.strip() + ] + + def _save(self) -> None: + self.path.parent.mkdir(parents=True, exist_ok=True) + self.path.write_text("".join(json.dumps(e) + "\n" for e in self._entries)) + + def record( + self, + *, + intent: str, + mandate_summary: dict[str, Any], + redemption: dict[str, Any], + reason: str, + amount_usd: float, + recipient: str, + mandate_hash: str, + ) -> str: + """Log an escalation as pending. Idempotent on (intent, redemption, amount).""" + eid = hashlib.sha256( + json.dumps([intent, redemption, amount_usd], sort_keys=True).encode() + ).hexdigest()[:12] + if any(e["id"] == eid and e["status"] == "pending" for e in self._entries): + return eid # already pending; don't duplicate + self._entries.append( + { + "id": eid, + "status": "pending", + "case": { + "intent": intent, + "mandate_summary": mandate_summary, + "redemption": redemption, + }, + "context": { + "amount_usd": amount_usd, + "recipient": recipient, + "mandate_hash": mandate_hash, + "reason": reason, + }, + "resolution": None, + } + ) + self._save() + return eid + + def pending(self) -> list[dict[str, Any]]: + return [e for e in self._entries if e["status"] == "pending"] + + def resolve(self, eid: str, verdict: str, reasoning: str = "", by: str = "human") -> dict[str, Any]: + """Record a human's resolution of a pending escalation.""" + if verdict not in ("approve", "reject"): + raise ValueError("resolve verdict must be 'approve' or 'reject'") + for e in self._entries: + if e["id"] == eid and e["status"] == "pending": + e["status"] = "resolved" + e["resolution"] = {"verdict": verdict, "reasoning": reasoning, "by": by} + self._save() + return e + raise KeyError(f"no pending escalation {eid!r}") + + def export_training_cases(self) -> list[dict[str, Any]]: + """Resolved escalations as goldseel training cases (the harvest).""" + cases = [] + for e in self._entries: + if e["status"] == "resolved" and e["resolution"]: + c = e["case"] + res = e["resolution"] + cases.append( + { + "intent": c["intent"], + "mandate_summary": c["mandate_summary"], + "redemption": c["redemption"], + "expected_verdict": res["verdict"], + "expected_reasoning": res["reasoning"] + or f"Human-resolved escalation: {res['verdict']}.", + "_archetype": "harvested-escalation", + } + ) + return cases + + def write_training_jsonl(self, path: str | Path) -> int: + """Write the harvest to a jsonl ready to merge into the next training set.""" + cases = self.export_training_cases() + Path(path).write_text("".join(json.dumps(c) + "\n" for c in cases)) + return len(cases) + + def stats(self) -> dict[str, int]: + pending = sum(1 for e in self._entries if e["status"] == "pending") + resolved = len(self._entries) - pending + return {"total": len(self._entries), "pending": pending, "resolved": resolved} diff --git a/src/korgchat/gate.py b/src/korgchat/gate.py index 68dffb4..7e30254 100644 --- a/src/korgchat/gate.py +++ b/src/korgchat/gate.py @@ -30,6 +30,7 @@ import urllib.request from typing import Any, Protocol +from .escalation import EscalationLog from .ontology import CategoryOntology from .tools import Tool @@ -71,7 +72,14 @@ def evaluate( except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as e: return {"verdict": "skip", "reasoning": f"goldseel unreachable: {e}"} verdict = str(data.get("verdict", "")).lower() - normalized = "approve" if "approve" in verdict else "reject" if "reject" in verdict else "skip" + if "escalate" in verdict: + normalized = "escalate" + elif "approve" in verdict: + normalized = "approve" + elif "reject" in verdict: + normalized = "reject" + else: + normalized = "skip" return {"verdict": normalized, "reasoning": str(data.get("reasoning", ""))} @@ -110,6 +118,7 @@ def goldseel_pay_tool( *, gate: Gate | None = None, ontology: CategoryOntology | None = None, + escalation_log: EscalationLog | None = None, learn: bool = True, name: str = "pay", simulate: bool = True, @@ -143,6 +152,9 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: "amount_usdc": int(round(amount * 1e6)), "resource_description": args.get("resource_description"), } + # the dollar-denominated view goldseel was trained on + gs_redemption = {**redemption, "amount_usdc": f"{amount:.2f}"} + gs_summary: dict[str, Any] | None = None reasons: list[str] = [] decided_by = None @@ -171,7 +183,7 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: # 3. genuine unknown -> consult the owned model (pay-per-call). # goldseel was trained on DOLLAR-denominated amounts (e.g. "12.00"), # NOT on-chain micros — send the dollar view so it reads the cap right. - mandate_summary = { + gs_summary = { # match goldseel's training distribution: dollar cap, a POSITIVE # use-counter (None reads as "exhausted -> reject"), no expiry. "spend_cap_remaining": f"{state['remaining']:.2f} USDC", @@ -179,12 +191,12 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: "expiry_iso": None, "recipient_policy": mandate.get("recipient_policy") or "any", } - gs_redemption = {**redemption, "amount_usdc": f"{amount:.2f}"} - verdict = judge.evaluate(mandate["intent"], mandate_summary, gs_redemption) + verdict = judge.evaluate(mandate["intent"], gs_summary, gs_redemption) decided_by = "goldseel" if verdict["verdict"] == "reject": reasons.append(f"goldseel: {verdict['reasoning']}") - escalate = verdict["verdict"] == "skip" # model down -> defer to a human + # model defers OR is unreachable -> defer to a human + escalate = verdict["verdict"] in ("escalate", "skip") # compounding: cache any explicit classification so the next call is deterministic learned_key = None @@ -197,6 +209,19 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: state["remaining"] -= amount settled = simulate + # harvest: log escalations so a human's later resolution becomes training data + escalation_id = None + if decision == "ESCALATE" and escalation_log is not None: + escalation_id = escalation_log.record( + intent=mandate["intent"], + mandate_summary=gs_summary or {}, + redemption=gs_redemption, + reason=verdict.get("reasoning") or "goldseel deferred or unreachable", + amount_usd=amount, + recipient=recipient, + mandate_hash=mandate_hash, + ) + return { "decision": decision, "amount_usd": amount, @@ -210,6 +235,7 @@ def handler(args: dict[str, Any]) -> dict[str, Any]: "mandate_hash": mandate_hash, "learned": learned_key, "vendors_known": ont.stats()["vendors_known"], + "escalation_id": escalation_id, "settled": settled, "settlement": "simulated" if settled else None, } diff --git a/tests/test_escalation.py b/tests/test_escalation.py new file mode 100644 index 0000000..bc017ca --- /dev/null +++ b/tests/test_escalation.py @@ -0,0 +1,91 @@ +"""Escalation harvest — the second compounding loop.""" + +import pytest + +from korgchat.escalation import EscalationLog +from korgchat.gate import goldseel_pay_tool, payment_mandate + + +class FakeGate: + def __init__(self, verdict): + self.verdict = verdict + + def evaluate(self, *a): + return {"verdict": self.verdict, "reasoning": "ambiguous, defer to a human"} + + +def _record(log, dom="mystery.io"): + return log.record( + intent="AI inference only", + mandate_summary={"spend_cap_remaining": "50.00 USDC"}, + redemption={"recipient_domain": dom, "amount_usdc": "12.00", "recipient_categories": None}, + reason="ambiguous", + amount_usd=12, + recipient=dom, + mandate_hash="abc", + ) + + +def test_record_resolve_export_persist(tmp_path): + log = EscalationLog(tmp_path / "esc.jsonl") + eid = _record(log) + assert log.stats() == {"total": 1, "pending": 1, "resolved": 0} + + # idempotent: same case doesn't double-log while pending + assert _record(log) == eid + assert log.stats()["total"] == 1 + + log.resolve(eid, "approve", "human verified it is a legit inference vendor") + assert log.stats() == {"total": 1, "pending": 0, "resolved": 1} + + cases = log.export_training_cases() + assert len(cases) == 1 + c = cases[0] + assert c["expected_verdict"] == "approve" + assert c["intent"] == "AI inference only" + assert c["_archetype"] == "harvested-escalation" + assert "human verified" in c["expected_reasoning"] + + # persistence round-trip + reloaded = EscalationLog(tmp_path / "esc.jsonl") + assert reloaded.stats()["resolved"] == 1 + assert reloaded.export_training_cases()[0]["expected_verdict"] == "approve" + + +def test_resolve_rejects_bad_verdict(tmp_path): + log = EscalationLog(tmp_path / "e.jsonl") + eid = _record(log) + with pytest.raises(ValueError): + log.resolve(eid, "maybe") + + +def test_pay_tool_logs_escalation_then_harvests(tmp_path): + log = EscalationLog(tmp_path / "esc.jsonl") + # unknown recipient (no category) + a gate that escalates -> ESCALATE -> logged + tool = goldseel_pay_tool( + payment_mandate("AI inference only.", 50), gate=FakeGate("escalate"), escalation_log=log + ) + r = tool.call( + {"amount_usd": 9, "recipient_domain": "ambiguous-vendor.io", "resource_description": "general services"} + ) + assert r["decision"] == "ESCALATE" + assert r["escalation_id"] + assert log.stats()["pending"] == 1 + + # a human resolves it -> it becomes a labeled training case for the next retrain + log.resolve(r["escalation_id"], "reject", "not actually an AI vendor") + harvest = log.export_training_cases() + assert len(harvest) == 1 + assert harvest[0]["expected_verdict"] == "reject" + # carries goldseel's dollar-format inputs, ready to train on + assert harvest[0]["redemption"]["amount_usdc"] == "9.00" + + +def test_unreachable_model_also_escalates_and_logs(tmp_path): + log = EscalationLog(tmp_path / "esc.jsonl") + tool = goldseel_pay_tool( + payment_mandate("AI inference only.", 50), gate=FakeGate("skip"), escalation_log=log + ) + r = tool.call({"amount_usd": 5, "recipient_domain": "down-vendor.io"}) + assert r["decision"] == "ESCALATE" # model unreachable -> defer, never auto-approve + assert log.stats()["pending"] == 1