diff --git a/README.md b/README.md index 7685447..09f469a 100644 --- a/README.md +++ b/README.md @@ -47,35 +47,18 @@ parse('.{ 1, 2, 3 }') ```go import zon "github.com/jsonicjs/zon/go" -import jsonic "github.com/jsonicjs/jsonic/go" -j := jsonic.Make() -j.UseDefaults(zon.Zon, zon.Defaults) - -result, _ := j.Parse(`.{ .name = "Alice", .age = 30 }`) +result, _ := zon.Parse(`.{ .name = "Alice", .age = 30 }`) // map[string]any{"name": "Alice", "age": 30} ``` -## Supported syntax - -- Anonymous struct literals: `.{ .field = value, ... }` -- Tuple / array literals: `.{ value, value, ... }` -- Field names: `.identifier` -- Enum literals (used as values): `.identifier` (parsed as bare strings) -- Strings: `"..."` with Zig escape sequences (`\n`, `\r`, `\t`, `\\`, `\"`, `\'`) -- Multi-line strings: consecutive lines starting with `\\` -- Numbers: decimal, `0x` hex, `0o` octal, `0b` binary, with `_` separators -- Character literals: `'x'`, `'\n'`, `'\x41'`, `'\u{1F600}'` -- Keywords: `true`, `false`, `null` -- Line comments: `// ...` -- Trailing commas allowed - -## Options - -| Option | Default | Description | -| -------------- | ------- | ---------------------------------------------------- | -| `charAsNumber` | `false` | Parse character literals as numeric code points. | -| `enumTag` | `null` | If set, wrap enum literals in `{ [enumTag]: name }`. | +## Documentation + +Full documentation following the [Diataxis](https://diataxis.fr) +framework (tutorials, how-to guides, explanation, reference): + +- [TypeScript documentation](doc/zon-ts.md) +- [Go documentation](doc/zon-go.md) ## License diff --git a/doc/zon-go.md b/doc/zon-go.md new file mode 100644 index 0000000..c50d42e --- /dev/null +++ b/doc/zon-go.md @@ -0,0 +1,283 @@ +# ZON plugin for Jsonic (Go) + +A Jsonic syntax plugin that parses +[Zig Object Notation (ZON)](https://ziglang.org/documentation/master/#ZON) +into Go values, with support for anonymous struct literals, tuples, +enum literals, numeric bases, character literals, multi-line strings, +and trailing commas. + +```go +import ( + jsonic "github.com/jsonicjs/jsonic/go" + zon "github.com/jsonicjs/zon/go" +) +``` + +```bash +go get github.com/jsonicjs/zon/go@latest +``` + + +## Tutorials + +### Parse a basic ZON document + +Use the `Parse` convenience function to parse a top-level struct or +tuple literal: + +```go +result, err := zon.Parse(`.{ .name = "Alice", .age = 30 }`) +// map[string]any{"name": "Alice", "age": float64(30)} + +result, err = zon.Parse(`.{ 1, 2, 3 }`) +// []any{float64(1), float64(2), float64(3)} +``` + +### Parse a realistic build.zig.zon + +ZON files typically have nested structs mixed with tuple-style +`paths` lists: + +```go +src := `.{ + .name = "example", + .version = "0.0.1", + .minimum_zig_version = "0.14.0", + .dependencies = .{ + .foo = .{ + .url = "https://example.com/foo.tar.gz", + .hash = "1220deadbeef", + }, + }, + .paths = .{ + "build.zig", + "src", + }, +}` + +result, err := zon.Parse(src) +// map[string]any{ +// "name": "example", +// "version": "0.0.1", +// "minimum_zig_version": "0.14.0", +// "dependencies": map[string]any{ +// "foo": map[string]any{ +// "url": "https://example.com/foo.tar.gz", "hash": "1220deadbeef", +// }, +// }, +// "paths": []any{"build.zig", "src"}, +// } +``` + +### Parse numbers in every ZON base + +ZON numbers accept hex, octal, binary, and `_` separators: + +```go +zon.Parse("0x2a") // float64(42) +zon.Parse("0o52") // float64(42) +zon.Parse("0b101010") // float64(42) +zon.Parse("1_000_000") // float64(1000000) +zon.Parse("3.14") // float64(3.14) +``` + + +## How-to guides + +### Parse character literals as code points + +By default Zig char literals (`'A'`, `'\n'`, `'\u{1F600}'`) parse as +one-character strings. Set `CharAsNumber` to receive numeric code +points instead: + +```go +charAsNum := true +result, err := zon.Parse(`'A'`, zon.ZonOptions{CharAsNumber: &charAsNum}) +// float64(65) +``` + +### Tag enum literals to distinguish them from strings + +Without options, an enum literal value like `.red` becomes the plain +string `"red"`. If you need to tell it apart from an ordinary string +in the parsed tree, set `EnumTag`: + +```go +result, err := zon.Parse( + `.{ .kind = .red, .label = "red" }`, + zon.ZonOptions{EnumTag: "$enum"}, +) +// map[string]any{ +// "kind": map[string]any{"$enum": "red"}, +// "label": "red", +// } +``` + +### Read multi-line Zig strings + +Consecutive lines prefixed with `\\` become a single string joined +by `\n`: + +```go +src := ".{\n" + + " .description =\n" + + " \\\\first line\n" + + " \\\\second line\n" + + " ,\n" + + "}" + +result, err := zon.Parse(src) +// map[string]any{"description": "first line\nsecond line"} +``` + +### Reuse a parser for many inputs + +`Parse` rebuilds a Jsonic instance on every call. For hot paths, cache +an instance with `MakeJsonic` and reuse it: + +```go +j := zon.MakeJsonic() +for _, src := range inputs { + result, err := j.Parse(src) + _ = result + _ = err +} +``` + +### Reject extra alternates contributed by this plugin + +Every grammar alternate added by the plugin carries the group tag +`zon`. To re-enable strict JSON while the plugin is loaded, exclude +that tag: + +```go +j := jsonic.Make() +j.UseDefaults(zon.Zon, zon.Defaults) +j.SetOptions(jsonic.Options{Rule: &jsonic.RuleOptions{Exclude: "zon"}}) +``` + + +## Explanation + +### How ZON parsing works + +ZON is not a superset of JSON — it uses a distinct opening syntax +(`.{`), a different key/value separator (`=`), and key identifiers +prefixed with `.`. The plugin reshapes Jsonic into a ZON parser by +combining four mechanisms: + +1. **Custom lex matchers** for the `.`-prefixed tokens: + + - `.{` peeks ahead and emits `#OB` (struct/map) if followed by + `.ident=` or `#OS` (tuple/list) otherwise. This resolves + the ambiguity at lex time so only two-token grammar lookahead is + needed. + - `.identifier` emits `#TX` whose `Val` is the identifier (dot + stripped) and whose `Use["zonEnum"]` flag marks it for optional + enum-tag wrapping. + - `\\`-prefixed multi-line strings emit a single `#ST` token with + the joined content. + - Character literals (`'x'`, `'\n'`, `'\xNN'`, `'\u{...}'`) emit a + `#NR` token whose value is either the one-char string or the + numeric code point (controlled by `CharAsNumber`). + +2. **Token remapping**: `#CL` is rebound from `:` to `=`; `#OB`, + `#OS`, and `#CS` drop their default char mappings so stray `{`, + `[`, or `]` in source produce a syntax error rather than silently + opening a map or list. + +3. **Key-set restriction**: the `KEY` token set is narrowed to `#TX` + alone so only identifiers (not numbers or strings) can appear on + the left of `=`. + +4. **Grammar overlay**: small alts prepended to `val`, `list`, + `elem`, and `pair` swap the list terminator from `#CS` to `#CB` + and accept trailing commas before `}`. + +All four are applied atomically through the `GrammarSpec` passed to +`j.Grammar(gs, &jsonic.GrammarSetting{...G: "zon"})`, which tags +every ZON alt with the `zon` group. + +### Struct vs tuple disambiguation + +ZON uses the same `.{ ... }` opener for both struct literals (with +`.field = value` pairs) and tuple literals (bare values). Jsonic's +parser allows only two tokens of lookahead, so the decision is made +by the lex matcher: it scans past the opening `.{`, whitespace, and +`//` comments, then checks for `.ident` followed by `=`. This means +the grammar only ever sees an already-classified `#OB` or `#OS` +token. + +### Enum literals as values + +A bare `.foo` token is both a valid key (when followed by `=`) and a +valid value (enum literal). The `#TX` token set membership in both +`KEY` and `VAL` lets the parser pick the right interpretation by +context — no grammar branching is needed. + + +## Reference + +### `Parse(src string, opts ...ZonOptions) (any, error)` + +Parses a ZON string and returns the resulting value. Convenience +wrapper around `MakeJsonic(opts...).Parse(src)`. + +### `MakeJsonic(opts ...ZonOptions) *jsonic.Jsonic` + +Returns a reusable Jsonic instance configured for ZON parsing. Use +this when parsing multiple ZON strings with the same options. + +### `Zon(j *jsonic.Jsonic, options map[string]any) error` + +The raw plugin function. Usually called indirectly through +`j.UseDefaults(zon.Zon, zon.Defaults, opts...)` or via the `Parse` +and `MakeJsonic` helpers above. + +### `Defaults` + +```go +var Defaults = map[string]any{ + "charAsNumber": false, + "enumTag": "", +} +``` + +### `ZonOptions` + +```go +type ZonOptions struct { + // When non-nil and true, parses Zig char literals ('x') as numeric + // code points. When nil or false (default), they are one-char strings. + CharAsNumber *bool + + // When non-empty, wraps enum literals (.foo used as value) in + // map[string]any{: name} instead of producing bare strings. + EnumTag string +} +``` + +### Supported ZON syntax + +| Construct | Example | Result | +| -------------------- | -------------------------------- | ------------------------------ | +| Struct literal | `.{ .a = 1, .b = 2 }` | `map[string]any{"a":1,"b":2}` | +| Empty struct literal | `.{}` | `[]any{}` (empty list) | +| Tuple literal | `.{ 1, 2, 3 }` | `[]any{1, 2, 3}` | +| Nested | `.{ .a = .{ .b = 1 } }` | nested maps | +| String | `"hello\nworld"` | `"hello\nworld"` | +| Multi-line string | `\\line1\n\\line2` | `"line1\nline2"` | +| Number | `42`, `0x2a`, `0o52`, `0b101010` | `float64(42)` | +| Number separator | `1_000_000` | `float64(1000000)` | +| Float | `3.14` | `float64(3.14)` | +| Boolean / null | `true`, `false`, `null` | `true`, `false`, `nil` | +| Char literal | `'A'` | `"A"` (or `float64(65)`) | +| Enum literal | `.red` | `"red"` | +| Trailing comma | `.{ .a = 1, }` | `map[string]any{"a":1}` | +| Line comment | `// ...` | *(ignored)* | + +### Grammar group tags + +All grammar alternates added by the plugin carry the group tag +`zon`, so callers may exclude them via +`Options{Rule: &RuleOptions{Exclude: "zon"}}`. diff --git a/doc/zon-ts.md b/doc/zon-ts.md new file mode 100644 index 0000000..32e15f5 --- /dev/null +++ b/doc/zon-ts.md @@ -0,0 +1,255 @@ +# ZON plugin for Jsonic (TypeScript) + +A Jsonic syntax plugin that parses +[Zig Object Notation (ZON)](https://ziglang.org/documentation/master/#ZON) +into JavaScript values, with support for anonymous struct literals, +tuples, enum literals, numeric bases, character literals, multi-line +strings, and trailing commas. + +```bash +npm install @jsonic/zon +``` + +Requires `jsonic` >= 2 as a peer dependency. + + +## Tutorials + +### Parse a basic ZON document + +Register the plugin and parse a top-level struct literal: + +```typescript +import { Jsonic } from 'jsonic' +import { Zon } from '@jsonic/zon' + +const j = Jsonic.make().use(Zon) + +j('.{ .name = "Alice", .age = 30 }') +// { name: 'Alice', age: 30 } + +j('.{ 1, 2, 3 }') +// [1, 2, 3] +``` + +### Parse a realistic build.zig.zon + +ZON files typically have nested structs mixed with tuple-style +`paths` lists: + +```typescript +import { Jsonic } from 'jsonic' +import { Zon } from '@jsonic/zon' + +const j = Jsonic.make().use(Zon) + +j(`.{ + .name = "example", + .version = "0.0.1", + .minimum_zig_version = "0.14.0", + .dependencies = .{ + .foo = .{ + .url = "https://example.com/foo.tar.gz", + .hash = "1220deadbeef", + }, + }, + .paths = .{ + "build.zig", + "src", + }, +}`) +// { +// name: 'example', +// version: '0.0.1', +// minimum_zig_version: '0.14.0', +// dependencies: { foo: { url: '...', hash: '1220deadbeef' } }, +// paths: ['build.zig', 'src'], +// } +``` + +### Parse numbers in every ZON base + +ZON numbers accept hex, octal, binary, and `_` separators: + +```typescript +const j = Jsonic.make().use(Zon) + +j('0x2a') // 42 +j('0o52') // 42 +j('0b101010') // 42 +j('1_000_000') // 1000000 +j('3.14') // 3.14 +``` + + +## How-to guides + +### Parse character literals as code points + +By default Zig char literals (`'A'`, `'\n'`, `'\u{1F600}'`) parse as +one-character strings. Set `charAsNumber: true` to receive numeric +code points instead: + +```typescript +const j = Jsonic.make().use(Zon, { charAsNumber: true }) + +j("'A'") // 65 +j("'\\n'") // 10 +j("'\\u{1F600}'") // 128512 +``` + +### Tag enum literals to distinguish them from strings + +Without options, an enum literal value like `.red` becomes the plain +string `'red'`. If you need to tell it apart from an ordinary string +in the parsed tree, set `enumTag`: + +```typescript +const j = Jsonic.make().use(Zon, { enumTag: '$enum' }) + +j('.{ .kind = .red, .label = "red" }') +// { kind: { $enum: 'red' }, label: 'red' } +``` + +### Read multi-line Zig strings + +Consecutive lines prefixed with `\\` become a single string joined by +`\n`: + +```typescript +const j = Jsonic.make().use(Zon) + +j(`.{ + .description = + \\\\first line + \\\\second line + , +}`) +// { description: 'first line\nsecond line' } +``` + +### Reject extra alternates contributed by this plugin + +Every grammar alternate added by the plugin carries the group tag +`zon`. To re-enable strict JSON while the plugin is loaded (rarely +useful, but supported), exclude that tag: + +```typescript +const j = Jsonic.make().use(Zon).options({ + rule: { exclude: 'zon' }, +}) +``` + + +## Explanation + +### How ZON parsing works + +ZON is not a superset of JSON — it uses a distinct opening syntax +(`.{`), a different key/value separator (`=`), and key identifiers +prefixed with `.`. The plugin reshapes Jsonic into a ZON parser by +combining four mechanisms: + +1. **Custom lex matchers** for the `.`-prefixed tokens: + + - `.{` peeks ahead and emits `#OB` (struct/map) if followed by + `.ident=` or `#OS` (tuple/list) otherwise. This resolves + the ambiguity at lex time so only two-token grammar lookahead is + needed. + - `.identifier` emits `#TX` whose `val` is the identifier (dot + stripped) and whose `use.zonEnum` flag marks it for optional + enum-tag wrapping. + - `\\`-prefixed multi-line strings emit a single `#ST` token with + the joined content. + - Character literals (`'x'`, `'\n'`, `'\xNN'`, `'\u{...}'`) emit a + `#NR` token whose value is either the one-char string or the + numeric code point (controlled by `charAsNumber`). + +2. **Token remapping**: `#CL` is rebound from `:` to `=`; `#OB`, + `#OS`, and `#CS` drop their default char mappings so stray `{`, + `[`, or `]` in source produce a syntax error rather than silently + opening a map or list. + +3. **Key-set restriction**: the `KEY` token set is narrowed to `#TX` + alone so only identifiers (not numbers or strings) can appear on + the left of `=`. + +4. **Grammar overlay**: small alts prepended to `val`, `list`, + `elem`, and `pair` swap the list terminator from `#CS` to `#CB` + and accept trailing commas before `}`. + +All four are applied atomically through the `GrammarSpec` passed to +`jsonic.grammar(grammarDef, { rule: { alt: { g: 'zon' } } })`, which +tags every ZON alt with the `zon` group. + +### Struct vs tuple disambiguation + +ZON uses the same `.{ ... }` opener for both struct literals (with +`.field = value` pairs) and tuple literals (bare values). Jsonic's +parser allows only two tokens of lookahead, so the decision is made +by the lex matcher: it scans past the opening `.{`, whitespace, and +`//` comments, then checks for `.ident` followed by `=`. This means +the grammar only ever sees an already-classified `#OB` or `#OS` +token. + +### Enum literals as values + +A bare `.foo` token is both a valid key (when followed by `=`) and a +valid value (enum literal). The `#TX` token set membership in both +`KEY` and `VAL` lets the parser pick the right interpretation by +context — no grammar branching is needed. + + +## Reference + +### `Zon` (Plugin) + +The plugin function. Register with `Jsonic.make().use(Zon, options)`. +`Zon.defaults` holds the merged default options. + +### `ZonOptions` + +```typescript +type ZonOptions = { + // When true, parse Zig char literals ('x') as numeric code points. + // When false (default), parse them as one-character strings. + charAsNumber: boolean + + // When set, wrap enum literals (.foo used as value) in + // `{ [enumTag]: name }` objects instead of producing bare strings. + enumTag: null | string +} +``` + +Defaults: + +```typescript +{ + charAsNumber: false, + enumTag: null, +} +``` + +### Supported ZON syntax + +| Construct | Example | Result | +| -------------------- | -------------------------------- | ------------------------ | +| Struct literal | `.{ .a = 1, .b = 2 }` | `{ a: 1, b: 2 }` | +| Empty struct literal | `.{}` | `[]` (empty list) | +| Tuple literal | `.{ 1, 2, 3 }` | `[1, 2, 3]` | +| Nested | `.{ .a = .{ .b = 1 } }` | `{ a: { b: 1 } }` | +| String | `"hello\nworld"` | `'hello\nworld'` | +| Multi-line string | `\\line1\n\\line2` | `'line1\nline2'` | +| Number | `42`, `0x2a`, `0o52`, `0b101010` | `42` | +| Number separator | `1_000_000` | `1000000` | +| Float | `3.14` | `3.14` | +| Boolean / null | `true`, `false`, `null` | `true`, `false`, `null` | +| Char literal | `'A'` | `'A'` (or `65`) | +| Enum literal | `.red` | `'red'` | +| Trailing comma | `.{ .a = 1, }` | `{ a: 1 }` | +| Line comment | `// ...` | *(ignored)* | + +### Grammar group tags + +All grammar alternates added by the plugin carry the group tag +`zon`, so callers may exclude them via `rule.exclude: 'zon'`. diff --git a/go/go.mod b/go/go.mod index 8191f07..db19806 100644 --- a/go/go.mod +++ b/go/go.mod @@ -2,4 +2,4 @@ module github.com/jsonicjs/zon/go go 1.24.7 -require github.com/jsonicjs/jsonic/go v0.1.18 +require github.com/jsonicjs/jsonic/go v0.1.19 diff --git a/go/go.sum b/go/go.sum index 7142a59..f137ca3 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,2 +1,6 @@ github.com/jsonicjs/jsonic/go v0.1.18 h1:OW15hjFisrw2n7HE6zDuQAikW8A5NUW8OyP4SCG2oFg= github.com/jsonicjs/jsonic/go v0.1.18/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= +github.com/jsonicjs/jsonic/go v0.1.19-0.20260418194431-54100be22847 h1:+utFlbRO7upKu+DLO9tjUzWHacbxwsyiuFcCkylV3IA= +github.com/jsonicjs/jsonic/go v0.1.19-0.20260418194431-54100be22847/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= +github.com/jsonicjs/jsonic/go v0.1.19 h1:jEP+GSxMGKV+eTJEjuU0qRMUQ8GAIl1SRigc+mbZzVo= +github.com/jsonicjs/jsonic/go v0.1.19/go.mod h1:ObNKlCG7esWoi4AHCpdgkILvPINV8bpvkbCd4llGGUg= diff --git a/go/zon.go b/go/zon.go index 6a75663..2c834a6 100644 --- a/go/zon.go +++ b/go/zon.go @@ -49,31 +49,34 @@ const grammarText = ` # A bare .identifier emits #TX with val = identifier (the leading dot is # stripped). This token is both a valid KEY (when followed by =) and a # valid VAL (when used as an enum literal). +# +# The grammar is applied with { rule: { alt: { g: 'zon' } } } so every +# alt below is automatically tagged with the 'zon' group. { rule: val: open: [ # Empty .{} -> empty list. - { s: '#OS #CB' b: 2 p: list g: 'zon,list,empty' } + { s: '#OS #CB' b: 2 p: list g: 'list,empty' } ] rule: list: open: [ - { s: '#OS #CB' b: 1 g: 'zon,list,empty' } - { s: '#OS' p: elem g: 'zon,list,open' } + { s: '#OS #CB' b: 1 g: 'list,empty' } + { s: '#OS' p: elem g: 'list,open' } ] rule: list: close: [ - { s: '#CB' g: 'zon,list,close' } + { s: '#CB' g: 'list,close' } ] rule: elem: close: [ - { s: '#CA #CB' b: 1 g: 'zon,elem,trailing' } - { s: '#CA' r: elem g: 'zon,elem,next' } - { s: '#CB' b: 1 g: 'zon,elem,end' } + { s: '#CA #CB' b: 1 g: 'elem,trailing' } + { s: '#CA' r: elem g: 'elem,next' } + { s: '#CB' b: 1 g: 'elem,end' } ] rule: pair: close: [ - { s: '#CA #CB' b: 1 g: 'zon,pair,trailing' } - { s: '#CA' r: pair g: 'zon,pair,next' } - { s: '#CB' b: 1 g: 'zon,pair,end' } + { s: '#CA #CB' b: 1 g: 'pair,trailing' } + { s: '#CA' r: pair g: 'pair,next' } + { s: '#CB' b: 1 g: 'pair,end' } ] } ` @@ -91,8 +94,41 @@ func Zon(j *jsonic.Jsonic, options map[string]any) error { charAsNumber := toBool(options["charAsNumber"]) enumTag := toString(options["enumTag"]) + // If enumTag is set, wrap enum-literal values into `{ [enumTag]: name }`. + // Runs before the default `@val-bc` (via /prepend) so it takes precedence. + refs := map[jsonic.FuncRef]any{} + if enumTag != "" { + refs["@val-bc/prepend"] = jsonic.StateAction(func(r *jsonic.Rule, _ *jsonic.Context) { + if !jsonic.IsUndefined(r.Node) { + return + } + if r.Child != nil && !jsonic.IsUndefined(r.Child.Node) { + return + } + if r.OS == 0 || r.O0 == nil { + return + } + tkn := r.O0 + if tkn.Use == nil { + return + } + if _, ok := tkn.Use["zonEnum"]; !ok { + return + } + if name, ok := tkn.Val.(string); ok { + r.Node = map[string]any{enumTag: name} + } + }) + } + + gs, err := parseGrammarText(grammarText, refs) + if err != nil { + return err + } + // All jsonic option overrides live on the grammar object so the plugin + // applies them atomically alongside its rule alts. eqSrc := "=" - jsonicOptions := jsonic.Options{ + gs.Options = &jsonic.Options{ Rule: &jsonic.RuleOptions{ // Remove jsonic extensions (implicit maps/lists, top-level commas, // path dives). ZON uses explicit struct literals only. @@ -157,41 +193,14 @@ func Zon(j *jsonic.Jsonic, options map[string]any) error { }, }, } - - j.SetOptions(jsonicOptions) - - // If enumTag is set, wrap enum-literal values into `{ [enumTag]: name }`. - // Runs before the default `@val-bc` (via /prepend) so it takes precedence. - refs := map[jsonic.FuncRef]any{} - if enumTag != "" { - refs["@val-bc/prepend"] = jsonic.StateAction(func(r *jsonic.Rule, _ *jsonic.Context) { - if !jsonic.IsUndefined(r.Node) { - return - } - if r.Child != nil && !jsonic.IsUndefined(r.Child.Node) { - return - } - if r.OS == 0 || r.O0 == nil { - return - } - tkn := r.O0 - if tkn.Use == nil { - return - } - if _, ok := tkn.Use["zonEnum"]; !ok { - return - } - if name, ok := tkn.Val.(string); ok { - r.Node = map[string]any{enumTag: name} - } - }) - } - - gs, err := parseGrammarText(grammarText, refs) - if err != nil { - return err + // Tag every alt in this grammar with the 'zon' group so callers can + // selectively exclude zon alts via rule.exclude. + setting := &jsonic.GrammarSetting{ + Rule: &jsonic.GrammarSettingRule{ + Alt: &jsonic.GrammarSettingAlt{G: "zon"}, + }, } - if err := j.Grammar(gs); err != nil { + if err := j.Grammar(gs, setting); err != nil { return fmt.Errorf("zon: failed to apply grammar: %w", err) } @@ -204,6 +213,50 @@ var Defaults = map[string]any{ "enumTag": "", } +// ZonOptions is a typed wrapper for common plugin options. +// Fields are pointers so callers can express "omit" (nil) vs "set". +type ZonOptions struct { + // CharAsNumber, when true, parses Zig char literals ('x') as numeric + // code points. When false (default), they are parsed as one-char strings. + CharAsNumber *bool + // EnumTag, when non-empty, wraps enum literals (.foo used as value) in + // map[string]any{: name} instead of producing the bare string. + EnumTag string +} + +func (o ZonOptions) toMap() map[string]any { + m := map[string]any{} + if o.CharAsNumber != nil { + m["charAsNumber"] = *o.CharAsNumber + } + if o.EnumTag != "" { + m["enumTag"] = o.EnumTag + } + return m +} + +// MakeJsonic returns a reusable Jsonic instance configured for ZON parsing. +// Use this when parsing multiple ZON strings with the same options. +func MakeJsonic(opts ...ZonOptions) *jsonic.Jsonic { + j := jsonic.Make() + var m map[string]any + if len(opts) > 0 { + m = opts[0].toMap() + } + if err := j.UseDefaults(Zon, Defaults, m); err != nil { + // Plugin registration errors are programming errors with static + // inputs; surface them via panic rather than silent misbehavior. + panic(fmt.Sprintf("zon: plugin initialisation failed: %v", err)) + } + return j +} + +// Parse parses a ZON string and returns the resulting value. Convenience +// wrapper around MakeJsonic(opts...).Parse(src). +func Parse(src string, opts ...ZonOptions) (any, error) { + return MakeJsonic(opts...).Parse(src) +} + // Custom lex matcher for `.`-prefixed tokens: // // `.{` -> #OB if followed by `.ident =`, else #OS diff --git a/package.json b/package.json index bb5b350..e9e9b1d 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ ], "devDependencies": { "@types/node": "^25.6.0", + "jsonic": "github:jsonicjs/jsonic#main", "typescript": "^5.9.3" }, "peerDependencies": { diff --git a/src/zon.ts b/src/zon.ts index 51890f0..ad242ab 100644 --- a/src/zon.ts +++ b/src/zon.ts @@ -46,31 +46,34 @@ const grammarText = ` # A bare .identifier emits #TX with val = identifier (the leading dot is # stripped). This token is both a valid KEY (when followed by =) and a # valid VAL (when used as an enum literal). +# +# The grammar is applied with { rule: { alt: { g: 'zon' } } } so every +# alt below is automatically tagged with the 'zon' group. { rule: val: open: [ # Empty .{} -> empty list. - { s: '#OS #CB' b: 2 p: list g: 'zon,list,empty' } + { s: '#OS #CB' b: 2 p: list g: 'list,empty' } ] rule: list: open: [ - { s: '#OS #CB' b: 1 g: 'zon,list,empty' } - { s: '#OS' p: elem g: 'zon,list,open' } + { s: '#OS #CB' b: 1 g: 'list,empty' } + { s: '#OS' p: elem g: 'list,open' } ] rule: list: close: [ - { s: '#CB' g: 'zon,list,close' } + { s: '#CB' g: 'list,close' } ] rule: elem: close: [ - { s: '#CA #CB' b: 1 g: 'zon,elem,trailing' } - { s: '#CA' r: elem g: 'zon,elem,next' } - { s: '#CB' b: 1 g: 'zon,elem,end' } + { s: '#CA #CB' b: 1 g: 'elem,trailing' } + { s: '#CA' r: elem g: 'elem,next' } + { s: '#CB' b: 1 g: 'elem,end' } ] rule: pair: close: [ - { s: '#CA #CB' b: 1 g: 'zon,pair,trailing' } - { s: '#CA' r: pair g: 'zon,pair,next' } - { s: '#CB' b: 1 g: 'zon,pair,end' } + { s: '#CA #CB' b: 1 g: 'pair,trailing' } + { s: '#CA' r: pair g: 'pair,next' } + { s: '#CB' b: 1 g: 'pair,end' } ] } ` @@ -81,8 +84,27 @@ const Zon: Plugin = (jsonic: Jsonic, options: ZonOptions) => { const charAsNumber = !!options.charAsNumber const enumTag = options.enumTag || null - // Configure jsonic for ZON syntax. - jsonic.options({ + // If enumTag is set, wrap enum-literal values (produced by zonDot) into + // `{ [enumTag]: name }` objects. The `/prepend` form runs before the + // default `@val-bc` handler sets r.node from the token. + const refs: Record = { + '@val-bc/prepend': (r: Rule, _ctx: Context) => { + if (!enumTag) return + if (undefined !== r.node) return + if (undefined !== r.child.node) return + if (0 === r.os) return + const tkn: any = r.o0 + if (tkn && tkn.use && tkn.use.zonEnum) { + r.node = { [enumTag]: tkn.val } + } + }, + } + + const grammarDef = Jsonic.make()(grammarText) + grammarDef.ref = refs + // All jsonic option overrides live on the grammar object so the plugin + // applies them atomically alongside its rule alts. + grammarDef.options = { rule: { // Remove jsonic extensions (implicit maps/lists, top-level commas, // path dives). ZON uses explicit struct literals only. @@ -150,27 +172,11 @@ const Zon: Plugin = (jsonic: Jsonic, options: ZonOptions) => { zonChar: { order: 1.2e5, make: buildZonCharMatcher(charAsNumber) }, }, }, - }) - - // If enumTag is set, wrap enum-literal values (produced by zonDot) into - // `{ [enumTag]: name }` objects. The `/prepend` form runs before the - // default `@val-bc` handler sets r.node from the token. - const refs: Record = { - '@val-bc/prepend': (r: Rule, _ctx: Context) => { - if (!enumTag) return - if (undefined !== r.node) return - if (undefined !== r.child.node) return - if (0 === r.os) return - const tkn: any = r.o0 - if (tkn && tkn.use && tkn.use.zonEnum) { - r.node = { [enumTag]: tkn.val } - } - }, } - const grammarDef = Jsonic.make()(grammarText) - grammarDef.ref = refs - jsonic.grammar(grammarDef) + // Tag every alt in this grammar with the 'zon' group so callers can + // selectively exclude zon alts via `rule.exclude: 'zon'`. + jsonic.grammar(grammarDef, { rule: { alt: { g: 'zon' } } }) } // Custom lex matcher for `.`-prefixed tokens. diff --git a/zon-grammar.jsonic b/zon-grammar.jsonic index 7eb3ae9..c090b72 100644 --- a/zon-grammar.jsonic +++ b/zon-grammar.jsonic @@ -21,30 +21,33 @@ # A bare .identifier emits #TX with val = identifier (the leading dot is # stripped). This token is both a valid KEY (when followed by =) and a # valid VAL (when used as an enum literal). +# +# The grammar is applied with { rule: { alt: { g: 'zon' } } } so every +# alt below is automatically tagged with the 'zon' group. { rule: val: open: [ # Empty .{} -> empty list. - { s: '#OS #CB' b: 2 p: list g: 'zon,list,empty' } + { s: '#OS #CB' b: 2 p: list g: 'list,empty' } ] rule: list: open: [ - { s: '#OS #CB' b: 1 g: 'zon,list,empty' } - { s: '#OS' p: elem g: 'zon,list,open' } + { s: '#OS #CB' b: 1 g: 'list,empty' } + { s: '#OS' p: elem g: 'list,open' } ] rule: list: close: [ - { s: '#CB' g: 'zon,list,close' } + { s: '#CB' g: 'list,close' } ] rule: elem: close: [ - { s: '#CA #CB' b: 1 g: 'zon,elem,trailing' } - { s: '#CA' r: elem g: 'zon,elem,next' } - { s: '#CB' b: 1 g: 'zon,elem,end' } + { s: '#CA #CB' b: 1 g: 'elem,trailing' } + { s: '#CA' r: elem g: 'elem,next' } + { s: '#CB' b: 1 g: 'elem,end' } ] rule: pair: close: [ - { s: '#CA #CB' b: 1 g: 'zon,pair,trailing' } - { s: '#CA' r: pair g: 'zon,pair,next' } - { s: '#CB' b: 1 g: 'zon,pair,end' } + { s: '#CA #CB' b: 1 g: 'pair,trailing' } + { s: '#CA' r: pair g: 'pair,next' } + { s: '#CB' b: 1 g: 'pair,end' } ] }