diff --git a/.gitignore b/.gitignore index 853c4a4..c2ad347 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,6 @@ lib-cov *.seed *.log -*.csv -!test/fixtures/*.csv *.dat *.out *.pid @@ -19,7 +17,6 @@ node_modules .idea/ - trial test/coverage.html @@ -32,4 +29,6 @@ dist-test package-lock.json yarn.lock - +# W3C XML Conformance Test Suite (downloaded on demand via +# scripts/fetch-xml-suite.sh — not redistributed). +test/xmlconf/ diff --git a/coverage/lcov.info b/coverage/lcov.info deleted file mode 100644 index b0b03ab..0000000 --- a/coverage/lcov.info +++ /dev/null @@ -1,340 +0,0 @@ -TN: -SF:csv.js -FN:7,(anonymous_0) -FN:60,(anonymous_1) -FN:137,(anonymous_2) -FN:138,(anonymous_3) -FN:154,(anonymous_4) -FN:160,(anonymous_5) -FN:175,(anonymous_6) -FN:188,(anonymous_7) -FN:243,(anonymous_8) -FN:255,(anonymous_9) -FN:262,(anonymous_10) -FN:273,(anonymous_11) -FN:279,(anonymous_12) -FN:290,(anonymous_13) -FN:301,(anonymous_14) -FN:312,(anonymous_15) -FN:326,(anonymous_16) -FN:337,(anonymous_17) -FN:350,(anonymous_18) -FN:359,buildCsvStringMatcher -FN:360,makeCsvStringMatcher -FN:361,csvStringMatcher -FNF:22 -FNH:20 -FNDA:137,(anonymous_0) -FNDA:0,(anonymous_1) -FNDA:137,(anonymous_2) -FNDA:216,(anonymous_3) -FNDA:208,(anonymous_4) -FNDA:137,(anonymous_5) -FNDA:137,(anonymous_6) -FNDA:457,(anonymous_7) -FNDA:137,(anonymous_8) -FNDA:137,(anonymous_9) -FNDA:30,(anonymous_10) -FNDA:13,(anonymous_11) -FNDA:137,(anonymous_12) -FNDA:137,(anonymous_13) -FNDA:28,(anonymous_14) -FNDA:63,(anonymous_15) -FNDA:52,(anonymous_16) -FNDA:0,(anonymous_17) -FNDA:71,(anonymous_18) -FNDA:132,buildCsvStringMatcher -FNDA:396,makeCsvStringMatcher -FNDA:1488,csvStringMatcher -DA:3,1 -DA:4,1 -DA:5,1 -DA:7,1 -DA:10,137 -DA:11,137 -DA:12,137 -DA:14,137 -DA:15,137 -DA:16,137 -DA:17,137 -DA:18,137 -DA:19,137 -DA:21,137 -DA:22,134 -DA:24,132 -DA:32,134 -DA:38,3 -DA:40,0 -DA:48,3 -DA:49,3 -DA:50,3 -DA:51,3 -DA:52,3 -DA:57,137 -DA:58,1 -DA:59,1 -DA:60,1 -DA:61,0 -DA:62,0 -DA:65,0 -DA:69,137 -DA:70,137 -DA:72,134 -DA:81,137 -DA:82,8 -DA:85,137 -DA:87,137 -DA:134,137 -DA:135,137 -DA:137,137 -DA:138,137 -DA:139,216 -DA:140,216 -DA:141,216 -DA:155,208 -DA:157,137 -DA:160,137 -DA:161,137 -DA:175,137 -DA:176,137 -DA:190,457 -DA:192,457 -DA:193,107 -DA:197,350 -DA:199,350 -DA:200,137 -DA:201,137 -DA:202,137 -DA:203,135 -DA:204,2 -DA:205,2 -DA:210,133 -DA:211,133 -DA:212,234 -DA:215,133 -DA:218,135 -DA:219,12 -DA:220,12 -DA:223,135 -DA:227,213 -DA:228,537 -DA:232,348 -DA:233,3 -DA:236,345 -DA:239,455 -DA:241,137 -DA:243,137 -DA:244,137 -DA:255,137 -DA:256,137 -DA:263,30 -DA:264,30 -DA:273,13 -DA:279,137 -DA:280,137 -DA:290,137 -DA:291,137 -DA:303,28 -DA:304,28 -DA:314,63 -DA:315,63 -DA:328,52 -DA:329,52 -DA:338,0 -DA:339,0 -DA:340,0 -DA:351,71 -DA:355,1 -DA:360,132 -DA:361,396 -DA:362,1488 -DA:363,1488 -DA:364,1488 -DA:365,1488 -DA:366,1488 -DA:367,102 -DA:368,102 -DA:369,102 -DA:370,102 -DA:371,102 -DA:372,102 -DA:374,102 -DA:375,291 -DA:376,291 -DA:378,291 -DA:379,148 -DA:380,148 -DA:381,148 -DA:382,47 -DA:385,101 -DA:390,143 -DA:392,143 -DA:393,143 -DA:394,143 -DA:395,263 -DA:396,263 -DA:398,143 -DA:399,143 -DA:400,28 -DA:401,22 -DA:403,28 -DA:404,28 -DA:406,115 -DA:407,0 -DA:408,0 -DA:409,0 -DA:412,115 -DA:413,115 -DA:417,102 -DA:418,1 -DA:419,1 -DA:421,101 -DA:424,101 -DA:425,101 -DA:426,101 -DA:427,101 -DA:433,1 -LF:148 -LH:138 -BRDA:18,0,0,0 -BRDA:18,0,1,137 -BRDA:18,1,0,137 -BRDA:18,1,1,137 -BRDA:21,2,0,134 -BRDA:21,2,1,3 -BRDA:22,3,0,132 -BRDA:22,3,1,2 -BRDA:38,4,0,0 -BRDA:38,4,1,3 -BRDA:48,5,0,3 -BRDA:48,5,1,0 -BRDA:49,6,0,3 -BRDA:49,6,1,0 -BRDA:50,7,0,3 -BRDA:50,7,1,0 -BRDA:51,8,0,3 -BRDA:51,8,1,0 -BRDA:57,9,0,1 -BRDA:57,9,1,136 -BRDA:70,10,0,134 -BRDA:70,10,1,3 -BRDA:81,11,0,8 -BRDA:81,11,1,129 -BRDA:98,12,0,134 -BRDA:98,12,1,3 -BRDA:117,13,0,136 -BRDA:117,13,1,1 -BRDA:120,14,0,136 -BRDA:120,14,1,1 -BRDA:140,15,0,216 -BRDA:140,15,1,1 -BRDA:148,16,0,129 -BRDA:148,16,1,8 -BRDA:155,17,0,208 -BRDA:155,17,1,1 -BRDA:186,18,0,8 -BRDA:186,18,1,129 -BRDA:190,19,0,457 -BRDA:190,19,1,313 -BRDA:192,20,0,107 -BRDA:192,20,1,350 -BRDA:192,21,0,457 -BRDA:192,21,1,201 -BRDA:193,22,0,0 -BRDA:193,22,1,107 -BRDA:197,23,0,350 -BRDA:197,23,1,0 -BRDA:199,24,0,137 -BRDA:199,24,1,213 -BRDA:202,25,0,135 -BRDA:202,25,1,2 -BRDA:203,26,0,2 -BRDA:203,26,1,133 -BRDA:204,27,0,2 -BRDA:204,27,1,0 -BRDA:205,28,0,1 -BRDA:205,28,1,1 -BRDA:213,29,0,6 -BRDA:213,29,1,228 -BRDA:221,30,0,0 -BRDA:221,30,1,12 -BRDA:229,31,0,0 -BRDA:229,31,1,537 -BRDA:232,32,0,3 -BRDA:232,32,1,345 -BRDA:303,33,0,28 -BRDA:303,33,1,0 -BRDA:304,34,0,28 -BRDA:304,34,1,0 -BRDA:314,35,0,41 -BRDA:314,35,1,22 -BRDA:316,36,0,41 -BRDA:316,36,1,22 -BRDA:317,37,0,45 -BRDA:317,37,1,18 -BRDA:317,38,0,63 -BRDA:317,38,1,41 -BRDA:328,39,0,2 -BRDA:328,39,1,50 -BRDA:330,40,0,2 -BRDA:330,40,1,50 -BRDA:330,41,0,23 -BRDA:330,41,1,29 -BRDA:338,42,0,0 -BRDA:338,42,1,0 -BRDA:339,43,0,0 -BRDA:339,43,1,0 -BRDA:341,44,0,0 -BRDA:341,44,1,0 -BRDA:341,45,0,0 -BRDA:341,45,1,0 -BRDA:344,46,0,134 -BRDA:344,46,1,3 -BRDA:351,47,0,71 -BRDA:351,47,1,0 -BRDA:366,48,0,102 -BRDA:366,48,1,1386 -BRDA:378,49,0,148 -BRDA:378,49,1,143 -BRDA:381,50,0,47 -BRDA:381,50,1,101 -BRDA:394,51,0,406 -BRDA:394,51,1,405 -BRDA:394,51,2,377 -BRDA:399,52,0,28 -BRDA:399,52,1,115 -BRDA:400,53,0,22 -BRDA:400,53,1,6 -BRDA:406,54,0,0 -BRDA:406,54,1,115 -BRDA:417,55,0,1 -BRDA:417,55,1,101 -BRDA:417,56,0,102 -BRDA:417,56,1,101 -BRF:115 -BRH:92 -end_of_record -TN: -SF:test/csv-fixtures.js -FN:381,(anonymous_0) -FN:400,(anonymous_1) -FN:647,(anonymous_2) -FN:661,(anonymous_3) -FNF:4 -FNH:4 -FNDA:1,(anonymous_0) -FNDA:1,(anonymous_1) -FNDA:1,(anonymous_2) -FNDA:1,(anonymous_3) -DA:4,1 -DA:5,1 -DA:7,1 -DA:382,1 -DA:401,1 -DA:648,1 -DA:662,1 -LF:7 -LH:7 -BRF:0 -BRH:0 -end_of_record diff --git a/csv-grammar.jsonic b/csv-grammar.jsonic deleted file mode 100644 index b7c599b..0000000 --- a/csv-grammar.jsonic +++ /dev/null @@ -1,52 +0,0 @@ -# CSV Grammar Definition -# Parsed by a standard Jsonic instance and passed to jsonic.grammar() -# Function references (@ prefixed) are resolved against the refs map -# -# Token naming: -# #LN - line ending (removed from per-instance IGNORE set) -# #SP - whitespace (removed from per-instance IGNORE set in strict mode) -# #CA - comma / field separator -# #ZZ - end of input -# #VAL - token set: text, string, number, value literals -# -# Rules csv, newline, record, text are fully defined here. -# Rules list, elem, val are modified in code (strict mode defines from scratch; -# non-strict prepends to existing defaults to preserve JSON parsing). - -{ - rule: csv: open: [ - { s: '#ZZ' } - { s: '#LN' p: newline c: '@not-record-empty' } - { p: record } - ] - - rule: newline: open: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - rule: newline: close: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - - rule: record: open: [ - { p: list } - ] - rule: record: close: [ - { s: '#ZZ' } - { s: '#LN #ZZ' b: 1 } - { s: '#LN' r: '@record-close-next' } - ] - - rule: text: open: [ - { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } - { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } - { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } - { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } - {} - ] -} diff --git a/doc/csv-go.md b/doc/csv-go.md deleted file mode 100644 index 9c7322f..0000000 --- a/doc/csv-go.md +++ /dev/null @@ -1,264 +0,0 @@ -# CSV plugin for Jsonic (Go) - -A Jsonic syntax plugin that parses CSV text into Go slices of maps -or slices, with support for headers, quoted fields, custom -delimiters, streaming, and strict/non-strict modes. - -```bash -go get github.com/jsonicjs/csv/go@latest -``` - - -## Tutorials - -### Parse a basic CSV file - -Parse CSV text with a header row into a slice of ordered maps: - -```go -package main - -import ( - "fmt" - csv "github.com/jsonicjs/csv/go" -) - -func main() { - result, _ := csv.Parse("name,age\nAlice,30\nBob,25") - fmt.Println(result) - // [{name:Alice age:30} {name:Bob age:25}] -} -``` - -### Parse CSV without headers - -Return rows as slices instead of maps, with no header row: - -```go -result, _ := csv.Parse("a,b,c\n1,2,3", csv.CsvOptions{ - Header: boolPtr(false), - Object: boolPtr(false), -}) -// [[a b c] [1 2 3]] -``` - -### Parse CSV with quoted fields - -Double-quoted fields handle commas, newlines, and escaped quotes: - -```go -result, _ := csv.Parse(`name,bio -Alice,"Likes ""cats"" and dogs" -Bob,"Line1 -Line2"`) -// [{name:Alice bio:Likes "cats" and dogs} {name:Bob bio:Line1\nLine2}] -``` - - -## How-to guides - -### Use a custom field delimiter - -Set `Field.Separation` to use a delimiter other than comma: - -```go -result, _ := csv.Parse("name\tage\nAlice\t30", csv.CsvOptions{ - Field: &csv.FieldOptions{Separation: "\t"}, -}) -// [{name:Alice age:30}] -``` - -### Enable number and value parsing - -By default in strict mode, all values are strings. Enable `Number` -and `Value` to parse numeric and boolean values: - -```go -result, _ := csv.Parse("a,b,c\n1,true,null", csv.CsvOptions{ - Number: boolPtr(true), - Value: boolPtr(true), -}) -// [{a:1 b:true c:}] -``` - -### Trim whitespace from fields - -Enable `Trim` to remove leading and trailing whitespace from field -values: - -```go -result, _ := csv.Parse("a , b \n 1 , 2 ", csv.CsvOptions{ - Trim: boolPtr(true), -}) -// [{a:1 b:2}] -``` - -### Stream records as they are parsed - -Use the `Stream` callback to receive records one at a time: - -```go -var records []any - -result, _ := csv.Parse("a,b\n1,2\n3,4", csv.CsvOptions{ - Stream: func(what string, record any) { - if what == "record" { - records = append(records, record) - } - }, -}) -// result is [] (empty, records were streamed) -// records contains [{a:1 b:2}, {a:3 b:4}] -``` - -### Provide explicit field names - -Set `Field.Names` when the CSV has no header row but you want -map output with named fields: - -```go -result, _ := csv.Parse("1,2,3\n4,5,6", csv.CsvOptions{ - Header: boolPtr(false), - Field: &csv.FieldOptions{Names: []string{"x", "y", "z"}}, -}) -// [{x:1 y:2 z:3} {x:4 y:5 z:6}] -``` - -### Enforce exact field counts - -Set `Field.Exact` to error when a row has more or fewer fields -than the header: - -```go -_, err := csv.Parse("a,b\n1,2,3", csv.CsvOptions{ - Field: &csv.FieldOptions{Exact: true}, -}) -// err: unexpected extra field value -``` - -### Create a reusable parser - -Use `MakeJsonic` to create a configured Jsonic instance you can -call repeatedly: - -```go -j := csv.MakeJsonic(csv.CsvOptions{ - Number: boolPtr(true), -}) - -r1, _ := j.Parse("a,b\n1,2") -r2, _ := j.Parse("x,y\n3,4") -``` - -### Enable comment lines - -Enable `Comment` to skip lines starting with `#`: - -```go -result, _ := csv.Parse("a,b\n# skip\n1,2", csv.CsvOptions{ - Comment: boolPtr(true), -}) -// [{a:1 b:2}] -``` - - -## Explanation - -### Strict vs non-strict mode - -In **strict mode** (default), the CSV plugin disables Jsonic's -built-in JSON parsing. All field values are treated as raw strings -unless `Number` or `Value` options are enabled. This matches the -behaviour of standard CSV parsers. - -In **non-strict mode** (`Strict: boolPtr(false)`), the plugin -preserves Jsonic's ability to parse JSON values. Fields can contain -objects, arrays, booleans, numbers, and quoted strings using Jsonic -syntax. Non-strict mode enables `Trim`, `Comment`, and `Number` by -default. - -### How quoted fields work - -The plugin includes a custom CSV string matcher that handles the -RFC 4180 double-quote escaping convention: - -- A field wrapped in double quotes can contain commas, newlines, - and quotes. -- A literal quote inside a quoted field is represented as `""`. -- For example: `"a""b"` parses to `a"b`. - - -## Reference - -### `Parse` (Function) - -```go -func Parse(src string, opts ...CsvOptions) ([]any, error) -``` - -Parse CSV text with the given options. Returns a slice of records. - -### `MakeJsonic` (Function) - -```go -func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic -``` - -Create a reusable Jsonic instance configured for CSV parsing. - -### `CsvOptions` - -```go -type CsvOptions struct { - Object *bool // Return maps (true) or slices (false). Default: true - Header *bool // First row is header. Default: true - Trim *bool // Trim whitespace. Default: nil (false strict, true non-strict) - Comment *bool // Enable # comments. Default: nil (false strict, true non-strict) - Number *bool // Parse numbers. Default: nil (false strict, true non-strict) - Value *bool // Parse true/false/null. Default: nil - Strict *bool // Strict CSV mode. Default: true - Field *FieldOptions - Record *RecordOptions - String *StringOptions - Stream StreamFunc -} -``` - -### `FieldOptions` - -```go -type FieldOptions struct { - Separation string // Field separator. Default: "," - NonamePrefix string // Prefix for unnamed extra fields. Default: "field~" - Empty string // Value for empty fields. Default: "" - Names []string // Explicit field names. - Exact bool // Error on field count mismatch. Default: false -} -``` - -### `RecordOptions` - -```go -type RecordOptions struct { - Separators string // Custom record separator characters. - Empty bool // Preserve empty lines as records. Default: false -} -``` - -### `StringOptions` - -```go -type StringOptions struct { - Quote string // Quote character. Default: `"` - Csv *bool // Force CSV string mode (nil=auto). -} -``` - -### `StreamFunc` - -```go -type StreamFunc func(what string, record any) -``` - -Callback for streaming CSV parsing. Called with `"start"`, `"record"`, -`"end"`, or `"error"`. diff --git a/doc/csv-ts.md b/doc/csv-ts.md deleted file mode 100644 index 2e8f9b5..0000000 --- a/doc/csv-ts.md +++ /dev/null @@ -1,286 +0,0 @@ -# CSV plugin for Jsonic (TypeScript) - -A Jsonic syntax plugin that parses CSV text into JavaScript arrays -of objects or arrays, with support for headers, quoted fields, -custom delimiters, streaming, and strict/non-strict modes. - -```bash -npm install @jsonic/csv -``` - -Requires `jsonic` >= 2 as a peer dependency. - - -## Tutorials - -### Parse a basic CSV file - -Parse CSV text with a header row into an array of objects: - -```typescript -import { Jsonic } from 'jsonic' -import { Csv } from '@jsonic/csv' - -const j = Jsonic.make().use(Csv) - -j("name,age\nAlice,30\nBob,25") -// [{ name: 'Alice', age: '30' }, { name: 'Bob', age: '25' }] -``` - -### Parse CSV without headers - -Return rows as arrays instead of objects, with no header row: - -```typescript -import { Jsonic } from 'jsonic' -import { Csv } from '@jsonic/csv' - -const j = Jsonic.make().use(Csv, { header: false, object: false }) - -j("a,b,c\n1,2,3") -// [['a', 'b', 'c'], ['1', '2', '3']] -``` - -### Parse CSV with quoted fields - -Double-quoted fields handle commas, newlines, and escaped quotes: - -```typescript -import { Jsonic } from 'jsonic' -import { Csv } from '@jsonic/csv' - -const j = Jsonic.make().use(Csv) - -j('name,bio\nAlice,"Likes ""cats"" and dogs"\nBob,"Line1\nLine2"') -// [ -// { name: 'Alice', bio: 'Likes "cats" and dogs' }, -// { name: 'Bob', bio: 'Line1\nLine2' } -// ] -``` - - -## How-to guides - -### Use a custom field delimiter - -Set `field.separation` to use a delimiter other than comma: - -```typescript -const j = Jsonic.make().use(Csv, { - field: { separation: '\t' } -}) - -j("name\tage\nAlice\t30") -// [{ name: 'Alice', age: '30' }] -``` - -### Enable number and value parsing - -By default in strict mode, all values are strings. Enable `number` -and `value` to parse numeric and boolean values: - -```typescript -const j = Jsonic.make().use(Csv, { - number: true, - value: true, -}) - -j("a,b,c\n1,true,null") -// [{ a: 1, b: true, c: null }] -``` - -### Trim whitespace from fields - -Enable `trim` to remove leading and trailing whitespace from field -values: - -```typescript -const j = Jsonic.make().use(Csv, { trim: true }) - -j("a , b \n 1 , 2 ") -// [{ a: '1', b: '2' }] -``` - -### Stream records as they are parsed - -Use the `stream` callback to receive records one at a time without -storing them all in memory: - -```typescript -const records: any[] = [] - -const j = Jsonic.make().use(Csv, { - stream: (what, record) => { - if (what === 'record') records.push(record) - }, -}) - -j("a,b\n1,2\n3,4") -// returns [] (empty, records were streamed) -// records === [{ a: '1', b: '2' }, { a: '3', b: '4' }] -``` - -### Provide explicit field names - -Set `field.names` when the CSV has no header row but you want -object output with named fields: - -```typescript -const j = Jsonic.make().use(Csv, { - header: false, - field: { names: ['x', 'y', 'z'] }, -}) - -j("1,2,3\n4,5,6") -// [{ x: '1', y: '2', z: '3' }, { x: '4', y: '5', z: '6' }] -``` - -### Enforce exact field counts - -Set `field.exact` to error when a row has more or fewer fields -than the header: - -```typescript -const j = Jsonic.make().use(Csv, { - field: { exact: true }, -}) - -// j("a,b\n1,2,3") // throws: unexpected extra field value -// j("a,b\n1") // throws: missing field -``` - -### Use non-strict mode for embedded JSON - -Disable `strict` to allow Jsonic syntax inside CSV fields, -including JSON objects, arrays, and expressions: - -```typescript -const j = Jsonic.make().use(Csv, { strict: false }) - -j("a,b\ntrue,[1,2]") -// [{ a: true, b: [1, 2] }] -``` - -### Enable comment lines - -Enable `comment` to skip lines starting with `#`: - -```typescript -const j = Jsonic.make().use(Csv, { comment: true }) - -j("a,b\n# skip this\n1,2") -// [{ a: '1', b: '2' }] -``` - -### Preserve empty records - -By default, blank lines are skipped. Set `record.empty` to -preserve them as empty-field records: - -```typescript -const j = Jsonic.make().use(Csv, { record: { empty: true } }) - -j("a\n1\n\n2") -// [{ a: '1' }, { a: '' }, { a: '2' }] -``` - - -## Explanation - -### Strict vs non-strict mode - -In **strict mode** (default), the CSV plugin disables Jsonic's -built-in JSON parsing. All field values are treated as raw strings -unless `number` or `value` options are enabled. This matches the -behaviour of standard CSV parsers. - -In **non-strict mode** (`strict: false`), the plugin preserves -Jsonic's ability to parse JSON values. Fields can contain objects -(`{x:1}`), arrays (`[1,2]`), booleans, numbers, and quoted strings -using Jsonic syntax. Non-strict mode enables `trim`, `comment`, and -`number` by default. - -### How quoted fields work - -The plugin includes a custom CSV string matcher that handles the -RFC 4180 double-quote escaping convention: - -- A field wrapped in double quotes can contain commas, newlines, - and quotes. -- A literal quote inside a quoted field is represented as `""`. -- For example: `"a""b"` parses to `a"b`. - - -## Reference - -### `Csv` (Plugin) - -The plugin function. Register with `Jsonic.make().use(Csv, options)`. - -### `CsvOptions` - -```typescript -type CsvOptions = { - // Trim surrounding whitespace. Default: null (false in strict, true in non-strict) - trim: boolean | null - - // Enable # line comments. Default: null (false in strict, true in non-strict) - comment: boolean | null - - // Parse numeric values. Default: null (false in strict, true in non-strict) - number: boolean | null - - // Parse value keywords (true/false/null). Default: null (false in strict, false in non-strict) - value: boolean | null - - // First row is a header row. Default: true - header: boolean - - // Return records as objects (true) or arrays (false). Default: true - object: boolean - - // Stream callback. Default: null - stream: null | ((what: string, record?: Record | Error) => void) - - // Strict CSV mode (disables Jsonic syntax). Default: true - strict: boolean - - field: { - // Field separator string. Default: null (uses comma) - separation: null | string - - // Prefix for unnamed extra fields. Default: 'field~' - nonameprefix: string - - // Value for empty fields. Default: '' - empty: any - - // Explicit field names (overrides header). Default: undefined - names: undefined | string[] - - // Error on field count mismatch. Default: false - exact: boolean - } - - record: { - // Custom record separator characters. Default: null - separators: null | string - - // Preserve empty lines as records. Default: false - empty: boolean - } - - string: { - // Quote character. Default: '"' - quote: string - - // Force CSV string mode (null=auto). Default: null - csv: null | boolean - } -} -``` - -### `buildCsvStringMatcher` (Function) - -Exported for advanced use. Creates the custom CSV double-quote -string matcher used internally by the plugin. diff --git a/embed-grammar.js b/embed-grammar.js index 499715e..92b91f2 100644 --- a/embed-grammar.js +++ b/embed-grammar.js @@ -1,17 +1,16 @@ #!/usr/bin/env node -// Embed csv-grammar.jsonic into TypeScript and Go source files. +// Embed xml-grammar.jsonic into TypeScript source files. // Run via: npm run embed (or: node embed-grammar.js) const fs = require('fs') const path = require('path') -const GRAMMAR_FILE = path.join(__dirname, 'csv-grammar.jsonic') -const TS_FILE = path.join(__dirname, 'src', 'csv.ts') -const GO_FILE = path.join(__dirname, 'go', 'csv.go') +const GRAMMAR_FILE = path.join(__dirname, 'xml-grammar.jsonic') +const TS_FILE = path.join(__dirname, 'src', 'xml.ts') -const BEGIN = '// --- BEGIN EMBEDDED csv-grammar.jsonic ---' -const END = '// --- END EMBEDDED csv-grammar.jsonic ---' +const BEGIN = '// --- BEGIN EMBEDDED xml-grammar.jsonic ---' +const END = '// --- END EMBEDDED xml-grammar.jsonic ---' const grammar = fs.readFileSync(GRAMMAR_FILE, 'utf8') @@ -43,32 +42,4 @@ function embedTS() { console.log('Embedded grammar into', TS_FILE) } -// --- Go embedding --- -function embedGo() { - let src = fs.readFileSync(GO_FILE, 'utf8') - const startIdx = src.indexOf(BEGIN) - const endIdx = src.indexOf(END) - if (startIdx === -1 || endIdx === -1) { - console.error('Go markers not found in', GO_FILE) - process.exit(1) - } - - if (grammar.includes('`')) { - console.error('Grammar contains backticks, incompatible with Go raw strings') - process.exit(1) - } - - const replacement = - BEGIN + - '\nconst grammarText = `\n' + - grammar + - '`\n' + - END - - src = src.substring(0, startIdx) + replacement + src.substring(endIdx + END.length) - fs.writeFileSync(GO_FILE, src) - console.log('Embedded grammar into', GO_FILE) -} - embedTS() -embedGo() diff --git a/go/csv.go b/go/csv.go deleted file mode 100644 index 05686ba..0000000 --- a/go/csv.go +++ /dev/null @@ -1,780 +0,0 @@ -/* Copyright (c) 2021-2025 Richard Rodger, MIT License */ - -package csv - -import ( - "fmt" - "strconv" - "strings" - - jsonic "github.com/jsonicjs/jsonic/go" -) - -const Version = "0.1.3" - -// --- BEGIN EMBEDDED csv-grammar.jsonic --- -const grammarText = ` -# CSV Grammar Definition -# Parsed by a standard Jsonic instance and passed to jsonic.grammar() -# Function references (@ prefixed) are resolved against the refs map -# -# Token naming: -# #LN - line ending (removed from per-instance IGNORE set) -# #SP - whitespace (removed from per-instance IGNORE set in strict mode) -# #CA - comma / field separator -# #ZZ - end of input -# #VAL - token set: text, string, number, value literals -# -# Rules csv, newline, record, text are fully defined here. -# Rules list, elem, val are modified in code (strict mode defines from scratch; -# non-strict prepends to existing defaults to preserve JSON parsing). - -{ - rule: csv: open: [ - { s: '#ZZ' } - { s: '#LN' p: newline c: '@not-record-empty' } - { p: record } - ] - - rule: newline: open: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - rule: newline: close: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - - rule: record: open: [ - { p: list } - ] - rule: record: close: [ - { s: '#ZZ' } - { s: '#LN #ZZ' b: 1 } - { s: '#LN' r: '@record-close-next' } - ] - - rule: text: open: [ - { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } - { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } - { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } - { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } - {} - ] -} -` -// --- END EMBEDDED csv-grammar.jsonic --- - -// Csv is a jsonic plugin that adds CSV parsing support. -// Options are pre-merged with Defaults by jsonic.UseDefaults. -func Csv(j *jsonic.Jsonic, options map[string]any) error { - // Guard against re-invocation: Use() re-runs plugins on SetOptions calls. - if j.Decoration("csv-init") != nil { - return nil - } - j.Decorate("csv-init", true) - - strict := toBool(options["strict"]) - objres := toBool(options["object"]) - header := toBool(options["header"]) - - trim := toBool(options["trim"]) - comment := toBool(options["comment"]) - opt_number := toBool(options["number"]) - opt_value := toBool(options["value"]) - - fieldOpts, _ := options["field"].(map[string]any) - recordOpts, _ := options["record"].(map[string]any) - stringOpts, _ := options["string"].(map[string]any) - - record_empty := toBool(recordOpts["empty"]) - - stream, _ := options["stream"].(func(string, any)) - - // In strict mode, Jsonic field content is not parsed. - if strict { - if stringOpts["csv"] != false { - j.SetOptions(jsonic.Options{Lex: &jsonic.LexOptions{ - Match: map[string]*jsonic.MatchSpec{ - "stringcsv": {Order: 1e5, Make: buildCsvStringMatcher(stringOpts)}, - }, - }}) - } - j.SetOptions(jsonic.Options{Rule: &jsonic.RuleOptions{Exclude: "jsonic,imp"}}) - } else { - // Fields may contain Jsonic content. - if stringOpts["csv"] == true { - j.SetOptions(jsonic.Options{Lex: &jsonic.LexOptions{ - Match: map[string]*jsonic.MatchSpec{ - "stringcsv": {Order: 1e5, Make: buildCsvStringMatcher(stringOpts)}, - }, - }}) - } - if options["trim"] == nil { - trim = true - } - if options["comment"] == nil { - comment = true - } - if options["number"] == nil { - opt_number = true - } - if options["value"] == nil { - opt_value = true - } - j.SetOptions(jsonic.Options{Rule: &jsonic.RuleOptions{Exclude: "imp"}}) - } - - fieldSep := toString(fieldOpts["separation"]) - recordSep := toString(recordOpts["separators"]) - - // Jsonic option overrides (matching TS jsonicOptions). - jsonicOptions := jsonic.Options{ - Rule: &jsonic.RuleOptions{Start: "csv"}, - Number: &jsonic.NumberOptions{ - Lex: boolPtr(opt_number), - }, - Value: &jsonic.ValueOptions{ - Lex: boolPtr(opt_value), - }, - Comment: &jsonic.CommentOptions{ - Lex: boolPtr(comment), - }, - Lex: &jsonic.LexOptions{ - EmptyResult: []any{}, - }, - Line: &jsonic.LineOptions{ - Single: boolPtr(record_empty), - }, - Error: map[string]string{ - "csv_extra_field": "unexpected extra field value: $fsrc", - "csv_missing_field": "missing field", - }, - Hint: map[string]string{ - "csv_extra_field": "Row $row has too many fields (the first of which is: $fsrc). Only $len\nfields per row are expected.", - "csv_missing_field": "Row $row has too few fields. $len fields per row are expected.", - }, - } - - if strict { - csvStringOpt := stringOpts["csv"] - if csvStringOpt == nil || csvStringOpt == true { - jsonicOptions.String = &jsonic.StringOptions{ - Lex: boolPtr(false), - Chars: "", - } - } - } - - if recordSep != "" { - jsonicOptions.Line.Chars = recordSep - jsonicOptions.Line.RowChars = recordSep - } - - // Fixed-token overrides: in strict mode disable JSON structural tokens - // and the ':' key separator; swap the field separator when configured. - if strict || fieldSep != "" { - jsonicOptions.Fixed = &jsonic.FixedOptions{Token: map[string]*string{}} - if strict { - jsonicOptions.Fixed.Token["#OB"] = nil - jsonicOptions.Fixed.Token["#CB"] = nil - jsonicOptions.Fixed.Token["#OS"] = nil - jsonicOptions.Fixed.Token["#CS"] = nil - jsonicOptions.Fixed.Token["#CL"] = nil - } - if fieldSep != "" { - sep := fieldSep - jsonicOptions.Fixed.Token["#CA"] = &sep - } - } - - // IGNORE set: drop #LN so row breaks are significant; in strict mode - // also drop #SP so whitespace inside fields is preserved. - if strict { - jsonicOptions.TokenSet = map[string][]string{"IGNORE": {"#CM"}} - } else { - jsonicOptions.TokenSet = map[string][]string{"IGNORE": {"#SP", "#CM"}} - } - - j.SetOptions(jsonicOptions) - - // Named function references for declarative grammar definition. - emptyField := toString(fieldOpts["empty"]) - nonameprefix := toString(fieldOpts["nonameprefix"]) - fieldExact := toBool(fieldOpts["exact"]) - var fieldNames []string - if names, ok := fieldOpts["names"].([]string); ok { - fieldNames = names - } else if names, ok := fieldOpts["names"].([]any); ok { - for _, n := range names { - if s, ok := n.(string); ok { - fieldNames = append(fieldNames, s) - } - } - } - - refs := map[jsonic.FuncRef]any{ - - "@csv-bo": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if ctx.Meta == nil { - ctx.Meta = make(map[string]any) - } - ctx.Meta["recordI"] = 0 - if stream != nil { - stream("start", nil) - } - r.Node = make([]any, 0) - }), - - "@csv-ac": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if stream != nil { - stream("end", nil) - } - }), - - "@record-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - recordI, _ := ctx.Meta["recordI"].(int) - var fields []string - if fs, ok := ctx.Meta["fields"].([]string); ok { - fields = fs - } - if fields == nil { - fields = fieldNames - } - - if recordI == 0 && header { - if childArr, ok := r.Child.Node.([]any); ok { - names := make([]string, len(childArr)) - for i, v := range childArr { - names[i], _ = v.(string) - } - ctx.Meta["fields"] = names - } else { - ctx.Meta["fields"] = []string{} - } - } else { - record, _ := r.Child.Node.([]any) - if record == nil { - record = []any{} - } - - if objres { - obj := make(map[string]any) - var keys []string - i := 0 - - if fields != nil { - if fieldExact && len(record) != len(fields) { - errCode := "csv_missing_field" - if len(record) > len(fields) { - errCode = "csv_extra_field" - } - ctx.ParseErr = &jsonic.Token{ - Name: "#BD", Tin: jsonic.TinBD, - Why: errCode, Src: errCode, - } - return - } - - for fI := 0; fI < len(fields); fI++ { - var val any = emptyField - if fI < len(record) && !jsonic.IsUndefined(record[fI]) { - val = record[fI] - } - obj[fields[fI]] = val - keys = append(keys, fields[fI]) - } - i = len(fields) - } - - for ; i < len(record); i++ { - fname := nonameprefix + strconv.Itoa(i) - val := record[i] - if jsonic.IsUndefined(val) { - val = emptyField - } - obj[fname] = val - keys = append(keys, fname) - } - - out := orderedMap{keys: keys, m: obj} - if stream != nil { - stream("record", out) - } else if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, out) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - } else { - for i := range record { - if jsonic.IsUndefined(record[i]) { - record[i] = emptyField - } - } - if stream != nil { - stream("record", record) - } else if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, record) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - } - } - ctx.Meta["recordI"] = recordI + 1 - }), - - "@text-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if !jsonic.IsUndefined(r.Child.Node) { - r.Parent.Node = r.Child.Node - } else { - r.Parent.Node = r.Node - } - }), - - "@text-follows": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - result := prev + tokenStr(r.O0) - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }), - - "@text-leads": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - sp := "" - if r.N["text"] >= 2 || !trim { - sp = r.O0.Src - } - result := prev + sp + r.O1.Src - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }), - - "@text-end": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - sp := "" - if !trim { - sp = r.O0.Src - } - result := prev + sp - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }), - - "@text-space": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if strict { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - sp := "" - if !trim { - sp = r.O0.Src - } - result := prev + sp - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - } - }), - - "@not-record-empty": jsonic.AltCond(func(r *jsonic.Rule, ctx *jsonic.Context) bool { - return !record_empty - }), - - "@record-close-next": func(r *jsonic.Rule, ctx *jsonic.Context) string { - if record_empty { - return "record" - } - return "newline" - }, - - "@text-space-push": func(r *jsonic.Rule, ctx *jsonic.Context) string { - if strict { - return "" - } - return "val" - }, - } - - // Parse embedded grammar definition using a separate standard Jsonic instance. - gs, err := parseGrammarText(grammarText, refs) - if err != nil { - return err - } - if err := j.Grammar(gs); err != nil { - return fmt.Errorf("failed to apply csv grammar: %w", err) - } - - // Rules list, elem, val are modified in code rather than the grammar file, - // because in non-strict mode the default jsonic alternatives must be preserved - // to support embedded JSON values like [1,2] and {x:1}. - - LN := j.Token("#LN") - CA := j.Token("#CA") - SP := j.Token("#SP") - ZZ := j.Token("#ZZ") - VAL := j.TokenSet("VAL") - - j.Rule("list", func(rs *jsonic.RuleSpec) { - rs.Clear() - rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { - r.Node = make([]any, 0) - }) - rs.Open = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{LN}}, B: 1}, - {P: "elem"}, - } - rs.Close = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{LN}}, B: 1}, - {S: [][]jsonic.Tin{{ZZ}}}, - } - }) - - j.Rule("elem", func(rs *jsonic.RuleSpec) { - rs.Clear() - rs.Open = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{CA}}, B: 1, - A: jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, emptyField) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - r.U["done"] = true - })}, - {P: "val"}, - } - rs.Close = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{CA}, {LN, ZZ}}, B: 1, - A: jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, emptyField) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - })}, - {S: [][]jsonic.Tin{{CA}}, R: "elem"}, - {S: [][]jsonic.Tin{{LN}}, B: 1}, - {S: [][]jsonic.Tin{{ZZ}}}, - } - rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { - done, _ := r.U["done"].(bool) - if !done && !jsonic.IsUndefined(r.Child.Node) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, r.Child.Node) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - } - }) - }) - - j.Rule("val", func(rs *jsonic.RuleSpec) { - rs.Clear() - rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { - r.Node = jsonic.Undefined - }) - rs.Open = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{VAL, {SP}}, B: 2, P: "text"}, - {S: [][]jsonic.Tin{{SP}}, B: 1, P: "text"}, - {S: [][]jsonic.Tin{VAL}}, - {S: [][]jsonic.Tin{{LN}}, B: 1}, - } - rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { - if jsonic.IsUndefined(r.Node) { - if jsonic.IsUndefined(r.Child.Node) { - if r.OS == 0 { - r.Node = jsonic.Undefined - } else { - r.Node = r.O0.ResolveVal() - } - } else { - r.Node = r.Child.Node - } - } - }) - }) - - return nil -} - -// Custom CSV String matcher factory. -// Handles "a""b" -> a"b quoting. -// Matches TS: buildCsvStringMatcher(options) returns make(cfg, opts) => matcher(lex). -func buildCsvStringMatcher(stringOpts map[string]any) jsonic.MakeLexMatcher { - quote := toString(stringOpts["quote"]) - return func(cfg *jsonic.LexConfig, opts *jsonic.Options) jsonic.LexMatcher { - return func(lex *jsonic.Lex, rule *jsonic.Rule) *jsonic.Token { - pnt := lex.Cursor() - src := lex.Src - sI := pnt.SI - srclen := len(src) - - if sI >= srclen || !strings.HasPrefix(src[sI:], quote) { - return nil - } - - // Only match when quote is at the start of a field. - if sI > 0 { - prev := rune(src[sI-1]) - _, isFixed := cfg.FixedTokens[string(prev)] - if !isFixed && !cfg.LineChars[prev] && !cfg.SpaceChars[prev] { - return nil - } - } - - q := quote - qLen := len(q) - rI := pnt.RI - cI := pnt.CI - sI += qLen - cI += qLen - - var s strings.Builder - for sI < srclen { - cI++ - if strings.HasPrefix(src[sI:], q) { - sI += qLen - cI += qLen - 1 - if sI < srclen && strings.HasPrefix(src[sI:], q) { - s.WriteString(q) - sI += qLen - cI += qLen - continue - } - val := s.String() - ssrc := src[pnt.SI:sI] - tkn := lex.Token("#ST", jsonic.TinST, val, ssrc) - pnt.SI = sI - pnt.RI = rI - pnt.CI = cI - return tkn - } - - ch := src[sI] - if cfg.LineChars[rune(ch)] { - if cfg.RowChars[rune(ch)] { - rI++ - pnt.RI = rI - } - cI = 1 - s.WriteByte(ch) - sI++ - continue - } - if ch < 32 { - return nil - } - - bI := sI - qFirst := q[0] - for sI < srclen && src[sI] >= 32 && src[sI] != qFirst { - if cfg.LineChars[rune(src[sI])] { - break - } - sI++ - cI++ - } - cI-- - s.WriteString(src[bI:sI]) - } - - badSrc := src[pnt.SI:sI] - tkn := lex.Token("#BD", jsonic.TinBD, nil, badSrc) - tkn.Why = "unterminated_string" - pnt.SI = sI - pnt.RI = rI - pnt.CI = cI - return tkn - } - } -} - -// Defaults matches the TS Csv.defaults. Used with jsonic.UseDefaults. -var Defaults = map[string]any{ - "trim": nil, - "comment": nil, - "number": nil, - "value": nil, - "header": true, - "object": true, - "stream": nil, - "strict": true, - "field": map[string]any{ - "separation": nil, - "nonameprefix": "field~", - "empty": "", - "names": nil, - "exact": false, - }, - "record": map[string]any{ - "separators": nil, - "empty": false, - }, - "string": map[string]any{ - "quote": `"`, - "csv": nil, - }, -} - -// parseGrammarText parses grammar text and builds a GrammarSpec with Ref support. -func parseGrammarText(text string, refs map[jsonic.FuncRef]any) (*jsonic.GrammarSpec, error) { - parsed, err := jsonic.Make().Parse(text) - if err != nil { - return nil, fmt.Errorf("failed to parse grammar text: %w", err) - } - parsedMap, ok := parsed.(map[string]any) - if !ok { - return nil, fmt.Errorf("grammar text did not parse to a map") - } - gs := &jsonic.GrammarSpec{Ref: refs} - ruleMap, ok := parsedMap["rule"].(map[string]any) - if !ok { - return gs, nil - } - gs.Rule = make(map[string]*jsonic.GrammarRuleSpec, len(ruleMap)) - for name, rDef := range ruleMap { - rd, ok := rDef.(map[string]any) - if !ok { - continue - } - grs := &jsonic.GrammarRuleSpec{} - if openDef, ok := rd["open"]; ok { - grs.Open = buildGrammarAlts(openDef) - } - if closeDef, ok := rd["close"]; ok { - grs.Close = buildGrammarAlts(closeDef) - } - gs.Rule[name] = grs - } - return gs, nil -} - -func buildGrammarAlts(def any) []*jsonic.GrammarAltSpec { - arr, ok := def.([]any) - if !ok { - return nil - } - alts := make([]*jsonic.GrammarAltSpec, 0, len(arr)) - for _, item := range arr { - m, ok := item.(map[string]any) - if !ok { - alts = append(alts, &jsonic.GrammarAltSpec{}) - continue - } - ga := &jsonic.GrammarAltSpec{} - if s, ok := m["s"]; ok { - switch sv := s.(type) { - case string: - ga.S = sv - case []any: - strs := make([]string, len(sv)) - for i, v := range sv { - strs[i], _ = v.(string) - } - ga.S = strs - } - } - if b, ok := m["b"]; ok { - switch bv := b.(type) { - case float64: - ga.B = int(bv) - case int: - ga.B = bv - } - } - if p, ok := m["p"].(string); ok { - ga.P = p - } - if r, ok := m["r"].(string); ok { - ga.R = r - } - if a, ok := m["a"].(string); ok { - ga.A = jsonic.FuncRef(a) - } - if c, ok := m["c"]; ok { - switch cv := c.(type) { - case string: - ga.C = cv - case map[string]any: - ga.C = cv - } - } - if n, ok := m["n"].(map[string]any); ok { - ga.N = make(map[string]int, len(n)) - for k, v := range n { - if nv, ok := v.(float64); ok { - ga.N[k] = int(nv) - } else if nv, ok := v.(int); ok { - ga.N[k] = nv - } - } - } - if g, ok := m["g"].(string); ok { - ga.G = g - } - alts = append(alts, ga) - } - return alts -} - -func tokenStr(t *jsonic.Token) string { - if t == nil || t.IsNoToken() { - return "" - } - if t.Tin == jsonic.TinST { - if s, ok := t.Val.(string); ok { - return s - } - } - return t.Src -} - -func toBool(v any) bool { - b, _ := v.(bool) - return b -} - -func toString(v any) string { - s, _ := v.(string) - return s -} - -func boolPtr(b bool) *bool { - return &b -} - -// orderedMap maintains insertion order for JSON serialization comparison. -type orderedMap struct { - keys []string - m map[string]any -} diff --git a/go/csv_test.go b/go/csv_test.go deleted file mode 100644 index 2e14b3a..0000000 --- a/go/csv_test.go +++ /dev/null @@ -1,527 +0,0 @@ -package csv - -import ( - "encoding/json" - "fmt" - "os" - "path/filepath" - "reflect" - "testing" - - jsonic "github.com/jsonicjs/jsonic/go" -) - -// fixtureEntry represents one entry in the test manifest. -type fixtureEntry struct { - Name string `json:"name"` - CsvFile string `json:"csvFile,omitempty"` - Opt map[string]any `json:"opt,omitempty"` - JsonicOpt map[string]any `json:"jsonicOpt,omitempty"` - Err string `json:"err,omitempty"` -} - -func fixturesDir() string { - return filepath.Join("..", "test", "fixtures") -} - -// csvParse creates a jsonic instance with the Csv plugin and parses src. -func csvParse(src string, opts ...map[string]any) ([]any, error) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults, opts...) - - result, err := j.Parse(src) - if err != nil { - return nil, err - } - if result == nil { - return []any{}, nil - } - if arr, ok := result.([]any); ok { - return arr, nil - } - return []any{}, nil -} - -func TestFixtures(t *testing.T) { - dir := fixturesDir() - manifestPath := filepath.Join(dir, "manifest.json") - - manifestData, err := os.ReadFile(manifestPath) - if err != nil { - t.Fatalf("Failed to read manifest: %v", err) - } - - var manifest map[string]fixtureEntry - if err := json.Unmarshal(manifestData, &manifest); err != nil { - t.Fatalf("Failed to parse manifest: %v", err) - } - - for key, entry := range manifest { - t.Run(entry.Name, func(t *testing.T) { - csvFile := entry.CsvFile - if csvFile == "" { - csvFile = key - } - - csvData, err := os.ReadFile(filepath.Join(dir, csvFile+".csv")) - if err != nil { - t.Fatalf("Failed to read CSV file %s: %v", csvFile, err) - } - - result, err := parseFixture(string(csvData), entry.Opt, entry.JsonicOpt) - if err != nil { - if entry.Err != "" { - return // expected error - } - t.Fatalf("Unexpected error: %v", err) - } - - if entry.Err != "" { - t.Fatalf("Expected error %s but got none", entry.Err) - } - - expectedData, err := os.ReadFile(filepath.Join(dir, key+".json")) - if err != nil { - t.Fatalf("Failed to read expected JSON: %v", err) - } - - var expected []any - if err := json.Unmarshal(expectedData, &expected); err != nil { - t.Fatalf("Failed to parse expected JSON: %v", err) - } - - resultNorm := normalizeResult(result) - expectedNorm := normalizeJSON(expected) - - if !reflect.DeepEqual(resultNorm, expectedNorm) { - resultJSON, _ := json.MarshalIndent(resultNorm, "", " ") - expectedJSON, _ := json.MarshalIndent(expectedNorm, "", " ") - t.Errorf("Fixture %q mismatch:\nGot: %s\nExpected: %s", - entry.Name, string(resultJSON), string(expectedJSON)) - } - }) - } -} - -func TestPlugin(t *testing.T) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults) - - result, err := j.Parse("a,b\n1,2\n3,4") - if err != nil { - t.Fatalf("Plugin parse error: %v", err) - } - - arr, ok := result.([]any) - if !ok { - t.Fatalf("Expected []any, got %T", result) - } - - if len(arr) != 2 { - t.Fatalf("Expected 2 records, got %d", len(arr)) - } - - r0 := toMap(arr[0]) - if r0["a"] != "1" || r0["b"] != "2" { - t.Errorf("Record 0: expected {a:1,b:2}, got %v", r0) - } -} - -func TestPluginWithOptions(t *testing.T) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults, map[string]any{"object": false}) - - result, err := j.Parse("a,b\n1,2") - if err != nil { - t.Fatalf("Plugin parse error: %v", err) - } - - arr, ok := result.([]any) - if !ok { - t.Fatalf("Expected []any, got %T", result) - } - - if len(arr) != 1 { - t.Fatalf("Expected 1 record, got %d", len(arr)) - } - - inner, ok := arr[0].([]any) - if !ok { - t.Fatalf("Expected inner []any, got %T", arr[0]) - } - - if inner[0] != "1" || inner[1] != "2" { - t.Errorf("Expected [1,2], got %v", inner) - } -} - -func TestPluginEmpty(t *testing.T) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults) - - result, err := j.Parse("") - if err != nil { - t.Fatalf("Plugin parse error: %v", err) - } - - arr, ok := result.([]any) - if !ok { - t.Fatalf("Expected []any, got %T: %v", result, result) - } - - if len(arr) != 0 { - t.Errorf("Expected empty array, got %v", arr) - } -} - -func TestUsePlugin(t *testing.T) { - j := jsonic.Make() - j.Use(Csv, nil) - - result, err := j.Parse("a,b\n1,2") - if err != nil { - t.Logf("Plugin parse returned error (expected with basic plugin): %v", err) - } - _ = result -} - -func TestEmptyRecords(t *testing.T) { - result, _ := csvParse("a\n1\n\n2\n3\n\n\n4\n") - assertRecords(t, "empty-ignored", result, []map[string]any{ - {"a": "1"}, {"a": "2"}, {"a": "3"}, {"a": "4"}, - }) - - result2, _ := csvParse("a\n1\n\n2\n3\n\n\n4\n", - map[string]any{"record": map[string]any{"empty": true}}) - assertRecords(t, "empty-preserved", result2, []map[string]any{ - {"a": "1"}, {"a": ""}, {"a": "2"}, {"a": "3"}, - {"a": ""}, {"a": ""}, {"a": "4"}, - }) -} - -func TestHeader(t *testing.T) { - result, _ := csvParse("\na,b\nA,B") - assertRecords(t, "header-skip-leading", result, []map[string]any{ - {"a": "A", "b": "B"}, - }) - - result2, _ := csvParse("\na,b\nA,B", map[string]any{"header": false}) - assertRecords(t, "no-header", result2, []map[string]any{ - {"field~0": "a", "field~1": "b"}, - {"field~0": "A", "field~1": "B"}, - }) -} - -func TestDoubleQuotes(t *testing.T) { - tests := []struct { - input string - expected string - }{ - {`a` + "\n" + `"b"`, "b"}, - {`a` + "\n" + `"""b"`, `"b`}, - {`a` + "\n" + `"b"""`, `b"`}, - {`a` + "\n" + `"""b"""`, `"b"`}, - {`a` + "\n" + `"b""c"`, `b"c`}, - {`a` + "\n" + `"b""c""d"`, `b"c"d`}, - {`a` + "\n" + `"""""b"`, `""b`}, - {`a` + "\n" + `"b"""""`, `b""`}, - {`a` + "\n" + `"""""b"""""`, `""b""`}, - } - - for _, tt := range tests { - result, err := csvParse(tt.input) - if err != nil { - t.Errorf("Parse(%q): error: %v", tt.input, err) - continue - } - if len(result) != 1 { - t.Errorf("Parse(%q): expected 1 record, got %d", tt.input, len(result)) - continue - } - m := toMap(result[0]) - if m["a"] != tt.expected { - t.Errorf("Parse(%q): expected a=%q, got a=%q", tt.input, tt.expected, m["a"]) - } - } -} - -func TestTrim(t *testing.T) { - r1, _ := csvParse("a\n b") - assertField(t, "no-trim-leading", r1, "a", " b") - - r2, _ := csvParse("a\nb ") - assertField(t, "no-trim-trailing", r2, "a", "b ") - - r3, _ := csvParse("a\n b ") - assertField(t, "no-trim-both", r3, "a", " b ") - - r4, _ := csvParse("a\n b", map[string]any{"trim": true}) - assertField(t, "trim-leading", r4, "a", "b") - - r5, _ := csvParse("a\nb ", map[string]any{"trim": true}) - assertField(t, "trim-trailing", r5, "a", "b") - - r6, _ := csvParse("a\n b c ", map[string]any{"trim": true}) - assertField(t, "trim-internal", r6, "a", "b c") -} - -func TestComment(t *testing.T) { - r1, _ := csvParse("a\n# b") - assertField(t, "no-comment", r1, "a", "# b") - - r2, _ := csvParse("a\n# b", map[string]any{"comment": true}) - if len(r2) != 0 { - t.Errorf("comment-line: expected 0 records, got %d", len(r2)) - } - - r3, _ := csvParse("a\n b #c", map[string]any{"comment": true}) - assertField(t, "comment-inline", r3, "a", " b ") -} - -func TestNumber(t *testing.T) { - r1, _ := csvParse("a\n1") - assertField(t, "no-number", r1, "a", "1") - - r2, _ := csvParse("a\n1", map[string]any{"number": true}) - m := toMap(r2[0]) - if m["a"] != float64(1) { - t.Errorf("number: expected 1 (float64), got %v (%T)", m["a"], m["a"]) - } -} - -func TestValue(t *testing.T) { - r1, _ := csvParse("a\ntrue") - assertField(t, "no-value", r1, "a", "true") - - r2, _ := csvParse("a\ntrue", map[string]any{"value": true}) - m := toMap(r2[0]) - if m["a"] != true { - t.Errorf("value-true: expected true, got %v (%T)", m["a"], m["a"]) - } - - r3, _ := csvParse("a\nfalse", map[string]any{"value": true}) - m3 := toMap(r3[0]) - if m3["a"] != false { - t.Errorf("value-false: expected false, got %v (%T)", m3["a"], m3["a"]) - } - - r4, _ := csvParse("a\nnull", map[string]any{"value": true}) - m4 := toMap(r4[0]) - if m4["a"] != nil { - t.Errorf("value-null: expected nil, got %v (%T)", m4["a"], m4["a"]) - } -} - -func TestStream(t *testing.T) { - var events []string - var records []any - - j := jsonic.Make() - j.UseDefaults(Csv, Defaults, map[string]any{ - "stream": func(what string, record any) { - events = append(events, what) - if what == "record" { - records = append(records, record) - } - }, - }) - j.Parse("a,b\n1,2\n3,4\n5,6") - - if len(events) < 3 { - t.Fatalf("Expected at least 3 events, got %d", len(events)) - } - if events[0] != "start" { - t.Errorf("First event should be 'start', got %q", events[0]) - } - if events[len(events)-1] != "end" { - t.Errorf("Last event should be 'end', got %q", events[len(events)-1]) - } - if len(records) != 3 { - t.Errorf("Expected 3 records, got %d", len(records)) - } -} - -func TestSeparators(t *testing.T) { - result, _ := csvParse("a|b|c\nA|B|C\nAA|BB|CC", - map[string]any{"field": map[string]any{"separation": "|"}}) - assertRecords(t, "pipe", result, []map[string]any{ - {"a": "A", "b": "B", "c": "C"}, - {"a": "AA", "b": "BB", "c": "CC"}, - }) - - result2, _ := csvParse("a~~b~~c\nA~~B~~C", - map[string]any{"field": map[string]any{"separation": "~~"}}) - assertRecords(t, "multi-char", result2, []map[string]any{ - {"a": "A", "b": "B", "c": "C"}, - }) -} - -func TestRecordSeparators(t *testing.T) { - result, _ := csvParse("a,b,c%A,B,C%AA,BB,CC", - map[string]any{"record": map[string]any{"separators": "%"}}) - assertRecords(t, "record-sep", result, []map[string]any{ - {"a": "A", "b": "B", "c": "C"}, - {"a": "AA", "b": "BB", "c": "CC"}, - }) -} - -// parseFixture parses CSV with optional jsonic-level options for fixtures. -func parseFixture(src string, pluginOpts map[string]any, jsonicOpts map[string]any) ([]any, error) { - if len(jsonicOpts) == 0 { - return csvParse(src, pluginOpts) - } - - j := jsonic.Make() - - // Apply jsonicOpt: value.def - if valOpt, ok := jsonicOpts["value"].(map[string]any); ok { - if defMap, ok := valOpt["def"].(map[string]any); ok { - vopts := jsonic.Options{Value: &jsonic.ValueOptions{ - Def: map[string]*jsonic.ValueDef{ - "true": {Val: true}, - "false": {Val: false}, - "null": {Val: nil}, - }, - }} - for k, v := range defMap { - if v == nil { - delete(vopts.Value.Def, k) - } else if vm, ok := v.(map[string]any); ok { - vopts.Value.Def[k] = &jsonic.ValueDef{Val: vm["val"]} - } - } - j.SetOptions(vopts) - } - } - - // Apply jsonicOpt: comment.def - if cmtOpt, ok := jsonicOpts["comment"].(map[string]any); ok { - if defMap, ok := cmtOpt["def"].(map[string]any); ok { - copts := jsonic.Options{Comment: &jsonic.CommentOptions{ - Def: make(map[string]*jsonic.CommentDef), - }} - for name, v := range defMap { - if cm, ok := v.(map[string]any); ok { - def := &jsonic.CommentDef{} - if start, ok := cm["start"].(string); ok { - def.Start = start - } - if end, ok := cm["end"].(string); ok { - def.End = end - } else { - def.Line = true - } - copts.Comment.Def[name] = def - } - } - j.SetOptions(copts) - } - } - - j.UseDefaults(Csv, Defaults, pluginOpts) - - result, err := j.Parse(src) - if err != nil { - return nil, err - } - if result == nil { - return []any{}, nil - } - if arr, ok := result.([]any); ok { - return arr, nil - } - return []any{}, nil -} - -// Helpers - -func assertRecords(t *testing.T, name string, result []any, expected []map[string]any) { - t.Helper() - if len(result) != len(expected) { - t.Errorf("%s: expected %d records, got %d: %v", name, len(expected), len(result), result) - return - } - for i, exp := range expected { - m := toMap(result[i]) - for k, v := range exp { - if fmt.Sprintf("%v", m[k]) != fmt.Sprintf("%v", v) { - t.Errorf("%s: record %d, field %q: expected %v, got %v", name, i, k, v, m[k]) - } - } - } -} - -func assertField(t *testing.T, name string, result []any, key string, expected string) { - t.Helper() - if len(result) != 1 { - t.Errorf("%s: expected 1 record, got %d", name, len(result)) - return - } - m := toMap(result[0]) - if m[key] != expected { - t.Errorf("%s: expected %q=%q, got %q=%q", name, key, expected, key, m[key]) - } -} - -func toMap(v any) map[string]any { - switch m := v.(type) { - case map[string]any: - return m - case orderedMap: - return m.m - default: - return nil - } -} - -func normalizeResult(result []any) []any { - out := make([]any, len(result)) - for i, r := range result { - out[i] = normalizeValue(r) - } - return out -} - -func normalizeValue(v any) any { - switch val := v.(type) { - case orderedMap: - m := make(map[string]any) - for k, v := range val.m { - m[k] = normalizeValue(v) - } - return m - case map[string]any: - m := make(map[string]any) - for k, v := range val { - m[k] = normalizeValue(v) - } - return m - case []any: - out := make([]any, len(val)) - for i, v := range val { - out[i] = normalizeValue(v) - } - return out - default: - return v - } -} - -func normalizeJSON(v any) any { - switch val := v.(type) { - case []any: - out := make([]any, len(val)) - for i, item := range val { - out[i] = normalizeJSON(item) - } - return out - case map[string]any: - m := make(map[string]any) - for k, v := range val { - m[k] = normalizeJSON(v) - } - return m - default: - return v - } -} diff --git a/go/go.mod b/go/go.mod index 55ab5a2..c42f565 100644 --- a/go/go.mod +++ b/go/go.mod @@ -1,4 +1,4 @@ -module github.com/jsonicjs/csv/go +module github.com/jsonicjs/xml/go go 1.24.7 diff --git a/go/xml.go b/go/xml.go new file mode 100644 index 0000000..e743b17 --- /dev/null +++ b/go/xml.go @@ -0,0 +1,1504 @@ +// Copyright (c) 2021-2025 Richard Rodger, MIT License + +// Package xml is a Jsonic plugin that parses XML into a tree of +// elements. The parser supports: elements with open/close and +// self-closing tags, attributes (single and double quoted with entity +// decoding), mixed element/text content, predefined and numeric +// character entity references, namespace resolution from xmlns/xmlns:* +// declarations, comments, CDATA sections, processing instructions and +// DOCTYPE declarations. +// +// The returned tree uses `map[string]any` nodes with keys `name`, +// `localName`, optional `prefix`, optional `namespace`, `attributes` +// (map of string -> string) and `children` (array of nested elements +// or text strings). +package xml + +import ( + "encoding/binary" + "fmt" + "regexp" + "strconv" + "strings" + "unicode/utf16" + "unicode/utf8" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +const Version = "0.1.0" + +// Defaults are merged with caller-supplied options when the plugin is +// registered via jsonic.UseDefaults. +// +// Option keys: +// +// namespaces bool resolve xmlns / xmlns:* into prefix / +// localName / namespace fields on every +// element. Default: true. +// entities bool decode the five predefined entities and +// numeric character references in text and +// attribute values. Default: true. +// customEntities map[string]string extra named entities to recognise. +// strictEntities bool enforce XML 1.0 §4.1: every named entity +// reference must resolve to a declared +// entity. Default: true. When false, +// references to unknown names are left +// as-is in the output. +// embed bool when true, keep Jsonic's JSON/JSONIC +// grammar in place and splice an XML +// literal alternate into the `val` rule +// so `` can appear wherever +// Jsonic expects a value. When false +// (default) the parser is reconfigured +// as a pure-XML parser. +var Defaults = map[string]any{ + "namespaces": true, + "entities": true, + "customEntities": map[string]string{}, + "strictEntities": true, + "embed": false, +} + +// Xml is the Jsonic plugin entry point. Register via: +// +// j := jsonic.Make() +// j.UseDefaults(xml.Xml, xml.Defaults) +// result, err := j.Parse(src) +func Xml(j *jsonic.Jsonic, options map[string]any) error { + // Guard against re-invocation: Use() re-runs plugins on SetOptions calls. + if j.Decoration("xml-init") != nil { + return nil + } + j.Decorate("xml-init", true) + + namespacesOn := toBool(options["namespaces"], true) + entitiesOn := toBool(options["entities"], true) + customEntities := toStringMap(options["customEntities"]) + strictEntities := toBool(options["strictEntities"], true) + embed := toBool(options["embed"], false) + + decode, declared := buildEntityDecoder(entitiesOn, customEntities) + + // Reserve #XIG (ignored) and #XOP/#XCL/#XSC (tag tokens) so they have + // stable tins before the grammar references them. The tins are then + // passed to the tag matcher by closure. + xigTin := j.Token("#XIG", "") + xopTin := j.Token("#XOP", "") + xclTin := j.Token("#XCL", "") + xscTin := j.Token("#XSC", "") + + if !embed { + // Register a dummy fixed token bound to a character that cannot + // legally appear in XML source (ASCII SOH). This keeps the + // lexer's internal `FixedSorted` list non-empty, which in turn + // disables an otherwise-hardcoded fallback that still ends text + // tokens on `{ } [ ] : ,` even when those symbols have been + // removed from the fixed token map. Without this, XML text + // content containing a comma would be truncated at the comma. + // In embed mode the JSON structural tokens remain in place, so + // the dummy is not needed. + soh := "\x01" + _ = j.Token("#XDUM", soh) + } + + // Shared options installed in both modes: the custom matcher, the + // text-end character `<`, and the XML-specific error templates. + j.SetOptions(jsonic.Options{ + Lex: &jsonic.LexOptions{ + Match: map[string]*jsonic.MatchSpec{ + "xmltag": {Order: 100_000, Make: buildXmlTagMatcher(decode, declared, entitiesOn, strictEntities, embed, xigTin, xopTin, xclTin, xscTin)}, + }, + }, + Ender: []string{"<"}, + Error: map[string]string{ + "xml_mismatched_tag": "closing tag does not match opening tag <$openname>", + "xml_invalid_tag": "invalid tag: $fsrc", + "xml_unterminated": "unterminated $kind", + "comment_double_dash": "comment body cannot contain \"--\"", + "cdata_terminator_in_text": "character data cannot contain \"]]>\"", + "pi_target_invalid": "processing instruction target is missing or invalid", + "lt_in_attr_value": "\"<\" is not allowed in an attribute value", + "bad_entity_ref": "malformed entity reference (need &name; or &#NNN; or &#xHHH;)", + "duplicate_attribute": "duplicate attribute name in tag", + "invalid_xml_char": "illegal control character in XML data", + "reserved_namespace": "invalid use of a reserved namespace prefix or URI", + "unbound_prefix": "element or attribute uses an undeclared namespace prefix", + "undeclared_entity": "reference to undeclared entity", + }, + Hint: map[string]string{ + "xml_mismatched_tag": "Each opening tag must be paired with a matching closing tag.\nExpected but found .", + "xml_invalid_tag": "The tag syntax is not valid XML.", + "xml_unterminated": "The $kind starting at this position is not terminated.", + "comment_double_dash": "XML 1.0 disallows \"--\" inside a comment body.", + "cdata_terminator_in_text": "The literal \"]]>\" must only appear as the end of a CDATA section.", + "pi_target_invalid": "A processing instruction must start with a Name; the XML declaration is the special case.", + "lt_in_attr_value": "Use the entity reference < to include \"<\" in an attribute value.", + "bad_entity_ref": "Replace literal \"&\" with &, or terminate the entity reference with \";\".", + "duplicate_attribute": "Each attribute name in an open tag must be unique.", + "invalid_xml_char": "Only #x9, #xA, #xD and code points >= #x20 are legal XML characters.", + "reserved_namespace": "The \"xml\" prefix is fixed to " + xmlNSURI + "; the \"xmlns\" prefix cannot be redeclared, and neither URI may be bound to any other prefix or as the default namespace.", + "unbound_prefix": "Declare the prefix with xmlns:prefix=\"...\" on this element or one of its ancestors.", + "undeclared_entity": "Declare the entity in the DOCTYPE internal subset, add it to the customEntities option, or set strictEntities: false to allow unresolved references through.", + }, + }) + + if !embed { + // Pure XML mode: reconfigure the parser so Jsonic's own value + // grammar is unreachable and all lexers other than our tag + // matcher are quiescent. + // + // Note: we deliberately do NOT install a Text.Modify hook + // here. While the root element is open the custom matcher + // itself emits the text tokens (with entity decoding and + // well-formedness checks); Jsonic's text matcher only sees + // whitespace before and after the root element where no + // decoding is needed. + j.SetOptions(jsonic.Options{ + Rule: &jsonic.RuleOptions{ + Start: "xml", + Exclude: "jsonic,imp", + }, + Fixed: &jsonic.FixedOptions{Token: map[string]*string{ + "#OB": nil, "#CB": nil, "#OS": nil, "#CS": nil, + "#CL": nil, "#CA": nil, + }}, + Number: &jsonic.NumberOptions{Lex: boolPtr(false)}, + Value: &jsonic.ValueOptions{Lex: boolPtr(false)}, + String: &jsonic.StringOptions{Lex: boolPtr(false)}, + Comment: &jsonic.CommentOptions{Lex: boolPtr(false)}, + Space: &jsonic.SpaceOptions{Lex: boolPtr(false)}, + Line: &jsonic.LineOptions{Lex: boolPtr(false)}, + }) + } + + // IGNORE set: drop #XIG (comments, PIs, DOCTYPE) along with the + // default members so any of them is skipped by the parser. In + // embed mode this preserves all default ignored tokens; in pure + // mode the SP/LN/CM tokens are never produced (we disabled their + // lexers), but keeping them here is harmless. + j.SetTokenSet("IGNORE", []jsonic.Tin{ + j.Token("#SP", ""), j.Token("#LN", ""), j.Token("#CM", ""), xigTin, + }) + + // Grammar declarations. Mirror the TypeScript grammar exactly. + refs := map[jsonic.FuncRef]any{ + "@xml-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if r.Child == nil || r.Child == jsonic.NoRule || r.Child.Node == nil { + return + } + // The Go parser follows the Next chain forward from the root + // rule to find the final result holder, so the current rule's + // node is what the caller will see. Set it (and the original + // root's node via the Prev chain as well for safety). + r.Node = r.Child.Node + root := firstRule(r) + root.Node = r.Child.Node + // Mark the document as having seen its root so the + // @no-root-yet condition rejects any subsequent attempt + // to push a second root element (XML 1.0 §2.1). + ctx.U["rootSeen"] = true + if namespacesOn { + if el, ok := r.Node.(map[string]any); ok { + if code := resolveNamespaces(el, nil); code != "" { + ctx.ParseErr = &jsonic.Token{ + Name: "#BD", Tin: jsonic.TinBD, + Err: code, Why: code, Src: code, + } + } + } + } + }), + + "@no-root-yet": jsonic.AltCond(func(_ *jsonic.Rule, ctx *jsonic.Context) bool { + seen, _ := ctx.U["rootSeen"].(bool) + return !seen + }), + + "@element-open": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + v := r.O0.Val.(map[string]any) + name := v["name"].(string) + attrs := v["attributes"].(map[string]any) + r.Node = map[string]any{ + "name": name, + "localName": name, + "attributes": applyAttrDefaults(attrs, name, ctx), + "children": []any{}, + } + }), + + "@element-selfclose": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + v := r.O0.Val.(map[string]any) + name := v["name"].(string) + attrs := v["attributes"].(map[string]any) + r.Node = map[string]any{ + "name": name, + "localName": name, + "attributes": applyAttrDefaults(attrs, name, ctx), + "children": []any{}, + } + }), + + "@element-close": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + el, _ := r.Node.(map[string]any) + openName, _ := el["name"].(string) + closeName, _ := r.C0.Val.(string) + if openName != closeName { + // The Go parser's top-level error handling reports parse + // errors under a single "unexpected" code, so encode our + // specific error code into the token's `Src`: that string + // is substituted into the error detail via $fsrc and will + // appear in err.Error() for consumers (and tests) that + // want to key on the specific cause. + r.C0.Src = "xml_mismatched_tag: does not match <" + openName + ">" + if r.C0.Use == nil { + r.C0.Use = map[string]any{} + } + r.C0.Use["openname"] = openName + r.C0.Err = "xml_mismatched_tag" + ctx.ParseErr = r.C0 + } + }), + + "@child-text": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + el, _ := r.Node.(map[string]any) + children, _ := el["children"].([]any) + el["children"] = append(children, r.O0.Val) + r.U["done"] = true + }), + + "@child-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if done, _ := r.U["done"].(bool); done { + return + } + if r.Child == nil || r.Child == jsonic.NoRule || r.Child.Node == nil { + return + } + el, ok := r.Node.(map[string]any) + if !ok { + return + } + children, _ := el["children"].([]any) + el["children"] = append(children, r.Child.Node) + }), + + "@element-is-selfclosed": jsonic.AltCond(func(r *jsonic.Rule, ctx *jsonic.Context) bool { + v, _ := r.U["selfclose"].(int) + return v == 1 + }), + } + + gs := &jsonic.GrammarSpec{ + Ref: refs, + Rule: map[string]*jsonic.GrammarRuleSpec{ + "xml": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#ZZ"}, + {S: "#TX", R: "xml"}, + {P: "element", C: "@no-root-yet"}, + }, + Close: []*jsonic.GrammarAltSpec{ + {S: "#ZZ"}, + {S: "#TX", R: "xml"}, + }, + }, + "element": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#XSC", A: "@element-selfclose", U: map[string]any{"selfclose": 1}}, + {S: "#XOP", P: "content", A: "@element-open"}, + }, + Close: []*jsonic.GrammarAltSpec{ + {C: "@element-is-selfclosed"}, + {S: "#XCL", A: "@element-close"}, + }, + }, + "content": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#XCL", B: 1}, + {P: "child"}, + }, + Close: []*jsonic.GrammarAltSpec{ + {S: "#XCL", B: 1}, + {R: "content"}, + }, + }, + "child": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#TX", A: "@child-text"}, + {S: "#XOP", B: 1, P: "element"}, + {S: "#XSC", B: 1, P: "element"}, + }, + }, + }, + } + if err := j.Grammar(gs); err != nil { + return fmt.Errorf("xml: apply grammar: %w", err) + } + + if embed { + // Splice XML literals into the Jsonic `val` rule. When the + // parser is looking for a value and sees `#XOP` or `#XSC`, + // push the `element` rule (backtracking by 1 so element.open + // can read the same token and dispatch). + j.Rule("val", func(rs *jsonic.RuleSpec) { + rs.Open = append(rs.Open, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{xopTin}}, + B: 1, P: "element", G: "xml", + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{xscTin}}, + B: 1, P: "element", G: "xml", + }, + ) + }) + + // In embed mode the top-level wrapper is Jsonic's `val` rule, + // so the @xml-bc hook that copies the root element to + // ctx.root().node is not invoked. Resolve namespaces instead + // when the element rule closes directly under a val rule. + if namespacesOn { + j.Rule("element", func(rs *jsonic.RuleSpec) { + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if r.Parent != nil && r.Parent != jsonic.NoRule && + r.Parent.Name == "val" { + if el, ok := r.Node.(map[string]any); ok { + resolveNamespaces(el, nil) + } + } + }) + }) + } + } + + return nil +} + +// dtdEntities reads the per-parse DOCTYPE-declared entity map (set +// by the DOCTYPE matcher path). Returns nil if none have been +// registered yet. +func dtdEntities(lex *jsonic.Lex) map[string]string { + if lex == nil || lex.Ctx == nil || lex.Ctx.U == nil { + return nil + } + m, _ := lex.Ctx.U["dtdEntities"].(map[string]string) + return m +} + +// dtdAttrDefaults reads the per-parse DOCTYPE-supplied attribute +// default map keyed by element name (set by the DOCTYPE matcher +// path). Returns nil if none have been registered yet. +func dtdAttrDefaults(ctx *jsonic.Context) map[string]map[string]string { + if ctx == nil || ctx.U == nil { + return nil + } + m, _ := ctx.U["dtdAttrDefaults"].(map[string]map[string]string) + return m +} + +// applyAttrDefaults merges in DOCTYPE-supplied default attribute +// values for any attribute missing from the parsed element instance. +// Returns the original map if no defaults apply. +func applyAttrDefaults( + attrs map[string]any, elemName string, ctx *jsonic.Context, +) map[string]any { + all := dtdAttrDefaults(ctx) + if all == nil { + return attrs + } + defaults, ok := all[elemName] + if !ok { + return attrs + } + for k, v := range defaults { + if _, present := attrs[k]; !present { + attrs[k] = v + } + } + return attrs +} + +// parseDoctypeAttlists scans a DOCTYPE internal-subset body and +// extracts every `` default +// attribute value, keyed by element name and attribute name. Both +// literal defaults and `#FIXED "value"` defaults are returned; +// `#REQUIRED` and `#IMPLIED` declarations contribute nothing because +// they have no default value. +func parseDoctypeAttlists(body string) map[string]map[string]string { + skipSpace := func(s int) int { + for s < len(body) && isSpace(body[s]) { + s++ + } + return s + } + out := map[string]map[string]string{} + + i := 0 + for i < len(body) { + idx := strings.Index(body[i:], "= len(body) { + break + } + if body[j] == '>' { + j++ + break + } + attrName, attrEnd, ok := readName(body, j) + if !ok { + j++ + continue + } + j = attrEnd + j = skipSpace(j) + + // Skip AttType. + if j < len(body) && body[j] == '(' { + close := strings.Index(body[j:], ")") + if close < 0 { + j = len(body) + break + } + j = j + close + 1 + } else if strings.HasPrefix(body[j:], "NOTATION") { + j += len("NOTATION") + j = skipSpace(j) + if j < len(body) && body[j] == '(' { + close := strings.Index(body[j:], ")") + if close < 0 { + j = len(body) + break + } + j = j + close + 1 + } + } else { + for j < len(body) && body[j] >= 'A' && body[j] <= 'Z' { + j++ + } + } + j = skipSpace(j) + + // DefaultDecl. + if strings.HasPrefix(body[j:], "#REQUIRED") { + j += len("#REQUIRED") + continue + } + if strings.HasPrefix(body[j:], "#IMPLIED") { + j += len("#IMPLIED") + continue + } + if strings.HasPrefix(body[j:], "#FIXED") { + j += len("#FIXED") + j = skipSpace(j) + } + if j < len(body) && (body[j] == '"' || body[j] == '\'') { + quote := body[j] + j++ + valStart := j + for j < len(body) && body[j] != quote { + j++ + } + if j >= len(body) { + break + } + value := body[valStart:j] + if out[elemName] == nil { + out[elemName] = map[string]string{} + } + out[elemName][attrName] = value + j++ + } + } + i = j + } + return out +} + +// parseDoctypeEntities scans a DOCTYPE internal-subset body and +// extracts every internal general entity declaration of the form +// `` (or single-quoted). Parameter entity +// declarations (``) and external entity +// declarations (`` etc.) are skipped, as +// are `") + if end < 0 { + break + } + i = j + end + 1 + continue + } + // Read the entity name. + name, after, ok := readName(body, j) + if !ok { + i = j + 1 + continue + } + j = after + for j < len(body) && isSpace(body[j]) { + j++ + } + // Quoted value -> internal entity. SYSTEM/PUBLIC -> skip. + if j < len(body) && (body[j] == '"' || body[j] == '\'') { + quote := body[j] + j++ + valStart := j + for j < len(body) && body[j] != quote { + j++ + } + if j >= len(body) { + break + } + out[name] = body[valStart:j] + j++ + } + end := strings.Index(body[j:], ">") + if end < 0 { + break + } + i = j + end + 1 + } + return out +} + +// xmlDepth reads the per-parse XML nesting counter from the lex context. +// Returns 0 if not set. +func xmlDepth(lex *jsonic.Lex) int { + if lex == nil || lex.Ctx == nil { + return 0 + } + if lex.Ctx.U == nil { + lex.Ctx.U = map[string]any{} + return 0 + } + v, _ := lex.Ctx.U["xmlDepth"].(int) + return v +} + +// setXmlDepth writes the XML nesting counter, clamping at zero. +func setXmlDepth(lex *jsonic.Lex, d int) { + if lex == nil || lex.Ctx == nil { + return + } + if lex.Ctx.U == nil { + lex.Ctx.U = map[string]any{} + } + if d < 0 { + d = 0 + } + lex.Ctx.U["xmlDepth"] = d +} + +// DecodeBOM detects a byte-order mark at the start of `src` and, when +// the input is encoded as UTF-16 LE/BE or UTF-32 LE/BE, returns a +// transcoded UTF-8 string. UTF-8 BOMs are returned with the BOM bytes +// stripped. For input without a recognised BOM, the original string +// is returned unchanged. +// +// Use this when feeding XML files of unknown encoding into the +// parser: +// +// body, _ := os.ReadFile(path) +// doc, err := j.Parse(xml.DecodeBOM(string(body))) +func DecodeBOM(src string) string { + b := []byte(src) + n := len(b) + switch { + case n >= 4 && b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xfe && b[3] == 0xff: + return decodeUTF32(b[4:], binary.BigEndian) + case n >= 4 && b[0] == 0xff && b[1] == 0xfe && b[2] == 0x00 && b[3] == 0x00: + return decodeUTF32(b[4:], binary.LittleEndian) + case n >= 2 && b[0] == 0xfe && b[1] == 0xff: + return decodeUTF16(b[2:], binary.BigEndian) + case n >= 2 && b[0] == 0xff && b[1] == 0xfe: + return decodeUTF16(b[2:], binary.LittleEndian) + case n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf: + return string(b[3:]) + } + return src +} + +func decodeUTF16(b []byte, order binary.ByteOrder) string { + if len(b)%2 != 0 { + b = b[:len(b)-1] + } + units := make([]uint16, len(b)/2) + for i := range units { + units[i] = order.Uint16(b[i*2:]) + } + return string(utf16.Decode(units)) +} + +func decodeUTF32(b []byte, order binary.ByteOrder) string { + if len(b)%4 != 0 { + b = b[:len(b)-(len(b)%4)] + } + out := make([]rune, len(b)/4) + for i := range out { + out[i] = rune(order.Uint32(b[i*4:])) + } + return string(out) +} + +// firstRule walks back through Prev links to find the originating rule +// instance (matches the root rule used by the parser as the result +// holder). +func firstRule(r *jsonic.Rule) *jsonic.Rule { + cur := r + for cur.Prev != nil && cur.Prev != jsonic.NoRule { + cur = cur.Prev + } + return cur +} + +// predefinedEntities is the five XML-predefined entities. +var predefinedEntities = map[string]string{ + "amp": "&", + "lt": "<", + "gt": ">", + "quot": "\"", + "apos": "'", +} + +// entityRE matches a single entity reference: named, decimal numeric, or +// hexadecimal numeric. (?:...) would be ideal but the Go stdlib regexp +// supports named groups; this uses plain groups for portability. +var entityRE = regexp.MustCompile(`&(#x[0-9a-fA-F]+|#[0-9]+|[A-Za-z_][A-Za-z0-9_]*);`) + +// EntityDecoder decodes XML entity references in `s`. The optional +// `dtd` map supplies general entity declarations parsed from the +// DOCTYPE internal subset; values are recursively expanded with +// cycle detection. +type EntityDecoder func(s string, dtd map[string]string) string + +// buildEntityDecoder returns a function that decodes the five +// predefined entities, numeric character references, any +// caller-supplied custom entities, and per-parse DTD entities. +// When `enabled` is false the function is an identity. The second +// return value is the merged set of always-declared names used for +// strict-entity validation in the matcher. +func buildEntityDecoder( + enabled bool, custom map[string]string, +) (EntityDecoder, map[string]string) { + base := make(map[string]string, len(predefinedEntities)+len(custom)) + for k, v := range predefinedEntities { + base[k] = v + } + for k, v := range custom { + base[k] = v + } + if !enabled { + return func(s string, _ map[string]string) string { return s }, base + } + var expand func(s string, dtd map[string]string, seen map[string]bool) string + expand = func(s string, dtd map[string]string, seen map[string]bool) string { + if !strings.Contains(s, "&") { + return s + } + return entityRE.ReplaceAllStringFunc(s, func(match string) string { + ref := match[1 : len(match)-1] + if ref[0] == '#' { + var code int64 + var err error + if len(ref) > 1 && (ref[1] == 'x' || ref[1] == 'X') { + code, err = strconv.ParseInt(ref[2:], 16, 32) + } else { + code, err = strconv.ParseInt(ref[1:], 10, 32) + } + if err != nil { + return match + } + return string(rune(code)) + } + if v, ok := base[ref]; ok { + return v + } + if dtd != nil { + if v, ok := dtd[ref]; ok { + if seen[ref] { + // Recursive reference; break the cycle. + return match + } + seen[ref] = true + out := expand(v, dtd, seen) + delete(seen, ref) + return out + } + } + return match + }) + } + return func(s string, dtd map[string]string) string { + return expand(s, dtd, map[string]bool{}) + }, base +} + +// buildXmlTagMatcher returns a MakeLexMatcher that recognises every +// top-level XML `<...>` construct at the current lex position. On a +// successful match it consumes the full construct and emits exactly +// one of: +// +// #XOP val = {"name":..., "attributes":...} +// #XSC val = {"name":..., "attributes":...} +// #XCL val = name (string) +// #XIG | | (ignored) +// #TX val = cdata body (verbatim, no entity decoding) +func buildXmlTagMatcher( + decode EntityDecoder, + declared map[string]string, + entitiesOn bool, + strict bool, + embed bool, + xigTin, xopTin, xclTin, xscTin jsonic.Tin, +) jsonic.MakeLexMatcher { + _ = embed // embed flag is no longer needed for text-handling + return func(_ *jsonic.LexConfig, _ *jsonic.Options) jsonic.LexMatcher { + return func(lex *jsonic.Lex, _ *jsonic.Rule) *jsonic.Token { + pnt := lex.Cursor() + src := lex.Src + srclen := len(src) + sI := pnt.SI + + // Strip a UTF-8 byte-order mark at the very start of input. + if sI == 0 && srclen >= 3 && + src[0] == 0xef && src[1] == 0xbb && src[2] == 0xbf { + pnt.SI = 3 + return nil + } + + // Inside an open XML element (depth > 0), consume + // characters up to the next `<` as a single #TX text + // token. Validates well-formedness of character data: + // rejects "]]>" and bare/malformed entity references. + if sI < srclen && src[sI] != '<' { + if depth := xmlDepth(lex); depth > 0 { + i := sI + for i < srclen && src[i] != '<' { + i++ + } + if i == sI { + return nil + } + raw := src[sI:i] + if code := checkChars(raw); code != "" { + return lex.Bad(code) + } + if strings.Contains(raw, "]]>") { + return lex.Bad("cdata_terminator_in_text") + } + if code := checkEntityRefs(raw, dtdEntities(lex), declared, strict); code != "" { + return lex.Bad(code) + } + // §2.11 end-of-line normalisation. + normalised := normaliseLineEndings(raw) + var val any = normalised + if entitiesOn { + val = decode(normalised, dtdEntities(lex)) + } + tkn := lex.Token("#TX", jsonic.TinTX, val, raw) + advance(pnt, sI, i) + return tkn + } + } + + if sI >= srclen || src[sI] != '<' { + return nil + } + + // Comment: + if strings.HasPrefix(src[sI:], "") + if end < 0 { + return lex.Bad("unterminated_comment") + } + bodyStart := sI + 4 + bodyEnd := bodyStart + end + body := src[bodyStart:bodyEnd] + // WF: "--" must not occur in a comment body. + if strings.Contains(body, "--") { + return lex.Bad("comment_double_dash") + } + if code := checkChars(body); code != "" { + return lex.Bad(code) + } + finish := bodyEnd + 3 + tsrc := src[sI:finish] + tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // CDATA: + if strings.HasPrefix(src[sI:], "") + if end < 0 { + return lex.Bad("unterminated_cdata") + } + finish := body + end + 3 + text := src[body : body+end] + if code := checkChars(text); code != "" { + return lex.Bad(code) + } + tsrc := src[sI:finish] + // §2.11 line-end normalisation applies to CDATA too. + tkn := lex.Token("#TX", jsonic.TinTX, normaliseLineEndings(text), tsrc) + advance(pnt, sI, finish) + return tkn + } + + // DOCTYPE: (allows a single level of [] subset) + if strings.HasPrefix(src[sI:], "` inside an + // entity value or attribute default cannot terminate + // the subset prematurely. + if ch == '"' || ch == '\'' { + i++ + for i < srclen && src[i] != ch { + i++ + } + if i < srclen { + i++ + } + continue + } + if ch == '[' { + if depth == 0 { + subsetStart = i + 1 + } + depth++ + } else if ch == ']' { + depth-- + if depth == 0 { + subsetEnd = i + } + } else if ch == '>' && depth <= 0 { + break + } + i++ + } + if i >= srclen { + return lex.Bad("unterminated_doctype") + } + finish := i + 1 + // Extract internal-subset declarations and stash them + // on the per-parse context. The matcher's text / + // attribute paths and the element actions read these + // back via lex.Ctx.U. + if subsetStart >= 0 && subsetEnd > subsetStart && lex.Ctx != nil { + subset := src[subsetStart:subsetEnd] + if lex.Ctx.U == nil { + lex.Ctx.U = map[string]any{} + } + if found := parseDoctypeEntities(subset); len(found) > 0 { + existing, _ := lex.Ctx.U["dtdEntities"].(map[string]string) + if existing == nil { + existing = map[string]string{} + } + for k, v := range found { + existing[k] = v + } + lex.Ctx.U["dtdEntities"] = existing + } + if found := parseDoctypeAttlists(subset); len(found) > 0 { + existing, _ := lex.Ctx.U["dtdAttrDefaults"].(map[string]map[string]string) + if existing == nil { + existing = map[string]map[string]string{} + } + for elem, defs := range found { + if existing[elem] == nil { + existing[elem] = map[string]string{} + } + for k, v := range defs { + existing[elem][k] = v + } + } + lex.Ctx.U["dtdAttrDefaults"] = existing + } + } + tsrc := src[sI:finish] + tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // Processing instruction: + if sI+1 < srclen && src[sI+1] == '?' { + end := strings.Index(src[sI+2:], "?>") + if end < 0 { + return lex.Bad("unterminated_pi") + } + bodyEnd := sI + 2 + end + // WF: PI target must be a Name. + _, after, ok := readName(src, sI+2) + if !ok || after > bodyEnd { + return lex.Bad("pi_target_invalid") + } + if after < bodyEnd && !isSpace(src[after]) { + return lex.Bad("pi_target_invalid") + } + if code := checkChars(src[sI+2 : bodyEnd]); code != "" { + return lex.Bad(code) + } + finish := bodyEnd + 2 + tsrc := src[sI:finish] + tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // Closing tag: + if sI+1 < srclen && src[sI+1] == '/' { + name, after, ok := readName(src, sI+2) + // WF: empty close tag `` is invalid. + if !ok { + return lex.Bad("xml_invalid_tag") + } + i := after + for i < srclen && isSpace(src[i]) { + i++ + } + if i >= srclen || src[i] != '>' { + return lex.Bad("xml_invalid_tag") + } + finish := i + 1 + tsrc := src[sI:finish] + tkn := lex.Token("#XCL", xclTin, name, tsrc) + advance(pnt, sI, finish) + setXmlDepth(lex, xmlDepth(lex)-1) + return tkn + } + + // Opening or self-close tag: + name, after, ok := readName(src, sI+1) + if !ok { + return nil + } + i := after + attrs := map[string]any{} + + for { + wsStart := i + for i < srclen && isSpace(src[i]) { + i++ + } + if i >= srclen { + return lex.Bad("xml_invalid_tag") + } + + // End of tag. + if src[i] == '>' { + finish := i + 1 + tsrc := src[sI:finish] + val := map[string]any{"name": name, "attributes": attrs} + tkn := lex.Token("#XOP", xopTin, val, tsrc) + advance(pnt, sI, finish) + setXmlDepth(lex, xmlDepth(lex)+1) + return tkn + } + if src[i] == '/' && i+1 < srclen && src[i+1] == '>' { + finish := i + 2 + tsrc := src[sI:finish] + val := map[string]any{"name": name, "attributes": attrs} + tkn := lex.Token("#XSC", xscTin, val, tsrc) + advance(pnt, sI, finish) + // #XSC is an instantly-closed element; depth unchanged. + return tkn + } + + // Attributes must be separated by whitespace. + if wsStart == i { + return lex.Bad("xml_invalid_tag") + } + + // Attribute name. + attrName, attrEnd, ok := readName(src, i) + if !ok { + return lex.Bad("xml_invalid_tag") + } + i = attrEnd + + for i < srclen && isSpace(src[i]) { + i++ + } + if i >= srclen || src[i] != '=' { + return lex.Bad("xml_invalid_tag") + } + i++ + for i < srclen && isSpace(src[i]) { + i++ + } + + if i >= srclen { + return lex.Bad("xml_invalid_tag") + } + quote := src[i] + if quote != '"' && quote != '\'' { + return lex.Bad("xml_invalid_tag") + } + i++ + valStart := i + // Per the XML 1.0 spec, attribute values cannot contain + // a literal `<`. Scanning lets us also validate entity + // references in the value below. + for i < srclen && src[i] != quote { + if src[i] == '<' { + return lex.Bad("lt_in_attr_value") + } + i++ + } + if i >= srclen { + return lex.Bad("xml_invalid_tag") + } + raw := src[valStart:i] + i++ // consume closing quote + + if code := checkChars(raw); code != "" { + return lex.Bad(code) + } + if code := checkEntityRefs(raw, dtdEntities(lex), declared, strict); code != "" { + return lex.Bad(code) + } + if _, ok := attrs[attrName]; ok { + return lex.Bad("duplicate_attribute") + } + // §3.3.3 attribute-value normalisation: TAB/LF/CR/CRLF + // all collapse to a single SPACE. Without DTD attribute + // types, all attributes are treated as CDATA-typed + // (no further whitespace collapsing or trimming). + normalised := normaliseAttrWhitespace(raw) + attrs[attrName] = decode(normalised, dtdEntities(lex)) + } + } + } +} + +// §2.11 End-of-line handling: any literal CR or CR-LF is normalised +// to a single LF before parsing proceeds. Applies to character data +// and CDATA section bodies. +func normaliseLineEndings(s string) string { + if !strings.ContainsRune(s, '\r') { + return s + } + var b strings.Builder + b.Grow(len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\r' { + b.WriteByte('\n') + if i+1 < len(s) && s[i+1] == '\n' { + i++ + } + } else { + b.WriteByte(c) + } + } + return b.String() +} + +// §3.3.3 attribute-value normalisation for CDATA-typed attributes: +// TAB / LF / CR / CRLF all collapse to a single SPACE. +func normaliseAttrWhitespace(s string) string { + if !strings.ContainsAny(s, "\t\n\r") { + return s + } + var b strings.Builder + b.Grow(len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + switch c { + case '\t', '\n': + b.WriteByte(' ') + case '\r': + b.WriteByte(' ') + if i+1 < len(s) && s[i+1] == '\n' { + i++ + } + default: + b.WriteByte(c) + } + } + return b.String() +} + +// checkChars validates that every byte in `s` is a legal XML 1.0 Char. +// Returns "invalid_xml_char" on the first illegal byte, "" if all +// bytes are legal. Only the C0 control band is checked here; the full +// Char production (which excludes #xFFFE/#xFFFF and unpaired +// surrogates) is not enforced. +func checkChars(s string) string { + for i := 0; i < len(s); i++ { + c := s[i] + if c < 0x20 && c != 0x09 && c != 0x0a && c != 0x0d { + return "invalid_xml_char" + } + } + return "" +} + +// checkEntityRefs validates that every `&` in `s` begins a well-formed +// entity reference. Returns "" on success, otherwise an error code +// suitable for lex.Bad(). The `dtd` map supplies DOCTYPE-declared +// entity names; `declared` adds names that are always declared +// (typically the predefined and caller-supplied entities). When +// `strict` is true, references to unknown names trigger +// "undeclared_entity"; otherwise the syntactic check still runs but +// unknown names pass through. +// +// Well-formed forms: +// +// &name; - name must start with a NameStartChar +// &#nnnn; - decimal numeric character reference +// &#xhhhh; - hexadecimal numeric character reference +func checkEntityRefs(s string, dtd, declared map[string]string, strict bool) string { + for i := 0; i < len(s); i++ { + if s[i] != '&' { + continue + } + semi := strings.IndexByte(s[i+1:], ';') + if semi < 0 { + return "bad_entity_ref" + } + semi += i + 1 + ref := s[i+1 : semi] + if len(ref) == 0 { + return "bad_entity_ref" + } + if ref[0] == '#' { + if len(ref) < 2 { + return "bad_entity_ref" + } + var digits string + var hex bool + if ref[1] == 'x' || ref[1] == 'X' { + hex = true + digits = ref[2:] + } else { + digits = ref[1:] + } + if len(digits) == 0 { + return "bad_entity_ref" + } + for _, d := range digits { + if hex { + if !((d >= '0' && d <= '9') || (d >= 'a' && d <= 'f') || (d >= 'A' && d <= 'F')) { + return "bad_entity_ref" + } + } else { + if !(d >= '0' && d <= '9') { + return "bad_entity_ref" + } + } + } + } else { + // Entity name must be a Name. Use rune-aware checks so + // non-ASCII names (Unicode XML 1.0 NameStartChar / NameChar + // blocks) are accepted. + r0, sz := utf8.DecodeRuneInString(ref) + if r0 == utf8.RuneError && sz <= 1 { + return "bad_entity_ref" + } + if !isNameStartRune(r0) { + return "bad_entity_ref" + } + j := sz + for j < len(ref) { + r, sz := utf8.DecodeRuneInString(ref[j:]) + if r == utf8.RuneError && sz <= 1 { + return "bad_entity_ref" + } + if !isNameCharRune(r) { + return "bad_entity_ref" + } + j += sz + } + // §4.1: in strict mode the named entity must resolve. + if strict { + if _, ok := declared[ref]; !ok { + if _, ok := dtd[ref]; !ok { + return "undeclared_entity" + } + } + } + } + i = semi + } + return "" +} + +// xmlScope tracks state inherited down an XML tree: +// +// ns - prefix -> namespace URI (XML Namespaces 1.0) +// space - active xml:space value (XML 1.0 §2.10) +// lang - active xml:lang value (XML 1.0 §2.12) +// +// `space` and `lang` are recorded on each element only when they +// are non-default, so plain documents don't sprout extra fields. +type xmlScope struct { + ns map[string]string + space string + lang string +} + +// Reserved namespace URIs (Namespaces in XML 1.0 §2). +const ( + xmlNSURI = "http://www.w3.org/XML/1998/namespace" + xmlnsNSURI = "http://www.w3.org/2000/xmlns/" +) + +// resolveNamespaces annotates `element` (and its descendants) with +// `prefix`, `localName`, `namespace`, `space` and `lang` fields +// resolved from xmlns / xmlns:* / xml:space / xml:lang attributes +// in scope. Returns "" on success or an error code on the first +// reserved-prefix or unbound-prefix violation. +func resolveNamespaces(element map[string]any, scope map[string]string) string { + // Pre-bind the reserved xml prefix so xml:space / xml:lang + // qualify without an explicit declaration. + ns := make(map[string]string, len(scope)+1) + for k, v := range scope { + ns[k] = v + } + ns["xml"] = xmlNSURI + return resolveScope(element, xmlScope{ns: ns, space: "default", lang: ""}) +} + +func resolveScope(element map[string]any, scope xmlScope) string { + local := xmlScope{ + ns: make(map[string]string, len(scope.ns)+4), + space: scope.space, + lang: scope.lang, + } + for k, v := range scope.ns { + local.ns[k] = v + } + if attrs, ok := element["attributes"].(map[string]any); ok { + for k, v := range attrs { + s, _ := v.(string) + switch { + case k == "xmlns": + if s == xmlNSURI || s == xmlnsNSURI { + return "reserved_namespace" + } + local.ns[""] = s + case strings.HasPrefix(k, "xmlns:"): + prefix := k[6:] + switch prefix { + case "xml": + if s != xmlNSURI { + return "reserved_namespace" + } + case "xmlns": + return "reserved_namespace" + default: + if s == xmlNSURI || s == xmlnsNSURI { + return "reserved_namespace" + } + } + local.ns[prefix] = s + case k == "xml:space": + local.space = s + case k == "xml:lang": + local.lang = s + default: + if colon := strings.Index(k, ":"); colon > 0 { + ap := k[:colon] + if ap != "xmlns" { + if _, ok := local.ns[ap]; !ok { + return "unbound_prefix" + } + } + } + } + } + } + + name, _ := element["name"].(string) + if idx := strings.Index(name, ":"); idx >= 0 { + prefix := name[:idx] + element["prefix"] = prefix + element["localName"] = name[idx+1:] + if uri, ok := local.ns[prefix]; ok { + element["namespace"] = uri + } else { + return "unbound_prefix" + } + } else { + element["localName"] = name + if uri, ok := local.ns[""]; ok { + element["namespace"] = uri + } + } + + if local.space != "default" { + element["space"] = local.space + } + if local.lang != "" { + element["lang"] = local.lang + } + + children, _ := element["children"].([]any) + for _, c := range children { + if ce, ok := c.(map[string]any); ok { + if err := resolveScope(ce, local); err != "" { + return err + } + } + } + return "" +} + +// --- helpers --- + +func advance(pnt *jsonic.Point, from, to int) { + pnt.SI = to + pnt.CI += to - from +} + +// XML 1.0 Fifth Edition NameStartChar (§2.3 [4]): ASCII letters, +// underscore, colon and a long list of Unicode letter / ideograph +// blocks. Single-byte fast path for the common ASCII case. +func isNameStartByte(ch byte) bool { + return (ch >= 'A' && ch <= 'Z') || + (ch >= 'a' && ch <= 'z') || + ch == '_' || ch == ':' +} + +// Backwards-compat alias used by sites that only need to peek at the +// next byte (entity ref check, etc.). +func isNameStart(ch byte) bool { return isNameStartByte(ch) } + +func isNameStartRune(r rune) bool { + if r < 0x80 { + return isNameStartByte(byte(r)) + } + return (r >= 0xc0 && r <= 0xd6) || + (r >= 0xd8 && r <= 0xf6) || + (r >= 0xf8 && r <= 0x2ff) || + (r >= 0x370 && r <= 0x37d) || + (r >= 0x37f && r <= 0x1fff) || + (r >= 0x200c && r <= 0x200d) || + (r >= 0x2070 && r <= 0x218f) || + (r >= 0x2c00 && r <= 0x2fef) || + (r >= 0x3001 && r <= 0xd7ff) || + (r >= 0xf900 && r <= 0xfdcf) || + (r >= 0xfdf0 && r <= 0xfffd) || + (r >= 0x10000 && r <= 0xeffff) +} + +func isNameCharByte(ch byte) bool { + return isNameStartByte(ch) || + (ch >= '0' && ch <= '9') || + ch == '-' || ch == '.' +} + +func isNameChar(ch byte) bool { return isNameCharByte(ch) } + +func isNameCharRune(r rune) bool { + if r < 0x80 { + return isNameCharByte(byte(r)) + } + if isNameStartRune(r) { + return true + } + return r == 0xb7 || + (r >= 0x300 && r <= 0x36f) || + (r >= 0x203f && r <= 0x2040) +} + +// readName reads an XML Name starting at `start` from `src`. Returns +// the name, the byte index after the name, and ok=false if the byte +// at `start` does not begin a NameStartChar (including ASCII or any +// of the Unicode ranges in §2.3 [4]). +func readName(src string, start int) (name string, end int, ok bool) { + if start >= len(src) { + return "", start, false + } + r, sz := utf8.DecodeRuneInString(src[start:]) + if r == utf8.RuneError && sz <= 1 { + return "", start, false + } + if !isNameStartRune(r) { + return "", start, false + } + i := start + sz + for i < len(src) { + r, sz := utf8.DecodeRuneInString(src[i:]) + if r == utf8.RuneError && sz <= 1 { + break + } + if !isNameCharRune(r) { + break + } + i += sz + } + return src[start:i], i, true +} + +func isSpace(ch byte) bool { + return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' +} + +func boolPtr(b bool) *bool { return &b } + +func toBool(v any, def bool) bool { + if v == nil { + return def + } + b, ok := v.(bool) + if !ok { + return def + } + return b +} + +func toStringMap(v any) map[string]string { + out := map[string]string{} + switch m := v.(type) { + case map[string]string: + for k, vv := range m { + out[k] = vv + } + case map[string]any: + for k, vv := range m { + if s, ok := vv.(string); ok { + out[k] = s + } + } + } + return out +} diff --git a/go/xml_test.go b/go/xml_test.go new file mode 100644 index 0000000..be18e7e --- /dev/null +++ b/go/xml_test.go @@ -0,0 +1,374 @@ +package xml + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// specEntry represents one row of a TSV spec file. +type specEntry struct { + File string + Line int + Name string + Input string // Escape-decoded XML source. + Expected string // Raw cell: JSON text, or "ERROR" / "ERROR:code". + Opts string // Raw JSON (may be empty). +} + +// specDir returns the absolute path to the shared TSV spec directory. +func specDir() string { + return filepath.Join("..", "test", "spec") +} + +// loadSpec reads a TSV spec file into a slice of specEntry. Comment and +// blank lines are skipped. Escapes in the `input` column are decoded +// via unescapeInput; the `expected` and `opts` columns are left raw so +// JSON's own escape rules are honoured by the downstream JSON parser. +func loadSpec(t *testing.T, path string) []specEntry { + t.Helper() + f, err := os.Open(path) + if err != nil { + t.Fatalf("open %s: %v", path, err) + } + defer f.Close() + + var out []specEntry + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 1<<20), 1<<20) + lineNo := 0 + for scanner.Scan() { + lineNo++ + line := scanner.Text() + if line == "" || strings.HasPrefix(line, "#") { + continue + } + cols := strings.Split(line, "\t") + if len(cols) < 3 { + t.Fatalf("%s:%d: expected at least 3 tab-separated columns, got %d", path, lineNo, len(cols)) + } + entry := specEntry{ + File: filepath.Base(path), + Line: lineNo, + Name: cols[0], + Input: unescapeInput(cols[1]), + Expected: cols[2], + } + if len(cols) >= 4 { + entry.Opts = cols[3] + } + out = append(out, entry) + } + if err := scanner.Err(); err != nil { + t.Fatalf("read %s: %v", path, err) + } + return out +} + +// unescapeInput decodes the escape sequences used in the `input` +// column of the TSV spec: \n (LF), \r (CR), \t (TAB), \\ (backslash). +// Any other `\x` sequence is left intact so XML escapes like `\d` are +// not accidentally rewritten. +func unescapeInput(s string) string { + if !strings.Contains(s, `\`) { + return s + } + var b strings.Builder + b.Grow(len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\\' && i+1 < len(s) { + switch s[i+1] { + case 'n': + b.WriteByte('\n') + i++ + continue + case 'r': + b.WriteByte('\r') + i++ + continue + case 't': + b.WriteByte('\t') + i++ + continue + case '\\': + b.WriteByte('\\') + i++ + continue + } + } + b.WriteByte(c) + } + return b.String() +} + +// parseOpts decodes the optional options JSON into a map suitable for +// jsonic.UseDefaults. Empty strings produce an empty map. +func parseOpts(t *testing.T, entry specEntry) map[string]any { + t.Helper() + if strings.TrimSpace(entry.Opts) == "" { + return map[string]any{} + } + var out map[string]any + if err := json.Unmarshal([]byte(entry.Opts), &out); err != nil { + t.Fatalf("%s:%d: parse opts %q: %v", entry.File, entry.Line, entry.Opts, err) + } + return out +} + +// parseExpected decodes the expected cell: either a JSON document or +// an `ERROR` / `ERROR:code` marker. +func parseExpected(t *testing.T, entry specEntry) (wantErr bool, errCode string, wantJSON any) { + t.Helper() + raw := entry.Expected + if strings.HasPrefix(raw, "ERROR") { + rest := strings.TrimPrefix(raw, "ERROR") + rest = strings.TrimPrefix(rest, ":") + return true, rest, nil + } + if err := json.Unmarshal([]byte(raw), &wantJSON); err != nil { + t.Fatalf("%s:%d: parse expected JSON %q: %v", entry.File, entry.Line, raw, err) + } + return false, "", wantJSON +} + +// runSpecFile is the workhorse: it loads one spec file and runs each +// row as its own sub-test. +func runSpecFile(t *testing.T, path string) { + entries := loadSpec(t, path) + if len(entries) == 0 { + t.Fatalf("%s: no spec entries loaded", path) + } + for _, entry := range entries { + entry := entry + t.Run(entry.Name, func(t *testing.T) { + opts := parseOpts(t, entry) + wantErr, errCode, wantVal := parseExpected(t, entry) + + j := jsonic.Make() + if err := j.UseDefaults(Xml, Defaults, opts); err != nil { + t.Fatalf("plugin init: %v", err) + } + got, err := j.Parse(entry.Input) + + if wantErr { + if err == nil { + t.Fatalf("expected parse error, got result %v", got) + } + if errCode != "" && !strings.Contains(err.Error(), errCode) { + t.Fatalf("expected error code %q, got %q", errCode, err.Error()) + } + return + } + if err != nil { + t.Fatalf("unexpected parse error: %v", err) + } + + // Round-trip the got value through JSON for type normalisation + // so `[]any` vs concrete slice types compare cleanly against + // values decoded from the spec via json.Unmarshal. + gotJSON, err := json.Marshal(got) + if err != nil { + t.Fatalf("marshal got: %v", err) + } + var gotVal any + if err := json.Unmarshal(gotJSON, &gotVal); err != nil { + t.Fatalf("unmarshal got: %v", err) + } + if !reflect.DeepEqual(gotVal, wantVal) { + wantPretty, _ := json.Marshal(wantVal) + t.Fatalf("\nwant: %s\ngot : %s", wantPretty, gotJSON) + } + }) + } +} + +func TestBasicSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "basic.tsv")) } +func TestAttributesSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "attributes.tsv")) } +func TestEntitiesSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "entities.tsv")) } +func TestNamespacesSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "namespaces.tsv")) } +func TestStructureSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "structure.tsv")) } +func TestErrorsSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "errors.tsv")) } +func TestW3CSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "w3c.tsv")) } + +// --- XML literals embedded in Jsonic source -------------------------------- +// +// With `embed: true` the plugin extends Jsonic's own grammar so a literal +// XML element can appear anywhere a Jsonic value is expected. These tests +// exercise that integration: plain Jsonic documents still parse, a lone +// XML literal parses as an element, XML literals inside maps and lists +// parse in place, text with JSON-syntax characters (commas, colons) is +// preserved, and namespaces resolve across the embedded tree. + +func embedParser(t *testing.T) *jsonic.Jsonic { + t.Helper() + j := jsonic.Make() + if err := j.UseDefaults(Xml, Defaults, map[string]any{"embed": true}); err != nil { + t.Fatalf("UseDefaults: %v", err) + } + return j +} + +func TestEmbedPlainJsonicStillWorks(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`{a:1, b:"two"}`) + if err != nil { + t.Fatalf("parse: %v", err) + } + m, ok := got.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", got) + } + if m["a"] != float64(1) || m["b"] != "two" { + t.Fatalf("plain jsonic: got %v", m) + } +} + +func TestEmbedXmlAsTopLevelValue(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`hello`) + if err != nil { + t.Fatalf("parse: %v", err) + } + el, ok := got.(map[string]any) + if !ok { + t.Fatalf("expected map, got %T", got) + } + if el["name"] != "a" { + t.Fatalf("name: got %v", el["name"]) + } + children, _ := el["children"].([]any) + if len(children) != 1 || children[0] != "hello" { + t.Fatalf("children: got %v", children) + } +} + +func TestEmbedXmlInsideJsonicMap(t *testing.T) { + j := embedParser(t) + src := `{ + title: "order-42", + payload: + Widget + Gadget + , +}` + got, err := j.Parse(src) + if err != nil { + t.Fatalf("parse: %v", err) + } + m := got.(map[string]any) + if m["title"] != "order-42" { + t.Fatalf("title: got %v", m["title"]) + } + payload := m["payload"].(map[string]any) + if payload["name"] != "order" { + t.Fatalf("payload.name: got %v", payload["name"]) + } + if a, _ := payload["attributes"].(map[string]any); a["id"] != "42" { + t.Fatalf("payload.attributes.id: got %v", a["id"]) + } + children, _ := payload["children"].([]any) + var items []map[string]any + for _, c := range children { + if cm, ok := c.(map[string]any); ok && cm["name"] == "item" { + items = append(items, cm) + } + } + if len(items) != 2 { + t.Fatalf("expected 2 item elements, got %d", len(items)) + } + if a, _ := items[0]["attributes"].(map[string]any); a["qty"] != "2" { + t.Fatalf("item[0].qty: got %v", a["qty"]) + } + if ch, _ := items[0]["children"].([]any); len(ch) != 1 || ch[0] != "Widget" { + t.Fatalf("item[0].children: got %v", ch) + } + if a, _ := items[1]["attributes"].(map[string]any); a["qty"] != "1" { + t.Fatalf("item[1].qty: got %v", a["qty"]) + } +} + +func TestEmbedXmlTextPreservesJsonSyntaxChars(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`Hello, World!`) + if err != nil { + t.Fatalf("parse: %v", err) + } + children, _ := got.(map[string]any)["children"].([]any) + if len(children) != 1 || children[0] != "Hello, World!" { + t.Fatalf("children: got %v", children) + } + + got2, err := j.Parse(`key: value`) + if err != nil { + t.Fatalf("parse: %v", err) + } + children2, _ := got2.(map[string]any)["children"].([]any) + if len(children2) != 1 || children2[0] != "key: value" { + t.Fatalf("children2: got %v", children2) + } +} + +func TestEmbedMultipleXmlInsideJsonicList(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`[, x, ]`) + if err != nil { + t.Fatalf("parse: %v", err) + } + arr, ok := got.([]any) + if !ok || len(arr) != 3 { + t.Fatalf("expected 3-element list, got %v", got) + } + if arr[0].(map[string]any)["name"] != "a" { + t.Fatalf("arr[0]: %v", arr[0]) + } + if ch, _ := arr[1].(map[string]any)["children"].([]any); len(ch) != 1 || ch[0] != "x" { + t.Fatalf("arr[1].children: %v", ch) + } + if a, _ := arr[2].(map[string]any)["attributes"].(map[string]any); a["x"] != "1" { + t.Fatalf("arr[2].attributes.x: %v", a) + } +} + +func TestEmbedXmlNamespacesResolve(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`{doc: }`) + if err != nil { + t.Fatalf("parse: %v", err) + } + doc := got.(map[string]any)["doc"].(map[string]any) + if doc["namespace"] != "http://e.example" { + t.Fatalf("doc.namespace: %v", doc["namespace"]) + } + children, _ := doc["children"].([]any) + if len(children) != 1 { + t.Fatalf("expected 1 child, got %d", len(children)) + } + child := children[0].(map[string]any) + if child["namespace"] != "http://e.example" { + t.Fatalf("child.namespace: %v", child["namespace"]) + } +} + +// TestSpecDirExists is a sanity check that the shared test/spec folder is +// reachable from the Go test working directory. +func TestSpecDirExists(t *testing.T) { + info, err := os.Stat(specDir()) + if err != nil { + t.Fatalf("spec dir: %v", err) + } + if !info.IsDir() { + t.Fatalf("%s is not a directory", specDir()) + } +} + +// Compile-time assertion that specEntry stringifies meaningfully in +// error messages (keeps `fmt` import stable if trimmed elsewhere). +var _ = fmt.Sprintf diff --git a/go/xmlconf_test.go b/go/xmlconf_test.go new file mode 100644 index 0000000..b19019c --- /dev/null +++ b/go/xmlconf_test.go @@ -0,0 +1,142 @@ +package xml + +import ( + "os" + "path/filepath" + "strings" + "testing" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// Exercise the parser against the W3C XML Conformance Test Suite +// (xmltest, James Clark's set). The suite is not bundled with the +// repository — run `scripts/fetch-xml-suite.sh` to download it into +// `test/xmlconf/`. When the suite is absent these tests are skipped. +// +// Our parser deliberately doesn't implement every XML 1.0 well- +// formedness constraint (we don't validate character legality, resolve +// DTD-declared entities, or check for all forbidden sequences such as +// `--` inside comments), so the goal of these tests is not 100% +// conformance. Instead each test records how many documents parsed as +// expected and fails only if that count regresses below a stable +// floor. The numbers below were measured against the current parser +// and will move upward as conformance improves. + +const ( + // Minimum `valid/sa/*.xml` documents that must parse without error + // (out of 120). The conformance runner pre-decodes BOMs and + // supports Unicode tag names, so the floor is set close to the + // total. + validSaPassFloor = 118 + + // Minimum `not-wf/sa/*.xml` documents that must be rejected. The + // parser catches structural well-formedness errors (bad tags, + // unmatched close, unterminated constructs) but does not check + // many character-level WF constraints, so this floor is set well + // below total (186) and serves as a regression guard. + notWfSaRejectFloor = 30 +) + +func xmlconfRoot(t *testing.T) string { + t.Helper() + root := filepath.Join("..", "test", "xmlconf") + info, err := os.Stat(filepath.Join(root, "xmltest")) + if err != nil || !info.IsDir() { + t.Skipf("W3C XML Test Suite not found at %s; run scripts/fetch-xml-suite.sh to enable this test", root) + } + return root +} + +func xmlconfFiles(t *testing.T, dir string) []string { + t.Helper() + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("read %s: %v", dir, err) + } + var out []string + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".xml") { + continue + } + out = append(out, filepath.Join(dir, e.Name())) + } + return out +} + +func xmlconfParse(src string) (any, error) { + j := jsonic.Make() + if err := j.UseDefaults(Xml, Defaults); err != nil { + return nil, err + } + // The conformance suite mixes UTF-8/16/32 encoded files. Detect + // the byte-order mark and transcode to UTF-8 so the encoding is + // transparent to the parser. + return j.Parse(DecodeBOM(src)) +} + +func TestXmlConfValidStandalone(t *testing.T) { + root := xmlconfRoot(t) + files := xmlconfFiles(t, filepath.Join(root, "xmltest", "valid", "sa")) + if len(files) == 0 { + t.Skipf("no files under xmltest/valid/sa") + } + + pass := 0 + var failures []string + for _, path := range files { + body, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + if _, perr := xmlconfParse(string(body)); perr != nil { + failures = append(failures, filepath.Base(path)+": "+ + strings.SplitN(perr.Error(), "\n", 2)[0]) + continue + } + pass++ + } + + total := len(files) + t.Logf("valid/sa: %d / %d parsed successfully", pass, total) + if pass < validSaPassFloor { + t.Errorf("valid/sa pass count %d dropped below floor %d (total %d). Sample failures:\n %s", + pass, validSaPassFloor, total, strings.Join(firstN(failures, 5), "\n ")) + } +} + +func TestXmlConfNotWellFormedStandalone(t *testing.T) { + root := xmlconfRoot(t) + files := xmlconfFiles(t, filepath.Join(root, "xmltest", "not-wf", "sa")) + if len(files) == 0 { + t.Skipf("no files under xmltest/not-wf/sa") + } + + rejected := 0 + var falseAccepts []string + for _, path := range files { + body, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + if _, perr := xmlconfParse(string(body)); perr != nil { + rejected++ + continue + } + falseAccepts = append(falseAccepts, filepath.Base(path)) + } + + total := len(files) + t.Logf("not-wf/sa: %d / %d rejected as expected", rejected, total) + if rejected < notWfSaRejectFloor { + t.Errorf("not-wf/sa reject count %d dropped below floor %d (total %d). Sample false accepts:\n %s", + rejected, notWfSaRejectFloor, total, strings.Join(firstN(falseAccepts, 5), "\n ")) + } +} + +func firstN(list []string, n int) []string { + if len(list) > n { + return list[:n] + } + return list +} diff --git a/package.json b/package.json index 129bded..09ec4d6 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,12 @@ { - "name": "@jsonic/csv", - "version": "0.10.0", - "description": "This plugin allows the [Jsonic](https://jsonic.senecajs.org) JSON parser to support csv syntax.", - "main": "dist/csv.js", + "name": "@jsonic/xml", + "version": "0.1.0", + "description": "This plugin allows the [Jsonic](https://jsonic.senecajs.org) JSON parser to support xml syntax.", + "main": "dist/xml.js", "type": "commonjs", - "browser": "csv.min.js", - "types": "dist/csv.d.ts", - "homepage": "https://github.com/jsonicjs/csv", + "browser": "xml.min.js", + "types": "dist/xml.d.ts", + "homepage": "https://github.com/jsonicjs/xml", "keywords": [ "pattern", "matcher", @@ -17,12 +17,12 @@ "author": "Richard Rodger (http://richardrodger.com)", "repository": { "type": "git", - "url": "git://github.com/jsonicjs/csv.git" + "url": "git://github.com/jsonicjs/xml.git" }, "scripts": { "test": "node --enable-source-maps --test \"dist-test/*.test.js\"", "test-some": "node --enable-source-maps --test-name-pattern=\"$npm_config_pattern\" --test \"dist-test/*.test.js\"", - "test-watch": "node --test --watch dist-test/csv.test.js", + "test-watch": "node --test --watch dist-test/xml.test.js", "embed": "node embed-grammar.js", "watch": "tsc --build src test -w", "build": "node embed-grammar.js && tsc --build src test", @@ -40,7 +40,6 @@ ], "devDependencies": { "@types/node": "^25.6.0", - "csv-spectrum": "^2.0.0", "typescript": "^5.9.3" }, "peerDependencies": { diff --git a/scripts/fetch-xml-suite.sh b/scripts/fetch-xml-suite.sh new file mode 100755 index 0000000..ac6629f --- /dev/null +++ b/scripts/fetch-xml-suite.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Download the W3C XML Conformance Test Suite (xmlts, 2013-09-23 +# snapshot) and extract it into test/xmlconf/ so both the Go and +# TypeScript test runners can exercise the parser against thousands +# of real-world XML documents. +# +# The archive is owned by W3C and its contributors (Sun, OASIS, IBM, +# University of Edinburgh, Fuji Xerox, ...) and is not redistributed +# as part of this repository. Running this script is an explicit +# opt-in to download it from the W3C site. +# +# Usage: +# scripts/fetch-xml-suite.sh # default location +# scripts/fetch-xml-suite.sh /some/dir # custom destination +# +# After fetching, the conformance-driven tests are picked up +# automatically: +# go test ./go/... +# npm test +set -euo pipefail + +URL="https://www.w3.org/XML/Test/xmlts20130923.tar.gz" + +REPO_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" +DEST="${1:-$REPO_ROOT/test/xmlconf}" + +if [ -d "$DEST" ] && [ -d "$DEST/xmltest" ]; then + echo "Suite already present at $DEST (delete the directory to re-download)." + exit 0 +fi + +tmp="$(mktemp -d)" +trap 'rm -rf "$tmp"' EXIT + +echo "Fetching $URL ..." +curl -fL -o "$tmp/xmlts.tar.gz" "$URL" + +echo "Extracting to $DEST ..." +mkdir -p "$DEST" +# The archive already contains a top-level `xmlconf/` directory, so +# strip one component to land its contents directly in $DEST. +tar -xzf "$tmp/xmlts.tar.gz" -C "$DEST" --strip-components=1 + +valid=$(find "$DEST/xmltest/valid/sa" -maxdepth 1 -name '*.xml' | wc -l) +notwf=$(find "$DEST/xmltest/not-wf/sa" -maxdepth 1 -name '*.xml' | wc -l) +echo "Done. Extracted $valid standalone-valid and $notwf not-well-formed XML files." diff --git a/src/csv.ts b/src/csv.ts deleted file mode 100644 index c4eaf01..0000000 --- a/src/csv.ts +++ /dev/null @@ -1,562 +0,0 @@ -/* Copyright (c) 2021-2025 Richard Rodger, MIT License */ - -// Import Jsonic types used by plugins. -import { - Jsonic, - Rule, - RuleSpec, - Plugin, - Context, - Config, - Options, - Lex, -} from 'jsonic' - -// See defaults below for commentary. -type CsvOptions = { - trim: boolean | null - comment: boolean | null - number: boolean | null - value: boolean | null - header: boolean - object: boolean - stream: null | ((what: string, record?: Record | Error) => void) - strict: boolean - field: { - separation: null | string - nonameprefix: string - empty: any - names: undefined | string[] - exact: boolean - } - record: { - separators: null | string - empty: boolean - } - string: { - quote: string - csv: null | boolean - } -} - -// --- BEGIN EMBEDDED csv-grammar.jsonic --- -const grammarText = ` -# CSV Grammar Definition -# Parsed by a standard Jsonic instance and passed to jsonic.grammar() -# Function references (@ prefixed) are resolved against the refs map -# -# Token naming: -# #LN - line ending (removed from per-instance IGNORE set) -# #SP - whitespace (removed from per-instance IGNORE set in strict mode) -# #CA - comma / field separator -# #ZZ - end of input -# #VAL - token set: text, string, number, value literals -# -# Rules csv, newline, record, text are fully defined here. -# Rules list, elem, val are modified in code (strict mode defines from scratch; -# non-strict prepends to existing defaults to preserve JSON parsing). - -{ - rule: csv: open: [ - { s: '#ZZ' } - { s: '#LN' p: newline c: '@not-record-empty' } - { p: record } - ] - - rule: newline: open: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - rule: newline: close: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - - rule: record: open: [ - { p: list } - ] - rule: record: close: [ - { s: '#ZZ' } - { s: '#LN #ZZ' b: 1 } - { s: '#LN' r: '@record-close-next' } - ] - - rule: text: open: [ - { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } - { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } - { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } - { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } - {} - ] -} -` -// --- END EMBEDDED csv-grammar.jsonic --- - -// Plugin implementation. -const Csv: Plugin = (jsonic: Jsonic, options: CsvOptions) => { - // Normalize boolean options. - const strict = !!options.strict - const objres = !!options.object - const header = !!options.header - - // These may be changed below by superior options. - let trim = !!options.trim - let comment = !!options.comment - let opt_number = !!options.number - let opt_value = !!options.value - let record_empty = !!options.record?.empty - - const stream = options.stream - - // In strict mode, Jsonic field content is not parsed. - if (strict) { - if (false !== options.string.csv) { - jsonic.options({ - lex: { - match: { - stringcsv: { order: 1e5, make: buildCsvStringMatcher(options) }, - }, - }, - }) - } - jsonic.options({ - rule: { exclude: 'jsonic,imp' }, - }) - } - - // Fields may contain Jsonic content. - else { - if (true === options.string.csv) { - jsonic.options({ - lex: { - match: { - stringcsv: { order: 1e5, make: buildCsvStringMatcher(options) }, - }, - }, - }) - } - trim = null === options.trim ? true : trim - comment = null === options.comment ? true : comment - opt_number = null === options.number ? true : opt_number - opt_value = null === options.value ? true : opt_value - jsonic.options({ - rule: { exclude: 'imp' }, - }) - } - - // Stream rows as they are parsed, do not store in result. - if (stream) { - let parser = jsonic.internal().parser - let origStart = parser.start.bind(parser) - parser.start = (...args: any[]) => { - try { - return origStart(...args) - } catch (e: any) { - stream('error', e) - } - } - } - - let token: Record = {} - if (strict) { - // Disable JSON structure tokens - token = { - '#OB': null, - '#CB': null, - '#OS': null, - '#CS': null, - '#CL': null, - } - } - - // Custom "comma" - if (options.field.separation) { - token['#CA'] = options.field.separation - } - - // Jsonic option overrides. - let jsonicOptions: any = { - rule: { - start: 'csv', - }, - fixed: { - token, - }, - tokenSet: { - IGNORE: [ - strict ? null : undefined, // Handle #SP space - null, // Handle #LN newlines - undefined, // Still ignore #CM comments - ], - }, - number: { - lex: opt_number, - }, - value: { - lex: opt_value, - }, - comment: { - lex: comment, - }, - lex: { - emptyResult: [], - }, - line: { - single: record_empty, - chars: - null == options.record.separators - ? undefined - : options.record.separators, - rowChars: - null == options.record.separators - ? undefined - : options.record.separators, - }, - error: { - csv_extra_field: 'unexpected extra field value: $fsrc', - csv_missing_field: 'missing field', - }, - hint: { - csv_extra_field: `Row $row has too many fields (the first of which is: $fsrc). Only $len -fields per row are expected.`, - csv_missing_field: `Row $row has too few fields. $len fields per row are expected.`, - }, - } - - jsonic.options(jsonicOptions) - - - // Named function references for declarative grammar definition. - const refs: Record = { - - // === State actions (auto-wired by @rulename-{bo,ao,bc,ac} convention) === - - '@csv-bo': (r: Rule, ctx: Context) => { - ctx.u.recordI = 0 - stream && stream('start') - r.node = [] - }, - - '@csv-ac': (_r: Rule) => { - stream && stream('end') - }, - - '@record-bc': (r: Rule, ctx: Context) => { - let fields: string[] = ctx.u.fields || options.field.names - - if (0 === ctx.u.recordI && header) { - ctx.u.fields = undefined === r.child.node ? [] : r.child.node - } else { - let record: any = r.child.node || [] - - if (objres) { - let obj: Record = {} - let i = 0 - - if (fields) { - if (options.field.exact) { - if (record.length !== fields.length) { - return ctx.t0.bad( - record.length > fields.length - ? 'csv_extra_field' - : 'csv_missing_field', - ) - } - } - - let fI = 0 - for (; fI < fields.length; fI++) { - obj[fields[fI]] = - undefined === record[fI] ? options.field.empty : record[fI] - } - i = fI - } - - for (; i < record.length; i++) { - let field_name = options.field.nonameprefix + i - obj[field_name] = - undefined === record[i] ? options.field.empty : record[i] - } - - record = obj - } else { - for (let i = 0; i < record.length; i++) { - record[i] = - undefined === record[i] ? options.field.empty : record[i] - } - } - - if (stream) { - stream('record', record) - } else { - r.node.push(record) - } - } - - ctx.u.recordI++ - }, - - '@text-bc': (r: Rule) => { - r.parent.node = undefined === r.child.node ? r.node : r.child.node - }, - - - // === Alt actions === - - '@elem-open-empty': (r: Rule) => { - r.node.push(options.field.empty) - r.u.done = true - }, - - '@elem-close-trailing': (r: Rule) => { - r.node.push(options.field.empty) - }, - - '@text-follows': (r: Rule) => { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = (1 === r.n.text ? '' : r.prev.node) + r.o0.val - }, - - '@text-leads': (r: Rule) => { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + - (2 <= r.n.text || !trim ? r.o0.src : '') + - r.o1.src - }, - - '@text-end': (r: Rule) => { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') - }, - - '@text-space': (r: Rule) => { - if (strict) { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') - } - }, - - - // === Condition refs === - - '@not-record-empty': () => !record_empty, - - - // === FuncRef for dynamic rule names === - - '@record-close-next': () => record_empty ? 'record' : 'newline', - - '@text-space-push': () => strict ? '' : 'val', - } - - - // Usually [#TX, #ST, #NR, #VL] - let VAL = jsonic.tokenSet.VAL - - let { LN, CA, SP, ZZ } = jsonic.token - - // Parse embedded grammar definition using a separate standard Jsonic instance. - const grammarDef = Jsonic.make()(grammarText) - grammarDef.ref = refs - jsonic.grammar(grammarDef) - - - // Rules list, elem, val are modified in code rather than the grammar file, - // because in non-strict mode the default jsonic alternatives must be preserved - // to support embedded JSON values like [1,2] and {x:1}. - - jsonic.rule('list', (rs: RuleSpec) => { - return rs - .open([ - // If not ignoring empty fields, don't consume LN used to close empty record. - { s: [LN], b: 1 }, - ]) - // Unconditional fallback to push elem — the default jsonic list rule gates - // its elem push on prev.u.implist which CSV's record rule does not set. - .open([{ p: 'elem' }], { append: true }) - .close([ - // LN ends record - { s: [LN], b: 1 }, - - { s: [ZZ] }, - ]) - }) - - jsonic.rule('elem', (rs: RuleSpec) => { - return rs - .open( - [ - // An empty element - { - s: [CA], - b: 1, - a: (r: Rule) => { - r.node.push(options.field.empty) - r.u.done = true - }, - }, - ], - ) - - .close( - [ - // An empty element at the end of the line - { - s: [CA, [LN, ZZ]], - b: 1, - a: (r: Rule) => r.node.push(options.field.empty), - }, - - // LN ends record - { s: [LN], b: 1 }, - ], - ) - }) - - jsonic.rule('val', (rs: RuleSpec) => { - return rs.open( - [ - // Handle text and space concatentation - { s: [VAL, SP], b: 2, p: 'text' }, - { s: [SP], b: 1, p: 'text' }, - - // LN ends record - { s: [LN], b: 1 }, - ], - ) - }) - - // Close is called on final rule - set parent val node - jsonic.rule('text', (rs: RuleSpec) => { - rs.bc((r: Rule) => { - r.parent.node = undefined === r.child.node ? r.node : r.child.node - }) - }) -} - -// Custom CSV String matcher. -// Handles "a""b" -> "a"b" quoting wierdness. -// This is a reduced copy of the standard Jsonic string matcher. -function buildCsvStringMatcher(csvopts: CsvOptions) { - return function makeCsvStringMatcher(cfg: Config, _opts: Options) { - return function csvStringMatcher(lex: Lex) { - let quoteMap: any = { [csvopts.string.quote]: true } - - let { pnt, src } = lex - let { sI, rI, cI } = pnt - let srclen = src.length - - if (quoteMap[src[sI]]) { - const q = src[sI] // Quote character - const qI = sI - const qrI = rI - ++sI - ++cI - - let s: string[] = [] - - for (sI; sI < srclen; sI++) { - cI++ - let c = src[sI] - - // Quote char. - if (q === c) { - sI++ - cI++ - - if (q === src[sI]) { - s.push(q) - } else { - break // String finished. - } - } - - // Body part of string. - else { - let bI = sI - - let qc = q.charCodeAt(0) - let cc = src.charCodeAt(sI) - - while (sI < srclen && 32 <= cc && qc !== cc) { - cc = src.charCodeAt(++sI) - cI++ - } - cI-- - - if (cfg.line.chars[src[sI]]) { - if (cfg.line.rowChars[src[sI]]) { - pnt.rI = ++rI - } - - cI = 1 - s.push(src.substring(bI, sI + 1)) - } else if (cc < 32) { - pnt.sI = sI - pnt.cI = cI - return lex.bad('unprintable', sI, sI + 1) - } else { - s.push(src.substring(bI, sI)) - sI-- - } - } - } - - if (src[sI - 1] !== q || pnt.sI === sI - 1) { - pnt.rI = qrI - return lex.bad('unterminated_string', qI, sI) - } - - const tkn = lex.token( - '#ST', - s.join(''), - src.substring(pnt.sI, sI), - pnt, - ) - - pnt.sI = sI - pnt.rI = rI - pnt.cI = cI - return tkn - } - } - } -} - -// Default option values. -Csv.defaults = { - trim: null, - comment: null, - number: null, - value: null, - header: true, - object: true, - stream: null, - strict: true, - field: { - separation: null, - nonameprefix: 'field~', - empty: '', - names: undefined, - exact: false, - }, - record: { - separators: null, - empty: false, - }, - string: { - quote: '"', - csv: null, - }, -} as CsvOptions - -export { Csv, buildCsvStringMatcher } - -export type { CsvOptions } diff --git a/src/xml.ts b/src/xml.ts new file mode 100644 index 0000000..7c70ebf --- /dev/null +++ b/src/xml.ts @@ -0,0 +1,1321 @@ +/* Copyright (c) 2021-2025 Richard Rodger, MIT License */ + +// Import Jsonic types used by plugins. +import { + Jsonic, + Rule, + RuleSpec, + Plugin, + Context, + Config, + Options, + Lex, +} from 'jsonic' + +// A parsed XML element. +// +// Fields: +// name - qualified name as written in the source (e.g. "ns:tag") +// prefix - namespace prefix if any ("ns"), else undefined +// localName - local part of the qualified name ("tag") +// namespace - URI bound to the prefix/default at parse time +// attributes - attribute map, with entity references decoded. Namespace +// declarations ("xmlns", "xmlns:*") are kept here too. +// children - mixed array of text strings and nested elements. +type XmlElement = { + name: string + prefix?: string + localName: string + namespace?: string + // Effective xml:space (XML 1.0 §2.10). Present only when the + // element or an ancestor sets xml:space to something other than + // the default value "default" (typically "preserve"). + space?: string + // Effective xml:lang (XML 1.0 §2.12). Present only when the + // element or an ancestor specifies xml:lang. + lang?: string + attributes: Record + children: Array +} + +type XmlOptions = { + // Whether to resolve namespaces (annotate elements with + // `prefix`/`localName`/`namespace`). Default: true. + namespaces: boolean + // Whether to decode the five predefined entities and numeric character + // references in text and attribute values. Default: true. + entities: boolean + // Additional named entities to recognise beyond the five predefined ones. + customEntities: Record + // Whether to enforce XML 1.0 §4.1 — every named entity reference must + // resolve to a declared entity (predefined, customEntities, or a DOCTYPE + // declaration). Default: true. When set to false, references + // to unknown names are left as-is in the output (legacy behaviour + // useful for templating). + strictEntities: boolean + // Embed mode. When `false` (default), the plugin configures the parser + // for pure-XML input: the start rule becomes `xml`, JSON structural + // tokens are disabled, and all non-XML lexing is turned off. + // + // When `true`, the plugin leaves Jsonic's JSON/JSONIC rules in place + // and adds an alternate to the `val` rule so that a literal XML + // element (`` or ``) appears wherever Jsonic + // expects a value. The XML literal is parsed with the same element + // grammar used in pure mode. + embed: boolean +} + +// --- BEGIN EMBEDDED xml-grammar.jsonic --- +const grammarText = ` +# XML Grammar Definition (elements + attributes + mixed content) +# Parsed by a standard Jsonic instance and passed to jsonic.grammar() +# Function references (@ prefixed) are resolved against the refs map +# +# Token naming: +# #XOP - XML open tag, e.g. +# #XCL - XML close tag, e.g. +# #XSC - XML self-close tag, e.g. +# #XIG - comment / processing instruction / DOCTYPE (ignored) +# #TX - text content between tags (CDATA included) +# #ZZ - end of input + +{ + rule: xml: open: [ + { s: '#ZZ' } + { s: '#TX' r: xml } + { p: element c: '@no-root-yet' } + ] + rule: xml: close: [ + { s: '#ZZ' } + { s: '#TX' r: xml } + ] + + rule: element: open: [ + { s: '#XSC' a: '@element-selfclose' u: { selfclose: 1 } } + { s: '#XOP' p: content a: '@element-open' } + ] + rule: element: close: [ + { c: '@element-is-selfclosed' } + { s: '#XCL' a: '@element-close' } + ] + + rule: content: open: [ + { s: '#XCL' b: 1 } + { p: child } + ] + rule: content: close: [ + { s: '#XCL' b: 1 } + { r: content } + ] + + rule: child: open: [ + { s: '#TX' a: '@child-text' } + { s: '#XOP' b: 1 p: element } + { s: '#XSC' b: 1 p: element } + ] +} +` +// --- END EMBEDDED xml-grammar.jsonic --- + + +const Xml: Plugin = (jsonic: Jsonic, options: XmlOptions) => { + const embed = options.embed === true + const decodeEntity = buildEntityDecoder(options) + + // Register custom lexer matcher. The same matcher is used in both + // modes; in embed mode it additionally consumes text between tags so + // Jsonic's own text/fixed lexers don't split it on `,` `:` etc. + jsonic.options({ + lex: { + match: { + xmltag: { + order: 1e5, + make: buildXmlTagMatcher(decodeEntity, embed, options), + }, + }, + emptyResult: undefined, + }, + // Terminate Jsonic text at `<` so XML tag starts are not absorbed + // into Jsonic text runs. + ender: ['<'], + }) + + if (!embed) { + // Pure XML mode: reconfigure the parser so Jsonic's own value + // grammar is unreachable and all lexers other than our tag matcher + // are quiescent. + // + // Note: we deliberately do NOT install a `text.modify` hook here. + // While the root element is open the custom matcher itself emits + // the text tokens (with entity decoding and well-formedness + // checks); Jsonic's text matcher only sees whitespace before the + // root and after it, where no decoding is needed. + jsonic.options({ + rule: { + start: 'xml', + exclude: 'jsonic,imp', + }, + fixed: { + token: { + '#OB': null, '#CB': null, '#OS': null, '#CS': null, + '#CL': null, '#CA': null, + }, + }, + tokenSet: { + IGNORE: ['#SP', '#LN', '#CM', '#XIG'], + }, + number: { lex: false }, + value: { lex: false }, + string: { lex: false }, + comment: { lex: false }, + space: { lex: false }, + line: { lex: false }, + }) + } else { + // Embed mode: keep all of Jsonic's standard grammar. Still register + // #XIG for comments/PIs/DOCTYPE and add it to IGNORE. + jsonic.options({ + tokenSet: { + IGNORE: ['#SP', '#LN', '#CM', '#XIG'], + }, + }) + } + + // Error templates and hints are installed in both modes. + jsonic.options({ + error: { + xml_mismatched_tag: + 'closing tag does not match opening tag <$openname>', + xml_invalid_tag: 'invalid tag: $fsrc', + xml_unterminated: 'unterminated $kind', + comment_double_dash: 'comment body cannot contain "--"', + cdata_terminator_in_text: 'character data cannot contain "]]>"', + pi_target_invalid: 'processing instruction target is missing or invalid', + lt_in_attr_value: '"<" is not allowed in an attribute value', + bad_entity_ref: 'malformed entity reference (need &name; or &#NNN; or &#xHHH;)', + duplicate_attribute: 'duplicate attribute name in tag', + invalid_xml_char: 'illegal control character in XML data', + reserved_namespace: 'invalid use of a reserved namespace prefix or URI', + unbound_prefix: 'element or attribute uses an undeclared namespace prefix', + undeclared_entity: 'reference to undeclared entity', + }, + hint: { + xml_mismatched_tag: `Each opening tag must be paired with a matching closing tag. +Expected but found .`, + xml_invalid_tag: `The tag syntax is not valid XML.`, + xml_unterminated: `The $kind starting at this position is not terminated.`, + comment_double_dash: `XML 1.0 disallows "--" inside a comment body.`, + cdata_terminator_in_text: `The literal "]]>" must only appear as the end of a CDATA section.`, + pi_target_invalid: `A processing instruction must start with a Name; the XML declaration is the special case.`, + lt_in_attr_value: `Use the entity reference < to include "<" in an attribute value.`, + bad_entity_ref: `Replace literal "&" with &, or terminate the entity reference with ";".`, + duplicate_attribute: `Each attribute name in an open tag must be unique.`, + invalid_xml_char: `Only #x9, #xA, #xD and code points >= #x20 are legal XML characters.`, + reserved_namespace: `The "xml" prefix is fixed to ${XML_NS_URI}; the "xmlns" prefix cannot be redeclared, and neither URI may be bound to any other prefix or as the default namespace.`, + unbound_prefix: `Declare the prefix with xmlns:prefix="..." on this element or one of its ancestors.`, + undeclared_entity: `Declare the entity in the DOCTYPE internal subset, add it to the customEntities option, or set strictEntities: false to allow unresolved references through.`, + }, + }) + + const refs: Record = { + '@xml-bc': (r: Rule, ctx: Context) => { + if (r.child && r.child.node) { + const root = ctx.root() + root.node = r.child.node + // Mark the document as having seen its root so the + // `@no-root-yet` condition gates any further attempts to + // push a second root element. + ctx.u.rootSeen = true + if (options.namespaces !== false) { + const nsErr = resolveNamespaces(root.node, {}) + if (nsErr) { + return ctx.t0.bad(nsErr) + } + } + } + }, + + // Condition: only allow the xml rule to push an `element` if the + // document hasn't already produced a root (XML 1.0 §2.1). + '@no-root-yet': (_r: Rule, ctx: Context) => true !== ctx.u.rootSeen, + + '@element-open': (r: Rule, ctx: Context) => { + const v = r.o0.val + r.node = { + name: v.name, + localName: v.name, + attributes: applyAttrDefaults(v.attributes, v.name, ctx), + children: [], + } + }, + + '@element-selfclose': (r: Rule, ctx: Context) => { + const v = r.o0.val + r.node = { + name: v.name, + localName: v.name, + attributes: applyAttrDefaults(v.attributes, v.name, ctx), + children: [], + } + }, + + '@element-close': (r: Rule, ctx: Context) => { + const openName = r.node && r.node.name + const closeName = r.c0.val + if (openName !== closeName) { + r.c0.use = { openname: openName } + return ctx.t0.bad('xml_mismatched_tag') + } + }, + + '@child-text': (r: Rule) => { + r.node.children.push(r.o0.val) + r.u.done = true + }, + + '@child-bc': (r: Rule) => { + if (true !== r.u.done && r.child && r.child.node) { + r.node.children.push(r.child.node) + } + }, + + '@element-is-selfclosed': (r: Rule) => true === !!r.u.selfclose, + } + + // Parse embedded grammar definition and wire refs. + const grammarDef = Jsonic.make()(grammarText) + grammarDef.ref = refs + jsonic.grammar(grammarDef) + + if (embed) { + // Splice XML literals into the Jsonic `val` rule. When the parser + // is looking for a value and sees an `#XOP` or `#XSC` token, it + // pushes the `element` rule which builds the XML subtree. Backtrack + // by 1 so `element.open` can read the same token and dispatch to + // the correct branch. + const XOP = jsonic.token('#XOP') + const XSC = jsonic.token('#XSC') + jsonic.rule('val', (rs: RuleSpec) => { + return rs.open( + [ + { s: [XOP], b: 1, p: 'element', g: 'xml' }, + { s: [XSC], b: 1, p: 'element', g: 'xml' }, + ], + ) + }) + + // In embed mode the top-level wrapper is Jsonic's `val` rule, so + // the `@xml-bc` hook that copies the root element to `ctx.root().node` + // is not invoked. Resolve namespaces after the full tree lands on + // the element rule by hooking its close-state action. + if (options.namespaces !== false) { + jsonic.rule('element', (rs: RuleSpec) => { + rs.bc((r: Rule) => { + if (r.node && 'object' === typeof r.node && r.parent && + r.parent.name === 'val') { + resolveNamespaces(r.node, {}) + } + }) + }) + } + } +} + + +// decodeBOM converts a byte sequence (either a Node Buffer / Uint8Array +// or a Latin-1-mapped "binary" JS string where each char code is one +// byte) into a decoded Unicode string, transcoding from whichever of +// UTF-8 / UTF-16-LE / UTF-16-BE / UTF-32-LE / UTF-32-BE the byte-order +// mark indicates. UTF-8 is the default when no BOM is present, so +// non-ASCII tag names in BOM-less UTF-8 files round-trip correctly. +// +// If the caller has already decoded the input to a Unicode JS string +// (any code unit > 0xFF) the function only strips a leading U+FEFF +// and returns the input otherwise unchanged. +// +// Use this when reading XML files of unknown encoding: +// +// const body = decodeBOM(readFileSync(path)) // Node Buffer +// const doc = jsonic(body) +function decodeBOM(src: any): string { + // Already a decoded Unicode string: strip a leading BOM character. + if (typeof src === 'string') { + let isBinary = true + for (let i = 0; i < src.length && i < 1024; i++) { + if (src.charCodeAt(i) > 0xff) { isBinary = false; break } + } + if (!isBinary) { + return src.charCodeAt(0) === 0xfeff ? src.substring(1) : src + } + // Binary string: convert to a byte array and reuse the buffer path. + const bytes = new Uint8Array(src.length) + for (let i = 0; i < src.length; i++) bytes[i] = src.charCodeAt(i) & 0xff + return decodeBOMBytes(bytes) + } + // Buffer / Uint8Array / array-like. + return decodeBOMBytes(src as Uint8Array) +} + +function decodeBOMBytes(b: Uint8Array): string { + const n = b.length + if (n === 0) return '' + + // UTF-32 BE + if (n >= 4 && b[0] === 0x00 && b[1] === 0x00 && b[2] === 0xfe && b[3] === 0xff) { + return decodeUTF32(b, 4, true) + } + // UTF-32 LE (check before UTF-16 LE) + if (n >= 4 && b[0] === 0xff && b[1] === 0xfe && b[2] === 0x00 && b[3] === 0x00) { + return decodeUTF32(b, 4, false) + } + // UTF-16 BE + if (n >= 2 && b[0] === 0xfe && b[1] === 0xff) { + return decodeUTF16(b, 2, true) + } + // UTF-16 LE + if (n >= 2 && b[0] === 0xff && b[1] === 0xfe) { + return decodeUTF16(b, 2, false) + } + // UTF-8 BOM, then UTF-8 default + let start = 0 + if (n >= 3 && b[0] === 0xef && b[1] === 0xbb && b[2] === 0xbf) start = 3 + return decodeUTF8(b, start) +} + +function decodeUTF8(b: Uint8Array, start: number): string { + let out = '' + let i = start + const n = b.length + while (i < n) { + const c = b[i] + if (c < 0x80) { + out += String.fromCharCode(c) + i++ + continue + } + let cp = -1 + let advance = 1 + if ((c & 0xe0) === 0xc0 && i + 1 < n) { + cp = ((c & 0x1f) << 6) | (b[i + 1] & 0x3f) + advance = 2 + } else if ((c & 0xf0) === 0xe0 && i + 2 < n) { + cp = ((c & 0x0f) << 12) | ((b[i + 1] & 0x3f) << 6) | (b[i + 2] & 0x3f) + advance = 3 + } else if ((c & 0xf8) === 0xf0 && i + 3 < n) { + cp = ((c & 0x07) << 18) | + ((b[i + 1] & 0x3f) << 12) | + ((b[i + 2] & 0x3f) << 6) | + (b[i + 3] & 0x3f) + advance = 4 + } + // Reject malformed sequences (invalid lead byte, truncated tail, + // or out-of-range code point) by emitting the raw byte and + // advancing one position. The downstream XML check will then flag + // the offending control / non-Char character. + if (cp < 0 || cp > 0x10ffff) { + out += String.fromCharCode(c) + i++ + } else { + out += String.fromCodePoint(cp) + i += advance + } + } + return out +} + +function decodeUTF16(b: Uint8Array, start: number, big: boolean): string { + const units: number[] = [] + for (let i = start; i + 1 < b.length; i += 2) { + const a = b[i], c = b[i + 1] + units.push(big ? (a << 8) | c : (c << 8) | a) + } + return String.fromCharCode(...units) +} + +function decodeUTF32(b: Uint8Array, start: number, big: boolean): string { + let out = '' + for (let i = start; i + 3 < b.length; i += 4) { + const a = b[i], c = b[i + 1], d = b[i + 2], e = b[i + 3] + const cp = big + ? (a << 24) | (c << 16) | (d << 8) | e + : (e << 24) | (d << 16) | (c << 8) | a + out += String.fromCodePoint(cp >>> 0) + } + return out +} + + +// The five predefined XML entities. +const predefinedEntities: Record = { + amp: '&', + lt: '<', + gt: '>', + quot: '"', + apos: "'", +} + +// Build an entity decoder. The plugin-time entity map (predefined + +// customEntities) is closed over; per-parse entities declared in the +// DOCTYPE internal subset are passed in via the optional `dtd` +// argument and recursively expanded with cycle detection. +// +// Returned function signature: +// decode(src, dtd?) -> string +// where `dtd` is a per-parse map { name -> raw value } that the +// matcher pulls from `lex.ctx.u.dtdEntities`. +function buildEntityDecoder(options: XmlOptions) { + const baseEntities = { + ...predefinedEntities, + ...(options?.customEntities || {}), + } + const entityRE = /&(#x[0-9a-fA-F]+|#[0-9]+|[A-Za-z_:][A-Za-z0-9_\-\.:]*);/g + + function expand( + src: string, + dtd: Record, + seen: Set, + ): string { + if (src.indexOf('&') < 0) return src + return src.replace(entityRE, (match, ref) => { + if (ref[0] === '#') { + const code = + ref[1] === 'x' || ref[1] === 'X' + ? parseInt(ref.substring(2), 16) + : parseInt(ref.substring(1), 10) + if (isNaN(code)) return match + try { + return String.fromCodePoint(code) + } catch { + return match + } + } + // Predefined / option-supplied entities take precedence over + // anything declared in the DTD (matches the XML 1.0 rule that + // the five predefined entities are always available). + if (undefined !== baseEntities[ref]) return baseEntities[ref] + if (undefined !== dtd[ref]) { + if (seen.has(ref)) { + // Recursive entity reference is a WF violation. Fall through + // and keep the unexpanded text so the upstream WF check can + // catch the resulting bare `&` if the caller wants to treat + // this as an error; here we simply break the cycle. + return match + } + seen.add(ref) + const out = expand(dtd[ref], dtd, seen) + seen.delete(ref) + return out + } + return match + }) + } + + const decoder = function decodeEntities(src: string, dtd?: Record): string { + return expand(src, dtd || {}, new Set()) + } as DecodeEntitiesFn + decoder.declared = baseEntities + return decoder +} + +type DecodeEntitiesFn = ((src: string, dtd?: Record) => string) & { + declared: Record +} + +// Parse the body of a DOCTYPE declaration (the text between the `[` +// and `]` of the internal subset) and extract every `` +// declaration's default attribute values, keyed by element name and +// attribute name. Both literal defaults and `#FIXED "value"` defaults +// are returned; `#REQUIRED` and `#IMPLIED` declarations contribute +// nothing because they have no default value. +// +// Used by the matcher's element actions to fill in attributes that +// were not present on the element instance. +function parseDoctypeAttlists(body: string): Record> { + const isSpace = (ch: string) => + ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' + const isUpperAscii = (ch: string) => + ch >= 'A' && ch <= 'Z' + const skipSpace = (s: number): number => { + while (s < body.length && isSpace(body[s])) s++ + return s + } + const out: Record> = {} + + let i = 0 + while (i < body.length) { + const idx = body.indexOf('' or EOF. + while (j < body.length) { + j = skipSpace(j) + if (j >= body.length) break + if (body[j] === '>') { j++; break } + + const attrName = readNameInBody(body, j) + if (!attrName) { j++; continue } + j = attrName.end + j = skipSpace(j) + + // Skip AttType: enumeration `( ... )`, `NOTATION ( ... )`, or + // a bare type identifier (CDATA, ID, IDREF, IDREFS, NMTOKEN, + // NMTOKENS, ENTITY, ENTITIES). + if (body[j] === '(') { + const close = body.indexOf(')', j) + if (close < 0) { j = body.length; break } + j = close + 1 + } else if (body.startsWith('NOTATION', j)) { + j += 'NOTATION'.length + j = skipSpace(j) + if (body[j] === '(') { + const close = body.indexOf(')', j) + if (close < 0) { j = body.length; break } + j = close + 1 + } + } else { + while (j < body.length && isUpperAscii(body[j])) j++ + } + j = skipSpace(j) + + // DefaultDecl. + if (body.startsWith('#REQUIRED', j)) { + j += '#REQUIRED'.length + continue + } + if (body.startsWith('#IMPLIED', j)) { + j += '#IMPLIED'.length + continue + } + if (body.startsWith('#FIXED', j)) { + j += '#FIXED'.length + j = skipSpace(j) + } + if (body[j] === '"' || body[j] === "'") { + const quote = body[j] + j++ + const valStart = j + while (j < body.length && body[j] !== quote) j++ + if (j >= body.length) break + const value = body.substring(valStart, j) + if (!out[elemName.name]) out[elemName.name] = {} + out[elemName.name][attrName.name] = value + j++ + } + } + i = j + } + return out +} + +// applyAttrDefaults merges in DOCTYPE-supplied default attribute +// values (``) for any attribute +// missing from the parsed element instance. Returns the original +// attributes object if no defaults apply. +function applyAttrDefaults( + attrs: Record, + elemName: string, + ctx: Context, +): Record { + const defaults = ctx?.u?.dtdAttrDefaults?.[elemName] + if (!defaults) return attrs + const out = { ...attrs } + for (const k of Object.keys(defaults)) { + if (!Object.prototype.hasOwnProperty.call(out, k)) { + out[k] = defaults[k] + } + } + return out +} + +// readNameInBody is a free-function counterpart to the matcher's +// `readName` closure used by the DTD parsers, which run before the +// matcher closure has been instantiated. +function readNameInBody(s: string, start: number): { name: string; end: number } | null { + if (start >= s.length) return null + const cp0 = s.codePointAt(start)! + if (!isNameStartCP(cp0)) return null + let i = start + (cp0 > 0xffff ? 2 : 1) + while (i < s.length) { + const cp = s.codePointAt(i)! + if (!isNameCharCP(cp)) break + i += cp > 0xffff ? 2 : 1 + } + return { name: s.substring(start, i), end: i } +} + +// Parse the body of a DOCTYPE declaration (the text between the `[` +// and `]` of the internal subset) and extract every internal general +// entity declaration ``. Parameter entity +// declarations (``) and external entity +// declarations (`` etc.) are recognised +// but skipped. Other declarations (` { + const ents: Record = {} + const isSpace = (ch: string) => + ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' + const isNm = (ch: string) => isNameCharCP(ch.charCodeAt(0)) + + let i = 0 + while (i < body.length) { + const idx = body.indexOf('', j) + i = end < 0 ? body.length : end + 1 + continue + } + // Read name. + if (j >= body.length || !isNameStartCP(body.charCodeAt(j))) { + i = j + 1 + continue + } + const nameStart = j + j++ + while (j < body.length && isNm(body[j])) j++ + const name = body.substring(nameStart, j) + while (j < body.length && isSpace(body[j])) j++ + // Quoted entity value -> internal entity. + if (body[j] === '"' || body[j] === "'") { + const quote = body[j] + j++ + const valStart = j + while (j < body.length && body[j] !== quote) j++ + if (j >= body.length) break + ents[name] = body.substring(valStart, j) + j++ + } + // External entity (SYSTEM / PUBLIC) - skip; we don't fetch. + const end = body.indexOf('>', j) + i = end < 0 ? body.length : end + 1 + } + return ents +} + + +// Build a lexer matcher that recognises all top-level XML constructs +// starting with `<`. In embed mode the matcher also claims any text +// between an open tag and its matching close tag so that Jsonic's own +// text/fixed matchers don't split XML character data on JSON-syntax +// characters (`,`, `:`, etc.). +// +// Emits one of: +// -> #XOP val = { name, attributes } +// -> #XSC val = { name, attributes } +// -> #XCL val = name +// -> #XIG (parser ignores) +// -> #XIG (parser ignores) +// -> #XIG (parser ignores) +// -> #TX (verbatim text, no entity decoding) +function buildXmlTagMatcher( + decodeEntity: DecodeEntitiesFn, + embed: boolean, + options: XmlOptions, +) { + const strict = options.strictEntities !== false + const declared = decodeEntity.declared + // Backwards-compatible single-char predicates retained for sites that + // only need a simple character class check (e.g. peek before reading + // a name). Multi-byte / surrogate pair handling is in `readName` / + // `cpAt` below. + const isNameStart = (ch: string) => isNameStartCP(ch.codePointAt(0)!) + const isNameChar = (ch: string) => isNameCharCP(ch.codePointAt(0)!) + const isSpace = (ch: string) => + ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' + + // Read an XML Name starting at `start`. Returns the name and the + // index after it, or null if the character at `start` is not a + // valid NameStartChar. Handles UTF-16 surrogate pairs so non-BMP + // code points are treated as single characters. Typed `any` so the + // matcher's `lex.src` (declared as the boxed `String` upstream) + // can be passed in without a cast. + function readName(src: any, start: number): { name: string; end: number } | null { + if (start >= src.length) return null + const cp0 = src.codePointAt(start)! + if (!isNameStartCP(cp0)) return null + let i = start + (cp0 > 0xffff ? 2 : 1) + while (i < src.length) { + const cp = src.codePointAt(i)! + if (!isNameCharCP(cp)) break + i += cp > 0xffff ? 2 : 1 + } + return { name: String(src).substring(start, i), end: i } + } + + // Validate and decode a run of character data (non-CDATA). Enforces + // the XML 1.0 well-formedness constraints applicable to text: + // - every code point must be a legal XML Char (no C0 controls + // other than tab, newline, carriage return); + // - the literal sequence "]]>" must not appear in character data; + // - every "&" must start a well-formed entity reference. + // Returns either { val: string } on success or { err: string } if a + // WF constraint is violated. Pure decoding (without validation) is + // also available for CDATA bodies via decodeEntity(). + function processText( + raw: string, + dtd: Record, + ): { val?: string; err?: string } { + const ctrlErr = checkChars(raw) + if (ctrlErr) return { err: ctrlErr } + if (raw.indexOf(']]>') >= 0) { + return { err: 'cdata_terminator_in_text' } + } + const ampErr = checkEntityRefs(raw, dtd, declared, strict) + if (ampErr) return { err: ampErr } + // §2.11: normalise CR LF and lone CR to LF before downstream processing. + const normalised = normaliseLineEndings(raw) + return { + val: options.entities !== false ? decodeEntity(normalised, dtd) : normalised, + } + } + + return function makeXmlTagMatcher(_cfg: Config, _opts: Options) { + return function xmlTagMatcher(lex: Lex) { + const { pnt, src } = lex + const sI = pnt.sI + + // Strip a UTF-8 byte-order mark at the very start of input. + // After decoding, a UTF-8 BOM appears as a single U+FEFF + // character; some toolchains pass through the raw bytes + // (EF BB BF) as three separate Latin-1 code units. + if (sI === 0 && src.length > 0) { + if (src.charCodeAt(0) === 0xfeff) { + pnt.sI = 1 + return undefined + } + if (src.length >= 3 && + src.charCodeAt(0) === 0xef && + src.charCodeAt(1) === 0xbb && + src.charCodeAt(2) === 0xbf) { + pnt.sI = 3 + return undefined + } + } + + // Inside an open XML element (depth > 0), consume characters up + // to the next `<` as a single #TX text token so that Jsonic's + // own matchers don't reinterpret commas/colons/etc. as JSON + // separators in embed mode, and so we can apply XML text + // validation in pure mode too. + if (sI < src.length && src[sI] !== '<') { + const depth = (lex.ctx?.u?.xmlDepth | 0) || 0 + if (depth > 0) { + let i = sI + while (i < src.length && src[i] !== '<') i++ + if (i === sI) return undefined + const raw = src.substring(sI, i) + const dtd = (lex.ctx?.u?.dtdEntities) || {} + const result = processText(raw, dtd) + if (result.err) { + return lex.bad(result.err, sI, i) + } + const tkn = lex.token('#TX', result.val, raw, pnt) + pnt.sI = i + pnt.cI += i - sI + return tkn + } + } + + if (sI >= src.length || src[sI] !== '<') return undefined + + // Comment: + if (src.startsWith('', sI + 4) + if (endIdx === -1) { + return lex.bad('unterminated_comment', sI, src.length) + } + const body = src.substring(sI + 4, endIdx) + // WF constraint: "--" must not occur in a comment body. + if (body.indexOf('--') >= 0) { + return lex.bad('comment_double_dash', sI, endIdx + 3) + } + if (checkChars(body)) { + return lex.bad('invalid_xml_char', sI, endIdx + 3) + } + const end = endIdx + 3 + const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // CDATA: + if (src.startsWith('', sI + 9) + if (endIdx === -1) { + return lex.bad('unterminated_cdata', sI, src.length) + } + const end = endIdx + 3 + const text = src.substring(sI + 9, endIdx) + if (checkChars(text)) { + return lex.bad('invalid_xml_char', sI, end) + } + // §2.11 line-end normalisation applies to CDATA too. + const tkn = lex.token('#TX', normaliseLineEndings(text), src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // DOCTYPE: + if (src.startsWith('` inside an + // entity value or attribute default cannot terminate the + // subset prematurely. + if (ch === '"' || ch === "'") { + i++ + while (i < src.length && src[i] !== ch) i++ + if (i < src.length) i++ + continue + } + if (ch === '[') { + if (depth === 0) subsetStart = i + 1 + depth++ + } else if (ch === ']') { + depth-- + if (depth === 0) subsetEnd = i + } else if (ch === '>' && depth <= 0) break + i++ + } + if (i >= src.length) { + return lex.bad('unterminated_doctype', sI, src.length) + } + const end = i + 1 + // Extract internal-subset declarations and stash them on + // the per-parse context. The matcher's text/attribute paths + // and the element actions read these back via lex.ctx.u. + if (subsetStart >= 0 && subsetEnd > subsetStart && lex.ctx) { + const u: any = lex.ctx.u || (lex.ctx.u = {}) + const subset = src.substring(subsetStart, subsetEnd) + const ents = parseDoctypeEntities(subset) + if (Object.keys(ents).length > 0) { + u.dtdEntities = { ...(u.dtdEntities || {}), ...ents } + } + const atts = parseDoctypeAttlists(subset) + if (Object.keys(atts).length > 0) { + const merged = { ...(u.dtdAttrDefaults || {}) } + for (const elem of Object.keys(atts)) { + merged[elem] = { ...(merged[elem] || {}), ...atts[elem] } + } + u.dtdAttrDefaults = merged + } + } + const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // Processing instruction: + if (src[sI + 1] === '?') { + const endIdx = src.indexOf('?>', sI + 2) + if (endIdx === -1) { + return lex.bad('unterminated_pi', sI, src.length) + } + // WF constraint: PI target must be a Name (and not empty). + const piTargetRes = readName(src, sI + 2) + if (piTargetRes == null || piTargetRes.end > endIdx) { + return lex.bad('pi_target_invalid', sI, endIdx + 2) + } + const i = piTargetRes.end + // After the target, only whitespace then content is allowed. + if (i < endIdx && !isSpace(src[i])) { + return lex.bad('pi_target_invalid', sI, endIdx + 2) + } + if (checkChars(src.substring(sI + 2, endIdx))) { + return lex.bad('invalid_xml_char', sI, endIdx + 2) + } + const end = endIdx + 2 + const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // Closing tag: + if (src[sI + 1] === '/') { + const nameRes = readName(src, sI + 2) + // WF: empty close tag `` is invalid. + if (nameRes == null) { + return lex.bad('xml_invalid_tag', sI, Math.min(src.length, sI + 3)) + } + const name = nameRes.name + let i = nameRes.end + while (i < src.length && isSpace(src[i])) i++ + if (src[i] !== '>') { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + const end = i + 1 + const tkn = lex.token('#XCL', name, src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + if (lex.ctx) { + const u: any = lex.ctx.u || (lex.ctx.u = {}) + u.xmlDepth = Math.max(0, (u.xmlDepth | 0) - 1) + } + return tkn + } + + // Opening or self-close tag: + const elemNameRes = readName(src, sI + 1) + if (elemNameRes == null) return undefined + const name = elemNameRes.name + let i = elemNameRes.end + const attributes: Record = {} + + while (true) { + const wsStart = i + while (i < src.length && isSpace(src[i])) i++ + if (i >= src.length) { + return lex.bad('xml_invalid_tag', sI, src.length) + } + + if (src[i] === '>') { + const end = i + 1 + const tkn = lex.token('#XOP', { name, attributes }, src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + if (lex.ctx) { + const u: any = lex.ctx.u || (lex.ctx.u = {}) + u.xmlDepth = (u.xmlDepth | 0) + 1 + } + return tkn + } + if (src[i] === '/' && src[i + 1] === '>') { + const end = i + 2 + const tkn = lex.token('#XSC', { name, attributes }, src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + // #XSC is an instantly-closed element, so depth is unchanged. + return tkn + } + + if (wsStart === i) { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + + const attrNameRes = readName(src, i) + if (attrNameRes == null) { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + const attrName = attrNameRes.name + i = attrNameRes.end + + while (i < src.length && isSpace(src[i])) i++ + if (src[i] !== '=') { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + i++ + while (i < src.length && isSpace(src[i])) i++ + + const quote = src[i] + if (quote !== '"' && quote !== "'") { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + i++ + const valStart = i + // Per the XML 1.0 spec, attribute values cannot contain a + // literal `<`. Tracking the position lets us also validate + // entity references in the value. + while (i < src.length && src[i] !== quote) { + if (src[i] === '<') { + return lex.bad('lt_in_attr_value', sI, i + 1) + } + i++ + } + if (i >= src.length) { + return lex.bad('xml_invalid_tag', sI, src.length) + } + const rawVal = src.substring(valStart, i) + i++ + + const charErr = checkChars(rawVal) + if (charErr) { + return lex.bad(charErr, valStart, i) + } + const dtd = (lex.ctx?.u?.dtdEntities) || {} + const ampErr = checkEntityRefs(rawVal, dtd, declared, strict) + if (ampErr) { + return lex.bad(ampErr, valStart, i) + } + if (Object.prototype.hasOwnProperty.call(attributes, attrName)) { + return lex.bad('duplicate_attribute', sI, i) + } + // §3.3.3 attribute-value normalisation: literal whitespace + // (TAB, LF, CR, CRLF) becomes a single SPACE before any + // entity references are decoded. We do not have DTD-supplied + // attribute types, so all attributes are treated as CDATA- + // typed (no further whitespace collapsing or trimming). + const normalised = normaliseAttrWhitespace(rawVal) + attributes[attrName] = decodeEntity(normalised, dtd) + } + } + } +} + + +// §2.11 End-of-line handling: any literal CR (#xD) or CR-LF +// (#xD #xA) is normalised to a single LF (#xA) before parsing +// proceeds. Applies to character data, CDATA section bodies, and is +// the precondition for §3.3.3 attribute-value normalisation. +function normaliseLineEndings(s: string): string { + if (s.indexOf('\r') < 0) return s + return s.replace(/\r\n?/g, '\n') +} + +// §3.3.3 attribute-value normalisation for CDATA-typed attributes +// (the default in the absence of a DTD). All TAB, LF, CR, and CRLF +// occurrences in the source are replaced by a single SPACE; runs are +// not further collapsed and the value is not trimmed. +function normaliseAttrWhitespace(s: string): string { + if (!/[\r\n\t]/.test(s)) return s + return s.replace(/\r\n?|[\t\n]/g, ' ') +} + +// XML 1.0 Fifth Edition NameStartChar (§2.3 [4]). The non-Latin +// ranges below cover the characters allowed at the start of an +// element / attribute / entity / PI-target name. +function isNameStartCP(cp: number): boolean { + return cp === 0x3a || // ':' + cp === 0x5f || // '_' + (cp >= 0x41 && cp <= 0x5a) || + (cp >= 0x61 && cp <= 0x7a) || + (cp >= 0xc0 && cp <= 0xd6) || + (cp >= 0xd8 && cp <= 0xf6) || + (cp >= 0xf8 && cp <= 0x2ff) || + (cp >= 0x370 && cp <= 0x37d) || + (cp >= 0x37f && cp <= 0x1fff) || + (cp >= 0x200c && cp <= 0x200d) || + (cp >= 0x2070 && cp <= 0x218f) || + (cp >= 0x2c00 && cp <= 0x2fef) || + (cp >= 0x3001 && cp <= 0xd7ff) || + (cp >= 0xf900 && cp <= 0xfdcf) || + (cp >= 0xfdf0 && cp <= 0xfffd) || + (cp >= 0x10000 && cp <= 0xeffff) +} + +// XML 1.0 NameChar (§2.3 [4a]) — NameStartChar plus the digits, +// hyphen, full stop, the middle dot and the combining-mark blocks. +function isNameCharCP(cp: number): boolean { + return isNameStartCP(cp) || + cp === 0x2d || cp === 0x2e || // '-' '.' + (cp >= 0x30 && cp <= 0x39) || // '0'-'9' + cp === 0xb7 || + (cp >= 0x300 && cp <= 0x36f) || + (cp >= 0x203f && cp <= 0x2040) +} + +// Validate that every code unit in `s` is a legal XML 1.0 Char. +// Returns 'invalid_xml_char' on the first illegal character, '' if all +// characters are legal. Only the C0 control band is checked here; the +// full Char production (which excludes #xFFFE/#xFFFF and unpaired +// surrogates) is not enforced. +function checkChars(s: string): string { + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i) + if (c < 0x20 && c !== 0x09 && c !== 0x0a && c !== 0x0d) { + return 'invalid_xml_char' + } + } + return '' +} + +// Validate entity references in a run of character data. Returns an +// error code on the first malformed reference, or '' if every `&` +// in the input is part of a well-formed reference. The `dtd` map +// supplies DOCTYPE-declared entity names; `extra` adds named +// entities to consider declared (typically the predefined and +// caller-supplied entities). When `strict` is true, references to +// unknown names trigger `bad_entity_ref`; when false (legacy mode), +// the syntactic check still runs but unknown names pass through. +// +// Well-formed forms: +// &name; — name must start with a NameStartChar +// &#nnnn; — decimal numeric character reference +// &#xhhhh; — hexadecimal numeric character reference +function checkEntityRefs( + s: string, + dtd?: Record, + extra?: Record, + strict?: boolean, +): string { + for (let i = 0; i < s.length; i++) { + if (s[i] !== '&') continue + const semi = s.indexOf(';', i + 1) + if (semi < 0) return 'bad_entity_ref' + const ref = s.substring(i + 1, semi) + if (ref.length === 0) return 'bad_entity_ref' + if (ref[0] === '#') { + if (ref.length < 2) return 'bad_entity_ref' + const digits = ref[1] === 'x' || ref[1] === 'X' + ? ref.substring(2) + : ref.substring(1) + if (digits.length === 0) return 'bad_entity_ref' + const valid = ref[1] === 'x' || ref[1] === 'X' + ? /^[0-9a-fA-F]+$/.test(digits) + : /^[0-9]+$/.test(digits) + if (!valid) return 'bad_entity_ref' + } else { + // Entity name must be a Name (NameStartChar followed by NameChars). + let j = 0 + const startCP = ref.codePointAt(0) + if (startCP === undefined || !isNameStartCP(startCP)) { + return 'bad_entity_ref' + } + j += startCP > 0xffff ? 2 : 1 + while (j < ref.length) { + const cp = ref.codePointAt(j)! + if (!isNameCharCP(cp)) return 'bad_entity_ref' + j += cp > 0xffff ? 2 : 1 + } + // §4.1: in strict mode the named entity must resolve. + if (strict && + !(extra && Object.prototype.hasOwnProperty.call(extra, ref)) && + !(dtd && Object.prototype.hasOwnProperty.call(dtd, ref))) { + return 'undeclared_entity' + } + } + i = semi + } + return '' +} + + +// Resolve namespaces on an element tree. Walks the tree once, +// maintaining four kinds of inherited state: +// +// ns - prefix → namespace URI (empty key = default ns), per +// XML Namespaces 1.0 +// space - active xml:space value ('default' or 'preserve'), +// inherited per XML 1.0 §2.10 +// lang - active xml:lang value, inherited per XML 1.0 §2.12 +// +// `space` and `lang` are recorded on each element only when they are +// non-default (so plain documents don't sprout extra fields). +type XmlScope = { + ns: Record + space: string + lang: string +} + +// Per Namespaces in XML 1.0 §2 "Reserved prefixes and namespace names": +// the xml prefix is bound to the URI below and may be used implicitly. +const XML_NS_URI = 'http://www.w3.org/XML/1998/namespace' +// The xmlns prefix is reserved and must never be declared. +const XMLNS_NS_URI = 'http://www.w3.org/2000/xmlns/' + +function resolveNamespaces( + element: XmlElement, scope: Record, +): string { + // Pre-bind the xml prefix to its reserved URI so xml:lang / xml:space + // qualify correctly without an explicit declaration. + return resolveScope(element, { + ns: { ...scope, xml: XML_NS_URI }, + space: 'default', + lang: '', + }) +} + +// Returns '' on success or an XML namespace error code on the first +// violation (reserved-prefix misuse, unbound prefix). On error the +// tree may be partly annotated; callers should treat that as undefined. +function resolveScope(element: XmlElement, scope: XmlScope): string { + const ns = { ...scope.ns } + let space = scope.space + let lang = scope.lang + + for (const key of Object.keys(element.attributes || {})) { + const val = element.attributes[key] + if (key === 'xmlns') { + if (val === XML_NS_URI || val === XMLNS_NS_URI) { + return 'reserved_namespace' + } + ns[''] = val + } else if (key.startsWith('xmlns:')) { + const prefix = key.substring(6) + if (prefix === 'xml') { + if (val !== XML_NS_URI) return 'reserved_namespace' + } else if (prefix === 'xmlns') { + return 'reserved_namespace' + } else if (val === XML_NS_URI || val === XMLNS_NS_URI) { + return 'reserved_namespace' + } + ns[prefix] = val + } else if (key === 'xml:space') { + space = val + } else if (key === 'xml:lang') { + lang = val + } else { + // Attribute name namespace check. + const colon = key.indexOf(':') + if (colon > 0) { + const ap = key.substring(0, colon) + if (ap === 'xmlns') { + // already handled above + } else if (!Object.prototype.hasOwnProperty.call(ns, ap)) { + return 'unbound_prefix' + } + } + } + } + + const colonIdx = element.name.indexOf(':') + if (colonIdx >= 0) { + const prefix = element.name.substring(0, colonIdx) + element.prefix = prefix + element.localName = element.name.substring(colonIdx + 1) + if (Object.prototype.hasOwnProperty.call(ns, prefix)) { + element.namespace = ns[prefix] + } else { + return 'unbound_prefix' + } + } else { + element.localName = element.name + if (ns['']) { + element.namespace = ns[''] + } + } + + if (space !== 'default') (element as any).space = space + if (lang !== '') (element as any).lang = lang + + const childScope: XmlScope = { ns, space, lang } + for (const child of element.children) { + if (child && 'object' === typeof child) { + const err = resolveScope(child, childScope) + if (err) return err + } + } + return '' +} + + +Xml.defaults = { + namespaces: true, + entities: true, + customEntities: {}, + strictEntities: true, + embed: false, +} as XmlOptions + +export { Xml, decodeBOM } + +export type { XmlOptions, XmlElement } diff --git a/test/csv.test.ts b/test/csv.test.ts deleted file mode 100644 index 1e19bb2..0000000 --- a/test/csv.test.ts +++ /dev/null @@ -1,392 +0,0 @@ -/* Copyright (c) 2021-2024 Richard Rodger and other contributors, MIT License */ - -import { describe, test } from 'node:test' -import assert from 'node:assert' -import { readFileSync } from 'node:fs' -import { join } from 'node:path' - -import Util from 'util' - -import { Jsonic } from 'jsonic' -import { Csv } from '../dist/csv' - -const Spectrum = require('csv-spectrum') - -const fixturesDir = join(__dirname, '..', 'test', 'fixtures') -const manifest = JSON.parse( - readFileSync(join(fixturesDir, 'manifest.json'), 'utf8'), -) - -describe('csv', () => { - test('empty-records', async () => { - // ignored by default - - const jo = Jsonic.make().use(Csv) - assert.deepEqual(jo('\n'), []) - assert.deepEqual(jo('a\n1\n\n2\n3\n\n\n4\n'), [ - { a: '1' }, - { a: '2' }, - { a: '3' }, - { a: '4' }, - ]) - - const ja = Jsonic.make().use(Csv, { object: false }) - assert.deepEqual(ja('\n'), []) - assert.deepEqual(ja('a\n1\n\n2\n3\n\n\n4\n'), [['1'], ['2'], ['3'], ['4']]) - - // start and end also ignored - - assert.deepEqual(jo('\r\na,b\r\nA,B\r\n'), [{ a: 'A', b: 'B' }]) - assert.deepEqual(jo('\r\n\r\na,b\r\nA,B\r\n\r\n'), [{ a: 'A', b: 'B' }]) - assert.deepEqual(ja('\r\na,b\r\nA,B\r\n'), [['A', 'B']]) - assert.deepEqual(ja('\r\n\r\na,b\r\nA,B\r\n\r\n'), [['A', 'B']]) - - // with option, empty creates record - - const jon = Jsonic.make().use(Csv, { record: { empty: true } }) - assert.deepEqual(jon('\n'), []) - assert.deepEqual(jon('a\n1\n\n2\n3\n\n\n4\n'), [ - { a: '1' }, - { a: '' }, - { a: '2' }, - { a: '3' }, - { a: '' }, - { a: '' }, - { a: '4' }, - ]) - - // with comments - - const joc = Jsonic.make().use(Csv, { comment: true }) - // console.log(joc('a#X\n1\n#Y\n2\n3\n\n#Z\n4\n#Q')) - assert.deepEqual(joc('a#X\n1\n#Y\n2\n3\n\n#Z\n4\n#Q'), [ - { a: '1' }, - { a: '2' }, - { a: '3' }, - { a: '4' }, - ]) - - const jocn = Jsonic.make().use(Csv, { - comment: true, - record: { empty: true }, - }) - assert.deepEqual(jocn('a#X\n1\n#Y\n2\n3\n\n#Z\n4\n#Q'), [ - { a: '1' }, - { a: '' }, - { a: '2' }, - { a: '3' }, - { a: '' }, - { a: '' }, - { a: '4' }, - ]) - }) - - test('header', async () => { - const jo = Jsonic.make().use(Csv) - assert.deepEqual(jo('\n'), []) - assert.deepEqual(jo('\na,b\nA,B'), [{ a: 'A', b: 'B' }]) - - const ja = Jsonic.make().use(Csv, { object: false }) - assert.deepEqual(ja('\n'), []) - assert.deepEqual(ja('\na,b\nA,B'), [['A', 'B']]) - - const jon = Jsonic.make().use(Csv, { header: false }) - assert.deepEqual(jon('\n'), []) - assert.deepEqual(jon('\na,b\nA,B'), [ - { - 'field~0': 'a', - 'field~1': 'b', - }, - { - 'field~0': 'A', - 'field~1': 'B', - }, - ]) - - const jan = Jsonic.make().use(Csv, { header: false, object: false }) - assert.deepEqual(jan('\n'), []) - assert.deepEqual(jan('\na,b\nA,B'), [ - ['a', 'b'], - ['A', 'B'], - ]) - - const jonf = Jsonic.make().use(Csv, { - header: false, - field: { names: ['a', 'b'] }, - }) - assert.deepEqual(jonf('\n'), []) - assert.deepEqual(jonf('\na,b\nA,B'), [ - { - a: 'a', - b: 'b', - }, - { - a: 'A', - b: 'B', - }, - ]) - }) - - test('comma', async () => { - const jo = Jsonic.make().use(Csv) - - assert.deepEqual(jo('\na'), []) - assert.deepEqual(jo('a\n1,'), [{ a: '1', 'field~1': '' }]) - assert.deepEqual(jo('a\n,1'), [{ a: '', 'field~1': '1' }]) - assert.deepEqual(jo('a,b\n1,2,'), [{ a: '1', b: '2', 'field~2': '' }]) - assert.deepEqual(jo('a,b\n,1,2'), [{ a: '', b: '1', 'field~2': '2' }]) - - assert.deepEqual(jo('a\n1,\n'), [{ a: '1', 'field~1': '' }]) - assert.deepEqual(jo('a\n,1\n'), [{ a: '', 'field~1': '1' }]) - assert.deepEqual(jo('a,b\n1,2,\n'), [{ a: '1', b: '2', 'field~2': '' }]) - assert.deepEqual(jo('a,b\n,1,2\n'), [{ a: '', b: '1', 'field~2': '2' }]) - assert.deepEqual(jo('\na\n'), []) - - const ja = Jsonic.make().use(Csv, { object: false }) - - assert.deepEqual(ja('a\n1,'), [['1', '']]) - assert.deepEqual(ja('a\n,1'), [['', '1']]) - assert.deepEqual(ja('a,b\n1,2,'), [['1', '2', '']]) - assert.deepEqual(ja('a,b\n,1,2'), [['', '1', '2']]) - assert.deepEqual(ja('\n1'), []) - }) - - test('separators', async () => { - const jd = Jsonic.make().use(Csv, { - field: { - separation: '|', - }, - }) - - assert.deepEqual(jd('a|b|c\nA|B|C\nAA|BB|CC'), [ - { a: 'A', b: 'B', c: 'C' }, - { a: 'AA', b: 'BB', c: 'CC' }, - ]) - - const jD = Jsonic.make().use(Csv, { - field: { - separation: '~~', - }, - }) - - assert.deepEqual(jD('a~~b~~c\nA~~B~~C\nAA~~BB~~CC'), [ - { a: 'A', b: 'B', c: 'C' }, - { a: 'AA', b: 'BB', c: 'CC' }, - ]) - - const jn = Jsonic.make().use(Csv, { - record: { - separators: '%', - }, - }) - - assert.deepEqual(jn('a,b,c%A,B,C%AA,BB,CC'), [ - { a: 'A', b: 'B', c: 'C' }, - { a: 'AA', b: 'BB', c: 'CC' }, - ]) - }) - - test('double-quote', async () => { - const j = Jsonic.make().use(Csv) - - assert.deepEqual(j('a\n"b"'), [{ a: 'b' }]) - - assert.deepEqual(j('a\n"""b"'), [{ a: '"b' }]) - assert.deepEqual(j('a\n"b"""'), [{ a: 'b"' }]) - assert.deepEqual(j('a\n"""b"""'), [{ a: '"b"' }]) - assert.deepEqual(j('a\n"b""c"'), [{ a: 'b"c' }]) - - assert.deepEqual(j('a\n"b""c""d"'), [{ a: 'b"c"d' }]) - assert.deepEqual(j('a\n"b""c""d""e"'), [{ a: 'b"c"d"e' }]) - - assert.deepEqual(j('a\n"""b"'), [{ a: '"b' }]) - assert.deepEqual(j('a\n"b"""'), [{ a: 'b"' }]) - assert.deepEqual(j('a\n"""b"""'), [{ a: '"b"' }]) - - assert.deepEqual(j('a\n"""""b"'), [{ a: '""b' }]) - assert.deepEqual(j('a\n"b"""""'), [{ a: 'b""' }]) - assert.deepEqual(j('a\n"""""b"""""'), [{ a: '""b""' }]) - }) - - test('trim', async () => { - const j = Jsonic.make().use(Csv) - - assert.deepEqual(j('a\n b'), [{ a: ' b' }]) - assert.deepEqual(j('a\nb '), [{ a: 'b ' }]) - assert.deepEqual(j('a\n b '), [{ a: ' b ' }]) - assert.deepEqual(j('a\n b '), [{ a: ' b ' }]) - assert.deepEqual(j('a\n \tb \t '), [{ a: ' \tb \t ' }]) - - assert.deepEqual(j('a\n b c'), [{ a: ' b c' }]) - assert.deepEqual(j('a\nb c '), [{ a: 'b c ' }]) - assert.deepEqual(j('a\n b c '), [{ a: ' b c ' }]) - assert.deepEqual(j('a\n b c '), [{ a: ' b c ' }]) - assert.deepEqual(j('a\n \tb c \t '), [{ a: ' \tb c \t ' }]) - - const jt = Jsonic.make().use(Csv, { trim: true }) - - assert.deepEqual(jt('a\n b'), [{ a: 'b' }]) - assert.deepEqual(jt('a\nb '), [{ a: 'b' }]) - assert.deepEqual(jt('a\n b '), [{ a: 'b' }]) - assert.deepEqual(jt('a\n b '), [{ a: 'b' }]) - assert.deepEqual(jt('a\n \tb \t '), [{ a: 'b' }]) - - assert.deepEqual(jt('a\n b c'), [{ a: 'b c' }]) - assert.deepEqual(jt('a\nb c '), [{ a: 'b c' }]) - assert.deepEqual(jt('a\n b c '), [{ a: 'b c' }]) - assert.deepEqual(jt('a\n b c '), [{ a: 'b c' }]) - assert.deepEqual(jt('a\n \tb c \t '), [{ a: 'b c' }]) - }) - - test('comment', async () => { - const j = Jsonic.make().use(Csv) - assert.deepEqual(j('a\n# b'), [{ a: '# b' }]) - assert.deepEqual(j('a\n b #c'), [{ a: ' b #c' }]) - - const jc = Jsonic.make().use(Csv, { comment: true }) - assert.deepEqual(jc('a\n# b'), []) - assert.deepEqual(jc('a\n b #c'), [{ a: ' b ' }]) - - const jt = Jsonic.make().use(Csv, { strict: false }) - assert.deepEqual(jt('a\n# b'), []) - assert.deepEqual(jt('a\n b '), [{ a: 'b' }]) - }) - - test('number', async () => { - const j = Jsonic.make().use(Csv) - assert.deepEqual(j('a\n1'), [{ a: '1' }]) - assert.deepEqual(j('a\n1e2'), [{ a: '1e2' }]) - - const jn = Jsonic.make().use(Csv, { number: true }) - assert.deepEqual(jn('a\n1'), [{ a: 1 }]) - assert.deepEqual(jn('a\n1e2'), [{ a: 100 }]) - - const jt = Jsonic.make().use(Csv, { strict: false }) - assert.deepEqual(jt('a\n1'), [{ a: 1 }]) - assert.deepEqual(jt('a\n1e2'), [{ a: 100 }]) - }) - - test('value', async () => { - const j = Jsonic.make().use(Csv) - assert.deepEqual(j('a\ntrue'), [{ a: 'true' }]) - assert.deepEqual(j('a\nfalse'), [{ a: 'false' }]) - assert.deepEqual(j('a\nnull'), [{ a: 'null' }]) - - const jv = Jsonic.make().use(Csv, { value: true }) - assert.deepEqual(jv('a\ntrue'), [{ a: true }]) - assert.deepEqual(jv('a\nfalse'), [{ a: false }]) - assert.deepEqual(jv('a\nnull'), [{ a: null }]) - }) - - test('stream', () => { - return new Promise((resolve) => { - let tmp: any = {} - let data: any[] - const j = Jsonic.make().use(Csv, { - stream: (what: string, record?: any[]) => { - if ('start' === what) { - data = [] - tmp.start = Date.now() - } else if ('record' === what) { - data.push(record) - } else if ('end' === what) { - tmp.end = Date.now() - - assert.deepEqual(data, [ - { a: '1', b: '2' }, - { a: '3', b: '4' }, - { a: '5', b: '6' }, - ]) - - assert.ok(tmp.start <= tmp.end) - - resolve() - } - }, - }) - - j('a,b\n1,2\n3,4\n5,6') - }) - }) - - test('unstrict', async () => { - const j = Jsonic.make().use(Csv, { strict: false }) - let d0 = j(`a,b,c -true,[1,2],{x:{y:"q\\"w"}} - x , 'y\\'y', "z\\"z" -`) - assert.deepEqual(d0, [ - { - a: true, - b: [1, 2], - c: { - x: { - y: 'q"w', - }, - }, - }, - { - a: 'x', - b: "y'y", - c: 'z"z', - }, - ]) - - assert.throws(() => j('a\n{x:1}y'), /unexpected/) - }) - - test('spectrum', async () => { - const j = Jsonic.make().use(Csv) - const tests = await Util.promisify(Spectrum)() - for (let i = 0; i < tests.length; i++) { - let test = tests[i] - let name = test.name - let json = JSON.parse(test.json.toString()) - let csv = test.csv.toString() - let res = j(csv) - let testname = name + ' ' + (i + 1) + '/' + tests.length - - // Broken test, reenable when fixed - if (5 === i) { - continue - } - - assert.deepEqual({ [testname]: res }, { [testname]: json }) - } - }) - - test('fixtures', async () => { - const csv = Jsonic.make().use(Csv) - for (const [key, entry] of Object.entries(manifest) as [string, any][]) { - const name: string = entry.name - - let parser = csv - if (entry.opt) { - let j = entry.jsonicOpt ? Jsonic.make(entry.jsonicOpt) : Jsonic.make() - parser = j.use(Csv, entry.opt) - } - const csvFile = entry.csvFile || key - const raw = readFileSync(join(fixturesDir, csvFile + '.csv'), 'utf8') - - if (entry.err) { - try { - parser(raw) - assert.fail('Expected error ' + entry.err + ' for fixture: ' + name) - } catch (e: any) { - assert.deepEqual(entry.err, e.code) - } - } else { - try { - const expected = JSON.parse( - readFileSync(join(fixturesDir, key + '.json'), 'utf8'), - ) - const out = parser(raw) - assert.deepEqual(out, expected) - } catch (e: any) { - console.error('FIXTURE: ' + name) - throw e - } - } - } - }) -}) diff --git a/test/fixtures/basic-array.json b/test/fixtures/basic-array.json deleted file mode 100644 index e8a1b12..0000000 --- a/test/fixtures/basic-array.json +++ /dev/null @@ -1 +0,0 @@ -[["1","2"],["3","4"]] diff --git a/test/fixtures/basic-noheader-names.json b/test/fixtures/basic-noheader-names.json deleted file mode 100644 index 7df9a6f..0000000 --- a/test/fixtures/basic-noheader-names.json +++ /dev/null @@ -1 +0,0 @@ -[{"x":"a","y":"b"},{"x":"1","y":"2"},{"x":"3","y":"4"}] diff --git a/test/fixtures/basic-noheader.json b/test/fixtures/basic-noheader.json deleted file mode 100644 index 86c86e2..0000000 --- a/test/fixtures/basic-noheader.json +++ /dev/null @@ -1 +0,0 @@ -[{"field~0":"a","field~1":"b"},{"field~0":"1","field~1":"2"},{"field~0":"3","field~1":"4"}] diff --git a/test/fixtures/basic.csv b/test/fixtures/basic.csv deleted file mode 100644 index 0099ae9..0000000 --- a/test/fixtures/basic.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -1,2 -3,4 diff --git a/test/fixtures/basic.json b/test/fixtures/basic.json deleted file mode 100644 index 8db32a8..0000000 --- a/test/fixtures/basic.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1","b":"2"},{"a":"3","b":"4"}] diff --git a/test/fixtures/comment-empty.csv b/test/fixtures/comment-empty.csv deleted file mode 100644 index 28a3f77..0000000 --- a/test/fixtures/comment-empty.csv +++ /dev/null @@ -1,8 +0,0 @@ -a -1 -#comment -2 -3 - -#another comment -4 diff --git a/test/fixtures/comment-empty.json b/test/fixtures/comment-empty.json deleted file mode 100644 index ebc5b1c..0000000 --- a/test/fixtures/comment-empty.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":""},{"a":"2"},{"a":"3"},{"a":""},{"a":""},{"a":"4"}] diff --git a/test/fixtures/comment-inline.csv b/test/fixtures/comment-inline.csv deleted file mode 100644 index f927089..0000000 --- a/test/fixtures/comment-inline.csv +++ /dev/null @@ -1,3 +0,0 @@ -a#X -1 - b #c diff --git a/test/fixtures/comment-inline.json b/test/fixtures/comment-inline.json deleted file mode 100644 index 8a46826..0000000 --- a/test/fixtures/comment-inline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":" b "}] diff --git a/test/fixtures/comment-line.csv b/test/fixtures/comment-line.csv deleted file mode 100644 index 82875ca..0000000 --- a/test/fixtures/comment-line.csv +++ /dev/null @@ -1,5 +0,0 @@ -a -1 -#this is a comment -2 -3 diff --git a/test/fixtures/comment-line.json b/test/fixtures/comment-line.json deleted file mode 100644 index 071af2a..0000000 --- a/test/fixtures/comment-line.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":"2"},{"a":"3"}] diff --git a/test/fixtures/crlf.csv b/test/fixtures/crlf.csv deleted file mode 100644 index 4ba71dc..0000000 --- a/test/fixtures/crlf.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -A,B -C,D diff --git a/test/fixtures/crlf.json b/test/fixtures/crlf.json deleted file mode 100644 index c2872a6..0000000 --- a/test/fixtures/crlf.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B"},{"a":"C","b":"D"}] diff --git a/test/fixtures/empty-fields.csv b/test/fixtures/empty-fields.csv deleted file mode 100644 index 0970345..0000000 --- a/test/fixtures/empty-fields.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -1, -,1 -1,2, -,1,2 diff --git a/test/fixtures/empty-fields.json b/test/fixtures/empty-fields.json deleted file mode 100644 index 1da5613..0000000 --- a/test/fixtures/empty-fields.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1","b":""},{"a":"","b":"1"},{"a":"1","b":"2","field~2":""},{"a":"","b":"1","field~2":"2"}] diff --git a/test/fixtures/empty-records-default.json b/test/fixtures/empty-records-default.json deleted file mode 100644 index 7561320..0000000 --- a/test/fixtures/empty-records-default.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":"2"},{"a":"3"},{"a":"4"}] diff --git a/test/fixtures/empty-records.csv b/test/fixtures/empty-records.csv deleted file mode 100644 index bbeb6f4..0000000 --- a/test/fixtures/empty-records.csv +++ /dev/null @@ -1,8 +0,0 @@ -a -1 - -2 -3 - - -4 diff --git a/test/fixtures/empty-records.json b/test/fixtures/empty-records.json deleted file mode 100644 index ebc5b1c..0000000 --- a/test/fixtures/empty-records.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":""},{"a":"2"},{"a":"3"},{"a":""},{"a":""},{"a":"4"}] diff --git a/test/fixtures/happy.csv b/test/fixtures/happy.csv deleted file mode 100644 index 89a52c0..0000000 --- a/test/fixtures/happy.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -1,B,true -2,BB,false diff --git a/test/fixtures/happy.json b/test/fixtures/happy.json deleted file mode 100644 index c0ec104..0000000 --- a/test/fixtures/happy.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "a": "1", - "b": "B", - "c": "true" - }, - { - "a": "2", - "b": "BB", - "c": "false" - } -] diff --git a/test/fixtures/leading-newline.csv b/test/fixtures/leading-newline.csv deleted file mode 100644 index d555e5e..0000000 --- a/test/fixtures/leading-newline.csv +++ /dev/null @@ -1,3 +0,0 @@ - -a,b -A,B diff --git a/test/fixtures/leading-newline.json b/test/fixtures/leading-newline.json deleted file mode 100644 index e265c5e..0000000 --- a/test/fixtures/leading-newline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B"}] diff --git a/test/fixtures/manifest.json b/test/fixtures/manifest.json deleted file mode 100644 index b7f0b22..0000000 --- a/test/fixtures/manifest.json +++ /dev/null @@ -1,872 +0,0 @@ -{ - "happy": { - "name": "happy" - }, - "quote": { - "name": "quote" - }, - "notrim": { - "name": "notrim" - }, - "trim": { - "name": "trim", - "csvFile": "notrim", - "opt": { - "trim": true - } - }, - "papa-one-row": { - "name": "papa-One row", - "opt": { - "header": false, - "object": false - } - }, - "papa-two-rows": { - "name": "papa-Two rows", - "opt": { - "header": false, - "object": false - } - }, - "papa-three-rows": { - "name": "papa-Three rows", - "opt": { - "header": false, - "object": false - } - }, - "papa-whitespace-at-edges-of-unquoted-field": { - "name": "papa-Whitespace at edges of unquoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field": { - "name": "papa-Quoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-extra-whitespace-on-edges": { - "name": "papa-Quoted field with extra whitespace on edges", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-delimiter": { - "name": "papa-Quoted field with delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-line-break": { - "name": "papa-Quoted field with line break", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-fields-with-line-breaks": { - "name": "papa-Quoted fields with line breaks", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break": { - "name": "papa-Quoted fields at end of row with delimiter and line break", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-escaped-quotes": { - "name": "papa-Quoted field with escaped quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-escaped-quotes-at-boundaries": { - "name": "papa-Quoted field with escaped quotes at boundaries", - "opt": { - "header": false, - "object": false - } - }, - "papa-unquoted-field-with-quotes-at-end-of-field": { - "name": "papa-Unquoted field with quotes at end of field", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-quotes-around-delimiter": { - "name": "papa-Quoted field with quotes around delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-quotes-on-right-side-of-delimiter": { - "name": "papa-Quoted field with quotes on right side of delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-quotes-on-left-side-of-delimiter": { - "name": "papa-Quoted field with quotes on left side of delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too": { - "name": "papa-Quoted field with 5 quotes in a row and a delimiter in there: too", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-whitespace-around-quotes": { - "name": "papa-Quoted field with whitespace around quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-misplaced-quotes-in-data-not-as-opening-quotes": { - "name": "papa-Misplaced quotes in data: not as opening quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-has-no-closing-quote": { - "name": "papa-Quoted field has no closing quote", - "opt": { - "header": false, - "object": false - }, - "err": "unterminated_string" - }, - "papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer": { - "name": "papa-Quoted field has invalid trailing quote after delimiter with a valid closer", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-invalid-trailing-quote-after-delimiter": { - "name": "papa-Quoted field has invalid trailing quote after delimiter", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-invalid-trailing-quote-before-delimiter": { - "name": "papa-Quoted field has invalid trailing quote before delimiter", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-invalid-trailing-quote-after-new-line": { - "name": "papa-Quoted field has invalid trailing quote after new line", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-valid-trailing-quote-via-delimiter": { - "name": "papa-Quoted field has valid trailing quote via delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-has-valid-trailing-quote-via-n": { - "name": "papa-Quoted field has valid trailing quote via \\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-has-valid-trailing-quote-via-eof": { - "name": "papa-Quoted field has valid trailing quote via EOF", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote": { - "name": "papa-Quoted field contains delimiters and \\n with valid trailing quote", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-starts-with-quoted-field": { - "name": "papa-Line starts with quoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-starts-with-unquoted-empty-field": { - "name": "papa-Line starts with unquoted empty field", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-ends-with-quoted-field": { - "name": "papa-Line ends with quoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n": { - "name": "papa-Line ends with quoted field: first field of next line is empty, \\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes": { - "name": "papa-Quoted field at end of row (but not at EOF) has quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-quoted-field-at-eof-is-empty": { - "name": "papa-Empty quoted field at EOF is empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-multiple-consecutive-empty-fields": { - "name": "papa-Multiple consecutive empty fields", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-input-string": { - "name": "papa-Empty input string", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-the-delimiter-2-empty-fields": { - "name": "papa-Input is just the delimiter (2 empty fields)", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-empty-fields": { - "name": "papa-Input is just empty fields", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-a-string-a-single-field": { - "name": "papa-Input is just a string (a single field)", - "opt": { - "header": false, - "object": false - } - }, - "papa-commented-line-at-beginning": { - "name": "papa-Commented line at beginning", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-commented-line-in-middle": { - "name": "papa-Commented line in middle", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-commented-line-at-end": { - "name": "papa-Commented line at end", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-two-comment-lines-consecutively": { - "name": "papa-Two comment lines consecutively", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-two-comment-lines-consecutively-at-end-of-file": { - "name": "papa-Two comment lines consecutively at end of file", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-three-comment-lines-consecutively-at-beginning-of-file": { - "name": "papa-Three comment lines consecutively at beginning of file", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-entire-file-is-comment-lines": { - "name": "papa-Entire file is comment lines", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-comment-with-non-default-character": { - "name": "papa-Comment with non-default character", - "opt": { - "header": false, - "object": false, - "comment": true - }, - "jsonicOpt": { - "comment": { - "def": { - "hash": { - "start": "!" - } - } - } - } - }, - "papa-bad-comments-value-specified": { - "name": "papa-Bad comments value specified", - "opt": { - "header": false, - "object": false - } - }, - "papa-multi-character-comment-string": { - "name": "papa-Multi-character comment string", - "opt": { - "header": false, - "object": false, - "comment": true - }, - "jsonicOpt": { - "comment": { - "def": { - "hash": { - "start": "=N(" - } - } - } - } - }, - "papa-input-with-only-a-commented-line": { - "name": "papa-Input with only a commented line", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-jsonic-input-with-only-a-commented-line-and-blank-line-after": { - "name": "papa-jsonic-Input with only a commented line and blank line after", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-input-with-only-a-commented-line-without-comments-enabled": { - "name": "papa-Input with only a commented line: without comments enabled", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-without-comments-with-line-starting-with-whitespace": { - "name": "papa-Input without comments with line starting with whitespace", - "opt": { - "header": false, - "object": false - } - }, - "papa-multiple-rows-one-column-no-delimiter-found": { - "name": "papa-Multiple rows: one column (no delimiter found)", - "opt": { - "header": false, - "object": false - } - }, - "papa-jsonic-one-column-input-with-empty-fields": { - "name": "papa-jsonic-One column input with empty fields", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-two-rows-just-r": { - "name": "papa-Two rows: just \\r", - "opt": { - "header": false, - "object": false - } - }, - "papa-two-rows-r-n": { - "name": "papa-Two rows: \\r\\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-r-n": { - "name": "papa-Quoted field with \\r\\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-r": { - "name": "papa-Quoted field with \\r", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-n": { - "name": "papa-Quoted field with \\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter": { - "name": "papa-Quoted fields with spaces between closing quote and next delimiter", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line": { - "name": "papa-Quoted fields with spaces between closing quote and next new line", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-quoted-fields-with-spaces-after-closing-quote": { - "name": "papa-Quoted fields with spaces after closing quote", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-misplaced-quotes-in-data-twice-not-as-opening-quotes": { - "name": "papa-Misplaced quotes in data twice: not as opening quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-header-row-with-one-row-of-data": { - "name": "papa-Header row with one row of data", - "opt": { - "header": true - } - }, - "papa-header-row-only": { - "name": "papa-Header row only" - }, - "papa-row-with-too-few-fields": { - "name": "papa-Row with too few fields", - "opt": { - "field": { - "exact": true - } - }, - "err": "csv_missing_field" - }, - "papa-row-with-too-many-fields": { - "name": "papa-Row with too many fields", - "opt": { - "field": { - "exact": true - } - }, - "err": "csv_extra_field" - }, - "papa-row-with-enough-fields-but-blank-field-in-the-begining": { - "name": "papa-Row with enough fields but blank field in the begining", - "opt": { - "header": false, - "object": false - } - }, - "papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers": { - "name": "papa-Row with enough fields but blank field in the begining using headers" - }, - "papa-row-with-enough-fields-but-blank-field-at-end": { - "name": "papa-Row with enough fields but blank field at end" - }, - "papa-tab-delimiter": { - "name": "papa-Tab delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "\t" - } - } - }, - "papa-pipe-delimiter": { - "name": "papa-Pipe delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "|" - } - } - }, - "papa-ascii-30-delimiter": { - "name": "papa-ASCII 30 delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "\u001e" - } - } - }, - "papa-ascii-31-delimiter": { - "name": "papa-ASCII 31 delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "\u001f" - } - } - }, - "papa-multi-character-delimiter": { - "name": "papa-Multi-character delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": ", " - } - } - }, - "papa-multi-character-delimiter-length-2-with-quoted-field": { - "name": "papa-Multi-character delimiter (length 2) with quoted field", - "opt": { - "header": false, - "object": false, - "field": { - "separation": ", " - } - } - }, - "papa-dynamic-typing-converts-boolean-literals": { - "name": "papa-Dynamic typing converts boolean literals", - "opt": { - "header": false, - "object": false, - "value": true - }, - "jsonicOpt": { - "value": { - "def": { - "TRUE": { - "val": true - }, - "FALSE": { - "val": false - } - } - } - } - }, - "papa-dynamic-typing-doesn-t-convert-other-types": { - "name": "papa-Dynamic typing doesn't convert other types", - "opt": { - "header": false, - "object": false, - "value": true - }, - "jsonicOpt": { - "value": { - "def": { - "null": null - } - } - } - }, - "papa-jsonic-blank-line-at-beginning": { - "name": "papa-jsonic-Blank line at beginning", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-jsonic-blank-line-in-middle": { - "name": "papa-jsonic-Blank line in middle", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-jsonic-blank-lines-at-end": { - "name": "papa-jsonic-Blank lines at end", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-jsonic-blank-line-in-middle-with-whitespace": { - "name": "papa-jsonic-Blank line in middle with whitespace", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-first-field-of-a-line-is-empty": { - "name": "papa-First field of a line is empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-last-field-of-a-line-is-empty": { - "name": "papa-Last field of a line is empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-other-fields-are-empty": { - "name": "papa-Other fields are empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-input-string-2": { - "name": "papa-Empty input string 2", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-the-delimiter-2-empty-fields-2": { - "name": "papa-Input is just the delimiter (2 empty fields) 2", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-a-string-a-single-field-2": { - "name": "papa-Input is just a string (a single field) 2", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-lines": { - "name": "papa-Empty lines", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-skip-empty-lines": { - "name": "papa-Skip empty lines", - "opt": { - "header": false, - "object": false - } - }, - "papa-skip-empty-lines-with-newline-at-end-of-input": { - "name": "papa-Skip empty lines: with newline at end of input", - "opt": { - "header": false, - "object": false - } - }, - "papa-skip-empty-lines-with-empty-input": { - "name": "papa-Skip empty lines: with empty input", - "opt": { - "header": false, - "object": false - } - }, - "papa-skip-empty-lines-with-first-line-only-whitespace": { - "name": "papa-Skip empty lines: with first line only whitespace", - "opt": { - "header": false, - "object": false - } - }, - "papa-single-quote-as-quote-character": { - "name": "papa-Single quote as quote character", - "opt": { - "header": false, - "object": false, - "string": { - "quote": "'" - } - } - }, - "papa-custom-escape-character-in-the-middle": { - "name": "papa-Custom escape character in the middle", - "opt": { - "header": false, - "object": false, - "string": { - "csv": false - } - } - }, - "papa-custom-escape-character-at-the-end": { - "name": "papa-Custom escape character at the end", - "opt": { - "header": false, - "object": false, - "string": { - "csv": false - } - } - }, - "papa-header-row-with-preceding-comment": { - "name": "papa-Header row with preceding comment", - "opt": { - "comment": true - } - }, - "papa-carriage-return-in-header-inside-quotes-with-line-feed-endings": { - "name": "papa-Carriage return in header inside quotes: with line feed endings", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-n-endings-uses-n-linebreak": { - "name": "papa-Using \\n endings uses \\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings with \\r\\n in header field uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings with \\n in header field uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings with \\n in header field with skip empty lines uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak": { - "name": "papa-Using \\n endings with \\r\\n in header field uses \\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-reserved-regex-character-as-quote-character": { - "name": "papa-Using reserved regex character | as quote character", - "opt": { - "header": false, - "object": false, - "string": { - "quote": "|" - } - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter": { - "name": "papa-Quoted fields with spaces between closing quote and next delimiter and contains delimiter", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline": { - "name": "papa-Quoted fields with spaces between closing quote and newline and contains newline", - "opt": { - "header": false, - "object": false, - "trim": true - } - } -} diff --git a/test/fixtures/multi-char-separator.csv b/test/fixtures/multi-char-separator.csv deleted file mode 100644 index a2f41fb..0000000 --- a/test/fixtures/multi-char-separator.csv +++ /dev/null @@ -1,3 +0,0 @@ -a~~b~~c -A~~B~~C -AA~~BB~~CC diff --git a/test/fixtures/multi-char-separator.json b/test/fixtures/multi-char-separator.json deleted file mode 100644 index f4668e4..0000000 --- a/test/fixtures/multi-char-separator.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/multirow.csv b/test/fixtures/multirow.csv deleted file mode 100644 index b532031..0000000 --- a/test/fixtures/multirow.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -A,B,C -AA,BB,CC -AAA,BBB,CCC diff --git a/test/fixtures/multirow.json b/test/fixtures/multirow.json deleted file mode 100644 index 2128c0b..0000000 --- a/test/fixtures/multirow.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"},{"a":"AAA","b":"BBB","c":"CCC"}] diff --git a/test/fixtures/notrim.csv b/test/fixtures/notrim.csv deleted file mode 100644 index 155dee7..0000000 --- a/test/fixtures/notrim.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b,c -1 , 2 , 3 - 11 , 22 , 33 -4 , 5 , 6 - 44 , 55 , 66 diff --git a/test/fixtures/notrim.json b/test/fixtures/notrim.json deleted file mode 100644 index 50f6466..0000000 --- a/test/fixtures/notrim.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "a": "1 ", - "b": " 2 ", - "c": " 3" - }, - { - "a": " 11 ", - "b": " 22 ", - "c": " 33 " - }, - { - "a": "4\t", - "b": "\t5\t", - "c": "\t6" - }, - { - "a": "\t44\t", - "b": "\t\t55\t\t\t", - "c": "\t66\t" - } -] diff --git a/test/fixtures/number.csv b/test/fixtures/number.csv deleted file mode 100644 index f4f3001..0000000 --- a/test/fixtures/number.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -1,2.5 -1e2,abc diff --git a/test/fixtures/number.json b/test/fixtures/number.json deleted file mode 100644 index dcd2454..0000000 --- a/test/fixtures/number.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":1,"b":2.5},{"a":100,"b":"abc"}] diff --git a/test/fixtures/papa-ascii-30-delimiter.csv b/test/fixtures/papa-ascii-30-delimiter.csv deleted file mode 100644 index 0024b0a..0000000 --- a/test/fixtures/papa-ascii-30-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -abc -def \ No newline at end of file diff --git a/test/fixtures/papa-ascii-30-delimiter.json b/test/fixtures/papa-ascii-30-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-ascii-30-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-ascii-31-delimiter.csv b/test/fixtures/papa-ascii-31-delimiter.csv deleted file mode 100644 index ee8afcf..0000000 --- a/test/fixtures/papa-ascii-31-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -abc -def \ No newline at end of file diff --git a/test/fixtures/papa-ascii-31-delimiter.json b/test/fixtures/papa-ascii-31-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-ascii-31-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-bad-comments-value-specified.csv b/test/fixtures/papa-bad-comments-value-specified.csv deleted file mode 100644 index 164bb60..0000000 --- a/test/fixtures/papa-bad-comments-value-specified.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -5comment -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-bad-comments-value-specified.json b/test/fixtures/papa-bad-comments-value-specified.json deleted file mode 100644 index c8c02a3..0000000 --- a/test/fixtures/papa-bad-comments-value-specified.json +++ /dev/null @@ -1,15 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "5comment" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.csv b/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.csv deleted file mode 100644 index cbcc8a5..0000000 --- a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a","b" -"c","d" -"e","f" -"g","h" -"i","j" \ No newline at end of file diff --git a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.json b/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.json deleted file mode 100644 index 8e09d82..0000000 --- a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\r\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-comment-with-non-default-character.csv b/test/fixtures/papa-comment-with-non-default-character.csv deleted file mode 100644 index 6bacc65..0000000 --- a/test/fixtures/papa-comment-with-non-default-character.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -!Comment goes here -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-comment-with-non-default-character.json b/test/fixtures/papa-comment-with-non-default-character.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-comment-with-non-default-character.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-commented-line-at-beginning.csv b/test/fixtures/papa-commented-line-at-beginning.csv deleted file mode 100644 index 9d5dd4e..0000000 --- a/test/fixtures/papa-commented-line-at-beginning.csv +++ /dev/null @@ -1,2 +0,0 @@ -# Comment! -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-commented-line-at-beginning.json b/test/fixtures/papa-commented-line-at-beginning.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-commented-line-at-beginning.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-commented-line-at-end.csv b/test/fixtures/papa-commented-line-at-end.csv deleted file mode 100644 index 42497fd..0000000 --- a/test/fixtures/papa-commented-line-at-end.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,true,false -# Comment \ No newline at end of file diff --git a/test/fixtures/papa-commented-line-at-end.json b/test/fixtures/papa-commented-line-at-end.json deleted file mode 100644 index 2cb707c..0000000 --- a/test/fixtures/papa-commented-line-at-end.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "true", - "false" - ] -] diff --git a/test/fixtures/papa-commented-line-in-middle.csv b/test/fixtures/papa-commented-line-in-middle.csv deleted file mode 100644 index 53df74c..0000000 --- a/test/fixtures/papa-commented-line-in-middle.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -# Comment -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-commented-line-in-middle.json b/test/fixtures/papa-commented-line-in-middle.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-commented-line-in-middle.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-custom-escape-character-at-the-end.csv b/test/fixtures/papa-custom-escape-character-at-the-end.csv deleted file mode 100644 index 69ea0dd..0000000 --- a/test/fixtures/papa-custom-escape-character-at-the-end.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,"c\"d\"" \ No newline at end of file diff --git a/test/fixtures/papa-custom-escape-character-at-the-end.json b/test/fixtures/papa-custom-escape-character-at-the-end.json deleted file mode 100644 index e4033d0..0000000 --- a/test/fixtures/papa-custom-escape-character-at-the-end.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c\"d\"" - ] -] diff --git a/test/fixtures/papa-custom-escape-character-in-the-middle.csv b/test/fixtures/papa-custom-escape-character-in-the-middle.csv deleted file mode 100644 index b37a6fd..0000000 --- a/test/fixtures/papa-custom-escape-character-in-the-middle.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,"c\"d\"f" \ No newline at end of file diff --git a/test/fixtures/papa-custom-escape-character-in-the-middle.json b/test/fixtures/papa-custom-escape-character-in-the-middle.json deleted file mode 100644 index 85cd7d5..0000000 --- a/test/fixtures/papa-custom-escape-character-in-the-middle.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c\"d\"f" - ] -] diff --git a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.csv b/test/fixtures/papa-dynamic-typing-converts-boolean-literals.csv deleted file mode 100644 index d36c135..0000000 --- a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.csv +++ /dev/null @@ -1 +0,0 @@ -true,false,T,F,TRUE,FALSE,True,False \ No newline at end of file diff --git a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.json b/test/fixtures/papa-dynamic-typing-converts-boolean-literals.json deleted file mode 100644 index 0c34701..0000000 --- a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - true, - false, - "T", - "F", - true, - false, - "True", - "False" - ] -] diff --git a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.csv b/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.csv deleted file mode 100644 index 7e39851..0000000 --- a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -undefined,null,[ -var,float,if \ No newline at end of file diff --git a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.json b/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.json deleted file mode 100644 index 160d79c..0000000 --- a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "A", - "B", - "C" - ], - [ - "undefined", - "null", - "[" - ], - [ - "var", - "float", - "if" - ] -] diff --git a/test/fixtures/papa-empty-input-string-2.csv b/test/fixtures/papa-empty-input-string-2.csv deleted file mode 100644 index e69de29..0000000 diff --git a/test/fixtures/papa-empty-input-string-2.json b/test/fixtures/papa-empty-input-string-2.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-empty-input-string-2.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-empty-input-string.csv b/test/fixtures/papa-empty-input-string.csv deleted file mode 100644 index e69de29..0000000 diff --git a/test/fixtures/papa-empty-input-string.json b/test/fixtures/papa-empty-input-string.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-empty-input-string.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-empty-lines.csv b/test/fixtures/papa-empty-lines.csv deleted file mode 100644 index 4247d50..0000000 --- a/test/fixtures/papa-empty-lines.csv +++ /dev/null @@ -1,5 +0,0 @@ - -a,b,c - -d,e,f - diff --git a/test/fixtures/papa-empty-lines.json b/test/fixtures/papa-empty-lines.json deleted file mode 100644 index 963e246..0000000 --- a/test/fixtures/papa-empty-lines.json +++ /dev/null @@ -1,15 +0,0 @@ -[ - [], - [ - "a", - "b", - "c" - ], - [], - [ - "d", - "e", - "f" - ], - [] -] diff --git a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.csv b/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.csv deleted file mode 100644 index 2d02206..0000000 --- a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,"" -a,b,"" \ No newline at end of file diff --git a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.json b/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.json deleted file mode 100644 index 98b5299..0000000 --- a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "" - ], - [ - "a", - "b", - "" - ] -] diff --git a/test/fixtures/papa-entire-file-is-comment-lines.csv b/test/fixtures/papa-entire-file-is-comment-lines.csv deleted file mode 100644 index f77b825..0000000 --- a/test/fixtures/papa-entire-file-is-comment-lines.csv +++ /dev/null @@ -1,3 +0,0 @@ -#comment1 -#comment2 -#comment3 \ No newline at end of file diff --git a/test/fixtures/papa-entire-file-is-comment-lines.json b/test/fixtures/papa-entire-file-is-comment-lines.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-entire-file-is-comment-lines.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-first-field-of-a-line-is-empty.csv b/test/fixtures/papa-first-field-of-a-line-is-empty.csv deleted file mode 100644 index df89dba..0000000 --- a/test/fixtures/papa-first-field-of-a-line-is-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,c -,e,f \ No newline at end of file diff --git a/test/fixtures/papa-first-field-of-a-line-is-empty.json b/test/fixtures/papa-first-field-of-a-line-is-empty.json deleted file mode 100644 index 7ab352a..0000000 --- a/test/fixtures/papa-first-field-of-a-line-is-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-header-row-only.csv b/test/fixtures/papa-header-row-only.csv deleted file mode 100644 index 8ae723e..0000000 --- a/test/fixtures/papa-header-row-only.csv +++ /dev/null @@ -1 +0,0 @@ -A,B,C \ No newline at end of file diff --git a/test/fixtures/papa-header-row-only.json b/test/fixtures/papa-header-row-only.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-header-row-only.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-header-row-with-one-row-of-data.csv b/test/fixtures/papa-header-row-with-one-row-of-data.csv deleted file mode 100644 index fea02ce..0000000 --- a/test/fixtures/papa-header-row-with-one-row-of-data.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-header-row-with-one-row-of-data.json b/test/fixtures/papa-header-row-with-one-row-of-data.json deleted file mode 100644 index 79cd368..0000000 --- a/test/fixtures/papa-header-row-with-one-row-of-data.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - { - "A": "a", - "B": "b", - "C": "c" - } -] diff --git a/test/fixtures/papa-header-row-with-preceding-comment.csv b/test/fixtures/papa-header-row-with-preceding-comment.csv deleted file mode 100644 index 37801e2..0000000 --- a/test/fixtures/papa-header-row-with-preceding-comment.csv +++ /dev/null @@ -1,3 +0,0 @@ -#Comment -a,b -c,d diff --git a/test/fixtures/papa-header-row-with-preceding-comment.json b/test/fixtures/papa-header-row-with-preceding-comment.json deleted file mode 100644 index 8812637..0000000 --- a/test/fixtures/papa-header-row-with-preceding-comment.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - { - "a": "c", - "b": "d" - } -] diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.csv b/test/fixtures/papa-input-is-just-a-string-a-single-field-2.csv deleted file mode 100644 index 7203e92..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.csv +++ /dev/null @@ -1 +0,0 @@ -Abc def \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.json b/test/fixtures/papa-input-is-just-a-string-a-single-field-2.json deleted file mode 100644 index a5f44c8..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.json +++ /dev/null @@ -1,5 +0,0 @@ -[ - [ - "Abc def" - ] -] diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field.csv b/test/fixtures/papa-input-is-just-a-string-a-single-field.csv deleted file mode 100644 index 7203e92..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field.csv +++ /dev/null @@ -1 +0,0 @@ -Abc def \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field.json b/test/fixtures/papa-input-is-just-a-string-a-single-field.json deleted file mode 100644 index a5f44c8..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field.json +++ /dev/null @@ -1,5 +0,0 @@ -[ - [ - "Abc def" - ] -] diff --git a/test/fixtures/papa-input-is-just-empty-fields.csv b/test/fixtures/papa-input-is-just-empty-fields.csv deleted file mode 100644 index f6f13f5..0000000 --- a/test/fixtures/papa-input-is-just-empty-fields.csv +++ /dev/null @@ -1,2 +0,0 @@ -,, -,,, \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-empty-fields.json b/test/fixtures/papa-input-is-just-empty-fields.json deleted file mode 100644 index 46e6f81..0000000 --- a/test/fixtures/papa-input-is-just-empty-fields.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [ - "", - "", - "" - ], - [ - "", - "", - "", - "" - ] -] diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.csv b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.csv deleted file mode 100644 index 41622b4..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.csv +++ /dev/null @@ -1 +0,0 @@ -, \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.json b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.json deleted file mode 100644 index 3a9e22f..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - [ - "", - "" - ] -] diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.csv b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.csv deleted file mode 100644 index 41622b4..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.csv +++ /dev/null @@ -1 +0,0 @@ -, \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.json b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.json deleted file mode 100644 index 3a9e22f..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - [ - "", - "" - ] -] diff --git a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.csv b/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.csv deleted file mode 100644 index 65b570c..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.csv +++ /dev/null @@ -1 +0,0 @@ -#commented line \ No newline at end of file diff --git a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.json b/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.json deleted file mode 100644 index 66808d4..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.json +++ /dev/null @@ -1,5 +0,0 @@ -[ - [ - "#commented line" - ] -] diff --git a/test/fixtures/papa-input-with-only-a-commented-line.csv b/test/fixtures/papa-input-with-only-a-commented-line.csv deleted file mode 100644 index 65b570c..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line.csv +++ /dev/null @@ -1 +0,0 @@ -#commented line \ No newline at end of file diff --git a/test/fixtures/papa-input-with-only-a-commented-line.json b/test/fixtures/papa-input-with-only-a-commented-line.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.csv b/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.csv deleted file mode 100644 index 2395318..0000000 --- a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.csv +++ /dev/null @@ -1,3 +0,0 @@ -a - b -c \ No newline at end of file diff --git a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.json b/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.json deleted file mode 100644 index d4c6aea..0000000 --- a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.json +++ /dev/null @@ -1,11 +0,0 @@ -[ - [ - "a" - ], - [ - " b" - ], - [ - "c" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-line-at-beginning.csv b/test/fixtures/papa-jsonic-blank-line-at-beginning.csv deleted file mode 100644 index 155c206..0000000 --- a/test/fixtures/papa-jsonic-blank-line-at-beginning.csv +++ /dev/null @@ -1,3 +0,0 @@ - -a,b,c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-jsonic-blank-line-at-beginning.json b/test/fixtures/papa-jsonic-blank-line-at-beginning.json deleted file mode 100644 index 3c9bfa4..0000000 --- a/test/fixtures/papa-jsonic-blank-line-at-beginning.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [], - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.csv b/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.csv deleted file mode 100644 index 4b98566..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.json b/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.json deleted file mode 100644 index d7f6c55..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.json +++ /dev/null @@ -1,15 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - " " - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle.csv b/test/fixtures/papa-jsonic-blank-line-in-middle.csv deleted file mode 100644 index c02e652..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle.json b/test/fixtures/papa-jsonic-blank-line-in-middle.json deleted file mode 100644 index 281e7bc..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-lines-at-end.csv b/test/fixtures/papa-jsonic-blank-lines-at-end.csv deleted file mode 100644 index e076fcb..0000000 --- a/test/fixtures/papa-jsonic-blank-lines-at-end.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -d,e,f - diff --git a/test/fixtures/papa-jsonic-blank-lines-at-end.json b/test/fixtures/papa-jsonic-blank-lines-at-end.json deleted file mode 100644 index dfbca40..0000000 --- a/test/fixtures/papa-jsonic-blank-lines-at-end.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ], - [] -] diff --git a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.csv b/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.csv deleted file mode 100644 index 9355ec2..0000000 --- a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.csv +++ /dev/null @@ -1 +0,0 @@ -#commented line diff --git a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.json b/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.csv b/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.csv deleted file mode 100644 index 30b315b..0000000 --- a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.csv +++ /dev/null @@ -1,7 +0,0 @@ -a -b - - -c -d -e diff --git a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.json b/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.json deleted file mode 100644 index 42ccc45..0000000 --- a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.json +++ /dev/null @@ -1,19 +0,0 @@ -[ - [ - "a" - ], - [ - "b" - ], - [], - [], - [ - "c" - ], - [ - "d" - ], - [ - "e" - ] -] diff --git a/test/fixtures/papa-last-field-of-a-line-is-empty.csv b/test/fixtures/papa-last-field-of-a-line-is-empty.csv deleted file mode 100644 index 81e726d..0000000 --- a/test/fixtures/papa-last-field-of-a-line-is-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b, -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-last-field-of-a-line-is-empty.json b/test/fixtures/papa-last-field-of-a-line-is-empty.json deleted file mode 100644 index bf859ad..0000000 --- a/test/fixtures/papa-last-field-of-a-line-is-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.csv b/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.csv deleted file mode 100644 index 59e9ea6..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -,e,f -,"h","i" -,"k","l" \ No newline at end of file diff --git a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.json b/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.json deleted file mode 100644 index 6174c3e..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "", - "e", - "f" - ], - [ - "", - "h", - "i" - ], - [ - "", - "k", - "l" - ] -] diff --git a/test/fixtures/papa-line-ends-with-quoted-field.csv b/test/fixtures/papa-line-ends-with-quoted-field.csv deleted file mode 100644 index abc5889..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -d,e,f -"g","h","i" -"j","k","l" \ No newline at end of file diff --git a/test/fixtures/papa-line-ends-with-quoted-field.json b/test/fixtures/papa-line-ends-with-quoted-field.json deleted file mode 100644 index 815c73e..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ], - [ - "g", - "h", - "i" - ], - [ - "j", - "k", - "l" - ] -] diff --git a/test/fixtures/papa-line-starts-with-quoted-field.csv b/test/fixtures/papa-line-starts-with-quoted-field.csv deleted file mode 100644 index 35700a8..0000000 --- a/test/fixtures/papa-line-starts-with-quoted-field.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,c -"d",e,f \ No newline at end of file diff --git a/test/fixtures/papa-line-starts-with-quoted-field.json b/test/fixtures/papa-line-starts-with-quoted-field.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-line-starts-with-quoted-field.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-line-starts-with-unquoted-empty-field.csv b/test/fixtures/papa-line-starts-with-unquoted-empty-field.csv deleted file mode 100644 index ac81806..0000000 --- a/test/fixtures/papa-line-starts-with-unquoted-empty-field.csv +++ /dev/null @@ -1,2 +0,0 @@ -,b,c -"d",e,f \ No newline at end of file diff --git a/test/fixtures/papa-line-starts-with-unquoted-empty-field.json b/test/fixtures/papa-line-starts-with-unquoted-empty-field.json deleted file mode 100644 index 7079a92..0000000 --- a/test/fixtures/papa-line-starts-with-unquoted-empty-field.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.csv b/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.csv deleted file mode 100644 index 4f93485..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.csv +++ /dev/null @@ -1 +0,0 @@ -A,B "B",C \ No newline at end of file diff --git a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.json b/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.json deleted file mode 100644 index de57929..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B \"B\"", - "C" - ] -] diff --git a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.csv b/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.csv deleted file mode 100644 index 2fc1844..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B",C -D,E",F \ No newline at end of file diff --git a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.json b/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.json deleted file mode 100644 index eeff7d3..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "B\"", - "C" - ], - [ - "D", - "E\"", - "F" - ] -] diff --git a/test/fixtures/papa-multi-character-comment-string.csv b/test/fixtures/papa-multi-character-comment-string.csv deleted file mode 100644 index 149256a..0000000 --- a/test/fixtures/papa-multi-character-comment-string.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -=N(Comment) -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-multi-character-comment-string.json b/test/fixtures/papa-multi-character-comment-string.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-multi-character-comment-string.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.csv b/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.csv deleted file mode 100644 index 037f991..0000000 --- a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.csv +++ /dev/null @@ -1 +0,0 @@ -a, b, "c, e", d \ No newline at end of file diff --git a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.json b/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.json deleted file mode 100644 index 441bdb9..0000000 --- a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.json +++ /dev/null @@ -1,8 +0,0 @@ -[ - [ - "a", - "b", - "c, e", - "d" - ] -] diff --git a/test/fixtures/papa-multi-character-delimiter.csv b/test/fixtures/papa-multi-character-delimiter.csv deleted file mode 100644 index 5f2665b..0000000 --- a/test/fixtures/papa-multi-character-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -a, b, c \ No newline at end of file diff --git a/test/fixtures/papa-multi-character-delimiter.json b/test/fixtures/papa-multi-character-delimiter.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-multi-character-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-multiple-consecutive-empty-fields.csv b/test/fixtures/papa-multiple-consecutive-empty-fields.csv deleted file mode 100644 index 6b5f5c6..0000000 --- a/test/fixtures/papa-multiple-consecutive-empty-fields.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,,,c,d -,,e,,,f \ No newline at end of file diff --git a/test/fixtures/papa-multiple-consecutive-empty-fields.json b/test/fixtures/papa-multiple-consecutive-empty-fields.json deleted file mode 100644 index f46e882..0000000 --- a/test/fixtures/papa-multiple-consecutive-empty-fields.json +++ /dev/null @@ -1,18 +0,0 @@ -[ - [ - "a", - "b", - "", - "", - "c", - "d" - ], - [ - "", - "", - "e", - "", - "", - "f" - ] -] diff --git a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.csv b/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.csv deleted file mode 100644 index 0fec236..0000000 --- a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.csv +++ /dev/null @@ -1,5 +0,0 @@ -a -b -c -d -e \ No newline at end of file diff --git a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.json b/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.json deleted file mode 100644 index 40c4651..0000000 --- a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "a" - ], - [ - "b" - ], - [ - "c" - ], - [ - "d" - ], - [ - "e" - ] -] diff --git a/test/fixtures/papa-one-row.csv b/test/fixtures/papa-one-row.csv deleted file mode 100644 index 341e344..0000000 --- a/test/fixtures/papa-one-row.csv +++ /dev/null @@ -1 +0,0 @@ -A,b,c \ No newline at end of file diff --git a/test/fixtures/papa-one-row.json b/test/fixtures/papa-one-row.json deleted file mode 100644 index a462c67..0000000 --- a/test/fixtures/papa-one-row.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-other-fields-are-empty.csv b/test/fixtures/papa-other-fields-are-empty.csv deleted file mode 100644 index 528105e..0000000 --- a/test/fixtures/papa-other-fields-are-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,,c -,, \ No newline at end of file diff --git a/test/fixtures/papa-other-fields-are-empty.json b/test/fixtures/papa-other-fields-are-empty.json deleted file mode 100644 index 0490600..0000000 --- a/test/fixtures/papa-other-fields-are-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "", - "c" - ], - [ - "", - "", - "" - ] -] diff --git a/test/fixtures/papa-pipe-delimiter.csv b/test/fixtures/papa-pipe-delimiter.csv deleted file mode 100644 index 224ccfe..0000000 --- a/test/fixtures/papa-pipe-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a|b|c -d|e|f \ No newline at end of file diff --git a/test/fixtures/papa-pipe-delimiter.json b/test/fixtures/papa-pipe-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-pipe-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.csv b/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.csv deleted file mode 100644 index c5e50ef..0000000 --- a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,"c""c""" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.json b/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.json deleted file mode 100644 index 426ccb4..0000000 --- a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c\"c\"" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.csv b/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.csv deleted file mode 100644 index 2029076..0000000 --- a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,c -d,e,f" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.json b/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.json deleted file mode 100644 index e0176ec..0000000 --- a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - [ - "a", - "b,c\nd,e,f" - ] -] diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer.csv deleted file mode 100644 index 25dc50f..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer.csv +++ /dev/null @@ -1,2 +0,0 @@ -"a,"b,c" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter.csv deleted file mode 100644 index 3e8fdd3..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,"c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-new-line.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-new-line.csv deleted file mode 100644 index 42aef64..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-new-line.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,c -d"e,f,g \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-before-delimiter.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-before-delimiter.csv deleted file mode 100644 index 0a9d31d..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-before-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b"c,d -e,f,g \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-no-closing-quote.csv b/test/fixtures/papa-quoted-field-has-no-closing-quote.csv deleted file mode 100644 index c9d316a..0000000 --- a/test/fixtures/papa-quoted-field-has-no-closing-quote.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.csv b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.csv deleted file mode 100644 index b2f34ae..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b",c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.json b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.csv b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.csv deleted file mode 100644 index 7c3866f..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,c -d,e,"f" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.json b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.csv b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.csv deleted file mode 100644 index 05aca5b..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,"c" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.json b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.csv b/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.csv deleted file mode 100644 index 01d8e62..0000000 --- a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.csv +++ /dev/null @@ -1 +0,0 @@ -"1","cnonce="""",nc=""""","2" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.json b/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.json deleted file mode 100644 index 9da6a71..0000000 --- a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "1", - "cnonce=\"\",nc=\"\"", - "2" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-delimiter.csv b/test/fixtures/papa-quoted-field-with-delimiter.csv deleted file mode 100644 index 9382cff..0000000 --- a/test/fixtures/papa-quoted-field-with-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B,B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-delimiter.json b/test/fixtures/papa-quoted-field-with-delimiter.json deleted file mode 100644 index 2bc5490..0000000 --- a/test/fixtures/papa-quoted-field-with-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B,B", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.csv b/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.csv deleted file mode 100644 index fbbb934..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.csv +++ /dev/null @@ -1 +0,0 @@ -A,"""B""",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.json b/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.json deleted file mode 100644 index 3cdf91b..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "\"B\"", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes.csv b/test/fixtures/papa-quoted-field-with-escaped-quotes.csv deleted file mode 100644 index 00dec07..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B""B""B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes.json b/test/fixtures/papa-quoted-field-with-escaped-quotes.json deleted file mode 100644 index 25b324e..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\"B\"B", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.csv b/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.csv deleted file mode 100644 index 96c142a..0000000 --- a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.csv +++ /dev/null @@ -1 +0,0 @@ -A," B ",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.json b/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.json deleted file mode 100644 index 4b42a7c..0000000 --- a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - " B ", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-line-break.csv b/test/fixtures/papa-quoted-field-with-line-break.csv deleted file mode 100644 index c98c6ec..0000000 --- a/test/fixtures/papa-quoted-field-with-line-break.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B -B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-line-break.json b/test/fixtures/papa-quoted-field-with-line-break.json deleted file mode 100644 index cc396ac..0000000 --- a/test/fixtures/papa-quoted-field-with-line-break.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\nB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-n.csv b/test/fixtures/papa-quoted-field-with-n.csv deleted file mode 100644 index c98c6ec..0000000 --- a/test/fixtures/papa-quoted-field-with-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B -B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-n.json b/test/fixtures/papa-quoted-field-with-n.json deleted file mode 100644 index cc396ac..0000000 --- a/test/fixtures/papa-quoted-field-with-n.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\nB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.csv b/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.csv deleted file mode 100644 index a72e39d..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,""",""",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.json b/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.json deleted file mode 100644 index d80f7d4..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "\",\"", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.csv b/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.csv deleted file mode 100644 index 4bade34..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,""",",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.json b/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.json deleted file mode 100644 index dc50e4a..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "\",", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.csv b/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.csv deleted file mode 100644 index f0256e9..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,",""",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.json b/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.json deleted file mode 100644 index c174154..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - ",\"", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-r-n.csv b/test/fixtures/papa-quoted-field-with-r-n.csv deleted file mode 100644 index 1ad0a44..0000000 --- a/test/fixtures/papa-quoted-field-with-r-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B -B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-r-n.json b/test/fixtures/papa-quoted-field-with-r-n.json deleted file mode 100644 index 242a6dc..0000000 --- a/test/fixtures/papa-quoted-field-with-r-n.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\r\nB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-r.csv b/test/fixtures/papa-quoted-field-with-r.csv deleted file mode 100644 index eeb695a..0000000 --- a/test/fixtures/papa-quoted-field-with-r.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-r.json b/test/fixtures/papa-quoted-field-with-r.json deleted file mode 100644 index d70ec72..0000000 --- a/test/fixtures/papa-quoted-field-with-r.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\rB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.csv b/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.csv deleted file mode 100644 index 1053b10..0000000 --- a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.csv +++ /dev/null @@ -1 +0,0 @@ -A, "B" ,C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.json b/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.json deleted file mode 100644 index 6525263..0000000 --- a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - " \"B\" ", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field.csv b/test/fixtures/papa-quoted-field.csv deleted file mode 100644 index 6eadde4..0000000 --- a/test/fixtures/papa-quoted-field.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field.json b/test/fixtures/papa-quoted-field.json deleted file mode 100644 index f860470..0000000 --- a/test/fixtures/papa-quoted-field.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.csv b/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.csv deleted file mode 100644 index b96a397..0000000 --- a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,"c,c -c" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.json b/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.json deleted file mode 100644 index e1b467d..0000000 --- a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c,c\nc" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-line-breaks.csv b/test/fixtures/papa-quoted-fields-with-line-breaks.csv deleted file mode 100644 index 88a3644..0000000 --- a/test/fixtures/papa-quoted-fields-with-line-breaks.csv +++ /dev/null @@ -1,4 +0,0 @@ -A,"B -B","C -C -C" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-line-breaks.json b/test/fixtures/papa-quoted-fields-with-line-breaks.json deleted file mode 100644 index c396f92..0000000 --- a/test/fixtures/papa-quoted-fields-with-line-breaks.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\nB", - "C\nC\nC" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.csv b/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.csv deleted file mode 100644 index 87f53ab..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,"B" ,C,"D" -E,F,"G" ,"H" -Q,W,"E" ,R \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.json b/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.json deleted file mode 100644 index dbcf350..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - [ - "A", - "B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ], - [ - "Q", - "W", - "E", - "R" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.csv deleted file mode 100644 index 3621fb5..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,"c -" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.json deleted file mode 100644 index a811254..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c\n" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.csv deleted file mode 100644 index e63c960..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,",B" ,C,D -E,F,G,H \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.json deleted file mode 100644 index e5958d5..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - [ - "A", - ",B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.csv deleted file mode 100644 index 3d789d1..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B" ,C,D -E,F,"G" ,H \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.json deleted file mode 100644 index 420b102..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - [ - "A", - "B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.csv deleted file mode 100644 index 9e3acdf..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,"D" -E,F,G,"H" -Q,W,E,R \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.json deleted file mode 100644 index dbcf350..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - [ - "A", - "B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ], - [ - "Q", - "W", - "E", - "R" - ] -] diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.csv b/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.csv deleted file mode 100644 index 7573f21..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C -a,b, \ No newline at end of file diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.json b/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.json deleted file mode 100644 index 3c5cd20..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - { - "A": "a", - "B": "b", - "C": "" - } -] diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.csv b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.csv deleted file mode 100644 index aa16fa0..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -,b1,c1 -,b2,c2 \ No newline at end of file diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.json b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.json deleted file mode 100644 index bceacae..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "A": "", - "B": "b1", - "C": "c1" - }, - { - "A": "", - "B": "b2", - "C": "c2" - } -] diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.csv b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.csv deleted file mode 100644 index b50700a..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -,b1,c1 -a2,b2,c2 \ No newline at end of file diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.json b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.json deleted file mode 100644 index 433465c..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "A", - "B", - "C" - ], - [ - "", - "b1", - "c1" - ], - [ - "a2", - "b2", - "c2" - ] -] diff --git a/test/fixtures/papa-row-with-too-few-fields.csv b/test/fixtures/papa-row-with-too-few-fields.csv deleted file mode 100644 index 8b51576..0000000 --- a/test/fixtures/papa-row-with-too-few-fields.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C -a,b \ No newline at end of file diff --git a/test/fixtures/papa-row-with-too-many-fields.csv b/test/fixtures/papa-row-with-too-many-fields.csv deleted file mode 100644 index 4f38565..0000000 --- a/test/fixtures/papa-row-with-too-many-fields.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -a,b,c,d,e -f,g,h \ No newline at end of file diff --git a/test/fixtures/papa-single-quote-as-quote-character.csv b/test/fixtures/papa-single-quote-as-quote-character.csv deleted file mode 100644 index 01ec509..0000000 --- a/test/fixtures/papa-single-quote-as-quote-character.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,'c,d' \ No newline at end of file diff --git a/test/fixtures/papa-single-quote-as-quote-character.json b/test/fixtures/papa-single-quote-as-quote-character.json deleted file mode 100644 index 948722a..0000000 --- a/test/fixtures/papa-single-quote-as-quote-character.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c,d" - ] -] diff --git a/test/fixtures/papa-skip-empty-lines-with-empty-input.csv b/test/fixtures/papa-skip-empty-lines-with-empty-input.csv deleted file mode 100644 index e69de29..0000000 diff --git a/test/fixtures/papa-skip-empty-lines-with-empty-input.json b/test/fixtures/papa-skip-empty-lines-with-empty-input.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-empty-input.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.csv b/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.csv deleted file mode 100644 index 5e73edc..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.csv +++ /dev/null @@ -1,2 +0,0 @@ - -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.json b/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.json deleted file mode 100644 index 1c206a1..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - [ - " " - ], - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.csv b/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.csv deleted file mode 100644 index 29a42cc..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f diff --git a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.json b/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-skip-empty-lines.csv b/test/fixtures/papa-skip-empty-lines.csv deleted file mode 100644 index 68eacbe..0000000 --- a/test/fixtures/papa-skip-empty-lines.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-skip-empty-lines.json b/test/fixtures/papa-skip-empty-lines.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-skip-empty-lines.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-tab-delimiter.csv b/test/fixtures/papa-tab-delimiter.csv deleted file mode 100644 index 34caaac..0000000 --- a/test/fixtures/papa-tab-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a b c -d e f \ No newline at end of file diff --git a/test/fixtures/papa-tab-delimiter.json b/test/fixtures/papa-tab-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-tab-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.csv b/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.csv deleted file mode 100644 index 3d5f757..0000000 --- a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.csv +++ /dev/null @@ -1,4 +0,0 @@ -#comment1 -#comment2 -#comment3 -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.json b/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-three-rows.csv b/test/fixtures/papa-three-rows.csv deleted file mode 100644 index a7c9397..0000000 --- a/test/fixtures/papa-three-rows.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,b,c -d,E,f -G,h,i \ No newline at end of file diff --git a/test/fixtures/papa-three-rows.json b/test/fixtures/papa-three-rows.json deleted file mode 100644 index ce1c97e..0000000 --- a/test/fixtures/papa-three-rows.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ], - [ - "G", - "h", - "i" - ] -] diff --git a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.csv b/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.csv deleted file mode 100644 index 9d0af26..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -#comment1 -#comment2 \ No newline at end of file diff --git a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.json b/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-two-comment-lines-consecutively.csv b/test/fixtures/papa-two-comment-lines-consecutively.csv deleted file mode 100644 index 2d63911..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -#comment1 -#comment2 -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-two-comment-lines-consecutively.json b/test/fixtures/papa-two-comment-lines-consecutively.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-two-rows-just-r.csv b/test/fixtures/papa-two-rows-just-r.csv deleted file mode 100644 index da7eef8..0000000 --- a/test/fixtures/papa-two-rows-just-r.csv +++ /dev/null @@ -1 +0,0 @@ -A,b,c d,E,f \ No newline at end of file diff --git a/test/fixtures/papa-two-rows-just-r.json b/test/fixtures/papa-two-rows-just-r.json deleted file mode 100644 index 142ca69..0000000 --- a/test/fixtures/papa-two-rows-just-r.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ] -] diff --git a/test/fixtures/papa-two-rows-r-n.csv b/test/fixtures/papa-two-rows-r-n.csv deleted file mode 100644 index 5a60fa0..0000000 --- a/test/fixtures/papa-two-rows-r-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,b,c -d,E,f \ No newline at end of file diff --git a/test/fixtures/papa-two-rows-r-n.json b/test/fixtures/papa-two-rows-r-n.json deleted file mode 100644 index 142ca69..0000000 --- a/test/fixtures/papa-two-rows-r-n.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ] -] diff --git a/test/fixtures/papa-two-rows.csv b/test/fixtures/papa-two-rows.csv deleted file mode 100644 index ea46d36..0000000 --- a/test/fixtures/papa-two-rows.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,b,c -d,E,f \ No newline at end of file diff --git a/test/fixtures/papa-two-rows.json b/test/fixtures/papa-two-rows.json deleted file mode 100644 index 142ca69..0000000 --- a/test/fixtures/papa-two-rows.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ] -] diff --git a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.csv b/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.csv deleted file mode 100644 index a420a04..0000000 --- a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.csv +++ /dev/null @@ -1 +0,0 @@ -A,B",C \ No newline at end of file diff --git a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.json b/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.json deleted file mode 100644 index b1ca572..0000000 --- a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\"", - "C" - ] -] diff --git a/test/fixtures/papa-using-n-endings-uses-n-linebreak.csv b/test/fixtures/papa-using-n-endings-uses-n-linebreak.csv deleted file mode 100644 index 2e11fcd..0000000 --- a/test/fixtures/papa-using-n-endings-uses-n-linebreak.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-n-endings-uses-n-linebreak.json b/test/fixtures/papa-using-n-endings-uses-n-linebreak.json deleted file mode 100644 index c670851..0000000 --- a/test/fixtures/papa-using-n-endings-uses-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.csv b/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.csv deleted file mode 100644 index abd33f3..0000000 --- a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.json b/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.json deleted file mode 100644 index 8e09d82..0000000 --- a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\r\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.csv deleted file mode 100644 index 3a6870e..0000000 --- a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.json deleted file mode 100644 index c670851..0000000 --- a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.csv deleted file mode 100644 index 607dafe..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.json deleted file mode 100644 index fed2e68..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.csv deleted file mode 100644 index a6be125..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.json deleted file mode 100644 index fed2e68..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.csv deleted file mode 100644 index 2c0ca0b..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.json deleted file mode 100644 index 8e09d82..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\r\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.csv b/test/fixtures/papa-using-reserved-regex-character-as-quote-character.csv deleted file mode 100644 index a43df71..0000000 --- a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.csv +++ /dev/null @@ -1,6 +0,0 @@ -|a -a|,b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.json b/test/fixtures/papa-using-reserved-regex-character-as-quote-character.json deleted file mode 100644 index fed2e68..0000000 --- a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.csv b/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.csv deleted file mode 100644 index 64954bc..0000000 --- a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.csv +++ /dev/null @@ -1 +0,0 @@ -a, b ,c \ No newline at end of file diff --git a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.json b/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.json deleted file mode 100644 index a165586..0000000 --- a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "\tb ", - "c" - ] -] diff --git a/test/fixtures/pipe-separator.csv b/test/fixtures/pipe-separator.csv deleted file mode 100644 index e8d53ce..0000000 --- a/test/fixtures/pipe-separator.csv +++ /dev/null @@ -1,3 +0,0 @@ -a|b|c -A|B|C -AA|BB|CC diff --git a/test/fixtures/pipe-separator.json b/test/fixtures/pipe-separator.json deleted file mode 100644 index f4668e4..0000000 --- a/test/fixtures/pipe-separator.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/quote.csv b/test/fixtures/quote.csv deleted file mode 100644 index 57a9c05..0000000 --- a/test/fixtures/quote.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -"1","B","true" -"2","B""B","false" diff --git a/test/fixtures/quote.json b/test/fixtures/quote.json deleted file mode 100644 index da71f01..0000000 --- a/test/fixtures/quote.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "a": "1", - "b": "B", - "c": "true" - }, - { - "a": "2", - "b": "B\"B", - "c": "false" - } -] diff --git a/test/fixtures/quoted-escape.csv b/test/fixtures/quoted-escape.csv deleted file mode 100644 index 8c30b29..0000000 --- a/test/fixtures/quoted-escape.csv +++ /dev/null @@ -1,6 +0,0 @@ -a -"""b" -"b""" -"""b""" -"b""c" -"b""c""d" diff --git a/test/fixtures/quoted-escape.json b/test/fixtures/quoted-escape.json deleted file mode 100644 index 0d2bdd2..0000000 --- a/test/fixtures/quoted-escape.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"\"b"},{"a":"b\""},{"a":"\"b\""},{"a":"b\"c"},{"a":"b\"c\"d"}] diff --git a/test/fixtures/quoted-newline.csv b/test/fixtures/quoted-newline.csv deleted file mode 100644 index 734a55c..0000000 --- a/test/fixtures/quoted-newline.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -"line1 -line2",simple -"hello","world -!" diff --git a/test/fixtures/quoted-newline.json b/test/fixtures/quoted-newline.json deleted file mode 100644 index c3c8894..0000000 --- a/test/fixtures/quoted-newline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"line1\nline2","b":"simple"},{"a":"hello","b":"world\n!"}] diff --git a/test/fixtures/quoted-simple.csv b/test/fixtures/quoted-simple.csv deleted file mode 100644 index bc880e6..0000000 --- a/test/fixtures/quoted-simple.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -"hello","world" -"foo",bar diff --git a/test/fixtures/quoted-simple.json b/test/fixtures/quoted-simple.json deleted file mode 100644 index 031e677..0000000 --- a/test/fixtures/quoted-simple.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"hello","b":"world"},{"a":"foo","b":"bar"}] diff --git a/test/fixtures/record-separator.csv b/test/fixtures/record-separator.csv deleted file mode 100644 index 830e627..0000000 --- a/test/fixtures/record-separator.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,c%A,B,C%AA,BB,CC \ No newline at end of file diff --git a/test/fixtures/record-separator.json b/test/fixtures/record-separator.json deleted file mode 100644 index f4668e4..0000000 --- a/test/fixtures/record-separator.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/trailing-newline.csv b/test/fixtures/trailing-newline.csv deleted file mode 100644 index 9255cff..0000000 --- a/test/fixtures/trailing-newline.csv +++ /dev/null @@ -1,3 +0,0 @@ -a -1 -2 diff --git a/test/fixtures/trailing-newline.json b/test/fixtures/trailing-newline.json deleted file mode 100644 index 94d1615..0000000 --- a/test/fixtures/trailing-newline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":"2"}] diff --git a/test/fixtures/trim.csv b/test/fixtures/trim.csv deleted file mode 100644 index 7998755..0000000 --- a/test/fixtures/trim.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b - hello , world - foo ,bar diff --git a/test/fixtures/trim.json b/test/fixtures/trim.json deleted file mode 100644 index f8f2ee7..0000000 --- a/test/fixtures/trim.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "a": "1", - "b": "2", - "c": "3" - }, - { - "a": "11", - "b": "22", - "c": "33" - }, - { - "a": "4", - "b": "5", - "c": "6" - }, - { - "a": "44", - "b": "55", - "c": "66" - } -] diff --git a/test/fixtures/value.csv b/test/fixtures/value.csv deleted file mode 100644 index 97c34a5..0000000 --- a/test/fixtures/value.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -true,false,null -hello,true,1 diff --git a/test/fixtures/value.json b/test/fixtures/value.json deleted file mode 100644 index 22ed72b..0000000 --- a/test/fixtures/value.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":true,"b":false,"c":null},{"a":"hello","b":true,"c":"1"}] diff --git a/test/quick.js b/test/quick.js index 63047f3..490e3f1 100644 --- a/test/quick.js +++ b/test/quick.js @@ -1,136 +1,12 @@ -const { Jsonic } = require('@jsonic/jsonic-next') -const { Debug } = require('@jsonic/jsonic-next/debug') -const { Csv } = require('..') +const { Jsonic } = require('jsonic') +const { Xml } = require('../dist/xml') -const tlog = [] - -// const c0 = Jsonic.make() -// .use(Debug,{trace:true}) -// .use(Csv,{comment:true,object:false,header:false}) - -// const u0 = Jsonic.make() -// // .use(Debug,{trace:true}) -// .use(Csv,{ -// strict:false, -// }) - -const csv = Jsonic.make() - .use(Debug, { trace: true }) - .use(Csv, { - // line: {empty:true}, - // header: false, - // object: false, - // trim: true, - // value: true, - // comment: true, - // record: { empty: true } - }) - // .sub({lex:(t)=>console.log(t)}) - .sub({ lex: (t) => tlog.push(t) }) - -// console.log(csv.options.tokenSet) -// console.log(csv.internal().config.lex.match) - -// console.log(csv(`a,b -// 1,2,`,{xlog:-1})) +const xml = Jsonic.make().use(Xml) console.log( - csv( - `a -,1`, - { xlog: -1 }, + JSON.stringify( + xml('hello'), + null, + 2, ), ) - -// console.log(csv(`a,b -// 1, 2 -// 11 ,{22 -// 3 3, "a" -// `,{xlog:-1})) - -// console.log(csv(`a,b -// 1,2 -// 3,"x""y" -// 4,5 -// `,{xlog:-1})) - -// console.log(csv(`a,b -// 1, 2 3 -// 4, 5 6 -// 7, 8 9 -// 10, 11 12 13 -// `,{xlog:-1})) - -// const u0 = Jsonic.make() -// .use(Debug,{trace:true}) -// .use(Csv, {strict:false}) - -// console.dir(u0(`a,b -// 1 , 2 -// `),{depth:null}) - -// console.dir(u0(`a,b,c -// true,[1,2],{x:{y:"q\\"w"}} -// null,'Q\\r\\nA',1e2 -// `),{depth:null}) - -// console.log(c0(`a,b,c -// 1 , 2 , 3 -// 11 , 22 , 33 -// 4\t,\t5\t,\t6 -// \t44\t,\t\t55\t\t\t,\t6\t -// `)) - -// console.log(c0(`a,b,c,d,e,f -// 1 ,2 , 3 ,4 5 , 6 7,8 9 0 -// `)) - -// console.log(c0(`a,b -// "x"y,z`)) - -// console.log(u0(`a -// b `)) - -// console.log(c0(` -// 1`)) - -// console.log(c0('')) - -// console.log(c0(`#foo -// #bar -// 1,2 -// #a -// #b - -// 3,4 - -// #c - -// `)) - -// console.log(csv(`a,b -// A,B -// #X - -// AA,BB`)) - -// console.log(csv(` -// #X -// #XX -// a,b -// #Y -// #YY -// A,B -// #Z -// #ZZ -// `)) - -// console.log(csv('\n')) - -// console.log(csv('a,b\nA,"""B"')) - -// console.log(csv('true')) - -// console.log(csv('\na\n')) - -// console.log(tlog) diff --git a/test/spec/attributes.tsv b/test/spec/attributes.tsv new file mode 100644 index 0000000..7e39629 --- /dev/null +++ b/test/spec/attributes.tsv @@ -0,0 +1,18 @@ +# name input expected opts +# Attribute handling: double quotes, single quotes, entities, spacing, +# mixed names. See basic.tsv for the spec header / escape rules. + +attr-one {"name":"a","localName":"a","attributes":{"x":"1"},"children":[]} +attr-two {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} +attr-with-text text {"name":"a","localName":"a","attributes":{"x":"hello world"},"children":["text"]} +attr-single-quote {"name":"a","localName":"a","attributes":{"x":"value"},"children":[]} +attr-single-quote-with-dq {"name":"a","localName":"a","attributes":{"x":"says \"hi\""},"children":[]} +attr-extra-spaces {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} +attr-newlines {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} +attr-name-dash {"name":"a","localName":"a","attributes":{"data-x":"1"},"children":[]} +attr-name-dot {"name":"a","localName":"a","attributes":{"v.2":"ok"},"children":[]} +attr-empty-value {"name":"a","localName":"a","attributes":{"x":""},"children":[]} +attr-mixed-quotes {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} +attr-tab-normalised {"name":"a","localName":"a","attributes":{"x":"line1 line2"},"children":[]} +attr-newline-normalised {"name":"a","localName":"a","attributes":{"x":"line1 line2"},"children":[]} +attr-crlf-normalised {"name":"a","localName":"a","attributes":{"x":"line1 line2"},"children":[]} diff --git a/test/spec/basic.tsv b/test/spec/basic.tsv new file mode 100644 index 0000000..ef11529 --- /dev/null +++ b/test/spec/basic.tsv @@ -0,0 +1,31 @@ +# name input expected opts +# ----------------------------------------------------------------------------- +# Each row is one XML parse test. +# name - unique test identifier +# input - XML source. Escapes: \n (LF) \r (CR) \t (TAB) \\ (backslash) +# expected - JSON encoding of the parsed result, OR the literal token +# "ERROR" (optionally followed by ":code") to assert a parse +# error. The JSON is parsed as-is (standard JSON escapes). +# opts - optional JSON object of plugin options; empty for defaults. +# ----------------------------------------------------------------------------- + +empty-element {"name":"a","localName":"a","attributes":{},"children":[]} +self-closing {"name":"a","localName":"a","attributes":{},"children":[]} +self-closing-space
{"name":"br","localName":"br","attributes":{},"children":[]} +text-simple
hello {"name":"a","localName":"a","attributes":{},"children":["hello"]} +text-whitespace hello world {"name":"greet","localName":"greet","attributes":{},"children":["hello world"]} +text-only-spaces

hello world

{"name":"p","localName":"p","attributes":{},"children":[" hello world "]} +nested-empty {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[]}]} +nested-text x {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":["x"]}]} +deeply-nested x {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[{"name":"c","localName":"c","attributes":{},"children":["x"]}]}]} +multiple-children-selfclose {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[]},{"name":"c","localName":"c","attributes":{},"children":[]}]} +multiple-children-text 12 {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":["1"]},{"name":"c","localName":"c","attributes":{},"children":["2"]}]} +mixed-content helloinnerworld {"name":"a","localName":"a","attributes":{},"children":["hello",{"name":"b","localName":"b","attributes":{},"children":["inner"]},"world"]} +multiline \n 1\n 2\n {"name":"root","localName":"root","attributes":{},"children":["\n ",{"name":"a","localName":"a","attributes":{},"children":["1"]},"\n ",{"name":"b","localName":"b","attributes":{},"children":["2"]},"\n"]} +tag-name-dash x {"name":"a-b","localName":"a-b","attributes":{},"children":["x"]} +tag-name-dot x {"name":"a.b","localName":"a.b","attributes":{},"children":["x"]} +tag-name-underscore x {"name":"a_b","localName":"a_b","attributes":{},"children":["x"]} +tag-name-unicode-thai <เจมส์>x {"name":"เจมส์","localName":"เจมส์","attributes":{},"children":["x"]} +tag-name-unicode-greek <Ωmega>x {"name":"Ωmega","localName":"Ωmega","attributes":{},"children":["x"]} +text-crlf-normalised line1\r\nline2 {"name":"a","localName":"a","attributes":{},"children":["line1\nline2"]} +text-cr-normalised line1\rline2 {"name":"a","localName":"a","attributes":{},"children":["line1\nline2"]} diff --git a/test/spec/dtd-attlist.tsv b/test/spec/dtd-attlist.tsv new file mode 100644 index 0000000..f1ef44e --- /dev/null +++ b/test/spec/dtd-attlist.tsv @@ -0,0 +1,16 @@ +# name input expected opts +# DOCTYPE-supplied default attribute values via . The +# plugin parses every declaration in the internal subset +# and, when an element instance does not carry an attribute, fills +# in the declared default value. +# +# #REQUIRED and #IMPLIED contribute nothing because they have no +# default value; #FIXED "value" and bare quoted defaults are honoured. + +attlist-basic-default ]> {"name":"doc","localName":"doc","attributes":{"x":"default"},"children":[]} +attlist-default-overridden ]> {"name":"doc","localName":"doc","attributes":{"x":"custom"},"children":[]} +attlist-multiple-defaults ]> {"name":"doc","localName":"doc","attributes":{"a":"A","b":"B"},"children":[]} +attlist-fixed ]> {"name":"doc","localName":"doc","attributes":{"lang":"en"},"children":[]} +attlist-enumeration-default ]> {"name":"doc","localName":"doc","attributes":{"x":"b"},"children":[]} +attlist-required-and-implied-no-default ]> {"name":"doc","localName":"doc","attributes":{"y":"Y"},"children":[]} +attlist-applies-per-element ]> {"name":"root","localName":"root","attributes":{},"children":[{"name":"a","localName":"a","attributes":{"x":"AX"},"children":[]},{"name":"b","localName":"b","attributes":{"x":"BX"},"children":[]}]} diff --git a/test/spec/dtd-entities.tsv b/test/spec/dtd-entities.tsv new file mode 100644 index 0000000..f7e0c4e --- /dev/null +++ b/test/spec/dtd-entities.tsv @@ -0,0 +1,16 @@ +# name input expected opts +# DOCTYPE-declared general internal entities. The plugin parses +# `` declarations from the internal subset, +# stores them per-parse, and uses them to resolve `&name;` in text +# and attribute values (recursively, with cycle detection). + +dtd-entity-basic-text ]>&foo; {"name":"doc","localName":"doc","attributes":{},"children":["bar"]} +dtd-entity-mixed-text ]>hello &x;! {"name":"doc","localName":"doc","attributes":{},"children":["hello world!"]} +dtd-entity-in-attribute ]> {"name":"doc","localName":"doc","attributes":{"a":"hello world"},"children":[]} +dtd-entity-single-quoted ]>&x; {"name":"doc","localName":"doc","attributes":{},"children":["plain"]} +dtd-entity-numeric-ref-in-value ]>&x; {"name":"doc","localName":"doc","attributes":{},"children":["A"]} +dtd-entity-recursive ]>&b; {"name":"d","localName":"d","attributes":{},"children":["BBAAABB"]} +dtd-entity-multi-decl ]>&a;&b;&c; {"name":"d","localName":"d","attributes":{},"children":["123"]} +dtd-entity-parameter-ignored ]>&g; {"name":"d","localName":"d","attributes":{},"children":["G"]} +dtd-entity-external-ignored ]>&g; {"name":"d","localName":"d","attributes":{},"children":["ok"]} +dtd-entity-predefined-overrides ]>& {"name":"d","localName":"d","attributes":{},"children":["&"]} diff --git a/test/spec/entities.tsv b/test/spec/entities.tsv new file mode 100644 index 0000000..7e6aa97 --- /dev/null +++ b/test/spec/entities.tsv @@ -0,0 +1,22 @@ +# name input expected opts +# Entity references: predefined (amp lt gt quot apos), numeric (decimal and +# hex), unknown (passed through), and user-supplied custom entities. + +pre-amp & {"name":"a","localName":"a","attributes":{},"children":["&"]} +pre-lt < {"name":"a","localName":"a","attributes":{},"children":["<"]} +pre-gt > {"name":"a","localName":"a","attributes":{},"children":[">"]} +pre-quot " {"name":"a","localName":"a","attributes":{},"children":["\""]} +pre-apos ' {"name":"a","localName":"a","attributes":{},"children":["'"]} +pre-all-in-one &<>"' {"name":"a","localName":"a","attributes":{},"children":["&<>\"'"]} +pre-in-text Tom & Jerry {"name":"a","localName":"a","attributes":{},"children":["Tom & Jerry"]} +num-dec-single A {"name":"a","localName":"a","attributes":{},"children":["A"]} +num-dec-multi AB {"name":"a","localName":"a","attributes":{},"children":["AB"]} +num-hex-single A {"name":"a","localName":"a","attributes":{},"children":["A"]} +num-hex-multi AB {"name":"a","localName":"a","attributes":{},"children":["AB"]} +num-hex-astral 😀 {"name":"a","localName":"a","attributes":{},"children":["\uD83D\uDE00"]} +entity-in-attr {"name":"a","localName":"a","attributes":{"title":"Tom & Jerry"},"children":[]} +num-in-attr {"name":"a","localName":"a","attributes":{"v":"AB"},"children":[]} +unknown-rejected &unknown; ERROR:undeclared_entity +unknown-passthrough-lenient &unknown; {"name":"a","localName":"a","attributes":{},"children":["&unknown;"]} {"strictEntities":false} +custom-entity © 2025 all rights {"name":"a","localName":"a","attributes":{},"children":["© 2025\u00a0all rights"]} {"customEntities":{"nbsp":"\u00a0","copy":"©"}} +entities-disabled & {"name":"a","localName":"a","attributes":{},"children":["&"]} {"entities":false} diff --git a/test/spec/errors.tsv b/test/spec/errors.tsv new file mode 100644 index 0000000..e6dee36 --- /dev/null +++ b/test/spec/errors.tsv @@ -0,0 +1,16 @@ +# name input expected opts +# Inputs that must raise a parse error. `expected` uses the literal token +# "ERROR" (optionally ":code" for a specific error code) to indicate that +# parsing must fail. + +mismatched-close ERROR:xml_mismatched_tag +unterminated-comment {"name":"a","localName":"a","attributes":{},"children":[]} +comment-in-element hello {"name":"a","localName":"a","attributes":{},"children":["hello"]} +comment-around-child {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[]}]} +pi-xml-decl {"name":"a","localName":"a","attributes":{},"children":[]} +pi-xml-stylesheet {"name":"root","localName":"root","attributes":{},"children":[]} +doctype-simple {"name":"html","localName":"html","attributes":{},"children":[]} +doctype-system hi {"name":"note","localName":"note","attributes":{},"children":[{"name":"body","localName":"body","attributes":{},"children":["hi"]}]} +doctype-internal-subset ]> {"name":"a","localName":"a","attributes":{},"children":[]} +cdata-basic & raw text]]> {"name":"a","localName":"a","attributes":{},"children":[" & raw text"]} +cdata-with-newlines {"name":"a","localName":"a","attributes":{},"children":["line1\nline2"]} +cdata-no-entity-decode {"name":"a","localName":"a","attributes":{},"children":["&"]} diff --git a/test/spec/w3c.tsv b/test/spec/w3c.tsv new file mode 100644 index 0000000..f167342 --- /dev/null +++ b/test/spec/w3c.tsv @@ -0,0 +1,20 @@ +# name input expected opts +# Standardised / real-world XML parse cases. Inputs include examples taken +# or adapted from W3C conformance documents (XML 1.0 appendix, XHTML, +# Atom, SVG, SOAP, RSS) and canonical "not well-formed" counterexamples. +# Document structure is verified; full specification conformance is not. + +xml-decl-basic {"name":"doc","localName":"doc","attributes":{},"children":[]} +xml-decl-standalone {"name":"doc","localName":"doc","attributes":{},"children":[]} +xmltest-valid-001 \nHello, World! {"name":"doc","localName":"doc","attributes":{},"children":["Hello, World!"]} +xmltest-valid-attr {"name":"doc","localName":"doc","attributes":{"attr1":"value1","attr2":"value2"},"children":[]} +xmltest-valid-nested text {"name":"doc","localName":"doc","attributes":{},"children":[{"name":"child1","localName":"child1","attributes":{},"children":[]},{"name":"child2","localName":"child2","attributes":{},"children":[{"name":"nested","localName":"nested","attributes":{},"children":["text"]}]}]} +atom-entry Exampleurn:uuid:1 {"name":"entry","localName":"entry","namespace":"http://www.w3.org/2005/Atom","attributes":{"xmlns":"http://www.w3.org/2005/Atom"},"children":[{"name":"title","localName":"title","namespace":"http://www.w3.org/2005/Atom","attributes":{},"children":["Example"]},{"name":"id","localName":"id","namespace":"http://www.w3.org/2005/Atom","attributes":{},"children":["urn:uuid:1"]}]} +soap-envelope Apples {"name":"soap:Envelope","prefix":"soap","localName":"Envelope","namespace":"http://schemas.xmlsoap.org/soap/envelope/","attributes":{"xmlns:soap":"http://schemas.xmlsoap.org/soap/envelope/"},"children":[{"name":"soap:Body","prefix":"soap","localName":"Body","namespace":"http://schemas.xmlsoap.org/soap/envelope/","attributes":{},"children":[{"name":"m:GetPrice","prefix":"m","localName":"GetPrice","namespace":"https://example.com","attributes":{"xmlns:m":"https://example.com"},"children":[{"name":"m:Item","prefix":"m","localName":"Item","namespace":"https://example.com","attributes":{},"children":["Apples"]}]}]}]} +svg-rect {"name":"svg","localName":"svg","namespace":"http://www.w3.org/2000/svg","attributes":{"xmlns":"http://www.w3.org/2000/svg"},"children":[{"name":"rect","localName":"rect","namespace":"http://www.w3.org/2000/svg","attributes":{"x":"0","y":"0","width":"10","height":"10"},"children":[]}]} +rss-channel Examplehttps://e.example/ {"name":"rss","localName":"rss","attributes":{"version":"2.0"},"children":[{"name":"channel","localName":"channel","attributes":{},"children":[{"name":"title","localName":"title","attributes":{},"children":["Example"]},{"name":"link","localName":"link","attributes":{},"children":["https://e.example/"]}]}]} +xhtml-paragraph

Hello bold world.

{"name":"p","localName":"p","attributes":{"class":"greeting"},"children":["Hello ",{"name":"em","localName":"em","attributes":{},"children":["bold"]}," world."]} +notes-document \n\nToveJaniDon't forget me & cheers {"name":"note","localName":"note","attributes":{},"children":[{"name":"to","localName":"to","attributes":{},"children":["Tove"]},{"name":"from","localName":"from","attributes":{},"children":["Jani"]},{"name":"body","localName":"body","attributes":{},"children":["Don't forget me & cheers"]}]} +not-wf-no-close ERROR +not-wf-unclosed-nested ERROR +not-wf-stray-close ERROR diff --git a/test/spec/xmlspace-lang.tsv b/test/spec/xmlspace-lang.tsv new file mode 100644 index 0000000..fd46c5c --- /dev/null +++ b/test/spec/xmlspace-lang.tsv @@ -0,0 +1,11 @@ +# name input expected opts +# xml:space and xml:lang are special attributes (XML 1.0 §2.10 / §2.12) +# whose values are inherited down the tree. The plugin annotates each +# element with `space` (when not the default) and `lang` (when set). + +xml-space-preserve hi {"name":"a","localName":"a","attributes":{"xml:space":"preserve"},"space":"preserve","children":[" hi "]} +xml-space-default-not-annotated x {"name":"a","localName":"a","attributes":{"xml:space":"default"},"children":["x"]} +xml-space-inherited x {"name":"a","localName":"a","attributes":{"xml:space":"preserve"},"space":"preserve","children":[{"name":"b","localName":"b","attributes":{},"space":"preserve","children":["x"]}]} +xml-lang-set hi {"name":"a","localName":"a","attributes":{"xml:lang":"en"},"lang":"en","children":["hi"]} +xml-lang-inherited bonjour {"name":"a","localName":"a","attributes":{"xml:lang":"fr"},"lang":"fr","children":[{"name":"b","localName":"b","attributes":{},"lang":"fr","children":["bonjour"]}]} +xml-lang-overridden {"name":"a","localName":"a","attributes":{"xml:lang":"en"},"lang":"en","children":[{"name":"b","localName":"b","attributes":{"xml:lang":"fr"},"lang":"fr","children":[]}]} diff --git a/test/xml.test.ts b/test/xml.test.ts new file mode 100644 index 0000000..14c9761 --- /dev/null +++ b/test/xml.test.ts @@ -0,0 +1,297 @@ +/* Copyright (c) 2021-2025 Richard Rodger and other contributors, MIT License */ + +import { describe, test } from 'node:test' +import assert from 'node:assert' +import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs' +import { join } from 'node:path' + +import { Jsonic } from 'jsonic' +import { Xml, decodeBOM } from '../dist/xml' + +// --------------------------------------------------------------------------- +// Shared TSV spec runner +// +// Test cases are defined in tab-separated value files under test/spec/*.tsv. +// Each non-comment row is: +// nameinputexpectedopts +// - `input` uses the escape set \n \r \t \\ +// - `expected` is raw JSON (standard JSON escapes apply) or the literal +// token ERROR / ERROR:code for expected parse failures. +// - `opts` is optional JSON for plugin options. +// The same files drive the Go test suite in go/xml_test.go. +// --------------------------------------------------------------------------- + +// At runtime this test file is loaded from `dist-test/`, so hop up one +// level to reach the shared spec directory in the project root. +const specDir = join(__dirname, '..', 'test', 'spec') + +type SpecRow = { + file: string + line: number + name: string + input: string + expected: string + opts: string +} + +function loadSpec(file: string): SpecRow[] { + const path = join(specDir, file) + const body = readFileSync(path, 'utf8') + const rows: SpecRow[] = [] + const lines = body.split('\n') + for (let i = 0; i < lines.length; i++) { + const raw = lines[i] + if (raw === '' || raw.startsWith('#')) continue + const cols = raw.split('\t') + if (cols.length < 3) { + throw new Error(`${file}:${i + 1}: expected >=3 tab-separated columns`) + } + rows.push({ + file, + line: i + 1, + name: cols[0], + input: unescapeInput(cols[1]), + expected: cols[2], + opts: cols[3] ?? '', + }) + } + return rows +} + +// Decode the escape sequences used in the spec `input` column. Keeps +// the behaviour identical to the Go loader so the two language test +// suites exercise the exact same XML text. +function unescapeInput(s: string): string { + if (!s.includes('\\')) return s + let out = '' + for (let i = 0; i < s.length; i++) { + const c = s[i] + if (c === '\\' && i + 1 < s.length) { + const n = s[i + 1] + if (n === 'n') { out += '\n'; i++; continue } + if (n === 'r') { out += '\r'; i++; continue } + if (n === 't') { out += '\t'; i++; continue } + if (n === '\\') { out += '\\'; i++; continue } + } + out += c + } + return out +} + +function runSpec(file: string) { + const rows = loadSpec(file) + describe(file, () => { + for (const row of rows) { + test(row.name, () => { + const opts = row.opts.trim() === '' ? undefined : JSON.parse(row.opts) + const jx = opts ? Jsonic.make().use(Xml, opts) : Jsonic.make().use(Xml) + + if (row.expected.startsWith('ERROR')) { + const code = row.expected.slice(5).replace(/^:/, '') + assert.throws( + () => jx(row.input), + (err: Error) => + code === '' || err.message.includes(code) || + // Jsonic wraps codes as `jsonic/`; accept that form too. + err.message.includes('/' + code), + `${row.file}:${row.line}: expected error ${row.expected}`, + ) + return + } + + const got = jx(row.input) + const want = JSON.parse(row.expected) + // Round-trip `got` through JSON so ordering of keys does not affect + // structural comparison (deepEqual is already order-insensitive for + // objects, but this also strips undefined fields cleanly). + assert.deepEqual( + JSON.parse(JSON.stringify(got)), + want, + `${row.file}:${row.line}: ${row.name}`, + ) + }) + } + }) +} + +// Auto-discover every .tsv under test/spec and run it. Keeping this +// driven by directory contents means adding a new spec file never +// requires editing the TypeScript test code. +for (const file of readdirSync(specDir)) { + if (file.endsWith('.tsv')) runSpec(file) +} + + +// --------------------------------------------------------------------------- +// XML embedded in Jsonic source +// +// With `embed: true` the plugin extends Jsonic's own grammar so a literal +// XML element can appear anywhere a Jsonic value is expected. The outer +// document is parsed by standard Jsonic; the XML subtree is built by the +// plugin's element grammar. +// --------------------------------------------------------------------------- + +describe('xml-embedded-in-jsonic', () => { + test('plain Jsonic is unaffected by embed mode', () => { + const j = Jsonic.make().use(Xml, { embed: true }) + assert.deepEqual(j('{a:1, b:"two"}'), { a: 1, b: 'two' }) + assert.deepEqual(j('[1, 2, 3]'), [1, 2, 3]) + }) + + test('XML literal as the top-level value', () => { + const j = Jsonic.make().use(Xml, { embed: true }) + assert.deepEqual(j('hello'), { + name: 'a', + localName: 'a', + attributes: {}, + children: ['hello'], + }) + assert.deepEqual(j('
'), { + name: 'br', + localName: 'br', + attributes: {}, + children: [], + }) + }) + + test('XML literal as a value inside a Jsonic map', () => { + const j = Jsonic.make().use(Xml, { embed: true }) + const src = + '{\n' + + ' title: "order-42",\n' + + ' payload: \n' + + ' Widget\n' + + ' Gadget\n' + + ' ,\n' + + '}' + const result = j(src) as any + assert.equal(result.title, 'order-42') + const payload = result.payload + assert.equal(payload.name, 'order') + assert.equal(payload.attributes.id, '42') + const items = payload.children.filter( + (c: any) => typeof c === 'object' && c.name === 'item', + ) + assert.equal(items.length, 2) + assert.equal(items[0].attributes.qty, '2') + assert.equal(items[0].children[0], 'Widget') + assert.equal(items[1].attributes.qty, '1') + assert.equal(items[1].children[0], 'Gadget') + }) + + test('XML literal preserves comma and colon in text', () => { + // Without embed-mode text handling, Jsonic's lexer would split this + // text on the comma and reject the fragment. The custom matcher + // claims the run when depth > 0, so it arrives as a single child. + const j = Jsonic.make().use(Xml, { embed: true }) + assert.deepEqual(j('Hello, World!'), { + name: 'a', + localName: 'a', + attributes: {}, + children: ['Hello, World!'], + }) + assert.deepEqual(j('key: value'), { + name: 'a', + localName: 'a', + attributes: {}, + children: ['key: value'], + }) + }) + + test('multiple XML literals inside a Jsonic list', () => { + const j = Jsonic.make().use(Xml, { embed: true }) + const result = j('[, x, ]') as any[] + assert.equal(result.length, 3) + assert.equal(result[0].name, 'a') + assert.equal(result[1].name, 'b') + assert.deepEqual(result[1].children, ['x']) + assert.equal(result[2].attributes.x, '1') + }) + + test('XML literal with namespaces resolves correctly', () => { + const j = Jsonic.make().use(Xml, { embed: true }) + const result = j( + '{doc: }', + ) as any + assert.equal(result.doc.namespace, 'http://e.example') + assert.equal(result.doc.children[0].namespace, 'http://e.example') + }) +}) + + +// --------------------------------------------------------------------------- +// W3C XML Conformance Test Suite (xmltest subset) +// +// Exercised when the suite has been fetched to `test/xmlconf/` via +// `scripts/fetch-xml-suite.sh`. Skipped otherwise. Mirrors the Go test +// in go/xmlconf_test.go: counts valid/sa documents that parse and +// not-wf/sa documents that are correctly rejected, requiring each +// count to stay above a regression floor. Current parser numbers are +// ~116/120 valid and ~39/186 not-wf rejected. +// --------------------------------------------------------------------------- + +const xmlconfRoot = join(__dirname, '..', 'test', 'xmlconf') +const xmlconfAvailable = existsSync(join(xmlconfRoot, 'xmltest')) + +// Regression guards; raise once parser coverage improves. +const VALID_SA_PASS_FLOOR = 118 +const NOT_WF_SA_REJECT_FLOOR = 30 + +function xmlconfFiles(dir: string): string[] { + if (!existsSync(dir)) return [] + return readdirSync(dir) + .filter((n) => n.endsWith('.xml')) + .filter((n) => statSync(join(dir, n)).isFile()) + .map((n) => join(dir, n)) +} + +describe('w3c-xml-conformance', { skip: !xmlconfAvailable }, () => { + test('valid/sa documents parse', () => { + const files = xmlconfFiles(join(xmlconfRoot, 'xmltest', 'valid', 'sa')) + assert.ok(files.length > 0, 'no valid/sa files') + const parser = Jsonic.make().use(Xml) + let pass = 0 + const failures: string[] = [] + for (const path of files) { + // Read as a Buffer and let decodeBOM choose the encoding via + // the BOM (default UTF-8). This lets the same runner handle the + // suite's UTF-8 files (with or without BOM) and the few UTF-16 + // / UTF-32 documents. + const body = decodeBOM(readFileSync(path)) + try { + parser(body) + pass++ + } catch (err) { + const msg = (err as Error).message.split('\n', 1)[0] + failures.push(`${path.split('/').slice(-1)[0]}: ${msg}`) + } + } + console.log(` valid/sa: ${pass} / ${files.length} parsed successfully`) + assert.ok( + pass >= VALID_SA_PASS_FLOOR, + `valid/sa pass count ${pass} dropped below floor ${VALID_SA_PASS_FLOOR}. Sample failures:\n ${failures.slice(0, 5).join('\n ')}`, + ) + }) + + test('not-wf/sa documents are rejected', () => { + const files = xmlconfFiles(join(xmlconfRoot, 'xmltest', 'not-wf', 'sa')) + assert.ok(files.length > 0, 'no not-wf/sa files') + const parser = Jsonic.make().use(Xml) + let rejected = 0 + const falseAccepts: string[] = [] + for (const path of files) { + const body = decodeBOM(readFileSync(path)) + try { + parser(body) + falseAccepts.push(path.split('/').slice(-1)[0]) + } catch { + rejected++ + } + } + console.log(` not-wf/sa: ${rejected} / ${files.length} rejected as expected`) + assert.ok( + rejected >= NOT_WF_SA_REJECT_FLOOR, + `not-wf/sa reject count ${rejected} dropped below floor ${NOT_WF_SA_REJECT_FLOOR}. Sample false accepts:\n ${falseAccepts.slice(0, 5).join('\n ')}`, + ) + }) +}) diff --git a/xml-grammar.jsonic b/xml-grammar.jsonic new file mode 100644 index 0000000..64e3c77 --- /dev/null +++ b/xml-grammar.jsonic @@ -0,0 +1,47 @@ +# XML Grammar Definition (elements + attributes + mixed content) +# Parsed by a standard Jsonic instance and passed to jsonic.grammar() +# Function references (@ prefixed) are resolved against the refs map +# +# Token naming: +# #XOP - XML open tag, e.g. +# #XCL - XML close tag, e.g. +# #XSC - XML self-close tag, e.g. +# #XIG - comment / processing instruction / DOCTYPE (ignored) +# #TX - text content between tags (CDATA included) +# #ZZ - end of input + +{ + rule: xml: open: [ + { s: '#ZZ' } + { s: '#TX' r: xml } + { p: element c: '@no-root-yet' } + ] + rule: xml: close: [ + { s: '#ZZ' } + { s: '#TX' r: xml } + ] + + rule: element: open: [ + { s: '#XSC' a: '@element-selfclose' u: { selfclose: 1 } } + { s: '#XOP' p: content a: '@element-open' } + ] + rule: element: close: [ + { c: '@element-is-selfclosed' } + { s: '#XCL' a: '@element-close' } + ] + + rule: content: open: [ + { s: '#XCL' b: 1 } + { p: child } + ] + rule: content: close: [ + { s: '#XCL' b: 1 } + { r: content } + ] + + rule: child: open: [ + { s: '#TX' a: '@child-text' } + { s: '#XOP' b: 1 p: element } + { s: '#XSC' b: 1 p: element } + ] +}