From 4ab99d96e34994fa7ca28c922e71daf74de81657 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 18 Apr 2026 22:36:54 +0000 Subject: [PATCH 01/15] Pivot plugin to simple XML element parser Replace the CSV grammar and plugin with an XML element-only variant. Parses a single rooted XML document into `{ name, children }` where `children` is an array of strings (text nodes) and nested elements. Handles open/close tags, self-closing tags (``), nested and mixed content, and reports mismatched close tags as an error. A custom lexer matcher tokenizes ``, ``, and `` as single tokens; whitespace and JSON structural tokens are disabled so text is preserved verbatim between tags. --- csv-grammar.jsonic | 52 ----- embed-grammar.js | 39 +--- package.json | 19 +- src/csv.ts | 562 --------------------------------------------- src/xml.ts | 229 ++++++++++++++++++ test/csv.test.ts | 392 ------------------------------- test/quick.js | 138 +---------- test/xml.test.ts | 157 +++++++++++++ xml-grammar.jsonic | 41 ++++ 9 files changed, 448 insertions(+), 1181 deletions(-) delete mode 100644 csv-grammar.jsonic delete mode 100644 src/csv.ts create mode 100644 src/xml.ts delete mode 100644 test/csv.test.ts create mode 100644 test/xml.test.ts create mode 100644 xml-grammar.jsonic diff --git a/csv-grammar.jsonic b/csv-grammar.jsonic deleted file mode 100644 index b7c599b..0000000 --- a/csv-grammar.jsonic +++ /dev/null @@ -1,52 +0,0 @@ -# CSV Grammar Definition -# Parsed by a standard Jsonic instance and passed to jsonic.grammar() -# Function references (@ prefixed) are resolved against the refs map -# -# Token naming: -# #LN - line ending (removed from per-instance IGNORE set) -# #SP - whitespace (removed from per-instance IGNORE set in strict mode) -# #CA - comma / field separator -# #ZZ - end of input -# #VAL - token set: text, string, number, value literals -# -# Rules csv, newline, record, text are fully defined here. -# Rules list, elem, val are modified in code (strict mode defines from scratch; -# non-strict prepends to existing defaults to preserve JSON parsing). - -{ - rule: csv: open: [ - { s: '#ZZ' } - { s: '#LN' p: newline c: '@not-record-empty' } - { p: record } - ] - - rule: newline: open: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - rule: newline: close: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - - rule: record: open: [ - { p: list } - ] - rule: record: close: [ - { s: '#ZZ' } - { s: '#LN #ZZ' b: 1 } - { s: '#LN' r: '@record-close-next' } - ] - - rule: text: open: [ - { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } - { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } - { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } - { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } - {} - ] -} diff --git a/embed-grammar.js b/embed-grammar.js index 499715e..92b91f2 100644 --- a/embed-grammar.js +++ b/embed-grammar.js @@ -1,17 +1,16 @@ #!/usr/bin/env node -// Embed csv-grammar.jsonic into TypeScript and Go source files. +// Embed xml-grammar.jsonic into TypeScript source files. // Run via: npm run embed (or: node embed-grammar.js) const fs = require('fs') const path = require('path') -const GRAMMAR_FILE = path.join(__dirname, 'csv-grammar.jsonic') -const TS_FILE = path.join(__dirname, 'src', 'csv.ts') -const GO_FILE = path.join(__dirname, 'go', 'csv.go') +const GRAMMAR_FILE = path.join(__dirname, 'xml-grammar.jsonic') +const TS_FILE = path.join(__dirname, 'src', 'xml.ts') -const BEGIN = '// --- BEGIN EMBEDDED csv-grammar.jsonic ---' -const END = '// --- END EMBEDDED csv-grammar.jsonic ---' +const BEGIN = '// --- BEGIN EMBEDDED xml-grammar.jsonic ---' +const END = '// --- END EMBEDDED xml-grammar.jsonic ---' const grammar = fs.readFileSync(GRAMMAR_FILE, 'utf8') @@ -43,32 +42,4 @@ function embedTS() { console.log('Embedded grammar into', TS_FILE) } -// --- Go embedding --- -function embedGo() { - let src = fs.readFileSync(GO_FILE, 'utf8') - const startIdx = src.indexOf(BEGIN) - const endIdx = src.indexOf(END) - if (startIdx === -1 || endIdx === -1) { - console.error('Go markers not found in', GO_FILE) - process.exit(1) - } - - if (grammar.includes('`')) { - console.error('Grammar contains backticks, incompatible with Go raw strings') - process.exit(1) - } - - const replacement = - BEGIN + - '\nconst grammarText = `\n' + - grammar + - '`\n' + - END - - src = src.substring(0, startIdx) + replacement + src.substring(endIdx + END.length) - fs.writeFileSync(GO_FILE, src) - console.log('Embedded grammar into', GO_FILE) -} - embedTS() -embedGo() diff --git a/package.json b/package.json index 129bded..09ec4d6 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,12 @@ { - "name": "@jsonic/csv", - "version": "0.10.0", - "description": "This plugin allows the [Jsonic](https://jsonic.senecajs.org) JSON parser to support csv syntax.", - "main": "dist/csv.js", + "name": "@jsonic/xml", + "version": "0.1.0", + "description": "This plugin allows the [Jsonic](https://jsonic.senecajs.org) JSON parser to support xml syntax.", + "main": "dist/xml.js", "type": "commonjs", - "browser": "csv.min.js", - "types": "dist/csv.d.ts", - "homepage": "https://github.com/jsonicjs/csv", + "browser": "xml.min.js", + "types": "dist/xml.d.ts", + "homepage": "https://github.com/jsonicjs/xml", "keywords": [ "pattern", "matcher", @@ -17,12 +17,12 @@ "author": "Richard Rodger (http://richardrodger.com)", "repository": { "type": "git", - "url": "git://github.com/jsonicjs/csv.git" + "url": "git://github.com/jsonicjs/xml.git" }, "scripts": { "test": "node --enable-source-maps --test \"dist-test/*.test.js\"", "test-some": "node --enable-source-maps --test-name-pattern=\"$npm_config_pattern\" --test \"dist-test/*.test.js\"", - "test-watch": "node --test --watch dist-test/csv.test.js", + "test-watch": "node --test --watch dist-test/xml.test.js", "embed": "node embed-grammar.js", "watch": "tsc --build src test -w", "build": "node embed-grammar.js && tsc --build src test", @@ -40,7 +40,6 @@ ], "devDependencies": { "@types/node": "^25.6.0", - "csv-spectrum": "^2.0.0", "typescript": "^5.9.3" }, "peerDependencies": { diff --git a/src/csv.ts b/src/csv.ts deleted file mode 100644 index c4eaf01..0000000 --- a/src/csv.ts +++ /dev/null @@ -1,562 +0,0 @@ -/* Copyright (c) 2021-2025 Richard Rodger, MIT License */ - -// Import Jsonic types used by plugins. -import { - Jsonic, - Rule, - RuleSpec, - Plugin, - Context, - Config, - Options, - Lex, -} from 'jsonic' - -// See defaults below for commentary. -type CsvOptions = { - trim: boolean | null - comment: boolean | null - number: boolean | null - value: boolean | null - header: boolean - object: boolean - stream: null | ((what: string, record?: Record | Error) => void) - strict: boolean - field: { - separation: null | string - nonameprefix: string - empty: any - names: undefined | string[] - exact: boolean - } - record: { - separators: null | string - empty: boolean - } - string: { - quote: string - csv: null | boolean - } -} - -// --- BEGIN EMBEDDED csv-grammar.jsonic --- -const grammarText = ` -# CSV Grammar Definition -# Parsed by a standard Jsonic instance and passed to jsonic.grammar() -# Function references (@ prefixed) are resolved against the refs map -# -# Token naming: -# #LN - line ending (removed from per-instance IGNORE set) -# #SP - whitespace (removed from per-instance IGNORE set in strict mode) -# #CA - comma / field separator -# #ZZ - end of input -# #VAL - token set: text, string, number, value literals -# -# Rules csv, newline, record, text are fully defined here. -# Rules list, elem, val are modified in code (strict mode defines from scratch; -# non-strict prepends to existing defaults to preserve JSON parsing). - -{ - rule: csv: open: [ - { s: '#ZZ' } - { s: '#LN' p: newline c: '@not-record-empty' } - { p: record } - ] - - rule: newline: open: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - rule: newline: close: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - - rule: record: open: [ - { p: list } - ] - rule: record: close: [ - { s: '#ZZ' } - { s: '#LN #ZZ' b: 1 } - { s: '#LN' r: '@record-close-next' } - ] - - rule: text: open: [ - { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } - { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } - { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } - { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } - {} - ] -} -` -// --- END EMBEDDED csv-grammar.jsonic --- - -// Plugin implementation. -const Csv: Plugin = (jsonic: Jsonic, options: CsvOptions) => { - // Normalize boolean options. - const strict = !!options.strict - const objres = !!options.object - const header = !!options.header - - // These may be changed below by superior options. - let trim = !!options.trim - let comment = !!options.comment - let opt_number = !!options.number - let opt_value = !!options.value - let record_empty = !!options.record?.empty - - const stream = options.stream - - // In strict mode, Jsonic field content is not parsed. - if (strict) { - if (false !== options.string.csv) { - jsonic.options({ - lex: { - match: { - stringcsv: { order: 1e5, make: buildCsvStringMatcher(options) }, - }, - }, - }) - } - jsonic.options({ - rule: { exclude: 'jsonic,imp' }, - }) - } - - // Fields may contain Jsonic content. - else { - if (true === options.string.csv) { - jsonic.options({ - lex: { - match: { - stringcsv: { order: 1e5, make: buildCsvStringMatcher(options) }, - }, - }, - }) - } - trim = null === options.trim ? true : trim - comment = null === options.comment ? true : comment - opt_number = null === options.number ? true : opt_number - opt_value = null === options.value ? true : opt_value - jsonic.options({ - rule: { exclude: 'imp' }, - }) - } - - // Stream rows as they are parsed, do not store in result. - if (stream) { - let parser = jsonic.internal().parser - let origStart = parser.start.bind(parser) - parser.start = (...args: any[]) => { - try { - return origStart(...args) - } catch (e: any) { - stream('error', e) - } - } - } - - let token: Record = {} - if (strict) { - // Disable JSON structure tokens - token = { - '#OB': null, - '#CB': null, - '#OS': null, - '#CS': null, - '#CL': null, - } - } - - // Custom "comma" - if (options.field.separation) { - token['#CA'] = options.field.separation - } - - // Jsonic option overrides. - let jsonicOptions: any = { - rule: { - start: 'csv', - }, - fixed: { - token, - }, - tokenSet: { - IGNORE: [ - strict ? null : undefined, // Handle #SP space - null, // Handle #LN newlines - undefined, // Still ignore #CM comments - ], - }, - number: { - lex: opt_number, - }, - value: { - lex: opt_value, - }, - comment: { - lex: comment, - }, - lex: { - emptyResult: [], - }, - line: { - single: record_empty, - chars: - null == options.record.separators - ? undefined - : options.record.separators, - rowChars: - null == options.record.separators - ? undefined - : options.record.separators, - }, - error: { - csv_extra_field: 'unexpected extra field value: $fsrc', - csv_missing_field: 'missing field', - }, - hint: { - csv_extra_field: `Row $row has too many fields (the first of which is: $fsrc). Only $len -fields per row are expected.`, - csv_missing_field: `Row $row has too few fields. $len fields per row are expected.`, - }, - } - - jsonic.options(jsonicOptions) - - - // Named function references for declarative grammar definition. - const refs: Record = { - - // === State actions (auto-wired by @rulename-{bo,ao,bc,ac} convention) === - - '@csv-bo': (r: Rule, ctx: Context) => { - ctx.u.recordI = 0 - stream && stream('start') - r.node = [] - }, - - '@csv-ac': (_r: Rule) => { - stream && stream('end') - }, - - '@record-bc': (r: Rule, ctx: Context) => { - let fields: string[] = ctx.u.fields || options.field.names - - if (0 === ctx.u.recordI && header) { - ctx.u.fields = undefined === r.child.node ? [] : r.child.node - } else { - let record: any = r.child.node || [] - - if (objres) { - let obj: Record = {} - let i = 0 - - if (fields) { - if (options.field.exact) { - if (record.length !== fields.length) { - return ctx.t0.bad( - record.length > fields.length - ? 'csv_extra_field' - : 'csv_missing_field', - ) - } - } - - let fI = 0 - for (; fI < fields.length; fI++) { - obj[fields[fI]] = - undefined === record[fI] ? options.field.empty : record[fI] - } - i = fI - } - - for (; i < record.length; i++) { - let field_name = options.field.nonameprefix + i - obj[field_name] = - undefined === record[i] ? options.field.empty : record[i] - } - - record = obj - } else { - for (let i = 0; i < record.length; i++) { - record[i] = - undefined === record[i] ? options.field.empty : record[i] - } - } - - if (stream) { - stream('record', record) - } else { - r.node.push(record) - } - } - - ctx.u.recordI++ - }, - - '@text-bc': (r: Rule) => { - r.parent.node = undefined === r.child.node ? r.node : r.child.node - }, - - - // === Alt actions === - - '@elem-open-empty': (r: Rule) => { - r.node.push(options.field.empty) - r.u.done = true - }, - - '@elem-close-trailing': (r: Rule) => { - r.node.push(options.field.empty) - }, - - '@text-follows': (r: Rule) => { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = (1 === r.n.text ? '' : r.prev.node) + r.o0.val - }, - - '@text-leads': (r: Rule) => { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + - (2 <= r.n.text || !trim ? r.o0.src : '') + - r.o1.src - }, - - '@text-end': (r: Rule) => { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') - }, - - '@text-space': (r: Rule) => { - if (strict) { - let v = 1 === r.n.text ? r : r.prev - r.node = v.node = - (1 === r.n.text ? '' : r.prev.node) + (!trim ? r.o0.src : '') - } - }, - - - // === Condition refs === - - '@not-record-empty': () => !record_empty, - - - // === FuncRef for dynamic rule names === - - '@record-close-next': () => record_empty ? 'record' : 'newline', - - '@text-space-push': () => strict ? '' : 'val', - } - - - // Usually [#TX, #ST, #NR, #VL] - let VAL = jsonic.tokenSet.VAL - - let { LN, CA, SP, ZZ } = jsonic.token - - // Parse embedded grammar definition using a separate standard Jsonic instance. - const grammarDef = Jsonic.make()(grammarText) - grammarDef.ref = refs - jsonic.grammar(grammarDef) - - - // Rules list, elem, val are modified in code rather than the grammar file, - // because in non-strict mode the default jsonic alternatives must be preserved - // to support embedded JSON values like [1,2] and {x:1}. - - jsonic.rule('list', (rs: RuleSpec) => { - return rs - .open([ - // If not ignoring empty fields, don't consume LN used to close empty record. - { s: [LN], b: 1 }, - ]) - // Unconditional fallback to push elem — the default jsonic list rule gates - // its elem push on prev.u.implist which CSV's record rule does not set. - .open([{ p: 'elem' }], { append: true }) - .close([ - // LN ends record - { s: [LN], b: 1 }, - - { s: [ZZ] }, - ]) - }) - - jsonic.rule('elem', (rs: RuleSpec) => { - return rs - .open( - [ - // An empty element - { - s: [CA], - b: 1, - a: (r: Rule) => { - r.node.push(options.field.empty) - r.u.done = true - }, - }, - ], - ) - - .close( - [ - // An empty element at the end of the line - { - s: [CA, [LN, ZZ]], - b: 1, - a: (r: Rule) => r.node.push(options.field.empty), - }, - - // LN ends record - { s: [LN], b: 1 }, - ], - ) - }) - - jsonic.rule('val', (rs: RuleSpec) => { - return rs.open( - [ - // Handle text and space concatentation - { s: [VAL, SP], b: 2, p: 'text' }, - { s: [SP], b: 1, p: 'text' }, - - // LN ends record - { s: [LN], b: 1 }, - ], - ) - }) - - // Close is called on final rule - set parent val node - jsonic.rule('text', (rs: RuleSpec) => { - rs.bc((r: Rule) => { - r.parent.node = undefined === r.child.node ? r.node : r.child.node - }) - }) -} - -// Custom CSV String matcher. -// Handles "a""b" -> "a"b" quoting wierdness. -// This is a reduced copy of the standard Jsonic string matcher. -function buildCsvStringMatcher(csvopts: CsvOptions) { - return function makeCsvStringMatcher(cfg: Config, _opts: Options) { - return function csvStringMatcher(lex: Lex) { - let quoteMap: any = { [csvopts.string.quote]: true } - - let { pnt, src } = lex - let { sI, rI, cI } = pnt - let srclen = src.length - - if (quoteMap[src[sI]]) { - const q = src[sI] // Quote character - const qI = sI - const qrI = rI - ++sI - ++cI - - let s: string[] = [] - - for (sI; sI < srclen; sI++) { - cI++ - let c = src[sI] - - // Quote char. - if (q === c) { - sI++ - cI++ - - if (q === src[sI]) { - s.push(q) - } else { - break // String finished. - } - } - - // Body part of string. - else { - let bI = sI - - let qc = q.charCodeAt(0) - let cc = src.charCodeAt(sI) - - while (sI < srclen && 32 <= cc && qc !== cc) { - cc = src.charCodeAt(++sI) - cI++ - } - cI-- - - if (cfg.line.chars[src[sI]]) { - if (cfg.line.rowChars[src[sI]]) { - pnt.rI = ++rI - } - - cI = 1 - s.push(src.substring(bI, sI + 1)) - } else if (cc < 32) { - pnt.sI = sI - pnt.cI = cI - return lex.bad('unprintable', sI, sI + 1) - } else { - s.push(src.substring(bI, sI)) - sI-- - } - } - } - - if (src[sI - 1] !== q || pnt.sI === sI - 1) { - pnt.rI = qrI - return lex.bad('unterminated_string', qI, sI) - } - - const tkn = lex.token( - '#ST', - s.join(''), - src.substring(pnt.sI, sI), - pnt, - ) - - pnt.sI = sI - pnt.rI = rI - pnt.cI = cI - return tkn - } - } - } -} - -// Default option values. -Csv.defaults = { - trim: null, - comment: null, - number: null, - value: null, - header: true, - object: true, - stream: null, - strict: true, - field: { - separation: null, - nonameprefix: 'field~', - empty: '', - names: undefined, - exact: false, - }, - record: { - separators: null, - empty: false, - }, - string: { - quote: '"', - csv: null, - }, -} as CsvOptions - -export { Csv, buildCsvStringMatcher } - -export type { CsvOptions } diff --git a/src/xml.ts b/src/xml.ts new file mode 100644 index 0000000..e9dcdc2 --- /dev/null +++ b/src/xml.ts @@ -0,0 +1,229 @@ +/* Copyright (c) 2021-2025 Richard Rodger, MIT License */ + +// Import Jsonic types used by plugins. +import { + Jsonic, + Rule, + RuleSpec, + Plugin, + Context, + Config, + Options, + Lex, +} from 'jsonic' + +// A parsed XML element: a tag name and an array of children. +// Children are either strings (text nodes) or nested Element objects. +type XmlElement = { + name: string + children: Array +} + +type XmlOptions = { + // Reserved for future options. +} + +// --- BEGIN EMBEDDED xml-grammar.jsonic --- +const grammarText = ` +# XML Grammar Definition (simple element-only version) +# Parsed by a standard Jsonic instance and passed to jsonic.grammar() +# Function references (@ prefixed) are resolved against the refs map +# +# Token naming: +# #XOP - XML open tag, e.g. +# #XCL - XML close tag, e.g. +# #XSC - XML self-close tag, e.g. +# #TX - text content between tags +# #ZZ - end of input + +{ + rule: xml: open: [ + { s: '#ZZ' } + { p: element } + ] + + rule: element: open: [ + { s: '#XSC' a: '@element-selfclose' u: { selfclose: 1 } } + { s: '#XOP' p: content a: '@element-open' } + ] + rule: element: close: [ + { c: '@element-is-selfclosed' } + { s: '#XCL' a: '@element-close' } + ] + + rule: content: open: [ + { s: '#XCL' b: 1 } + { p: child } + ] + rule: content: close: [ + { s: '#XCL' b: 1 } + { r: content } + ] + + rule: child: open: [ + { s: '#TX' a: '@child-text' } + { s: '#XOP' b: 1 p: element } + { s: '#XSC' b: 1 p: element } + ] +} +` +// --- END EMBEDDED xml-grammar.jsonic --- + + +const Xml: Plugin = (jsonic: Jsonic, _options: XmlOptions) => { + // Register custom lexer matcher for XML tags so that ``, ``, + // and `` are each recognised as a single token with the tag name + // as the token value. + jsonic.options({ + lex: { + match: { + xmltag: { order: 1e5, make: buildXmlTagMatcher() }, + }, + emptyResult: undefined, + }, + // Terminate text at `<` so tag starts are not absorbed into text runs. + ender: ['<'], + rule: { + start: 'xml', + // Strip out JSON rules so XML input is not reinterpreted. + exclude: 'jsonic,imp', + }, + // Disable JSON structural fixed tokens. + fixed: { + token: { + '#OB': null, + '#CB': null, + '#OS': null, + '#CS': null, + '#CL': null, + '#CA': null, + }, + }, + // Disable number, value, and string lexing so XML text content is + // always a plain string. + number: { lex: false }, + value: { lex: false }, + string: { lex: false }, + comment: { lex: false }, + // Treat whitespace and newlines as part of text content rather than + // as separate tokens so text between tags is preserved verbatim. + space: { lex: false }, + line: { lex: false }, + error: { + xml_mismatched_tag: + 'closing tag does not match opening tag <$openname>', + }, + hint: { + xml_mismatched_tag: `Each opening tag must be paired with a matching closing tag. +Expected but found .`, + }, + }) + + const refs: Record = { + // Propagate the parsed root element up to the xml rule so it becomes + // the final parse result. + '@xml-bc': (r: Rule) => { + if (r.child && r.child.node) { + r.node = r.child.node + } + }, + + // Initialise the element node when the opening tag `` is matched. + '@element-open': (r: Rule) => { + r.node = { name: r.o0.val, children: [] } + }, + + // Self-closing tag `` - no children. + '@element-selfclose': (r: Rule) => { + r.node = { name: r.o0.val, children: [] } + }, + + // Verify that `` matches the opening ``. + '@element-close': (r: Rule, ctx: Context) => { + const openName = r.node && r.node.name + const closeName = r.c0.val + if (openName !== closeName) { + r.c0.use = { openname: openName } + return ctx.t0.bad('xml_mismatched_tag') + } + }, + + // Text node - push the text value onto the enclosing element's + // children array. The content/child rules inherit `r.node` from the + // parent element, so `r.node.children` is the enclosing element's + // child list. + '@child-text': (r: Rule) => { + r.node.children.push(r.o0.val) + r.u.done = true + }, + + // After the child rule returns (either from a text match above or + // from a nested `element` push), copy the nested element node into + // the parent element's children. Text was already pushed in open. + '@child-bc': (r: Rule) => { + if (true !== r.u.done && r.child && r.child.node) { + r.node.children.push(r.child.node) + } + }, + + // Condition: close of element is trivially met when it was a + // self-closing tag (``) with no separate close tag to match. + '@element-is-selfclosed': (r: Rule) => true === !!r.u.selfclose, + } + + // Parse embedded grammar definition using a separate standard Jsonic + // instance, then wire refs and apply. + const grammarDef = Jsonic.make()(grammarText) + grammarDef.ref = refs + jsonic.grammar(grammarDef) +} + + +// Build a lexer matcher that recognises XML tags as single tokens. +// Emits one of: +// #XOP for `` (val = name) +// #XCL for `` (val = name) +// #XSC for `` (val = name) +function buildXmlTagMatcher() { + const nameRE = `[A-Za-z_][A-Za-z0-9_\\-\\.:]*` + const openRE = new RegExp('^<(' + nameRE + ')\\s*(\\/?)>') + const closeRE = new RegExp('^<\\/(' + nameRE + ')\\s*>') + + return function makeXmlTagMatcher(_cfg: Config, _opts: Options) { + return function xmlTagMatcher(lex: Lex) { + const { pnt, src } = lex + const sI = pnt.sI + if (src[sI] !== '<') return undefined + + const rest = src.substring(sI) + + // Closing tag: + if (src[sI + 1] === '/') { + const m = rest.match(closeRE) + if (!m) return undefined + const len = m[0].length + const tkn = lex.token('#XCL', m[1], m[0], pnt) + pnt.sI += len + pnt.cI += len + return tkn + } + + // Opening or self-close tag: or + const m = rest.match(openRE) + if (!m) return undefined + const len = m[0].length + const selfClose = m[2] === '/' + const tkn = lex.token(selfClose ? '#XSC' : '#XOP', m[1], m[0], pnt) + pnt.sI += len + pnt.cI += len + return tkn + } + } +} + + +Xml.defaults = {} as XmlOptions + +export { Xml } + +export type { XmlOptions, XmlElement } diff --git a/test/csv.test.ts b/test/csv.test.ts deleted file mode 100644 index 1e19bb2..0000000 --- a/test/csv.test.ts +++ /dev/null @@ -1,392 +0,0 @@ -/* Copyright (c) 2021-2024 Richard Rodger and other contributors, MIT License */ - -import { describe, test } from 'node:test' -import assert from 'node:assert' -import { readFileSync } from 'node:fs' -import { join } from 'node:path' - -import Util from 'util' - -import { Jsonic } from 'jsonic' -import { Csv } from '../dist/csv' - -const Spectrum = require('csv-spectrum') - -const fixturesDir = join(__dirname, '..', 'test', 'fixtures') -const manifest = JSON.parse( - readFileSync(join(fixturesDir, 'manifest.json'), 'utf8'), -) - -describe('csv', () => { - test('empty-records', async () => { - // ignored by default - - const jo = Jsonic.make().use(Csv) - assert.deepEqual(jo('\n'), []) - assert.deepEqual(jo('a\n1\n\n2\n3\n\n\n4\n'), [ - { a: '1' }, - { a: '2' }, - { a: '3' }, - { a: '4' }, - ]) - - const ja = Jsonic.make().use(Csv, { object: false }) - assert.deepEqual(ja('\n'), []) - assert.deepEqual(ja('a\n1\n\n2\n3\n\n\n4\n'), [['1'], ['2'], ['3'], ['4']]) - - // start and end also ignored - - assert.deepEqual(jo('\r\na,b\r\nA,B\r\n'), [{ a: 'A', b: 'B' }]) - assert.deepEqual(jo('\r\n\r\na,b\r\nA,B\r\n\r\n'), [{ a: 'A', b: 'B' }]) - assert.deepEqual(ja('\r\na,b\r\nA,B\r\n'), [['A', 'B']]) - assert.deepEqual(ja('\r\n\r\na,b\r\nA,B\r\n\r\n'), [['A', 'B']]) - - // with option, empty creates record - - const jon = Jsonic.make().use(Csv, { record: { empty: true } }) - assert.deepEqual(jon('\n'), []) - assert.deepEqual(jon('a\n1\n\n2\n3\n\n\n4\n'), [ - { a: '1' }, - { a: '' }, - { a: '2' }, - { a: '3' }, - { a: '' }, - { a: '' }, - { a: '4' }, - ]) - - // with comments - - const joc = Jsonic.make().use(Csv, { comment: true }) - // console.log(joc('a#X\n1\n#Y\n2\n3\n\n#Z\n4\n#Q')) - assert.deepEqual(joc('a#X\n1\n#Y\n2\n3\n\n#Z\n4\n#Q'), [ - { a: '1' }, - { a: '2' }, - { a: '3' }, - { a: '4' }, - ]) - - const jocn = Jsonic.make().use(Csv, { - comment: true, - record: { empty: true }, - }) - assert.deepEqual(jocn('a#X\n1\n#Y\n2\n3\n\n#Z\n4\n#Q'), [ - { a: '1' }, - { a: '' }, - { a: '2' }, - { a: '3' }, - { a: '' }, - { a: '' }, - { a: '4' }, - ]) - }) - - test('header', async () => { - const jo = Jsonic.make().use(Csv) - assert.deepEqual(jo('\n'), []) - assert.deepEqual(jo('\na,b\nA,B'), [{ a: 'A', b: 'B' }]) - - const ja = Jsonic.make().use(Csv, { object: false }) - assert.deepEqual(ja('\n'), []) - assert.deepEqual(ja('\na,b\nA,B'), [['A', 'B']]) - - const jon = Jsonic.make().use(Csv, { header: false }) - assert.deepEqual(jon('\n'), []) - assert.deepEqual(jon('\na,b\nA,B'), [ - { - 'field~0': 'a', - 'field~1': 'b', - }, - { - 'field~0': 'A', - 'field~1': 'B', - }, - ]) - - const jan = Jsonic.make().use(Csv, { header: false, object: false }) - assert.deepEqual(jan('\n'), []) - assert.deepEqual(jan('\na,b\nA,B'), [ - ['a', 'b'], - ['A', 'B'], - ]) - - const jonf = Jsonic.make().use(Csv, { - header: false, - field: { names: ['a', 'b'] }, - }) - assert.deepEqual(jonf('\n'), []) - assert.deepEqual(jonf('\na,b\nA,B'), [ - { - a: 'a', - b: 'b', - }, - { - a: 'A', - b: 'B', - }, - ]) - }) - - test('comma', async () => { - const jo = Jsonic.make().use(Csv) - - assert.deepEqual(jo('\na'), []) - assert.deepEqual(jo('a\n1,'), [{ a: '1', 'field~1': '' }]) - assert.deepEqual(jo('a\n,1'), [{ a: '', 'field~1': '1' }]) - assert.deepEqual(jo('a,b\n1,2,'), [{ a: '1', b: '2', 'field~2': '' }]) - assert.deepEqual(jo('a,b\n,1,2'), [{ a: '', b: '1', 'field~2': '2' }]) - - assert.deepEqual(jo('a\n1,\n'), [{ a: '1', 'field~1': '' }]) - assert.deepEqual(jo('a\n,1\n'), [{ a: '', 'field~1': '1' }]) - assert.deepEqual(jo('a,b\n1,2,\n'), [{ a: '1', b: '2', 'field~2': '' }]) - assert.deepEqual(jo('a,b\n,1,2\n'), [{ a: '', b: '1', 'field~2': '2' }]) - assert.deepEqual(jo('\na\n'), []) - - const ja = Jsonic.make().use(Csv, { object: false }) - - assert.deepEqual(ja('a\n1,'), [['1', '']]) - assert.deepEqual(ja('a\n,1'), [['', '1']]) - assert.deepEqual(ja('a,b\n1,2,'), [['1', '2', '']]) - assert.deepEqual(ja('a,b\n,1,2'), [['', '1', '2']]) - assert.deepEqual(ja('\n1'), []) - }) - - test('separators', async () => { - const jd = Jsonic.make().use(Csv, { - field: { - separation: '|', - }, - }) - - assert.deepEqual(jd('a|b|c\nA|B|C\nAA|BB|CC'), [ - { a: 'A', b: 'B', c: 'C' }, - { a: 'AA', b: 'BB', c: 'CC' }, - ]) - - const jD = Jsonic.make().use(Csv, { - field: { - separation: '~~', - }, - }) - - assert.deepEqual(jD('a~~b~~c\nA~~B~~C\nAA~~BB~~CC'), [ - { a: 'A', b: 'B', c: 'C' }, - { a: 'AA', b: 'BB', c: 'CC' }, - ]) - - const jn = Jsonic.make().use(Csv, { - record: { - separators: '%', - }, - }) - - assert.deepEqual(jn('a,b,c%A,B,C%AA,BB,CC'), [ - { a: 'A', b: 'B', c: 'C' }, - { a: 'AA', b: 'BB', c: 'CC' }, - ]) - }) - - test('double-quote', async () => { - const j = Jsonic.make().use(Csv) - - assert.deepEqual(j('a\n"b"'), [{ a: 'b' }]) - - assert.deepEqual(j('a\n"""b"'), [{ a: '"b' }]) - assert.deepEqual(j('a\n"b"""'), [{ a: 'b"' }]) - assert.deepEqual(j('a\n"""b"""'), [{ a: '"b"' }]) - assert.deepEqual(j('a\n"b""c"'), [{ a: 'b"c' }]) - - assert.deepEqual(j('a\n"b""c""d"'), [{ a: 'b"c"d' }]) - assert.deepEqual(j('a\n"b""c""d""e"'), [{ a: 'b"c"d"e' }]) - - assert.deepEqual(j('a\n"""b"'), [{ a: '"b' }]) - assert.deepEqual(j('a\n"b"""'), [{ a: 'b"' }]) - assert.deepEqual(j('a\n"""b"""'), [{ a: '"b"' }]) - - assert.deepEqual(j('a\n"""""b"'), [{ a: '""b' }]) - assert.deepEqual(j('a\n"b"""""'), [{ a: 'b""' }]) - assert.deepEqual(j('a\n"""""b"""""'), [{ a: '""b""' }]) - }) - - test('trim', async () => { - const j = Jsonic.make().use(Csv) - - assert.deepEqual(j('a\n b'), [{ a: ' b' }]) - assert.deepEqual(j('a\nb '), [{ a: 'b ' }]) - assert.deepEqual(j('a\n b '), [{ a: ' b ' }]) - assert.deepEqual(j('a\n b '), [{ a: ' b ' }]) - assert.deepEqual(j('a\n \tb \t '), [{ a: ' \tb \t ' }]) - - assert.deepEqual(j('a\n b c'), [{ a: ' b c' }]) - assert.deepEqual(j('a\nb c '), [{ a: 'b c ' }]) - assert.deepEqual(j('a\n b c '), [{ a: ' b c ' }]) - assert.deepEqual(j('a\n b c '), [{ a: ' b c ' }]) - assert.deepEqual(j('a\n \tb c \t '), [{ a: ' \tb c \t ' }]) - - const jt = Jsonic.make().use(Csv, { trim: true }) - - assert.deepEqual(jt('a\n b'), [{ a: 'b' }]) - assert.deepEqual(jt('a\nb '), [{ a: 'b' }]) - assert.deepEqual(jt('a\n b '), [{ a: 'b' }]) - assert.deepEqual(jt('a\n b '), [{ a: 'b' }]) - assert.deepEqual(jt('a\n \tb \t '), [{ a: 'b' }]) - - assert.deepEqual(jt('a\n b c'), [{ a: 'b c' }]) - assert.deepEqual(jt('a\nb c '), [{ a: 'b c' }]) - assert.deepEqual(jt('a\n b c '), [{ a: 'b c' }]) - assert.deepEqual(jt('a\n b c '), [{ a: 'b c' }]) - assert.deepEqual(jt('a\n \tb c \t '), [{ a: 'b c' }]) - }) - - test('comment', async () => { - const j = Jsonic.make().use(Csv) - assert.deepEqual(j('a\n# b'), [{ a: '# b' }]) - assert.deepEqual(j('a\n b #c'), [{ a: ' b #c' }]) - - const jc = Jsonic.make().use(Csv, { comment: true }) - assert.deepEqual(jc('a\n# b'), []) - assert.deepEqual(jc('a\n b #c'), [{ a: ' b ' }]) - - const jt = Jsonic.make().use(Csv, { strict: false }) - assert.deepEqual(jt('a\n# b'), []) - assert.deepEqual(jt('a\n b '), [{ a: 'b' }]) - }) - - test('number', async () => { - const j = Jsonic.make().use(Csv) - assert.deepEqual(j('a\n1'), [{ a: '1' }]) - assert.deepEqual(j('a\n1e2'), [{ a: '1e2' }]) - - const jn = Jsonic.make().use(Csv, { number: true }) - assert.deepEqual(jn('a\n1'), [{ a: 1 }]) - assert.deepEqual(jn('a\n1e2'), [{ a: 100 }]) - - const jt = Jsonic.make().use(Csv, { strict: false }) - assert.deepEqual(jt('a\n1'), [{ a: 1 }]) - assert.deepEqual(jt('a\n1e2'), [{ a: 100 }]) - }) - - test('value', async () => { - const j = Jsonic.make().use(Csv) - assert.deepEqual(j('a\ntrue'), [{ a: 'true' }]) - assert.deepEqual(j('a\nfalse'), [{ a: 'false' }]) - assert.deepEqual(j('a\nnull'), [{ a: 'null' }]) - - const jv = Jsonic.make().use(Csv, { value: true }) - assert.deepEqual(jv('a\ntrue'), [{ a: true }]) - assert.deepEqual(jv('a\nfalse'), [{ a: false }]) - assert.deepEqual(jv('a\nnull'), [{ a: null }]) - }) - - test('stream', () => { - return new Promise((resolve) => { - let tmp: any = {} - let data: any[] - const j = Jsonic.make().use(Csv, { - stream: (what: string, record?: any[]) => { - if ('start' === what) { - data = [] - tmp.start = Date.now() - } else if ('record' === what) { - data.push(record) - } else if ('end' === what) { - tmp.end = Date.now() - - assert.deepEqual(data, [ - { a: '1', b: '2' }, - { a: '3', b: '4' }, - { a: '5', b: '6' }, - ]) - - assert.ok(tmp.start <= tmp.end) - - resolve() - } - }, - }) - - j('a,b\n1,2\n3,4\n5,6') - }) - }) - - test('unstrict', async () => { - const j = Jsonic.make().use(Csv, { strict: false }) - let d0 = j(`a,b,c -true,[1,2],{x:{y:"q\\"w"}} - x , 'y\\'y', "z\\"z" -`) - assert.deepEqual(d0, [ - { - a: true, - b: [1, 2], - c: { - x: { - y: 'q"w', - }, - }, - }, - { - a: 'x', - b: "y'y", - c: 'z"z', - }, - ]) - - assert.throws(() => j('a\n{x:1}y'), /unexpected/) - }) - - test('spectrum', async () => { - const j = Jsonic.make().use(Csv) - const tests = await Util.promisify(Spectrum)() - for (let i = 0; i < tests.length; i++) { - let test = tests[i] - let name = test.name - let json = JSON.parse(test.json.toString()) - let csv = test.csv.toString() - let res = j(csv) - let testname = name + ' ' + (i + 1) + '/' + tests.length - - // Broken test, reenable when fixed - if (5 === i) { - continue - } - - assert.deepEqual({ [testname]: res }, { [testname]: json }) - } - }) - - test('fixtures', async () => { - const csv = Jsonic.make().use(Csv) - for (const [key, entry] of Object.entries(manifest) as [string, any][]) { - const name: string = entry.name - - let parser = csv - if (entry.opt) { - let j = entry.jsonicOpt ? Jsonic.make(entry.jsonicOpt) : Jsonic.make() - parser = j.use(Csv, entry.opt) - } - const csvFile = entry.csvFile || key - const raw = readFileSync(join(fixturesDir, csvFile + '.csv'), 'utf8') - - if (entry.err) { - try { - parser(raw) - assert.fail('Expected error ' + entry.err + ' for fixture: ' + name) - } catch (e: any) { - assert.deepEqual(entry.err, e.code) - } - } else { - try { - const expected = JSON.parse( - readFileSync(join(fixturesDir, key + '.json'), 'utf8'), - ) - const out = parser(raw) - assert.deepEqual(out, expected) - } catch (e: any) { - console.error('FIXTURE: ' + name) - throw e - } - } - } - }) -}) diff --git a/test/quick.js b/test/quick.js index 63047f3..490e3f1 100644 --- a/test/quick.js +++ b/test/quick.js @@ -1,136 +1,12 @@ -const { Jsonic } = require('@jsonic/jsonic-next') -const { Debug } = require('@jsonic/jsonic-next/debug') -const { Csv } = require('..') +const { Jsonic } = require('jsonic') +const { Xml } = require('../dist/xml') -const tlog = [] - -// const c0 = Jsonic.make() -// .use(Debug,{trace:true}) -// .use(Csv,{comment:true,object:false,header:false}) - -// const u0 = Jsonic.make() -// // .use(Debug,{trace:true}) -// .use(Csv,{ -// strict:false, -// }) - -const csv = Jsonic.make() - .use(Debug, { trace: true }) - .use(Csv, { - // line: {empty:true}, - // header: false, - // object: false, - // trim: true, - // value: true, - // comment: true, - // record: { empty: true } - }) - // .sub({lex:(t)=>console.log(t)}) - .sub({ lex: (t) => tlog.push(t) }) - -// console.log(csv.options.tokenSet) -// console.log(csv.internal().config.lex.match) - -// console.log(csv(`a,b -// 1,2,`,{xlog:-1})) +const xml = Jsonic.make().use(Xml) console.log( - csv( - `a -,1`, - { xlog: -1 }, + JSON.stringify( + xml('hello'), + null, + 2, ), ) - -// console.log(csv(`a,b -// 1, 2 -// 11 ,{22 -// 3 3, "a" -// `,{xlog:-1})) - -// console.log(csv(`a,b -// 1,2 -// 3,"x""y" -// 4,5 -// `,{xlog:-1})) - -// console.log(csv(`a,b -// 1, 2 3 -// 4, 5 6 -// 7, 8 9 -// 10, 11 12 13 -// `,{xlog:-1})) - -// const u0 = Jsonic.make() -// .use(Debug,{trace:true}) -// .use(Csv, {strict:false}) - -// console.dir(u0(`a,b -// 1 , 2 -// `),{depth:null}) - -// console.dir(u0(`a,b,c -// true,[1,2],{x:{y:"q\\"w"}} -// null,'Q\\r\\nA',1e2 -// `),{depth:null}) - -// console.log(c0(`a,b,c -// 1 , 2 , 3 -// 11 , 22 , 33 -// 4\t,\t5\t,\t6 -// \t44\t,\t\t55\t\t\t,\t6\t -// `)) - -// console.log(c0(`a,b,c,d,e,f -// 1 ,2 , 3 ,4 5 , 6 7,8 9 0 -// `)) - -// console.log(c0(`a,b -// "x"y,z`)) - -// console.log(u0(`a -// b `)) - -// console.log(c0(` -// 1`)) - -// console.log(c0('')) - -// console.log(c0(`#foo -// #bar -// 1,2 -// #a -// #b - -// 3,4 - -// #c - -// `)) - -// console.log(csv(`a,b -// A,B -// #X - -// AA,BB`)) - -// console.log(csv(` -// #X -// #XX -// a,b -// #Y -// #YY -// A,B -// #Z -// #ZZ -// `)) - -// console.log(csv('\n')) - -// console.log(csv('a,b\nA,"""B"')) - -// console.log(csv('true')) - -// console.log(csv('\na\n')) - -// console.log(tlog) diff --git a/test/xml.test.ts b/test/xml.test.ts new file mode 100644 index 0000000..7f341fa --- /dev/null +++ b/test/xml.test.ts @@ -0,0 +1,157 @@ +/* Copyright (c) 2021-2025 Richard Rodger and other contributors, MIT License */ + +import { describe, test } from 'node:test' +import assert from 'node:assert' + +import { Jsonic } from 'jsonic' +import { Xml } from '../dist/xml' + +describe('xml', () => { + test('empty-element', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx(''), { name: 'a', children: [] }) + }) + + test('self-closing-element', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx(''), { name: 'a', children: [] }) + assert.deepEqual(jx('
'), { name: 'br', children: [] }) + }) + + test('text-content', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx('
hello'), { + name: 'a', + children: ['hello'], + }) + assert.deepEqual(jx('hello world'), { + name: 'greet', + children: ['hello world'], + }) + }) + + test('nested-elements', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx(''), { + name: 'a', + children: [{ name: 'b', children: [] }], + }) + assert.deepEqual(jx('x'), { + name: 'a', + children: [{ name: 'b', children: ['x'] }], + }) + }) + + test('deeply-nested', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx('x'), { + name: 'a', + children: [ + { + name: 'b', + children: [{ name: 'c', children: ['x'] }], + }, + ], + }) + }) + + test('multiple-children', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx(''), { + name: 'a', + children: [ + { name: 'b', children: [] }, + { name: 'c', children: [] }, + ], + }) + assert.deepEqual(jx('12'), { + name: 'a', + children: [ + { name: 'b', children: ['1'] }, + { name: 'c', children: ['2'] }, + ], + }) + }) + + test('mixed-content', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx('helloinnerworld'), { + name: 'a', + children: [ + 'hello', + { name: 'b', children: ['inner'] }, + 'world', + ], + }) + }) + + test('tag-name-variants', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx('x'), { + name: 'a-b', + children: ['x'], + }) + assert.deepEqual(jx('x'), { + name: 'a.b', + children: ['x'], + }) + assert.deepEqual(jx('x'), { + name: 'a_b', + children: ['x'], + }) + assert.deepEqual(jx('x'), { + name: 'ns:a', + children: ['x'], + }) + }) + + test('mismatched-tag', () => { + const jx = Jsonic.make().use(Xml) + assert.throws(() => jx(''), /xml_mismatched_tag|mismatched/i) + }) + + test('multiline-content', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx('\n 1\n 2\n'), + { + name: 'root', + children: [ + '\n ', + { name: 'a', children: ['1'] }, + '\n ', + { name: 'b', children: ['2'] }, + '\n', + ], + }, + ) + }) + + test('preserves-whitespace-text', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx('

hello world

'), { + name: 'p', + children: [' hello world '], + }) + }) + + test('deeply-nested-and-siblings', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx('x'), + { + name: 'a', + children: [ + { + name: 'b', + children: [ + { name: 'c', children: [] }, + { name: 'd', children: ['x'] }, + ], + }, + { name: 'e', children: [] }, + ], + }, + ) + }) +}) diff --git a/xml-grammar.jsonic b/xml-grammar.jsonic new file mode 100644 index 0000000..03edbec --- /dev/null +++ b/xml-grammar.jsonic @@ -0,0 +1,41 @@ +# XML Grammar Definition (simple element-only version) +# Parsed by a standard Jsonic instance and passed to jsonic.grammar() +# Function references (@ prefixed) are resolved against the refs map +# +# Token naming: +# #XOP - XML open tag, e.g. +# #XCL - XML close tag, e.g. +# #XSC - XML self-close tag, e.g. +# #TX - text content between tags +# #ZZ - end of input + +{ + rule: xml: open: [ + { s: '#ZZ' } + { p: element } + ] + + rule: element: open: [ + { s: '#XSC' a: '@element-selfclose' u: { selfclose: 1 } } + { s: '#XOP' p: content a: '@element-open' } + ] + rule: element: close: [ + { c: '@element-is-selfclosed' } + { s: '#XCL' a: '@element-close' } + ] + + rule: content: open: [ + { s: '#XCL' b: 1 } + { p: child } + ] + rule: content: close: [ + { s: '#XCL' b: 1 } + { r: content } + ] + + rule: child: open: [ + { s: '#TX' a: '@child-text' } + { s: '#XOP' b: 1 p: element } + { s: '#XSC' b: 1 p: element } + ] +} From 52c1aa8ee60b96703e1fc2f8b39100e1335fe9d6 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 18 Apr 2026 22:56:03 +0000 Subject: [PATCH 02/15] Add attributes, entities, namespaces, comments, CDATA, PI support Extend the simple element parser to cover the commonly used parts of the XML specification beyond bare elements. Lexer changes: - The custom tag matcher now parses attributes (including single- and double-quoted values with entity decoding) and recognises comments (), CDATA sections (), processing instructions (, including the XML declaration) and DOCTYPE declarations with optional internal subsets. Comments, PIs and DOCTYPEs are emitted as an #XIG token and dropped via IGNORE; CDATA is emitted verbatim as #TX with no entity processing. - A text modifier decodes the five predefined entities (amp, lt, gt, quot, apos) plus numeric character references (&#N; and &#xN;) from text nodes; attribute values are decoded inline. Data structure changes: - Each element now has `attributes`, `localName` and optional `prefix`/`namespace` fields in addition to `name` and `children`. - A post-parse walk resolves namespace URIs from xmlns/xmlns:* declarations across nested scopes with proper inheritance and override semantics. Options: - `namespaces` (default true) - enable namespace resolution - `entities` (default true) - enable entity decoding - `customEntities` - additional named entities Grammar: - `xml` rule skips whitespace text nodes between the document prolog (declaration, DOCTYPE, comments) and the root element, and after the root element, so real-world documents with blank lines parse cleanly. --- src/xml.ts | 387 ++++++++++++++++++++++++++++++++++++++------ test/xml.test.ts | 396 ++++++++++++++++++++++++++++++++++----------- xml-grammar.jsonic | 14 +- 3 files changed, 652 insertions(+), 145 deletions(-) diff --git a/src/xml.ts b/src/xml.ts index e9dcdc2..4eac2a6 100644 --- a/src/xml.ts +++ b/src/xml.ts @@ -4,7 +4,6 @@ import { Jsonic, Rule, - RuleSpec, Plugin, Context, Config, @@ -12,35 +11,60 @@ import { Lex, } from 'jsonic' -// A parsed XML element: a tag name and an array of children. -// Children are either strings (text nodes) or nested Element objects. +// A parsed XML element. +// +// Fields: +// name - qualified name as written in the source (e.g. "ns:tag") +// prefix - namespace prefix if any ("ns"), else undefined +// localName - local part of the qualified name ("tag") +// namespace - URI bound to the prefix/default at parse time +// attributes - attribute map, with entity references decoded. Namespace +// declarations ("xmlns", "xmlns:*") are kept here too. +// children - mixed array of text strings and nested elements. type XmlElement = { name: string + prefix?: string + localName: string + namespace?: string + attributes: Record children: Array } type XmlOptions = { - // Reserved for future options. + // Whether to resolve namespaces (annotate elements with + // `prefix`/`localName`/`namespace`). Default: true. + namespaces: boolean + // Whether to decode the five predefined entities and numeric character + // references in text and attribute values. Default: true. + entities: boolean + // Additional named entities to recognise beyond the five predefined ones. + customEntities: Record } // --- BEGIN EMBEDDED xml-grammar.jsonic --- const grammarText = ` -# XML Grammar Definition (simple element-only version) +# XML Grammar Definition (elements + attributes + mixed content) # Parsed by a standard Jsonic instance and passed to jsonic.grammar() # Function references (@ prefixed) are resolved against the refs map # # Token naming: -# #XOP - XML open tag, e.g. +# #XOP - XML open tag, e.g. # #XCL - XML close tag, e.g. -# #XSC - XML self-close tag, e.g. -# #TX - text content between tags +# #XSC - XML self-close tag, e.g. +# #XIG - comment / processing instruction / DOCTYPE (ignored) +# #TX - text content between tags (CDATA included) # #ZZ - end of input { rule: xml: open: [ { s: '#ZZ' } + { s: '#TX' r: xml } { p: element } ] + rule: xml: close: [ + { s: '#ZZ' } + { s: '#TX' r: xml } + ] rule: element: open: [ { s: '#XSC' a: '@element-selfclose' u: { selfclose: 1 } } @@ -70,14 +94,21 @@ const grammarText = ` // --- END EMBEDDED xml-grammar.jsonic --- -const Xml: Plugin = (jsonic: Jsonic, _options: XmlOptions) => { - // Register custom lexer matcher for XML tags so that ``, ``, - // and `` are each recognised as a single token with the tag name - // as the token value. +const Xml: Plugin = (jsonic: Jsonic, options: XmlOptions) => { + const decodeEntity = buildEntityDecoder(options) + + // Register custom lexer matchers. + // + // The XML tag matcher handles any `<...>` construct: elements (open, + // close, self-closing) with attributes, comments, CDATA, processing + // instructions and DOCTYPE declarations. + // + // A text modifier decodes entity references (`&` etc.) in text + // nodes. Attribute values are decoded inside the tag matcher. jsonic.options({ lex: { match: { - xmltag: { order: 1e5, make: buildXmlTagMatcher() }, + xmltag: { order: 1e5, make: buildXmlTagMatcher(decodeEntity) }, }, emptyResult: undefined, }, @@ -99,6 +130,13 @@ const Xml: Plugin = (jsonic: Jsonic, _options: XmlOptions) => { '#CA': null, }, }, + // Comments and processing instructions are emitted as a dedicated + // #XIG token and skipped by the parser via the IGNORE set. Keep the + // default IGNORE members so that whichever lexers happen to produce + // #SP/#LN/#CM still get skipped. + tokenSet: { + IGNORE: ['#SP', '#LN', '#CM', '#XIG'], + }, // Disable number, value, and string lexing so XML text content is // always a plain string. number: { lex: false }, @@ -109,36 +147,68 @@ const Xml: Plugin = (jsonic: Jsonic, _options: XmlOptions) => { // as separate tokens so text between tags is preserved verbatim. space: { lex: false }, line: { lex: false }, + // Decode entity references in text nodes. + text: { + modify: (val: any) => + 'string' === typeof val && options.entities !== false + ? decodeEntity(val) + : val, + }, error: { xml_mismatched_tag: 'closing tag does not match opening tag <$openname>', + xml_invalid_tag: 'invalid tag: $fsrc', + xml_unterminated: 'unterminated $kind', }, hint: { xml_mismatched_tag: `Each opening tag must be paired with a matching closing tag. Expected but found .`, + xml_invalid_tag: `The tag syntax is not valid XML.`, + xml_unterminated: `The $kind starting at this position is not terminated.`, }, }) const refs: Record = { // Propagate the parsed root element up to the xml rule so it becomes - // the final parse result. - '@xml-bc': (r: Rule) => { + // the final parse result. The xml rule uses `r: xml` to skip leading + // and trailing whitespace text, which creates a chain of rule + // instances. The root is the first one; walk the rule chain back to + // it so the final result is stored on the root rule's node. + '@xml-bc': (r: Rule, ctx: Context) => { if (r.child && r.child.node) { - r.node = r.child.node + const root = ctx.root() + root.node = r.child.node + if (options.namespaces !== false) { + resolveNamespaces(root.node, {}) + } } }, - // Initialise the element node when the opening tag `` is matched. + // Initialise the element node when the opening tag `` is + // matched. The tag token's value carries both the name and the + // parsed attribute map. '@element-open': (r: Rule) => { - r.node = { name: r.o0.val, children: [] } + const v = r.o0.val + r.node = { + name: v.name, + localName: v.name, + attributes: v.attributes, + children: [], + } }, - // Self-closing tag `` - no children. + // Self-closing tag `` - no children. '@element-selfclose': (r: Rule) => { - r.node = { name: r.o0.val, children: [] } + const v = r.o0.val + r.node = { + name: v.name, + localName: v.name, + attributes: v.attributes, + children: [], + } }, - // Verify that `` matches the opening ``. + // Verify that `` matches the opening ``. '@element-close': (r: Rule, ctx: Context) => { const openName = r.node && r.node.name const closeName = r.c0.val @@ -179,15 +249,63 @@ Expected but found .`, } -// Build a lexer matcher that recognises XML tags as single tokens. -// Emits one of: -// #XOP for `` (val = name) -// #XCL for `` (val = name) -// #XSC for `` (val = name) -function buildXmlTagMatcher() { - const nameRE = `[A-Za-z_][A-Za-z0-9_\\-\\.:]*` - const openRE = new RegExp('^<(' + nameRE + ')\\s*(\\/?)>') - const closeRE = new RegExp('^<\\/(' + nameRE + ')\\s*>') +// The five predefined XML entities. +const predefinedEntities: Record = { + amp: '&', + lt: '<', + gt: '>', + quot: '"', + apos: "'", +} + +// Build an entity-decoding function. Decodes the five predefined +// entities, numeric character references (`&#NN;` decimal and `&#xNN;` +// hex), plus any user-supplied custom entities. +function buildEntityDecoder(options: XmlOptions) { + const entities = { + ...predefinedEntities, + ...(options?.customEntities || {}), + } + const entityRE = /&(#x[0-9a-fA-F]+|#[0-9]+|[A-Za-z_][A-Za-z0-9_]*);/g + + return function decodeEntities(src: string): string { + if (src.indexOf('&') < 0) return src + return src.replace(entityRE, (match, ref) => { + if (ref[0] === '#') { + const code = + ref[1] === 'x' || ref[1] === 'X' + ? parseInt(ref.substring(2), 16) + : parseInt(ref.substring(1), 10) + if (isNaN(code)) return match + try { + return String.fromCodePoint(code) + } catch { + return match + } + } + return undefined !== entities[ref] ? entities[ref] : match + }) + } +} + + +// Build a lexer matcher that recognises all top-level XML constructs +// starting with `<`: +// -> #XOP val = { name, attributes } +// -> #XSC val = { name, attributes } +// -> #XCL val = name +// -> #XIG (parser ignores) +// -> #XIG (parser ignores) +// -> #XIG (parser ignores) +// -> #TX (verbatim text, no entity decoding) +function buildXmlTagMatcher( + decodeEntity: (src: string) => string, +) { + const isNameStart = (ch: string) => + /[A-Za-z_:]/.test(ch) + const isNameChar = (ch: string) => + /[A-Za-z0-9_\-\.:]/.test(ch) + const isSpace = (ch: string) => ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' return function makeXmlTagMatcher(_cfg: Config, _opts: Options) { return function xmlTagMatcher(lex: Lex) { @@ -195,34 +313,209 @@ function buildXmlTagMatcher() { const sI = pnt.sI if (src[sI] !== '<') return undefined - const rest = src.substring(sI) + // Comment: + if (src.startsWith('', sI + 4) + if (endIdx === -1) { + return lex.bad('unterminated_comment', sI, src.length) + } + const end = endIdx + 3 + const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // CDATA: + if (src.startsWith('', sI + 9) + if (endIdx === -1) { + return lex.bad('unterminated_cdata', sI, src.length) + } + const end = endIdx + 3 + const text = src.substring(sI + 9, endIdx) + const tkn = lex.token('#TX', text, src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // DOCTYPE: + if (src.startsWith('' && depth <= 0) break + i++ + } + if (i >= src.length) { + return lex.bad('unterminated_doctype', sI, src.length) + } + const end = i + 1 + const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // Processing instruction: (including decl) + if (src[sI + 1] === '?') { + const endIdx = src.indexOf('?>', sI + 2) + if (endIdx === -1) { + return lex.bad('unterminated_pi', sI, src.length) + } + const end = endIdx + 2 + const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } // Closing tag: if (src[sI + 1] === '/') { - const m = rest.match(closeRE) - if (!m) return undefined - const len = m[0].length - const tkn = lex.token('#XCL', m[1], m[0], pnt) - pnt.sI += len - pnt.cI += len + let i = sI + 2 + const nameStart = i + if (i >= src.length || !isNameStart(src[i])) return undefined + i++ + while (i < src.length && isNameChar(src[i])) i++ + const name = src.substring(nameStart, i) + while (i < src.length && isSpace(src[i])) i++ + if (src[i] !== '>') { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + const end = i + 1 + const tkn = lex.token('#XCL', name, src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI return tkn } - // Opening or self-close tag: or - const m = rest.match(openRE) - if (!m) return undefined - const len = m[0].length - const selfClose = m[2] === '/' - const tkn = lex.token(selfClose ? '#XSC' : '#XOP', m[1], m[0], pnt) - pnt.sI += len - pnt.cI += len - return tkn + // Opening or self-close tag: + let i = sI + 1 + const nameStart = i + if (i >= src.length || !isNameStart(src[i])) return undefined + i++ + while (i < src.length && isNameChar(src[i])) i++ + const name = src.substring(nameStart, i) + const attributes: Record = {} + + // Parse zero or more attributes. + while (true) { + const wsStart = i + while (i < src.length && isSpace(src[i])) i++ + + if (i >= src.length) { + return lex.bad('xml_invalid_tag', sI, src.length) + } + + // End of tag. + if (src[i] === '>') { + const end = i + 1 + const tkn = lex.token('#XOP', { name, attributes }, src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + if (src[i] === '/' && src[i + 1] === '>') { + const end = i + 2 + const tkn = lex.token('#XSC', { name, attributes }, src.substring(sI, end), pnt) + pnt.sI = end + pnt.cI += end - sI + return tkn + } + + // An attribute must follow, preceded by whitespace. + if (wsStart === i) { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + + // Attribute name. + const attrStart = i + if (!isNameStart(src[i])) { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + i++ + while (i < src.length && isNameChar(src[i])) i++ + const attrName = src.substring(attrStart, i) + + while (i < src.length && isSpace(src[i])) i++ + if (src[i] !== '=') { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + i++ + while (i < src.length && isSpace(src[i])) i++ + + const quote = src[i] + if (quote !== '"' && quote !== "'") { + return lex.bad('xml_invalid_tag', sI, i + 1) + } + i++ + const valStart = i + while (i < src.length && src[i] !== quote) i++ + if (i >= src.length) { + return lex.bad('xml_invalid_tag', sI, src.length) + } + const rawVal = src.substring(valStart, i) + i++ // consume closing quote + + attributes[attrName] = decodeEntity(rawVal) + } + } + } +} + + +// Resolve namespaces on an element tree. Walks the tree maintaining a +// scope map of `prefix` -> `namespace URI`. The empty string key is the +// default namespace. Mutates each element to add `prefix`, `localName` +// and `namespace` where applicable. +function resolveNamespaces( + element: XmlElement, + scope: Record, +) { + const localScope: Record = { ...scope } + + // Apply xmlns bindings from this element's attributes. + for (const key of Object.keys(element.attributes || {})) { + const val = element.attributes[key] + if (key === 'xmlns') { + localScope[''] = val + } else if (key.startsWith('xmlns:')) { + localScope[key.substring(6)] = val + } + } + + const colonIdx = element.name.indexOf(':') + if (colonIdx >= 0) { + const prefix = element.name.substring(0, colonIdx) + element.prefix = prefix + element.localName = element.name.substring(colonIdx + 1) + if (localScope[prefix]) { + element.namespace = localScope[prefix] + } + } else { + element.localName = element.name + if (localScope['']) { + element.namespace = localScope[''] + } + } + + for (const child of element.children) { + if (child && 'object' === typeof child) { + resolveNamespaces(child, localScope) } } } -Xml.defaults = {} as XmlOptions +Xml.defaults = { + namespaces: true, + entities: true, + customEntities: {}, +} as XmlOptions export { Xml } diff --git a/test/xml.test.ts b/test/xml.test.ts index 7f341fa..8c4d7f7 100644 --- a/test/xml.test.ts +++ b/test/xml.test.ts @@ -6,103 +6,85 @@ import assert from 'node:assert' import { Jsonic } from 'jsonic' import { Xml } from '../dist/xml' +// Build a plain element literal in the shape the parser emits. Optional +// namespace / prefix fields are only present when actually resolved. +function elem( + name: string, + children: any[] = [], + attributes: Record = {}, + extras: Record = {}, +) { + const out: any = { + name, + localName: extras.localName ?? name, + attributes, + children, + } + if (extras.prefix) out.prefix = extras.prefix + if (extras.namespace) out.namespace = extras.namespace + return out +} + describe('xml', () => { test('empty-element', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), { name: 'a', children: [] }) + assert.deepEqual(jx(''), elem('a')) }) test('self-closing-element', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), { name: 'a', children: [] }) - assert.deepEqual(jx('
'), { name: 'br', children: [] }) + assert.deepEqual(jx('
'), elem('a')) + assert.deepEqual(jx('
'), elem('br')) }) test('text-content', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('
hello'), { - name: 'a', - children: ['hello'], - }) - assert.deepEqual(jx('hello world'), { - name: 'greet', - children: ['hello world'], - }) + assert.deepEqual(jx('hello'), elem('a', ['hello'])) + assert.deepEqual( + jx('hello world'), + elem('greet', ['hello world']), + ) }) test('nested-elements', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), { - name: 'a', - children: [{ name: 'b', children: [] }], - }) - assert.deepEqual(jx('x'), { - name: 'a', - children: [{ name: 'b', children: ['x'] }], - }) + assert.deepEqual(jx(''), elem('a', [elem('b')])) + assert.deepEqual(jx('x'), elem('a', [elem('b', ['x'])])) }) test('deeply-nested', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('x'), { - name: 'a', - children: [ - { - name: 'b', - children: [{ name: 'c', children: ['x'] }], - }, - ], - }) + assert.deepEqual( + jx('x'), + elem('a', [elem('b', [elem('c', ['x'])])]), + ) }) test('multiple-children', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), { - name: 'a', - children: [ - { name: 'b', children: [] }, - { name: 'c', children: [] }, - ], - }) - assert.deepEqual(jx('12'), { - name: 'a', - children: [ - { name: 'b', children: ['1'] }, - { name: 'c', children: ['2'] }, - ], - }) + assert.deepEqual( + jx(''), + elem('a', [elem('b'), elem('c')]), + ) + assert.deepEqual( + jx('12'), + elem('a', [elem('b', ['1']), elem('c', ['2'])]), + ) }) test('mixed-content', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('helloinnerworld'), { - name: 'a', - children: [ - 'hello', - { name: 'b', children: ['inner'] }, - 'world', - ], - }) + assert.deepEqual( + jx('helloinnerworld'), + elem('a', ['hello', elem('b', ['inner']), 'world']), + ) }) test('tag-name-variants', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('x'), { - name: 'a-b', - children: ['x'], - }) - assert.deepEqual(jx('x'), { - name: 'a.b', - children: ['x'], - }) - assert.deepEqual(jx('x'), { - name: 'a_b', - children: ['x'], - }) - assert.deepEqual(jx('x'), { - name: 'ns:a', - children: ['x'], - }) + assert.deepEqual(jx('x'), elem('a-b', ['x'])) + assert.deepEqual(jx('x'), elem('a.b', ['x'])) + assert.deepEqual(jx('x'), elem('a_b', ['x'])) }) test('mismatched-tag', () => { @@ -114,44 +96,270 @@ describe('xml', () => { const jx = Jsonic.make().use(Xml) assert.deepEqual( jx('\n 1\n 2\n'), - { - name: 'root', - children: [ - '\n ', - { name: 'a', children: ['1'] }, - '\n ', - { name: 'b', children: ['2'] }, - '\n', - ], - }, + elem('root', [ + '\n ', + elem('a', ['1']), + '\n ', + elem('b', ['2']), + '\n', + ]), ) }) test('preserves-whitespace-text', () => { const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('

hello world

'), { - name: 'p', - children: [' hello world '], + assert.deepEqual( + jx('

hello world

'), + elem('p', [' hello world ']), + ) + }) + + test('attributes-basic', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(''), + elem('a', [], { x: '1' }), + ) + assert.deepEqual( + jx(''), + elem('a', [], { x: '1', y: '2' }), + ) + assert.deepEqual( + jx('text'), + elem('a', ['text'], { x: 'hello world' }), + ) + }) + + test('attributes-single-quote', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(``), + elem('a', [], { x: 'value' }), + ) + assert.deepEqual( + jx(``), + elem('a', [], { x: 'it says "hi"' }), + ) + }) + + test('attributes-spacing-variants', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(''), + elem('a', [], { x: '1', y: '2' }), + ) + assert.deepEqual( + jx(''), + elem('a', [], { x: '1', y: '2' }), + ) + }) + + test('attributes-with-dashes-and-dots', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(''), + elem('a', [], { 'data-x': '1', 'v.2': 'ok' }), + ) + }) + + test('entities-predefined-in-text', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx('&<>"''), + elem('a', [`&<>"'`]), + ) + assert.deepEqual( + jx('Tom & Jerry'), + elem('a', ['Tom & Jerry']), + ) + }) + + test('entities-numeric-references', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx('AB'), elem('a', ['AB'])) + assert.deepEqual(jx('AB'), elem('a', ['AB'])) + assert.deepEqual(jx('😀'), elem('a', ['\u{1F600}'])) + }) + + test('entities-in-attribute-values', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(''), + elem('a', [], { title: 'Tom & Jerry' }), + ) + assert.deepEqual( + jx(''), + elem('a', [], { v: 'AB' }), + ) + }) + + test('entities-unknown-passthrough', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx('&unknown;'), + elem('a', ['&unknown;']), + ) + }) + + test('entities-custom', () => { + const jx = Jsonic.make().use(Xml, { + customEntities: { nbsp: '\u00a0', copy: '\u00a9' }, }) + assert.deepEqual( + jx('© 2025 all rights'), + elem('a', ['\u00a9 2025\u00a0all rights']), + ) + }) + + test('entities-disabled', () => { + const jx = Jsonic.make().use(Xml, { entities: false }) + assert.deepEqual(jx('&'), elem('a', ['&'])) + }) + + test('comments-ignored', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual(jx(''), elem('a')) + assert.deepEqual( + jx('hello'), + elem('a', ['hello']), + ) + assert.deepEqual( + jx(''), + elem('a', [elem('b')]), + ) + }) + + test('processing-instructions-ignored', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(''), + elem('a'), + ) + assert.deepEqual( + jx(''), + elem('root'), + ) + }) + + test('doctype-ignored', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(''), + elem('html'), + ) + assert.deepEqual( + jx( + 'hi', + ), + elem('note', [elem('body', ['hi'])]), + ) + }) + + test('doctype-with-internal-subset', () => { + const jx = Jsonic.make().use(Xml) + assert.deepEqual( + jx(']>'), + elem('a'), + ) }) - test('deeply-nested-and-siblings', () => { + test('cdata-section', () => { const jx = Jsonic.make().use(Xml) assert.deepEqual( - jx('x'), - { - name: 'a', - children: [ - { - name: 'b', - children: [ - { name: 'c', children: [] }, - { name: 'd', children: ['x'] }, - ], - }, - { name: 'e', children: [] }, - ], - }, + jx(' & raw text]]>'), + elem('a', [' & raw text']), ) }) + + test('namespaces-default', () => { + const jx = Jsonic.make().use(Xml) + const result = jx('') + assert.deepEqual(result, { + name: 'a', + localName: 'a', + namespace: 'http://example.com', + attributes: { xmlns: 'http://example.com' }, + children: [ + { + name: 'b', + localName: 'b', + namespace: 'http://example.com', + attributes: {}, + children: [], + }, + ], + }) + }) + + test('namespaces-prefixed', () => { + const jx = Jsonic.make().use(Xml) + const result = jx( + 'body', + ) + assert.deepEqual(result, { + name: 'root', + localName: 'root', + attributes: { 'xmlns:x': 'http://x.example' }, + children: [ + { + name: 'x:a', + prefix: 'x', + localName: 'a', + namespace: 'http://x.example', + attributes: { 'x:k': 'v' }, + children: ['body'], + }, + ], + }) + }) + + test('namespaces-inherited-scope', () => { + const jx = Jsonic.make().use(Xml) + const result = jx( + '', + ) + assert.equal(result.children[0].namespace, 'http://p.example') + assert.equal(result.children[0].children[0].namespace, 'http://p.example') + }) + + test('namespaces-override-in-child', () => { + const jx = Jsonic.make().use(Xml) + const result = jx( + '', + ) + assert.equal(result.namespace, 'A') + assert.equal(result.children[0].namespace, 'B') + assert.equal(result.children[0].children[0].namespace, 'B') + assert.equal(result.children[1].namespace, 'A') + }) + + test('namespaces-disabled', () => { + const jx = Jsonic.make().use(Xml, { namespaces: false }) + const result = jx('') + assert.equal(result.namespace, undefined) + assert.equal(result.prefix, undefined) + }) + + test('full-document', () => { + const jx = Jsonic.make().use(Xml) + const src = ` + + + + Tove + Jani + Reminder + Don't forget me this weekend! & cheers + ]]> +` + const result = jx(src) + assert.equal(result.name, 'note') + assert.equal(result.attributes.lang, 'en') + const childElems = result.children.filter((c: any) => 'object' === typeof c) + assert.equal(childElems.length, 5) + assert.equal(childElems[0].name, 'to') + assert.equal(childElems[0].children[0], 'Tove') + assert.equal(childElems[3].children[0], "Don't forget me this weekend! & cheers") + assert.equal(childElems[4].children[0], '') + }) }) diff --git a/xml-grammar.jsonic b/xml-grammar.jsonic index 03edbec..f15d151 100644 --- a/xml-grammar.jsonic +++ b/xml-grammar.jsonic @@ -1,19 +1,25 @@ -# XML Grammar Definition (simple element-only version) +# XML Grammar Definition (elements + attributes + mixed content) # Parsed by a standard Jsonic instance and passed to jsonic.grammar() # Function references (@ prefixed) are resolved against the refs map # # Token naming: -# #XOP - XML open tag, e.g. +# #XOP - XML open tag, e.g. # #XCL - XML close tag, e.g. -# #XSC - XML self-close tag, e.g. -# #TX - text content between tags +# #XSC - XML self-close tag, e.g. +# #XIG - comment / processing instruction / DOCTYPE (ignored) +# #TX - text content between tags (CDATA included) # #ZZ - end of input { rule: xml: open: [ { s: '#ZZ' } + { s: '#TX' r: xml } { p: element } ] + rule: xml: close: [ + { s: '#ZZ' } + { s: '#TX' r: xml } + ] rule: element: open: [ { s: '#XSC' a: '@element-selfclose' u: { selfclose: 1 } } From af94f69d5759890233b5abb9e28e0a69dd40b2a9 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 18 Apr 2026 23:48:22 +0000 Subject: [PATCH 03/15] Add Go port, shared TSV specs, and Jsonic-embedded-XML test - go/xml.go: Go port of the XML plugin, with the same data shape and feature set as the TypeScript version (elements + attributes + mixed content, predefined + numeric entity decoding, namespace resolution, comments/CDATA/PI/DOCTYPE handling). Goes through jsonic/go's declarative GrammarSpec with auto-wired @xml-bc / @child-bc state actions. - go/xml_test.go: Go test suite driven by the shared TSV spec files plus an explicit jsonic-embedded-XML test case. - test/spec/*.tsv: shared parse fixtures with four columns (name, input, expected, opts). Input uses escape sequences (\n \r \t \\); expected is raw JSON or `ERROR`/`ERROR:code`; opts is optional plugin options JSON. Splits cases across basic, attributes, entities, namespaces, structure, errors, and a w3c spec of standardised/real-world XML documents (Atom, SOAP, SVG, RSS, XHTML, DOCTYPEs, not-well-formed). - test/xml.test.ts: the TypeScript test suite now auto-discovers and runs every TSV spec file, and adds the jsonic-embedded-XML test case. - Remove the leftover CSV Go package, CSV docs, CSV fixtures, and coverage artifact from the original repo layout. --- coverage/lcov.info | 340 ------- doc/csv-go.md | 264 ------ doc/csv-ts.md | 286 ------ go/csv.go | 780 ---------------- go/csv_test.go | 527 ----------- go/go.mod | 2 +- go/xml.go | 630 +++++++++++++ go/xml_test.go | 289 ++++++ test/fixtures/basic-array.json | 1 - test/fixtures/basic-noheader-names.json | 1 - test/fixtures/basic-noheader.json | 1 - test/fixtures/basic.csv | 3 - test/fixtures/basic.json | 1 - test/fixtures/comment-empty.csv | 8 - test/fixtures/comment-empty.json | 1 - test/fixtures/comment-inline.csv | 3 - test/fixtures/comment-inline.json | 1 - test/fixtures/comment-line.csv | 5 - test/fixtures/comment-line.json | 1 - test/fixtures/crlf.csv | 3 - test/fixtures/crlf.json | 1 - test/fixtures/empty-fields.csv | 5 - test/fixtures/empty-fields.json | 1 - test/fixtures/empty-records-default.json | 1 - test/fixtures/empty-records.csv | 8 - test/fixtures/empty-records.json | 1 - test/fixtures/happy.csv | 3 - test/fixtures/happy.json | 12 - test/fixtures/leading-newline.csv | 3 - test/fixtures/leading-newline.json | 1 - test/fixtures/manifest.json | 872 ------------------ test/fixtures/multi-char-separator.csv | 3 - test/fixtures/multi-char-separator.json | 1 - test/fixtures/multirow.csv | 4 - test/fixtures/multirow.json | 1 - test/fixtures/notrim.csv | 5 - test/fixtures/notrim.json | 22 - test/fixtures/number.csv | 3 - test/fixtures/number.json | 1 - test/fixtures/papa-ascii-30-delimiter.csv | 2 - test/fixtures/papa-ascii-30-delimiter.json | 12 - test/fixtures/papa-ascii-31-delimiter.csv | 2 - test/fixtures/papa-ascii-31-delimiter.json | 12 - .../papa-bad-comments-value-specified.csv | 3 - .../papa-bad-comments-value-specified.json | 15 - ...r-inside-quotes-with-line-feed-endings.csv | 6 - ...-inside-quotes-with-line-feed-endings.json | 22 - ...apa-comment-with-non-default-character.csv | 3 - ...pa-comment-with-non-default-character.json | 12 - .../papa-commented-line-at-beginning.csv | 2 - .../papa-commented-line-at-beginning.json | 7 - test/fixtures/papa-commented-line-at-end.csv | 2 - test/fixtures/papa-commented-line-at-end.json | 7 - .../papa-commented-line-in-middle.csv | 3 - .../papa-commented-line-in-middle.json | 12 - ...apa-custom-escape-character-at-the-end.csv | 1 - ...pa-custom-escape-character-at-the-end.json | 7 - ...-custom-escape-character-in-the-middle.csv | 1 - ...custom-escape-character-in-the-middle.json | 7 - ...namic-typing-converts-boolean-literals.csv | 1 - ...amic-typing-converts-boolean-literals.json | 12 - ...mic-typing-doesn-t-convert-other-types.csv | 3 - ...ic-typing-doesn-t-convert-other-types.json | 17 - test/fixtures/papa-empty-input-string-2.csv | 0 test/fixtures/papa-empty-input-string-2.json | 1 - test/fixtures/papa-empty-input-string.csv | 0 test/fixtures/papa-empty-input-string.json | 1 - test/fixtures/papa-empty-lines.csv | 5 - test/fixtures/papa-empty-lines.json | 15 - ...apa-empty-quoted-field-at-eof-is-empty.csv | 2 - ...pa-empty-quoted-field-at-eof-is-empty.json | 12 - .../papa-entire-file-is-comment-lines.csv | 3 - .../papa-entire-file-is-comment-lines.json | 1 - .../papa-first-field-of-a-line-is-empty.csv | 2 - .../papa-first-field-of-a-line-is-empty.json | 12 - test/fixtures/papa-header-row-only.csv | 1 - test/fixtures/papa-header-row-only.json | 1 - .../papa-header-row-with-one-row-of-data.csv | 2 - .../papa-header-row-with-one-row-of-data.json | 7 - ...papa-header-row-with-preceding-comment.csv | 3 - ...apa-header-row-with-preceding-comment.json | 6 - ...nput-is-just-a-string-a-single-field-2.csv | 1 - ...put-is-just-a-string-a-single-field-2.json | 5 - ...-input-is-just-a-string-a-single-field.csv | 1 - ...input-is-just-a-string-a-single-field.json | 5 - .../papa-input-is-just-empty-fields.csv | 2 - .../papa-input-is-just-empty-fields.json | 13 - ...is-just-the-delimiter-2-empty-fields-2.csv | 1 - ...s-just-the-delimiter-2-empty-fields-2.json | 6 - ...t-is-just-the-delimiter-2-empty-fields.csv | 1 - ...-is-just-the-delimiter-2-empty-fields.json | 6 - ...ommented-line-without-comments-enabled.csv | 1 - ...mmented-line-without-comments-enabled.json | 5 - .../papa-input-with-only-a-commented-line.csv | 1 - ...papa-input-with-only-a-commented-line.json | 1 - ...nts-with-line-starting-with-whitespace.csv | 3 - ...ts-with-line-starting-with-whitespace.json | 11 - .../papa-jsonic-blank-line-at-beginning.csv | 3 - .../papa-jsonic-blank-line-at-beginning.json | 13 - ...c-blank-line-in-middle-with-whitespace.csv | 3 - ...-blank-line-in-middle-with-whitespace.json | 15 - .../papa-jsonic-blank-line-in-middle.csv | 3 - .../papa-jsonic-blank-line-in-middle.json | 13 - .../papa-jsonic-blank-lines-at-end.csv | 3 - .../papa-jsonic-blank-lines-at-end.json | 13 - ...-a-commented-line-and-blank-line-after.csv | 1 - ...a-commented-line-and-blank-line-after.json | 1 - ...nic-one-column-input-with-empty-fields.csv | 7 - ...ic-one-column-input-with-empty-fields.json | 19 - .../papa-last-field-of-a-line-is-empty.csv | 2 - .../papa-last-field-of-a-line-is-empty.json | 12 - ...ld-first-field-of-next-line-is-empty-n.csv | 4 - ...d-first-field-of-next-line-is-empty-n.json | 22 - .../papa-line-ends-with-quoted-field.csv | 4 - .../papa-line-ends-with-quoted-field.json | 22 - .../papa-line-starts-with-quoted-field.csv | 2 - .../papa-line-starts-with-quoted-field.json | 12 - ...-line-starts-with-unquoted-empty-field.csv | 2 - ...line-starts-with-unquoted-empty-field.json | 12 - ...d-quotes-in-data-not-as-opening-quotes.csv | 1 - ...-quotes-in-data-not-as-opening-quotes.json | 7 - ...es-in-data-twice-not-as-opening-quotes.csv | 2 - ...s-in-data-twice-not-as-opening-quotes.json | 12 - .../papa-multi-character-comment-string.csv | 3 - .../papa-multi-character-comment-string.json | 12 - ...r-delimiter-length-2-with-quoted-field.csv | 1 - ...-delimiter-length-2-with-quoted-field.json | 8 - .../papa-multi-character-delimiter.csv | 1 - .../papa-multi-character-delimiter.json | 7 - ...papa-multiple-consecutive-empty-fields.csv | 2 - ...apa-multiple-consecutive-empty-fields.json | 18 - ...ple-rows-one-column-no-delimiter-found.csv | 5 - ...le-rows-one-column-no-delimiter-found.json | 17 - test/fixtures/papa-one-row.csv | 1 - test/fixtures/papa-one-row.json | 7 - test/fixtures/papa-other-fields-are-empty.csv | 2 - .../fixtures/papa-other-fields-are-empty.json | 12 - test/fixtures/papa-pipe-delimiter.csv | 2 - test/fixtures/papa-pipe-delimiter.json | 12 - ...t-end-of-row-but-not-at-eof-has-quotes.csv | 2 - ...-end-of-row-but-not-at-eof-has-quotes.json | 12 - ...miters-and-n-with-valid-trailing-quote.csv | 2 - ...iters-and-n-with-valid-trailing-quote.json | 6 - ...te-after-delimiter-with-a-valid-closer.csv | 2 - ...invalid-trailing-quote-after-delimiter.csv | 2 - ...-invalid-trailing-quote-after-new-line.csv | 2 - ...nvalid-trailing-quote-before-delimiter.csv | 2 - ...papa-quoted-field-has-no-closing-quote.csv | 2 - ...has-valid-trailing-quote-via-delimiter.csv | 2 - ...as-valid-trailing-quote-via-delimiter.json | 12 - ...field-has-valid-trailing-quote-via-eof.csv | 2 - ...ield-has-valid-trailing-quote-via-eof.json | 12 - ...d-field-has-valid-trailing-quote-via-n.csv | 2 - ...-field-has-valid-trailing-quote-via-n.json | 12 - ...-in-a-row-and-a-delimiter-in-there-too.csv | 1 - ...in-a-row-and-a-delimiter-in-there-too.json | 7 - .../papa-quoted-field-with-delimiter.csv | 1 - .../papa-quoted-field-with-delimiter.json | 7 - ...ield-with-escaped-quotes-at-boundaries.csv | 1 - ...eld-with-escaped-quotes-at-boundaries.json | 7 - .../papa-quoted-field-with-escaped-quotes.csv | 1 - ...papa-quoted-field-with-escaped-quotes.json | 7 - ...d-field-with-extra-whitespace-on-edges.csv | 1 - ...-field-with-extra-whitespace-on-edges.json | 7 - .../papa-quoted-field-with-line-break.csv | 2 - .../papa-quoted-field-with-line-break.json | 7 - test/fixtures/papa-quoted-field-with-n.csv | 2 - test/fixtures/papa-quoted-field-with-n.json | 7 - ...ted-field-with-quotes-around-delimiter.csv | 1 - ...ed-field-with-quotes-around-delimiter.json | 7 - ...-with-quotes-on-left-side-of-delimiter.csv | 1 - ...with-quotes-on-left-side-of-delimiter.json | 7 - ...with-quotes-on-right-side-of-delimiter.csv | 1 - ...ith-quotes-on-right-side-of-delimiter.json | 7 - test/fixtures/papa-quoted-field-with-r-n.csv | 2 - test/fixtures/papa-quoted-field-with-r-n.json | 7 - test/fixtures/papa-quoted-field-with-r.csv | 1 - test/fixtures/papa-quoted-field-with-r.json | 7 - ...ed-field-with-whitespace-around-quotes.csv | 1 - ...d-field-with-whitespace-around-quotes.json | 7 - test/fixtures/papa-quoted-field.csv | 1 - test/fixtures/papa-quoted-field.json | 7 - ...d-of-row-with-delimiter-and-line-break.csv | 3 - ...-of-row-with-delimiter-and-line-break.json | 12 - .../papa-quoted-fields-with-line-breaks.csv | 4 - .../papa-quoted-fields-with-line-breaks.json | 7 - ...fields-with-spaces-after-closing-quote.csv | 3 - ...ields-with-spaces-after-closing-quote.json | 20 - ...quote-and-newline-and-contains-newline.csv | 3 - ...uote-and-newline-and-contains-newline.json | 12 - ...-next-delimiter-and-contains-delimiter.csv | 2 - ...next-delimiter-and-contains-delimiter.json | 14 - ...tween-closing-quote-and-next-delimiter.csv | 2 - ...ween-closing-quote-and-next-delimiter.json | 14 - ...etween-closing-quote-and-next-new-line.csv | 3 - ...tween-closing-quote-and-next-new-line.json | 20 - ...h-enough-fields-but-blank-field-at-end.csv | 2 - ...-enough-fields-but-blank-field-at-end.json | 7 - ...nk-field-in-the-begining-using-headers.csv | 3 - ...k-field-in-the-begining-using-headers.json | 12 - ...fields-but-blank-field-in-the-begining.csv | 3 - ...ields-but-blank-field-in-the-begining.json | 17 - .../fixtures/papa-row-with-too-few-fields.csv | 2 - .../papa-row-with-too-many-fields.csv | 3 - .../papa-single-quote-as-quote-character.csv | 1 - .../papa-single-quote-as-quote-character.json | 7 - ...papa-skip-empty-lines-with-empty-input.csv | 0 ...apa-skip-empty-lines-with-empty-input.json | 1 - ...-lines-with-first-line-only-whitespace.csv | 2 - ...lines-with-first-line-only-whitespace.json | 10 - ...pty-lines-with-newline-at-end-of-input.csv | 3 - ...ty-lines-with-newline-at-end-of-input.json | 12 - test/fixtures/papa-skip-empty-lines.csv | 3 - test/fixtures/papa-skip-empty-lines.json | 12 - test/fixtures/papa-tab-delimiter.csv | 2 - test/fixtures/papa-tab-delimiter.json | 12 - ...nes-consecutively-at-beginning-of-file.csv | 4 - ...es-consecutively-at-beginning-of-file.json | 7 - test/fixtures/papa-three-rows.csv | 3 - test/fixtures/papa-three-rows.json | 17 - ...ent-lines-consecutively-at-end-of-file.csv | 3 - ...nt-lines-consecutively-at-end-of-file.json | 7 - .../papa-two-comment-lines-consecutively.csv | 4 - .../papa-two-comment-lines-consecutively.json | 12 - test/fixtures/papa-two-rows-just-r.csv | 1 - test/fixtures/papa-two-rows-just-r.json | 12 - test/fixtures/papa-two-rows-r-n.csv | 2 - test/fixtures/papa-two-rows-r-n.json | 12 - test/fixtures/papa-two-rows.csv | 2 - test/fixtures/papa-two-rows.json | 12 - ...oted-field-with-quotes-at-end-of-field.csv | 1 - ...ted-field-with-quotes-at-end-of-field.json | 7 - .../papa-using-n-endings-uses-n-linebreak.csv | 5 - ...papa-using-n-endings-uses-n-linebreak.json | 22 - ...h-r-n-in-header-field-uses-n-linebreak.csv | 6 - ...-r-n-in-header-field-uses-n-linebreak.json | 22 - ...a-using-r-n-endings-uses-r-n-linebreak.csv | 5 - ...-using-r-n-endings-uses-r-n-linebreak.json | 22 - ...h-n-in-header-field-uses-r-n-linebreak.csv | 6 - ...-n-in-header-field-uses-r-n-linebreak.json | 22 - ...th-skip-empty-lines-uses-r-n-linebreak.csv | 6 - ...h-skip-empty-lines-uses-r-n-linebreak.json | 22 - ...r-n-in-header-field-uses-r-n-linebreak.csv | 6 - ...-n-in-header-field-uses-r-n-linebreak.json | 22 - ...ved-regex-character-as-quote-character.csv | 6 - ...ed-regex-character-as-quote-character.json | 22 - ...-whitespace-at-edges-of-unquoted-field.csv | 1 - ...whitespace-at-edges-of-unquoted-field.json | 7 - test/fixtures/pipe-separator.csv | 3 - test/fixtures/pipe-separator.json | 1 - test/fixtures/quote.csv | 3 - test/fixtures/quote.json | 12 - test/fixtures/quoted-escape.csv | 6 - test/fixtures/quoted-escape.json | 1 - test/fixtures/quoted-newline.csv | 5 - test/fixtures/quoted-newline.json | 1 - test/fixtures/quoted-simple.csv | 3 - test/fixtures/quoted-simple.json | 1 - test/fixtures/record-separator.csv | 1 - test/fixtures/record-separator.json | 1 - test/fixtures/trailing-newline.csv | 3 - test/fixtures/trailing-newline.json | 1 - test/fixtures/trim.csv | 3 - test/fixtures/trim.json | 22 - test/fixtures/value.csv | 3 - test/fixtures/value.json | 1 - test/spec/attributes.tsv | 15 + test/spec/basic.tsv | 27 + test/spec/entities.tsv | 21 + test/spec/errors.tsv | 10 + test/spec/namespaces.tsv | 10 + test/spec/structure.tsv | 16 + test/spec/w3c.tsv | 20 + test/xml.test.ts | 493 +++------- 274 files changed, 1186 insertions(+), 4950 deletions(-) delete mode 100644 coverage/lcov.info delete mode 100644 doc/csv-go.md delete mode 100644 doc/csv-ts.md delete mode 100644 go/csv.go delete mode 100644 go/csv_test.go create mode 100644 go/xml.go create mode 100644 go/xml_test.go delete mode 100644 test/fixtures/basic-array.json delete mode 100644 test/fixtures/basic-noheader-names.json delete mode 100644 test/fixtures/basic-noheader.json delete mode 100644 test/fixtures/basic.csv delete mode 100644 test/fixtures/basic.json delete mode 100644 test/fixtures/comment-empty.csv delete mode 100644 test/fixtures/comment-empty.json delete mode 100644 test/fixtures/comment-inline.csv delete mode 100644 test/fixtures/comment-inline.json delete mode 100644 test/fixtures/comment-line.csv delete mode 100644 test/fixtures/comment-line.json delete mode 100644 test/fixtures/crlf.csv delete mode 100644 test/fixtures/crlf.json delete mode 100644 test/fixtures/empty-fields.csv delete mode 100644 test/fixtures/empty-fields.json delete mode 100644 test/fixtures/empty-records-default.json delete mode 100644 test/fixtures/empty-records.csv delete mode 100644 test/fixtures/empty-records.json delete mode 100644 test/fixtures/happy.csv delete mode 100644 test/fixtures/happy.json delete mode 100644 test/fixtures/leading-newline.csv delete mode 100644 test/fixtures/leading-newline.json delete mode 100644 test/fixtures/manifest.json delete mode 100644 test/fixtures/multi-char-separator.csv delete mode 100644 test/fixtures/multi-char-separator.json delete mode 100644 test/fixtures/multirow.csv delete mode 100644 test/fixtures/multirow.json delete mode 100644 test/fixtures/notrim.csv delete mode 100644 test/fixtures/notrim.json delete mode 100644 test/fixtures/number.csv delete mode 100644 test/fixtures/number.json delete mode 100644 test/fixtures/papa-ascii-30-delimiter.csv delete mode 100644 test/fixtures/papa-ascii-30-delimiter.json delete mode 100644 test/fixtures/papa-ascii-31-delimiter.csv delete mode 100644 test/fixtures/papa-ascii-31-delimiter.json delete mode 100644 test/fixtures/papa-bad-comments-value-specified.csv delete mode 100644 test/fixtures/papa-bad-comments-value-specified.json delete mode 100644 test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.csv delete mode 100644 test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.json delete mode 100644 test/fixtures/papa-comment-with-non-default-character.csv delete mode 100644 test/fixtures/papa-comment-with-non-default-character.json delete mode 100644 test/fixtures/papa-commented-line-at-beginning.csv delete mode 100644 test/fixtures/papa-commented-line-at-beginning.json delete mode 100644 test/fixtures/papa-commented-line-at-end.csv delete mode 100644 test/fixtures/papa-commented-line-at-end.json delete mode 100644 test/fixtures/papa-commented-line-in-middle.csv delete mode 100644 test/fixtures/papa-commented-line-in-middle.json delete mode 100644 test/fixtures/papa-custom-escape-character-at-the-end.csv delete mode 100644 test/fixtures/papa-custom-escape-character-at-the-end.json delete mode 100644 test/fixtures/papa-custom-escape-character-in-the-middle.csv delete mode 100644 test/fixtures/papa-custom-escape-character-in-the-middle.json delete mode 100644 test/fixtures/papa-dynamic-typing-converts-boolean-literals.csv delete mode 100644 test/fixtures/papa-dynamic-typing-converts-boolean-literals.json delete mode 100644 test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.csv delete mode 100644 test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.json delete mode 100644 test/fixtures/papa-empty-input-string-2.csv delete mode 100644 test/fixtures/papa-empty-input-string-2.json delete mode 100644 test/fixtures/papa-empty-input-string.csv delete mode 100644 test/fixtures/papa-empty-input-string.json delete mode 100644 test/fixtures/papa-empty-lines.csv delete mode 100644 test/fixtures/papa-empty-lines.json delete mode 100644 test/fixtures/papa-empty-quoted-field-at-eof-is-empty.csv delete mode 100644 test/fixtures/papa-empty-quoted-field-at-eof-is-empty.json delete mode 100644 test/fixtures/papa-entire-file-is-comment-lines.csv delete mode 100644 test/fixtures/papa-entire-file-is-comment-lines.json delete mode 100644 test/fixtures/papa-first-field-of-a-line-is-empty.csv delete mode 100644 test/fixtures/papa-first-field-of-a-line-is-empty.json delete mode 100644 test/fixtures/papa-header-row-only.csv delete mode 100644 test/fixtures/papa-header-row-only.json delete mode 100644 test/fixtures/papa-header-row-with-one-row-of-data.csv delete mode 100644 test/fixtures/papa-header-row-with-one-row-of-data.json delete mode 100644 test/fixtures/papa-header-row-with-preceding-comment.csv delete mode 100644 test/fixtures/papa-header-row-with-preceding-comment.json delete mode 100644 test/fixtures/papa-input-is-just-a-string-a-single-field-2.csv delete mode 100644 test/fixtures/papa-input-is-just-a-string-a-single-field-2.json delete mode 100644 test/fixtures/papa-input-is-just-a-string-a-single-field.csv delete mode 100644 test/fixtures/papa-input-is-just-a-string-a-single-field.json delete mode 100644 test/fixtures/papa-input-is-just-empty-fields.csv delete mode 100644 test/fixtures/papa-input-is-just-empty-fields.json delete mode 100644 test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.csv delete mode 100644 test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.json delete mode 100644 test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.csv delete mode 100644 test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.json delete mode 100644 test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.csv delete mode 100644 test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.json delete mode 100644 test/fixtures/papa-input-with-only-a-commented-line.csv delete mode 100644 test/fixtures/papa-input-with-only-a-commented-line.json delete mode 100644 test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.csv delete mode 100644 test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.json delete mode 100644 test/fixtures/papa-jsonic-blank-line-at-beginning.csv delete mode 100644 test/fixtures/papa-jsonic-blank-line-at-beginning.json delete mode 100644 test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.csv delete mode 100644 test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.json delete mode 100644 test/fixtures/papa-jsonic-blank-line-in-middle.csv delete mode 100644 test/fixtures/papa-jsonic-blank-line-in-middle.json delete mode 100644 test/fixtures/papa-jsonic-blank-lines-at-end.csv delete mode 100644 test/fixtures/papa-jsonic-blank-lines-at-end.json delete mode 100644 test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.csv delete mode 100644 test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.json delete mode 100644 test/fixtures/papa-jsonic-one-column-input-with-empty-fields.csv delete mode 100644 test/fixtures/papa-jsonic-one-column-input-with-empty-fields.json delete mode 100644 test/fixtures/papa-last-field-of-a-line-is-empty.csv delete mode 100644 test/fixtures/papa-last-field-of-a-line-is-empty.json delete mode 100644 test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.csv delete mode 100644 test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.json delete mode 100644 test/fixtures/papa-line-ends-with-quoted-field.csv delete mode 100644 test/fixtures/papa-line-ends-with-quoted-field.json delete mode 100644 test/fixtures/papa-line-starts-with-quoted-field.csv delete mode 100644 test/fixtures/papa-line-starts-with-quoted-field.json delete mode 100644 test/fixtures/papa-line-starts-with-unquoted-empty-field.csv delete mode 100644 test/fixtures/papa-line-starts-with-unquoted-empty-field.json delete mode 100644 test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.csv delete mode 100644 test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.json delete mode 100644 test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.csv delete mode 100644 test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.json delete mode 100644 test/fixtures/papa-multi-character-comment-string.csv delete mode 100644 test/fixtures/papa-multi-character-comment-string.json delete mode 100644 test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.csv delete mode 100644 test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.json delete mode 100644 test/fixtures/papa-multi-character-delimiter.csv delete mode 100644 test/fixtures/papa-multi-character-delimiter.json delete mode 100644 test/fixtures/papa-multiple-consecutive-empty-fields.csv delete mode 100644 test/fixtures/papa-multiple-consecutive-empty-fields.json delete mode 100644 test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.csv delete mode 100644 test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.json delete mode 100644 test/fixtures/papa-one-row.csv delete mode 100644 test/fixtures/papa-one-row.json delete mode 100644 test/fixtures/papa-other-fields-are-empty.csv delete mode 100644 test/fixtures/papa-other-fields-are-empty.json delete mode 100644 test/fixtures/papa-pipe-delimiter.csv delete mode 100644 test/fixtures/papa-pipe-delimiter.json delete mode 100644 test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.csv delete mode 100644 test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.json delete mode 100644 test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.csv delete mode 100644 test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.json delete mode 100644 test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer.csv delete mode 100644 test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-new-line.csv delete mode 100644 test/fixtures/papa-quoted-field-has-invalid-trailing-quote-before-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-field-has-no-closing-quote.csv delete mode 100644 test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.json delete mode 100644 test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.csv delete mode 100644 test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.json delete mode 100644 test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.csv delete mode 100644 test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.json delete mode 100644 test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.csv delete mode 100644 test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.json delete mode 100644 test/fixtures/papa-quoted-field-with-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-field-with-delimiter.json delete mode 100644 test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.csv delete mode 100644 test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.json delete mode 100644 test/fixtures/papa-quoted-field-with-escaped-quotes.csv delete mode 100644 test/fixtures/papa-quoted-field-with-escaped-quotes.json delete mode 100644 test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.csv delete mode 100644 test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.json delete mode 100644 test/fixtures/papa-quoted-field-with-line-break.csv delete mode 100644 test/fixtures/papa-quoted-field-with-line-break.json delete mode 100644 test/fixtures/papa-quoted-field-with-n.csv delete mode 100644 test/fixtures/papa-quoted-field-with-n.json delete mode 100644 test/fixtures/papa-quoted-field-with-quotes-around-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-field-with-quotes-around-delimiter.json delete mode 100644 test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.json delete mode 100644 test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.json delete mode 100644 test/fixtures/papa-quoted-field-with-r-n.csv delete mode 100644 test/fixtures/papa-quoted-field-with-r-n.json delete mode 100644 test/fixtures/papa-quoted-field-with-r.csv delete mode 100644 test/fixtures/papa-quoted-field-with-r.json delete mode 100644 test/fixtures/papa-quoted-field-with-whitespace-around-quotes.csv delete mode 100644 test/fixtures/papa-quoted-field-with-whitespace-around-quotes.json delete mode 100644 test/fixtures/papa-quoted-field.csv delete mode 100644 test/fixtures/papa-quoted-field.json delete mode 100644 test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.csv delete mode 100644 test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.json delete mode 100644 test/fixtures/papa-quoted-fields-with-line-breaks.csv delete mode 100644 test/fixtures/papa-quoted-fields-with-line-breaks.json delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.csv delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.json delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.csv delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.json delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.json delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.csv delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.json delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.csv delete mode 100644 test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.json delete mode 100644 test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.csv delete mode 100644 test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.json delete mode 100644 test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.csv delete mode 100644 test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.json delete mode 100644 test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.csv delete mode 100644 test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.json delete mode 100644 test/fixtures/papa-row-with-too-few-fields.csv delete mode 100644 test/fixtures/papa-row-with-too-many-fields.csv delete mode 100644 test/fixtures/papa-single-quote-as-quote-character.csv delete mode 100644 test/fixtures/papa-single-quote-as-quote-character.json delete mode 100644 test/fixtures/papa-skip-empty-lines-with-empty-input.csv delete mode 100644 test/fixtures/papa-skip-empty-lines-with-empty-input.json delete mode 100644 test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.csv delete mode 100644 test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.json delete mode 100644 test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.csv delete mode 100644 test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.json delete mode 100644 test/fixtures/papa-skip-empty-lines.csv delete mode 100644 test/fixtures/papa-skip-empty-lines.json delete mode 100644 test/fixtures/papa-tab-delimiter.csv delete mode 100644 test/fixtures/papa-tab-delimiter.json delete mode 100644 test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.csv delete mode 100644 test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.json delete mode 100644 test/fixtures/papa-three-rows.csv delete mode 100644 test/fixtures/papa-three-rows.json delete mode 100644 test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.csv delete mode 100644 test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.json delete mode 100644 test/fixtures/papa-two-comment-lines-consecutively.csv delete mode 100644 test/fixtures/papa-two-comment-lines-consecutively.json delete mode 100644 test/fixtures/papa-two-rows-just-r.csv delete mode 100644 test/fixtures/papa-two-rows-just-r.json delete mode 100644 test/fixtures/papa-two-rows-r-n.csv delete mode 100644 test/fixtures/papa-two-rows-r-n.json delete mode 100644 test/fixtures/papa-two-rows.csv delete mode 100644 test/fixtures/papa-two-rows.json delete mode 100644 test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.csv delete mode 100644 test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.json delete mode 100644 test/fixtures/papa-using-n-endings-uses-n-linebreak.csv delete mode 100644 test/fixtures/papa-using-n-endings-uses-n-linebreak.json delete mode 100644 test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.csv delete mode 100644 test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.json delete mode 100644 test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.csv delete mode 100644 test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.json delete mode 100644 test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.csv delete mode 100644 test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.json delete mode 100644 test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.csv delete mode 100644 test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.json delete mode 100644 test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.csv delete mode 100644 test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.json delete mode 100644 test/fixtures/papa-using-reserved-regex-character-as-quote-character.csv delete mode 100644 test/fixtures/papa-using-reserved-regex-character-as-quote-character.json delete mode 100644 test/fixtures/papa-whitespace-at-edges-of-unquoted-field.csv delete mode 100644 test/fixtures/papa-whitespace-at-edges-of-unquoted-field.json delete mode 100644 test/fixtures/pipe-separator.csv delete mode 100644 test/fixtures/pipe-separator.json delete mode 100644 test/fixtures/quote.csv delete mode 100644 test/fixtures/quote.json delete mode 100644 test/fixtures/quoted-escape.csv delete mode 100644 test/fixtures/quoted-escape.json delete mode 100644 test/fixtures/quoted-newline.csv delete mode 100644 test/fixtures/quoted-newline.json delete mode 100644 test/fixtures/quoted-simple.csv delete mode 100644 test/fixtures/quoted-simple.json delete mode 100644 test/fixtures/record-separator.csv delete mode 100644 test/fixtures/record-separator.json delete mode 100644 test/fixtures/trailing-newline.csv delete mode 100644 test/fixtures/trailing-newline.json delete mode 100644 test/fixtures/trim.csv delete mode 100644 test/fixtures/trim.json delete mode 100644 test/fixtures/value.csv delete mode 100644 test/fixtures/value.json create mode 100644 test/spec/attributes.tsv create mode 100644 test/spec/basic.tsv create mode 100644 test/spec/entities.tsv create mode 100644 test/spec/errors.tsv create mode 100644 test/spec/namespaces.tsv create mode 100644 test/spec/structure.tsv create mode 100644 test/spec/w3c.tsv diff --git a/coverage/lcov.info b/coverage/lcov.info deleted file mode 100644 index b0b03ab..0000000 --- a/coverage/lcov.info +++ /dev/null @@ -1,340 +0,0 @@ -TN: -SF:csv.js -FN:7,(anonymous_0) -FN:60,(anonymous_1) -FN:137,(anonymous_2) -FN:138,(anonymous_3) -FN:154,(anonymous_4) -FN:160,(anonymous_5) -FN:175,(anonymous_6) -FN:188,(anonymous_7) -FN:243,(anonymous_8) -FN:255,(anonymous_9) -FN:262,(anonymous_10) -FN:273,(anonymous_11) -FN:279,(anonymous_12) -FN:290,(anonymous_13) -FN:301,(anonymous_14) -FN:312,(anonymous_15) -FN:326,(anonymous_16) -FN:337,(anonymous_17) -FN:350,(anonymous_18) -FN:359,buildCsvStringMatcher -FN:360,makeCsvStringMatcher -FN:361,csvStringMatcher -FNF:22 -FNH:20 -FNDA:137,(anonymous_0) -FNDA:0,(anonymous_1) -FNDA:137,(anonymous_2) -FNDA:216,(anonymous_3) -FNDA:208,(anonymous_4) -FNDA:137,(anonymous_5) -FNDA:137,(anonymous_6) -FNDA:457,(anonymous_7) -FNDA:137,(anonymous_8) -FNDA:137,(anonymous_9) -FNDA:30,(anonymous_10) -FNDA:13,(anonymous_11) -FNDA:137,(anonymous_12) -FNDA:137,(anonymous_13) -FNDA:28,(anonymous_14) -FNDA:63,(anonymous_15) -FNDA:52,(anonymous_16) -FNDA:0,(anonymous_17) -FNDA:71,(anonymous_18) -FNDA:132,buildCsvStringMatcher -FNDA:396,makeCsvStringMatcher -FNDA:1488,csvStringMatcher -DA:3,1 -DA:4,1 -DA:5,1 -DA:7,1 -DA:10,137 -DA:11,137 -DA:12,137 -DA:14,137 -DA:15,137 -DA:16,137 -DA:17,137 -DA:18,137 -DA:19,137 -DA:21,137 -DA:22,134 -DA:24,132 -DA:32,134 -DA:38,3 -DA:40,0 -DA:48,3 -DA:49,3 -DA:50,3 -DA:51,3 -DA:52,3 -DA:57,137 -DA:58,1 -DA:59,1 -DA:60,1 -DA:61,0 -DA:62,0 -DA:65,0 -DA:69,137 -DA:70,137 -DA:72,134 -DA:81,137 -DA:82,8 -DA:85,137 -DA:87,137 -DA:134,137 -DA:135,137 -DA:137,137 -DA:138,137 -DA:139,216 -DA:140,216 -DA:141,216 -DA:155,208 -DA:157,137 -DA:160,137 -DA:161,137 -DA:175,137 -DA:176,137 -DA:190,457 -DA:192,457 -DA:193,107 -DA:197,350 -DA:199,350 -DA:200,137 -DA:201,137 -DA:202,137 -DA:203,135 -DA:204,2 -DA:205,2 -DA:210,133 -DA:211,133 -DA:212,234 -DA:215,133 -DA:218,135 -DA:219,12 -DA:220,12 -DA:223,135 -DA:227,213 -DA:228,537 -DA:232,348 -DA:233,3 -DA:236,345 -DA:239,455 -DA:241,137 -DA:243,137 -DA:244,137 -DA:255,137 -DA:256,137 -DA:263,30 -DA:264,30 -DA:273,13 -DA:279,137 -DA:280,137 -DA:290,137 -DA:291,137 -DA:303,28 -DA:304,28 -DA:314,63 -DA:315,63 -DA:328,52 -DA:329,52 -DA:338,0 -DA:339,0 -DA:340,0 -DA:351,71 -DA:355,1 -DA:360,132 -DA:361,396 -DA:362,1488 -DA:363,1488 -DA:364,1488 -DA:365,1488 -DA:366,1488 -DA:367,102 -DA:368,102 -DA:369,102 -DA:370,102 -DA:371,102 -DA:372,102 -DA:374,102 -DA:375,291 -DA:376,291 -DA:378,291 -DA:379,148 -DA:380,148 -DA:381,148 -DA:382,47 -DA:385,101 -DA:390,143 -DA:392,143 -DA:393,143 -DA:394,143 -DA:395,263 -DA:396,263 -DA:398,143 -DA:399,143 -DA:400,28 -DA:401,22 -DA:403,28 -DA:404,28 -DA:406,115 -DA:407,0 -DA:408,0 -DA:409,0 -DA:412,115 -DA:413,115 -DA:417,102 -DA:418,1 -DA:419,1 -DA:421,101 -DA:424,101 -DA:425,101 -DA:426,101 -DA:427,101 -DA:433,1 -LF:148 -LH:138 -BRDA:18,0,0,0 -BRDA:18,0,1,137 -BRDA:18,1,0,137 -BRDA:18,1,1,137 -BRDA:21,2,0,134 -BRDA:21,2,1,3 -BRDA:22,3,0,132 -BRDA:22,3,1,2 -BRDA:38,4,0,0 -BRDA:38,4,1,3 -BRDA:48,5,0,3 -BRDA:48,5,1,0 -BRDA:49,6,0,3 -BRDA:49,6,1,0 -BRDA:50,7,0,3 -BRDA:50,7,1,0 -BRDA:51,8,0,3 -BRDA:51,8,1,0 -BRDA:57,9,0,1 -BRDA:57,9,1,136 -BRDA:70,10,0,134 -BRDA:70,10,1,3 -BRDA:81,11,0,8 -BRDA:81,11,1,129 -BRDA:98,12,0,134 -BRDA:98,12,1,3 -BRDA:117,13,0,136 -BRDA:117,13,1,1 -BRDA:120,14,0,136 -BRDA:120,14,1,1 -BRDA:140,15,0,216 -BRDA:140,15,1,1 -BRDA:148,16,0,129 -BRDA:148,16,1,8 -BRDA:155,17,0,208 -BRDA:155,17,1,1 -BRDA:186,18,0,8 -BRDA:186,18,1,129 -BRDA:190,19,0,457 -BRDA:190,19,1,313 -BRDA:192,20,0,107 -BRDA:192,20,1,350 -BRDA:192,21,0,457 -BRDA:192,21,1,201 -BRDA:193,22,0,0 -BRDA:193,22,1,107 -BRDA:197,23,0,350 -BRDA:197,23,1,0 -BRDA:199,24,0,137 -BRDA:199,24,1,213 -BRDA:202,25,0,135 -BRDA:202,25,1,2 -BRDA:203,26,0,2 -BRDA:203,26,1,133 -BRDA:204,27,0,2 -BRDA:204,27,1,0 -BRDA:205,28,0,1 -BRDA:205,28,1,1 -BRDA:213,29,0,6 -BRDA:213,29,1,228 -BRDA:221,30,0,0 -BRDA:221,30,1,12 -BRDA:229,31,0,0 -BRDA:229,31,1,537 -BRDA:232,32,0,3 -BRDA:232,32,1,345 -BRDA:303,33,0,28 -BRDA:303,33,1,0 -BRDA:304,34,0,28 -BRDA:304,34,1,0 -BRDA:314,35,0,41 -BRDA:314,35,1,22 -BRDA:316,36,0,41 -BRDA:316,36,1,22 -BRDA:317,37,0,45 -BRDA:317,37,1,18 -BRDA:317,38,0,63 -BRDA:317,38,1,41 -BRDA:328,39,0,2 -BRDA:328,39,1,50 -BRDA:330,40,0,2 -BRDA:330,40,1,50 -BRDA:330,41,0,23 -BRDA:330,41,1,29 -BRDA:338,42,0,0 -BRDA:338,42,1,0 -BRDA:339,43,0,0 -BRDA:339,43,1,0 -BRDA:341,44,0,0 -BRDA:341,44,1,0 -BRDA:341,45,0,0 -BRDA:341,45,1,0 -BRDA:344,46,0,134 -BRDA:344,46,1,3 -BRDA:351,47,0,71 -BRDA:351,47,1,0 -BRDA:366,48,0,102 -BRDA:366,48,1,1386 -BRDA:378,49,0,148 -BRDA:378,49,1,143 -BRDA:381,50,0,47 -BRDA:381,50,1,101 -BRDA:394,51,0,406 -BRDA:394,51,1,405 -BRDA:394,51,2,377 -BRDA:399,52,0,28 -BRDA:399,52,1,115 -BRDA:400,53,0,22 -BRDA:400,53,1,6 -BRDA:406,54,0,0 -BRDA:406,54,1,115 -BRDA:417,55,0,1 -BRDA:417,55,1,101 -BRDA:417,56,0,102 -BRDA:417,56,1,101 -BRF:115 -BRH:92 -end_of_record -TN: -SF:test/csv-fixtures.js -FN:381,(anonymous_0) -FN:400,(anonymous_1) -FN:647,(anonymous_2) -FN:661,(anonymous_3) -FNF:4 -FNH:4 -FNDA:1,(anonymous_0) -FNDA:1,(anonymous_1) -FNDA:1,(anonymous_2) -FNDA:1,(anonymous_3) -DA:4,1 -DA:5,1 -DA:7,1 -DA:382,1 -DA:401,1 -DA:648,1 -DA:662,1 -LF:7 -LH:7 -BRF:0 -BRH:0 -end_of_record diff --git a/doc/csv-go.md b/doc/csv-go.md deleted file mode 100644 index 9c7322f..0000000 --- a/doc/csv-go.md +++ /dev/null @@ -1,264 +0,0 @@ -# CSV plugin for Jsonic (Go) - -A Jsonic syntax plugin that parses CSV text into Go slices of maps -or slices, with support for headers, quoted fields, custom -delimiters, streaming, and strict/non-strict modes. - -```bash -go get github.com/jsonicjs/csv/go@latest -``` - - -## Tutorials - -### Parse a basic CSV file - -Parse CSV text with a header row into a slice of ordered maps: - -```go -package main - -import ( - "fmt" - csv "github.com/jsonicjs/csv/go" -) - -func main() { - result, _ := csv.Parse("name,age\nAlice,30\nBob,25") - fmt.Println(result) - // [{name:Alice age:30} {name:Bob age:25}] -} -``` - -### Parse CSV without headers - -Return rows as slices instead of maps, with no header row: - -```go -result, _ := csv.Parse("a,b,c\n1,2,3", csv.CsvOptions{ - Header: boolPtr(false), - Object: boolPtr(false), -}) -// [[a b c] [1 2 3]] -``` - -### Parse CSV with quoted fields - -Double-quoted fields handle commas, newlines, and escaped quotes: - -```go -result, _ := csv.Parse(`name,bio -Alice,"Likes ""cats"" and dogs" -Bob,"Line1 -Line2"`) -// [{name:Alice bio:Likes "cats" and dogs} {name:Bob bio:Line1\nLine2}] -``` - - -## How-to guides - -### Use a custom field delimiter - -Set `Field.Separation` to use a delimiter other than comma: - -```go -result, _ := csv.Parse("name\tage\nAlice\t30", csv.CsvOptions{ - Field: &csv.FieldOptions{Separation: "\t"}, -}) -// [{name:Alice age:30}] -``` - -### Enable number and value parsing - -By default in strict mode, all values are strings. Enable `Number` -and `Value` to parse numeric and boolean values: - -```go -result, _ := csv.Parse("a,b,c\n1,true,null", csv.CsvOptions{ - Number: boolPtr(true), - Value: boolPtr(true), -}) -// [{a:1 b:true c:}] -``` - -### Trim whitespace from fields - -Enable `Trim` to remove leading and trailing whitespace from field -values: - -```go -result, _ := csv.Parse("a , b \n 1 , 2 ", csv.CsvOptions{ - Trim: boolPtr(true), -}) -// [{a:1 b:2}] -``` - -### Stream records as they are parsed - -Use the `Stream` callback to receive records one at a time: - -```go -var records []any - -result, _ := csv.Parse("a,b\n1,2\n3,4", csv.CsvOptions{ - Stream: func(what string, record any) { - if what == "record" { - records = append(records, record) - } - }, -}) -// result is [] (empty, records were streamed) -// records contains [{a:1 b:2}, {a:3 b:4}] -``` - -### Provide explicit field names - -Set `Field.Names` when the CSV has no header row but you want -map output with named fields: - -```go -result, _ := csv.Parse("1,2,3\n4,5,6", csv.CsvOptions{ - Header: boolPtr(false), - Field: &csv.FieldOptions{Names: []string{"x", "y", "z"}}, -}) -// [{x:1 y:2 z:3} {x:4 y:5 z:6}] -``` - -### Enforce exact field counts - -Set `Field.Exact` to error when a row has more or fewer fields -than the header: - -```go -_, err := csv.Parse("a,b\n1,2,3", csv.CsvOptions{ - Field: &csv.FieldOptions{Exact: true}, -}) -// err: unexpected extra field value -``` - -### Create a reusable parser - -Use `MakeJsonic` to create a configured Jsonic instance you can -call repeatedly: - -```go -j := csv.MakeJsonic(csv.CsvOptions{ - Number: boolPtr(true), -}) - -r1, _ := j.Parse("a,b\n1,2") -r2, _ := j.Parse("x,y\n3,4") -``` - -### Enable comment lines - -Enable `Comment` to skip lines starting with `#`: - -```go -result, _ := csv.Parse("a,b\n# skip\n1,2", csv.CsvOptions{ - Comment: boolPtr(true), -}) -// [{a:1 b:2}] -``` - - -## Explanation - -### Strict vs non-strict mode - -In **strict mode** (default), the CSV plugin disables Jsonic's -built-in JSON parsing. All field values are treated as raw strings -unless `Number` or `Value` options are enabled. This matches the -behaviour of standard CSV parsers. - -In **non-strict mode** (`Strict: boolPtr(false)`), the plugin -preserves Jsonic's ability to parse JSON values. Fields can contain -objects, arrays, booleans, numbers, and quoted strings using Jsonic -syntax. Non-strict mode enables `Trim`, `Comment`, and `Number` by -default. - -### How quoted fields work - -The plugin includes a custom CSV string matcher that handles the -RFC 4180 double-quote escaping convention: - -- A field wrapped in double quotes can contain commas, newlines, - and quotes. -- A literal quote inside a quoted field is represented as `""`. -- For example: `"a""b"` parses to `a"b`. - - -## Reference - -### `Parse` (Function) - -```go -func Parse(src string, opts ...CsvOptions) ([]any, error) -``` - -Parse CSV text with the given options. Returns a slice of records. - -### `MakeJsonic` (Function) - -```go -func MakeJsonic(opts ...CsvOptions) *jsonic.Jsonic -``` - -Create a reusable Jsonic instance configured for CSV parsing. - -### `CsvOptions` - -```go -type CsvOptions struct { - Object *bool // Return maps (true) or slices (false). Default: true - Header *bool // First row is header. Default: true - Trim *bool // Trim whitespace. Default: nil (false strict, true non-strict) - Comment *bool // Enable # comments. Default: nil (false strict, true non-strict) - Number *bool // Parse numbers. Default: nil (false strict, true non-strict) - Value *bool // Parse true/false/null. Default: nil - Strict *bool // Strict CSV mode. Default: true - Field *FieldOptions - Record *RecordOptions - String *StringOptions - Stream StreamFunc -} -``` - -### `FieldOptions` - -```go -type FieldOptions struct { - Separation string // Field separator. Default: "," - NonamePrefix string // Prefix for unnamed extra fields. Default: "field~" - Empty string // Value for empty fields. Default: "" - Names []string // Explicit field names. - Exact bool // Error on field count mismatch. Default: false -} -``` - -### `RecordOptions` - -```go -type RecordOptions struct { - Separators string // Custom record separator characters. - Empty bool // Preserve empty lines as records. Default: false -} -``` - -### `StringOptions` - -```go -type StringOptions struct { - Quote string // Quote character. Default: `"` - Csv *bool // Force CSV string mode (nil=auto). -} -``` - -### `StreamFunc` - -```go -type StreamFunc func(what string, record any) -``` - -Callback for streaming CSV parsing. Called with `"start"`, `"record"`, -`"end"`, or `"error"`. diff --git a/doc/csv-ts.md b/doc/csv-ts.md deleted file mode 100644 index 2e8f9b5..0000000 --- a/doc/csv-ts.md +++ /dev/null @@ -1,286 +0,0 @@ -# CSV plugin for Jsonic (TypeScript) - -A Jsonic syntax plugin that parses CSV text into JavaScript arrays -of objects or arrays, with support for headers, quoted fields, -custom delimiters, streaming, and strict/non-strict modes. - -```bash -npm install @jsonic/csv -``` - -Requires `jsonic` >= 2 as a peer dependency. - - -## Tutorials - -### Parse a basic CSV file - -Parse CSV text with a header row into an array of objects: - -```typescript -import { Jsonic } from 'jsonic' -import { Csv } from '@jsonic/csv' - -const j = Jsonic.make().use(Csv) - -j("name,age\nAlice,30\nBob,25") -// [{ name: 'Alice', age: '30' }, { name: 'Bob', age: '25' }] -``` - -### Parse CSV without headers - -Return rows as arrays instead of objects, with no header row: - -```typescript -import { Jsonic } from 'jsonic' -import { Csv } from '@jsonic/csv' - -const j = Jsonic.make().use(Csv, { header: false, object: false }) - -j("a,b,c\n1,2,3") -// [['a', 'b', 'c'], ['1', '2', '3']] -``` - -### Parse CSV with quoted fields - -Double-quoted fields handle commas, newlines, and escaped quotes: - -```typescript -import { Jsonic } from 'jsonic' -import { Csv } from '@jsonic/csv' - -const j = Jsonic.make().use(Csv) - -j('name,bio\nAlice,"Likes ""cats"" and dogs"\nBob,"Line1\nLine2"') -// [ -// { name: 'Alice', bio: 'Likes "cats" and dogs' }, -// { name: 'Bob', bio: 'Line1\nLine2' } -// ] -``` - - -## How-to guides - -### Use a custom field delimiter - -Set `field.separation` to use a delimiter other than comma: - -```typescript -const j = Jsonic.make().use(Csv, { - field: { separation: '\t' } -}) - -j("name\tage\nAlice\t30") -// [{ name: 'Alice', age: '30' }] -``` - -### Enable number and value parsing - -By default in strict mode, all values are strings. Enable `number` -and `value` to parse numeric and boolean values: - -```typescript -const j = Jsonic.make().use(Csv, { - number: true, - value: true, -}) - -j("a,b,c\n1,true,null") -// [{ a: 1, b: true, c: null }] -``` - -### Trim whitespace from fields - -Enable `trim` to remove leading and trailing whitespace from field -values: - -```typescript -const j = Jsonic.make().use(Csv, { trim: true }) - -j("a , b \n 1 , 2 ") -// [{ a: '1', b: '2' }] -``` - -### Stream records as they are parsed - -Use the `stream` callback to receive records one at a time without -storing them all in memory: - -```typescript -const records: any[] = [] - -const j = Jsonic.make().use(Csv, { - stream: (what, record) => { - if (what === 'record') records.push(record) - }, -}) - -j("a,b\n1,2\n3,4") -// returns [] (empty, records were streamed) -// records === [{ a: '1', b: '2' }, { a: '3', b: '4' }] -``` - -### Provide explicit field names - -Set `field.names` when the CSV has no header row but you want -object output with named fields: - -```typescript -const j = Jsonic.make().use(Csv, { - header: false, - field: { names: ['x', 'y', 'z'] }, -}) - -j("1,2,3\n4,5,6") -// [{ x: '1', y: '2', z: '3' }, { x: '4', y: '5', z: '6' }] -``` - -### Enforce exact field counts - -Set `field.exact` to error when a row has more or fewer fields -than the header: - -```typescript -const j = Jsonic.make().use(Csv, { - field: { exact: true }, -}) - -// j("a,b\n1,2,3") // throws: unexpected extra field value -// j("a,b\n1") // throws: missing field -``` - -### Use non-strict mode for embedded JSON - -Disable `strict` to allow Jsonic syntax inside CSV fields, -including JSON objects, arrays, and expressions: - -```typescript -const j = Jsonic.make().use(Csv, { strict: false }) - -j("a,b\ntrue,[1,2]") -// [{ a: true, b: [1, 2] }] -``` - -### Enable comment lines - -Enable `comment` to skip lines starting with `#`: - -```typescript -const j = Jsonic.make().use(Csv, { comment: true }) - -j("a,b\n# skip this\n1,2") -// [{ a: '1', b: '2' }] -``` - -### Preserve empty records - -By default, blank lines are skipped. Set `record.empty` to -preserve them as empty-field records: - -```typescript -const j = Jsonic.make().use(Csv, { record: { empty: true } }) - -j("a\n1\n\n2") -// [{ a: '1' }, { a: '' }, { a: '2' }] -``` - - -## Explanation - -### Strict vs non-strict mode - -In **strict mode** (default), the CSV plugin disables Jsonic's -built-in JSON parsing. All field values are treated as raw strings -unless `number` or `value` options are enabled. This matches the -behaviour of standard CSV parsers. - -In **non-strict mode** (`strict: false`), the plugin preserves -Jsonic's ability to parse JSON values. Fields can contain objects -(`{x:1}`), arrays (`[1,2]`), booleans, numbers, and quoted strings -using Jsonic syntax. Non-strict mode enables `trim`, `comment`, and -`number` by default. - -### How quoted fields work - -The plugin includes a custom CSV string matcher that handles the -RFC 4180 double-quote escaping convention: - -- A field wrapped in double quotes can contain commas, newlines, - and quotes. -- A literal quote inside a quoted field is represented as `""`. -- For example: `"a""b"` parses to `a"b`. - - -## Reference - -### `Csv` (Plugin) - -The plugin function. Register with `Jsonic.make().use(Csv, options)`. - -### `CsvOptions` - -```typescript -type CsvOptions = { - // Trim surrounding whitespace. Default: null (false in strict, true in non-strict) - trim: boolean | null - - // Enable # line comments. Default: null (false in strict, true in non-strict) - comment: boolean | null - - // Parse numeric values. Default: null (false in strict, true in non-strict) - number: boolean | null - - // Parse value keywords (true/false/null). Default: null (false in strict, false in non-strict) - value: boolean | null - - // First row is a header row. Default: true - header: boolean - - // Return records as objects (true) or arrays (false). Default: true - object: boolean - - // Stream callback. Default: null - stream: null | ((what: string, record?: Record | Error) => void) - - // Strict CSV mode (disables Jsonic syntax). Default: true - strict: boolean - - field: { - // Field separator string. Default: null (uses comma) - separation: null | string - - // Prefix for unnamed extra fields. Default: 'field~' - nonameprefix: string - - // Value for empty fields. Default: '' - empty: any - - // Explicit field names (overrides header). Default: undefined - names: undefined | string[] - - // Error on field count mismatch. Default: false - exact: boolean - } - - record: { - // Custom record separator characters. Default: null - separators: null | string - - // Preserve empty lines as records. Default: false - empty: boolean - } - - string: { - // Quote character. Default: '"' - quote: string - - // Force CSV string mode (null=auto). Default: null - csv: null | boolean - } -} -``` - -### `buildCsvStringMatcher` (Function) - -Exported for advanced use. Creates the custom CSV double-quote -string matcher used internally by the plugin. diff --git a/go/csv.go b/go/csv.go deleted file mode 100644 index 05686ba..0000000 --- a/go/csv.go +++ /dev/null @@ -1,780 +0,0 @@ -/* Copyright (c) 2021-2025 Richard Rodger, MIT License */ - -package csv - -import ( - "fmt" - "strconv" - "strings" - - jsonic "github.com/jsonicjs/jsonic/go" -) - -const Version = "0.1.3" - -// --- BEGIN EMBEDDED csv-grammar.jsonic --- -const grammarText = ` -# CSV Grammar Definition -# Parsed by a standard Jsonic instance and passed to jsonic.grammar() -# Function references (@ prefixed) are resolved against the refs map -# -# Token naming: -# #LN - line ending (removed from per-instance IGNORE set) -# #SP - whitespace (removed from per-instance IGNORE set in strict mode) -# #CA - comma / field separator -# #ZZ - end of input -# #VAL - token set: text, string, number, value literals -# -# Rules csv, newline, record, text are fully defined here. -# Rules list, elem, val are modified in code (strict mode defines from scratch; -# non-strict prepends to existing defaults to preserve JSON parsing). - -{ - rule: csv: open: [ - { s: '#ZZ' } - { s: '#LN' p: newline c: '@not-record-empty' } - { p: record } - ] - - rule: newline: open: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - rule: newline: close: [ - { s: '#LN #LN' r: newline } - { s: '#LN' r: newline } - { s: '#ZZ' } - { r: record } - ] - - rule: record: open: [ - { p: list } - ] - rule: record: close: [ - { s: '#ZZ' } - { s: '#LN #ZZ' b: 1 } - { s: '#LN' r: '@record-close-next' } - ] - - rule: text: open: [ - { s: ['#VAL' '#SP'] b: 1 r: text n: { text: 1 } g: 'csv,space,follows' a: '@text-follows' } - { s: ['#SP' '#VAL'] r: text n: { text: 1 } g: 'csv,space,leads' a: '@text-leads' } - { s: ['#SP' '#CA #LN #ZZ'] b: 1 n: { text: 1 } g: 'csv,end' a: '@text-end' } - { s: '#SP' n: { text: 1 } g: 'csv,space' a: '@text-space' p: '@text-space-push' } - {} - ] -} -` -// --- END EMBEDDED csv-grammar.jsonic --- - -// Csv is a jsonic plugin that adds CSV parsing support. -// Options are pre-merged with Defaults by jsonic.UseDefaults. -func Csv(j *jsonic.Jsonic, options map[string]any) error { - // Guard against re-invocation: Use() re-runs plugins on SetOptions calls. - if j.Decoration("csv-init") != nil { - return nil - } - j.Decorate("csv-init", true) - - strict := toBool(options["strict"]) - objres := toBool(options["object"]) - header := toBool(options["header"]) - - trim := toBool(options["trim"]) - comment := toBool(options["comment"]) - opt_number := toBool(options["number"]) - opt_value := toBool(options["value"]) - - fieldOpts, _ := options["field"].(map[string]any) - recordOpts, _ := options["record"].(map[string]any) - stringOpts, _ := options["string"].(map[string]any) - - record_empty := toBool(recordOpts["empty"]) - - stream, _ := options["stream"].(func(string, any)) - - // In strict mode, Jsonic field content is not parsed. - if strict { - if stringOpts["csv"] != false { - j.SetOptions(jsonic.Options{Lex: &jsonic.LexOptions{ - Match: map[string]*jsonic.MatchSpec{ - "stringcsv": {Order: 1e5, Make: buildCsvStringMatcher(stringOpts)}, - }, - }}) - } - j.SetOptions(jsonic.Options{Rule: &jsonic.RuleOptions{Exclude: "jsonic,imp"}}) - } else { - // Fields may contain Jsonic content. - if stringOpts["csv"] == true { - j.SetOptions(jsonic.Options{Lex: &jsonic.LexOptions{ - Match: map[string]*jsonic.MatchSpec{ - "stringcsv": {Order: 1e5, Make: buildCsvStringMatcher(stringOpts)}, - }, - }}) - } - if options["trim"] == nil { - trim = true - } - if options["comment"] == nil { - comment = true - } - if options["number"] == nil { - opt_number = true - } - if options["value"] == nil { - opt_value = true - } - j.SetOptions(jsonic.Options{Rule: &jsonic.RuleOptions{Exclude: "imp"}}) - } - - fieldSep := toString(fieldOpts["separation"]) - recordSep := toString(recordOpts["separators"]) - - // Jsonic option overrides (matching TS jsonicOptions). - jsonicOptions := jsonic.Options{ - Rule: &jsonic.RuleOptions{Start: "csv"}, - Number: &jsonic.NumberOptions{ - Lex: boolPtr(opt_number), - }, - Value: &jsonic.ValueOptions{ - Lex: boolPtr(opt_value), - }, - Comment: &jsonic.CommentOptions{ - Lex: boolPtr(comment), - }, - Lex: &jsonic.LexOptions{ - EmptyResult: []any{}, - }, - Line: &jsonic.LineOptions{ - Single: boolPtr(record_empty), - }, - Error: map[string]string{ - "csv_extra_field": "unexpected extra field value: $fsrc", - "csv_missing_field": "missing field", - }, - Hint: map[string]string{ - "csv_extra_field": "Row $row has too many fields (the first of which is: $fsrc). Only $len\nfields per row are expected.", - "csv_missing_field": "Row $row has too few fields. $len fields per row are expected.", - }, - } - - if strict { - csvStringOpt := stringOpts["csv"] - if csvStringOpt == nil || csvStringOpt == true { - jsonicOptions.String = &jsonic.StringOptions{ - Lex: boolPtr(false), - Chars: "", - } - } - } - - if recordSep != "" { - jsonicOptions.Line.Chars = recordSep - jsonicOptions.Line.RowChars = recordSep - } - - // Fixed-token overrides: in strict mode disable JSON structural tokens - // and the ':' key separator; swap the field separator when configured. - if strict || fieldSep != "" { - jsonicOptions.Fixed = &jsonic.FixedOptions{Token: map[string]*string{}} - if strict { - jsonicOptions.Fixed.Token["#OB"] = nil - jsonicOptions.Fixed.Token["#CB"] = nil - jsonicOptions.Fixed.Token["#OS"] = nil - jsonicOptions.Fixed.Token["#CS"] = nil - jsonicOptions.Fixed.Token["#CL"] = nil - } - if fieldSep != "" { - sep := fieldSep - jsonicOptions.Fixed.Token["#CA"] = &sep - } - } - - // IGNORE set: drop #LN so row breaks are significant; in strict mode - // also drop #SP so whitespace inside fields is preserved. - if strict { - jsonicOptions.TokenSet = map[string][]string{"IGNORE": {"#CM"}} - } else { - jsonicOptions.TokenSet = map[string][]string{"IGNORE": {"#SP", "#CM"}} - } - - j.SetOptions(jsonicOptions) - - // Named function references for declarative grammar definition. - emptyField := toString(fieldOpts["empty"]) - nonameprefix := toString(fieldOpts["nonameprefix"]) - fieldExact := toBool(fieldOpts["exact"]) - var fieldNames []string - if names, ok := fieldOpts["names"].([]string); ok { - fieldNames = names - } else if names, ok := fieldOpts["names"].([]any); ok { - for _, n := range names { - if s, ok := n.(string); ok { - fieldNames = append(fieldNames, s) - } - } - } - - refs := map[jsonic.FuncRef]any{ - - "@csv-bo": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if ctx.Meta == nil { - ctx.Meta = make(map[string]any) - } - ctx.Meta["recordI"] = 0 - if stream != nil { - stream("start", nil) - } - r.Node = make([]any, 0) - }), - - "@csv-ac": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if stream != nil { - stream("end", nil) - } - }), - - "@record-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - recordI, _ := ctx.Meta["recordI"].(int) - var fields []string - if fs, ok := ctx.Meta["fields"].([]string); ok { - fields = fs - } - if fields == nil { - fields = fieldNames - } - - if recordI == 0 && header { - if childArr, ok := r.Child.Node.([]any); ok { - names := make([]string, len(childArr)) - for i, v := range childArr { - names[i], _ = v.(string) - } - ctx.Meta["fields"] = names - } else { - ctx.Meta["fields"] = []string{} - } - } else { - record, _ := r.Child.Node.([]any) - if record == nil { - record = []any{} - } - - if objres { - obj := make(map[string]any) - var keys []string - i := 0 - - if fields != nil { - if fieldExact && len(record) != len(fields) { - errCode := "csv_missing_field" - if len(record) > len(fields) { - errCode = "csv_extra_field" - } - ctx.ParseErr = &jsonic.Token{ - Name: "#BD", Tin: jsonic.TinBD, - Why: errCode, Src: errCode, - } - return - } - - for fI := 0; fI < len(fields); fI++ { - var val any = emptyField - if fI < len(record) && !jsonic.IsUndefined(record[fI]) { - val = record[fI] - } - obj[fields[fI]] = val - keys = append(keys, fields[fI]) - } - i = len(fields) - } - - for ; i < len(record); i++ { - fname := nonameprefix + strconv.Itoa(i) - val := record[i] - if jsonic.IsUndefined(val) { - val = emptyField - } - obj[fname] = val - keys = append(keys, fname) - } - - out := orderedMap{keys: keys, m: obj} - if stream != nil { - stream("record", out) - } else if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, out) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - } else { - for i := range record { - if jsonic.IsUndefined(record[i]) { - record[i] = emptyField - } - } - if stream != nil { - stream("record", record) - } else if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, record) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - } - } - ctx.Meta["recordI"] = recordI + 1 - }), - - "@text-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if !jsonic.IsUndefined(r.Child.Node) { - r.Parent.Node = r.Child.Node - } else { - r.Parent.Node = r.Node - } - }), - - "@text-follows": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - result := prev + tokenStr(r.O0) - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }), - - "@text-leads": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - sp := "" - if r.N["text"] >= 2 || !trim { - sp = r.O0.Src - } - result := prev + sp + r.O1.Src - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }), - - "@text-end": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - sp := "" - if !trim { - sp = r.O0.Src - } - result := prev + sp - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - }), - - "@text-space": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if strict { - prev := "" - if r.N["text"] != 1 && r.Prev != nil && r.Prev != jsonic.NoRule { - prev, _ = r.Prev.Node.(string) - } - sp := "" - if !trim { - sp = r.O0.Src - } - result := prev + sp - r.Node = result - if r.N["text"] == 1 { - } else if r.Prev != nil && r.Prev != jsonic.NoRule { - r.Prev.Node = result - } - } - }), - - "@not-record-empty": jsonic.AltCond(func(r *jsonic.Rule, ctx *jsonic.Context) bool { - return !record_empty - }), - - "@record-close-next": func(r *jsonic.Rule, ctx *jsonic.Context) string { - if record_empty { - return "record" - } - return "newline" - }, - - "@text-space-push": func(r *jsonic.Rule, ctx *jsonic.Context) string { - if strict { - return "" - } - return "val" - }, - } - - // Parse embedded grammar definition using a separate standard Jsonic instance. - gs, err := parseGrammarText(grammarText, refs) - if err != nil { - return err - } - if err := j.Grammar(gs); err != nil { - return fmt.Errorf("failed to apply csv grammar: %w", err) - } - - // Rules list, elem, val are modified in code rather than the grammar file, - // because in non-strict mode the default jsonic alternatives must be preserved - // to support embedded JSON values like [1,2] and {x:1}. - - LN := j.Token("#LN") - CA := j.Token("#CA") - SP := j.Token("#SP") - ZZ := j.Token("#ZZ") - VAL := j.TokenSet("VAL") - - j.Rule("list", func(rs *jsonic.RuleSpec) { - rs.Clear() - rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { - r.Node = make([]any, 0) - }) - rs.Open = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{LN}}, B: 1}, - {P: "elem"}, - } - rs.Close = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{LN}}, B: 1}, - {S: [][]jsonic.Tin{{ZZ}}}, - } - }) - - j.Rule("elem", func(rs *jsonic.RuleSpec) { - rs.Clear() - rs.Open = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{CA}}, B: 1, - A: jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, emptyField) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - r.U["done"] = true - })}, - {P: "val"}, - } - rs.Close = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{{CA}, {LN, ZZ}}, B: 1, - A: jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, emptyField) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - })}, - {S: [][]jsonic.Tin{{CA}}, R: "elem"}, - {S: [][]jsonic.Tin{{LN}}, B: 1}, - {S: [][]jsonic.Tin{{ZZ}}}, - } - rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { - done, _ := r.U["done"].(bool) - if !done && !jsonic.IsUndefined(r.Child.Node) { - if arr, ok := r.Node.([]any); ok { - r.Node = append(arr, r.Child.Node) - if r.Parent != jsonic.NoRule && r.Parent != nil { - r.Parent.Node = r.Node - } - } - } - }) - }) - - j.Rule("val", func(rs *jsonic.RuleSpec) { - rs.Clear() - rs.AddBO(func(r *jsonic.Rule, ctx *jsonic.Context) { - r.Node = jsonic.Undefined - }) - rs.Open = []*jsonic.AltSpec{ - {S: [][]jsonic.Tin{VAL, {SP}}, B: 2, P: "text"}, - {S: [][]jsonic.Tin{{SP}}, B: 1, P: "text"}, - {S: [][]jsonic.Tin{VAL}}, - {S: [][]jsonic.Tin{{LN}}, B: 1}, - } - rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { - if jsonic.IsUndefined(r.Node) { - if jsonic.IsUndefined(r.Child.Node) { - if r.OS == 0 { - r.Node = jsonic.Undefined - } else { - r.Node = r.O0.ResolveVal() - } - } else { - r.Node = r.Child.Node - } - } - }) - }) - - return nil -} - -// Custom CSV String matcher factory. -// Handles "a""b" -> a"b quoting. -// Matches TS: buildCsvStringMatcher(options) returns make(cfg, opts) => matcher(lex). -func buildCsvStringMatcher(stringOpts map[string]any) jsonic.MakeLexMatcher { - quote := toString(stringOpts["quote"]) - return func(cfg *jsonic.LexConfig, opts *jsonic.Options) jsonic.LexMatcher { - return func(lex *jsonic.Lex, rule *jsonic.Rule) *jsonic.Token { - pnt := lex.Cursor() - src := lex.Src - sI := pnt.SI - srclen := len(src) - - if sI >= srclen || !strings.HasPrefix(src[sI:], quote) { - return nil - } - - // Only match when quote is at the start of a field. - if sI > 0 { - prev := rune(src[sI-1]) - _, isFixed := cfg.FixedTokens[string(prev)] - if !isFixed && !cfg.LineChars[prev] && !cfg.SpaceChars[prev] { - return nil - } - } - - q := quote - qLen := len(q) - rI := pnt.RI - cI := pnt.CI - sI += qLen - cI += qLen - - var s strings.Builder - for sI < srclen { - cI++ - if strings.HasPrefix(src[sI:], q) { - sI += qLen - cI += qLen - 1 - if sI < srclen && strings.HasPrefix(src[sI:], q) { - s.WriteString(q) - sI += qLen - cI += qLen - continue - } - val := s.String() - ssrc := src[pnt.SI:sI] - tkn := lex.Token("#ST", jsonic.TinST, val, ssrc) - pnt.SI = sI - pnt.RI = rI - pnt.CI = cI - return tkn - } - - ch := src[sI] - if cfg.LineChars[rune(ch)] { - if cfg.RowChars[rune(ch)] { - rI++ - pnt.RI = rI - } - cI = 1 - s.WriteByte(ch) - sI++ - continue - } - if ch < 32 { - return nil - } - - bI := sI - qFirst := q[0] - for sI < srclen && src[sI] >= 32 && src[sI] != qFirst { - if cfg.LineChars[rune(src[sI])] { - break - } - sI++ - cI++ - } - cI-- - s.WriteString(src[bI:sI]) - } - - badSrc := src[pnt.SI:sI] - tkn := lex.Token("#BD", jsonic.TinBD, nil, badSrc) - tkn.Why = "unterminated_string" - pnt.SI = sI - pnt.RI = rI - pnt.CI = cI - return tkn - } - } -} - -// Defaults matches the TS Csv.defaults. Used with jsonic.UseDefaults. -var Defaults = map[string]any{ - "trim": nil, - "comment": nil, - "number": nil, - "value": nil, - "header": true, - "object": true, - "stream": nil, - "strict": true, - "field": map[string]any{ - "separation": nil, - "nonameprefix": "field~", - "empty": "", - "names": nil, - "exact": false, - }, - "record": map[string]any{ - "separators": nil, - "empty": false, - }, - "string": map[string]any{ - "quote": `"`, - "csv": nil, - }, -} - -// parseGrammarText parses grammar text and builds a GrammarSpec with Ref support. -func parseGrammarText(text string, refs map[jsonic.FuncRef]any) (*jsonic.GrammarSpec, error) { - parsed, err := jsonic.Make().Parse(text) - if err != nil { - return nil, fmt.Errorf("failed to parse grammar text: %w", err) - } - parsedMap, ok := parsed.(map[string]any) - if !ok { - return nil, fmt.Errorf("grammar text did not parse to a map") - } - gs := &jsonic.GrammarSpec{Ref: refs} - ruleMap, ok := parsedMap["rule"].(map[string]any) - if !ok { - return gs, nil - } - gs.Rule = make(map[string]*jsonic.GrammarRuleSpec, len(ruleMap)) - for name, rDef := range ruleMap { - rd, ok := rDef.(map[string]any) - if !ok { - continue - } - grs := &jsonic.GrammarRuleSpec{} - if openDef, ok := rd["open"]; ok { - grs.Open = buildGrammarAlts(openDef) - } - if closeDef, ok := rd["close"]; ok { - grs.Close = buildGrammarAlts(closeDef) - } - gs.Rule[name] = grs - } - return gs, nil -} - -func buildGrammarAlts(def any) []*jsonic.GrammarAltSpec { - arr, ok := def.([]any) - if !ok { - return nil - } - alts := make([]*jsonic.GrammarAltSpec, 0, len(arr)) - for _, item := range arr { - m, ok := item.(map[string]any) - if !ok { - alts = append(alts, &jsonic.GrammarAltSpec{}) - continue - } - ga := &jsonic.GrammarAltSpec{} - if s, ok := m["s"]; ok { - switch sv := s.(type) { - case string: - ga.S = sv - case []any: - strs := make([]string, len(sv)) - for i, v := range sv { - strs[i], _ = v.(string) - } - ga.S = strs - } - } - if b, ok := m["b"]; ok { - switch bv := b.(type) { - case float64: - ga.B = int(bv) - case int: - ga.B = bv - } - } - if p, ok := m["p"].(string); ok { - ga.P = p - } - if r, ok := m["r"].(string); ok { - ga.R = r - } - if a, ok := m["a"].(string); ok { - ga.A = jsonic.FuncRef(a) - } - if c, ok := m["c"]; ok { - switch cv := c.(type) { - case string: - ga.C = cv - case map[string]any: - ga.C = cv - } - } - if n, ok := m["n"].(map[string]any); ok { - ga.N = make(map[string]int, len(n)) - for k, v := range n { - if nv, ok := v.(float64); ok { - ga.N[k] = int(nv) - } else if nv, ok := v.(int); ok { - ga.N[k] = nv - } - } - } - if g, ok := m["g"].(string); ok { - ga.G = g - } - alts = append(alts, ga) - } - return alts -} - -func tokenStr(t *jsonic.Token) string { - if t == nil || t.IsNoToken() { - return "" - } - if t.Tin == jsonic.TinST { - if s, ok := t.Val.(string); ok { - return s - } - } - return t.Src -} - -func toBool(v any) bool { - b, _ := v.(bool) - return b -} - -func toString(v any) string { - s, _ := v.(string) - return s -} - -func boolPtr(b bool) *bool { - return &b -} - -// orderedMap maintains insertion order for JSON serialization comparison. -type orderedMap struct { - keys []string - m map[string]any -} diff --git a/go/csv_test.go b/go/csv_test.go deleted file mode 100644 index 2e14b3a..0000000 --- a/go/csv_test.go +++ /dev/null @@ -1,527 +0,0 @@ -package csv - -import ( - "encoding/json" - "fmt" - "os" - "path/filepath" - "reflect" - "testing" - - jsonic "github.com/jsonicjs/jsonic/go" -) - -// fixtureEntry represents one entry in the test manifest. -type fixtureEntry struct { - Name string `json:"name"` - CsvFile string `json:"csvFile,omitempty"` - Opt map[string]any `json:"opt,omitempty"` - JsonicOpt map[string]any `json:"jsonicOpt,omitempty"` - Err string `json:"err,omitempty"` -} - -func fixturesDir() string { - return filepath.Join("..", "test", "fixtures") -} - -// csvParse creates a jsonic instance with the Csv plugin and parses src. -func csvParse(src string, opts ...map[string]any) ([]any, error) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults, opts...) - - result, err := j.Parse(src) - if err != nil { - return nil, err - } - if result == nil { - return []any{}, nil - } - if arr, ok := result.([]any); ok { - return arr, nil - } - return []any{}, nil -} - -func TestFixtures(t *testing.T) { - dir := fixturesDir() - manifestPath := filepath.Join(dir, "manifest.json") - - manifestData, err := os.ReadFile(manifestPath) - if err != nil { - t.Fatalf("Failed to read manifest: %v", err) - } - - var manifest map[string]fixtureEntry - if err := json.Unmarshal(manifestData, &manifest); err != nil { - t.Fatalf("Failed to parse manifest: %v", err) - } - - for key, entry := range manifest { - t.Run(entry.Name, func(t *testing.T) { - csvFile := entry.CsvFile - if csvFile == "" { - csvFile = key - } - - csvData, err := os.ReadFile(filepath.Join(dir, csvFile+".csv")) - if err != nil { - t.Fatalf("Failed to read CSV file %s: %v", csvFile, err) - } - - result, err := parseFixture(string(csvData), entry.Opt, entry.JsonicOpt) - if err != nil { - if entry.Err != "" { - return // expected error - } - t.Fatalf("Unexpected error: %v", err) - } - - if entry.Err != "" { - t.Fatalf("Expected error %s but got none", entry.Err) - } - - expectedData, err := os.ReadFile(filepath.Join(dir, key+".json")) - if err != nil { - t.Fatalf("Failed to read expected JSON: %v", err) - } - - var expected []any - if err := json.Unmarshal(expectedData, &expected); err != nil { - t.Fatalf("Failed to parse expected JSON: %v", err) - } - - resultNorm := normalizeResult(result) - expectedNorm := normalizeJSON(expected) - - if !reflect.DeepEqual(resultNorm, expectedNorm) { - resultJSON, _ := json.MarshalIndent(resultNorm, "", " ") - expectedJSON, _ := json.MarshalIndent(expectedNorm, "", " ") - t.Errorf("Fixture %q mismatch:\nGot: %s\nExpected: %s", - entry.Name, string(resultJSON), string(expectedJSON)) - } - }) - } -} - -func TestPlugin(t *testing.T) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults) - - result, err := j.Parse("a,b\n1,2\n3,4") - if err != nil { - t.Fatalf("Plugin parse error: %v", err) - } - - arr, ok := result.([]any) - if !ok { - t.Fatalf("Expected []any, got %T", result) - } - - if len(arr) != 2 { - t.Fatalf("Expected 2 records, got %d", len(arr)) - } - - r0 := toMap(arr[0]) - if r0["a"] != "1" || r0["b"] != "2" { - t.Errorf("Record 0: expected {a:1,b:2}, got %v", r0) - } -} - -func TestPluginWithOptions(t *testing.T) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults, map[string]any{"object": false}) - - result, err := j.Parse("a,b\n1,2") - if err != nil { - t.Fatalf("Plugin parse error: %v", err) - } - - arr, ok := result.([]any) - if !ok { - t.Fatalf("Expected []any, got %T", result) - } - - if len(arr) != 1 { - t.Fatalf("Expected 1 record, got %d", len(arr)) - } - - inner, ok := arr[0].([]any) - if !ok { - t.Fatalf("Expected inner []any, got %T", arr[0]) - } - - if inner[0] != "1" || inner[1] != "2" { - t.Errorf("Expected [1,2], got %v", inner) - } -} - -func TestPluginEmpty(t *testing.T) { - j := jsonic.Make() - j.UseDefaults(Csv, Defaults) - - result, err := j.Parse("") - if err != nil { - t.Fatalf("Plugin parse error: %v", err) - } - - arr, ok := result.([]any) - if !ok { - t.Fatalf("Expected []any, got %T: %v", result, result) - } - - if len(arr) != 0 { - t.Errorf("Expected empty array, got %v", arr) - } -} - -func TestUsePlugin(t *testing.T) { - j := jsonic.Make() - j.Use(Csv, nil) - - result, err := j.Parse("a,b\n1,2") - if err != nil { - t.Logf("Plugin parse returned error (expected with basic plugin): %v", err) - } - _ = result -} - -func TestEmptyRecords(t *testing.T) { - result, _ := csvParse("a\n1\n\n2\n3\n\n\n4\n") - assertRecords(t, "empty-ignored", result, []map[string]any{ - {"a": "1"}, {"a": "2"}, {"a": "3"}, {"a": "4"}, - }) - - result2, _ := csvParse("a\n1\n\n2\n3\n\n\n4\n", - map[string]any{"record": map[string]any{"empty": true}}) - assertRecords(t, "empty-preserved", result2, []map[string]any{ - {"a": "1"}, {"a": ""}, {"a": "2"}, {"a": "3"}, - {"a": ""}, {"a": ""}, {"a": "4"}, - }) -} - -func TestHeader(t *testing.T) { - result, _ := csvParse("\na,b\nA,B") - assertRecords(t, "header-skip-leading", result, []map[string]any{ - {"a": "A", "b": "B"}, - }) - - result2, _ := csvParse("\na,b\nA,B", map[string]any{"header": false}) - assertRecords(t, "no-header", result2, []map[string]any{ - {"field~0": "a", "field~1": "b"}, - {"field~0": "A", "field~1": "B"}, - }) -} - -func TestDoubleQuotes(t *testing.T) { - tests := []struct { - input string - expected string - }{ - {`a` + "\n" + `"b"`, "b"}, - {`a` + "\n" + `"""b"`, `"b`}, - {`a` + "\n" + `"b"""`, `b"`}, - {`a` + "\n" + `"""b"""`, `"b"`}, - {`a` + "\n" + `"b""c"`, `b"c`}, - {`a` + "\n" + `"b""c""d"`, `b"c"d`}, - {`a` + "\n" + `"""""b"`, `""b`}, - {`a` + "\n" + `"b"""""`, `b""`}, - {`a` + "\n" + `"""""b"""""`, `""b""`}, - } - - for _, tt := range tests { - result, err := csvParse(tt.input) - if err != nil { - t.Errorf("Parse(%q): error: %v", tt.input, err) - continue - } - if len(result) != 1 { - t.Errorf("Parse(%q): expected 1 record, got %d", tt.input, len(result)) - continue - } - m := toMap(result[0]) - if m["a"] != tt.expected { - t.Errorf("Parse(%q): expected a=%q, got a=%q", tt.input, tt.expected, m["a"]) - } - } -} - -func TestTrim(t *testing.T) { - r1, _ := csvParse("a\n b") - assertField(t, "no-trim-leading", r1, "a", " b") - - r2, _ := csvParse("a\nb ") - assertField(t, "no-trim-trailing", r2, "a", "b ") - - r3, _ := csvParse("a\n b ") - assertField(t, "no-trim-both", r3, "a", " b ") - - r4, _ := csvParse("a\n b", map[string]any{"trim": true}) - assertField(t, "trim-leading", r4, "a", "b") - - r5, _ := csvParse("a\nb ", map[string]any{"trim": true}) - assertField(t, "trim-trailing", r5, "a", "b") - - r6, _ := csvParse("a\n b c ", map[string]any{"trim": true}) - assertField(t, "trim-internal", r6, "a", "b c") -} - -func TestComment(t *testing.T) { - r1, _ := csvParse("a\n# b") - assertField(t, "no-comment", r1, "a", "# b") - - r2, _ := csvParse("a\n# b", map[string]any{"comment": true}) - if len(r2) != 0 { - t.Errorf("comment-line: expected 0 records, got %d", len(r2)) - } - - r3, _ := csvParse("a\n b #c", map[string]any{"comment": true}) - assertField(t, "comment-inline", r3, "a", " b ") -} - -func TestNumber(t *testing.T) { - r1, _ := csvParse("a\n1") - assertField(t, "no-number", r1, "a", "1") - - r2, _ := csvParse("a\n1", map[string]any{"number": true}) - m := toMap(r2[0]) - if m["a"] != float64(1) { - t.Errorf("number: expected 1 (float64), got %v (%T)", m["a"], m["a"]) - } -} - -func TestValue(t *testing.T) { - r1, _ := csvParse("a\ntrue") - assertField(t, "no-value", r1, "a", "true") - - r2, _ := csvParse("a\ntrue", map[string]any{"value": true}) - m := toMap(r2[0]) - if m["a"] != true { - t.Errorf("value-true: expected true, got %v (%T)", m["a"], m["a"]) - } - - r3, _ := csvParse("a\nfalse", map[string]any{"value": true}) - m3 := toMap(r3[0]) - if m3["a"] != false { - t.Errorf("value-false: expected false, got %v (%T)", m3["a"], m3["a"]) - } - - r4, _ := csvParse("a\nnull", map[string]any{"value": true}) - m4 := toMap(r4[0]) - if m4["a"] != nil { - t.Errorf("value-null: expected nil, got %v (%T)", m4["a"], m4["a"]) - } -} - -func TestStream(t *testing.T) { - var events []string - var records []any - - j := jsonic.Make() - j.UseDefaults(Csv, Defaults, map[string]any{ - "stream": func(what string, record any) { - events = append(events, what) - if what == "record" { - records = append(records, record) - } - }, - }) - j.Parse("a,b\n1,2\n3,4\n5,6") - - if len(events) < 3 { - t.Fatalf("Expected at least 3 events, got %d", len(events)) - } - if events[0] != "start" { - t.Errorf("First event should be 'start', got %q", events[0]) - } - if events[len(events)-1] != "end" { - t.Errorf("Last event should be 'end', got %q", events[len(events)-1]) - } - if len(records) != 3 { - t.Errorf("Expected 3 records, got %d", len(records)) - } -} - -func TestSeparators(t *testing.T) { - result, _ := csvParse("a|b|c\nA|B|C\nAA|BB|CC", - map[string]any{"field": map[string]any{"separation": "|"}}) - assertRecords(t, "pipe", result, []map[string]any{ - {"a": "A", "b": "B", "c": "C"}, - {"a": "AA", "b": "BB", "c": "CC"}, - }) - - result2, _ := csvParse("a~~b~~c\nA~~B~~C", - map[string]any{"field": map[string]any{"separation": "~~"}}) - assertRecords(t, "multi-char", result2, []map[string]any{ - {"a": "A", "b": "B", "c": "C"}, - }) -} - -func TestRecordSeparators(t *testing.T) { - result, _ := csvParse("a,b,c%A,B,C%AA,BB,CC", - map[string]any{"record": map[string]any{"separators": "%"}}) - assertRecords(t, "record-sep", result, []map[string]any{ - {"a": "A", "b": "B", "c": "C"}, - {"a": "AA", "b": "BB", "c": "CC"}, - }) -} - -// parseFixture parses CSV with optional jsonic-level options for fixtures. -func parseFixture(src string, pluginOpts map[string]any, jsonicOpts map[string]any) ([]any, error) { - if len(jsonicOpts) == 0 { - return csvParse(src, pluginOpts) - } - - j := jsonic.Make() - - // Apply jsonicOpt: value.def - if valOpt, ok := jsonicOpts["value"].(map[string]any); ok { - if defMap, ok := valOpt["def"].(map[string]any); ok { - vopts := jsonic.Options{Value: &jsonic.ValueOptions{ - Def: map[string]*jsonic.ValueDef{ - "true": {Val: true}, - "false": {Val: false}, - "null": {Val: nil}, - }, - }} - for k, v := range defMap { - if v == nil { - delete(vopts.Value.Def, k) - } else if vm, ok := v.(map[string]any); ok { - vopts.Value.Def[k] = &jsonic.ValueDef{Val: vm["val"]} - } - } - j.SetOptions(vopts) - } - } - - // Apply jsonicOpt: comment.def - if cmtOpt, ok := jsonicOpts["comment"].(map[string]any); ok { - if defMap, ok := cmtOpt["def"].(map[string]any); ok { - copts := jsonic.Options{Comment: &jsonic.CommentOptions{ - Def: make(map[string]*jsonic.CommentDef), - }} - for name, v := range defMap { - if cm, ok := v.(map[string]any); ok { - def := &jsonic.CommentDef{} - if start, ok := cm["start"].(string); ok { - def.Start = start - } - if end, ok := cm["end"].(string); ok { - def.End = end - } else { - def.Line = true - } - copts.Comment.Def[name] = def - } - } - j.SetOptions(copts) - } - } - - j.UseDefaults(Csv, Defaults, pluginOpts) - - result, err := j.Parse(src) - if err != nil { - return nil, err - } - if result == nil { - return []any{}, nil - } - if arr, ok := result.([]any); ok { - return arr, nil - } - return []any{}, nil -} - -// Helpers - -func assertRecords(t *testing.T, name string, result []any, expected []map[string]any) { - t.Helper() - if len(result) != len(expected) { - t.Errorf("%s: expected %d records, got %d: %v", name, len(expected), len(result), result) - return - } - for i, exp := range expected { - m := toMap(result[i]) - for k, v := range exp { - if fmt.Sprintf("%v", m[k]) != fmt.Sprintf("%v", v) { - t.Errorf("%s: record %d, field %q: expected %v, got %v", name, i, k, v, m[k]) - } - } - } -} - -func assertField(t *testing.T, name string, result []any, key string, expected string) { - t.Helper() - if len(result) != 1 { - t.Errorf("%s: expected 1 record, got %d", name, len(result)) - return - } - m := toMap(result[0]) - if m[key] != expected { - t.Errorf("%s: expected %q=%q, got %q=%q", name, key, expected, key, m[key]) - } -} - -func toMap(v any) map[string]any { - switch m := v.(type) { - case map[string]any: - return m - case orderedMap: - return m.m - default: - return nil - } -} - -func normalizeResult(result []any) []any { - out := make([]any, len(result)) - for i, r := range result { - out[i] = normalizeValue(r) - } - return out -} - -func normalizeValue(v any) any { - switch val := v.(type) { - case orderedMap: - m := make(map[string]any) - for k, v := range val.m { - m[k] = normalizeValue(v) - } - return m - case map[string]any: - m := make(map[string]any) - for k, v := range val { - m[k] = normalizeValue(v) - } - return m - case []any: - out := make([]any, len(val)) - for i, v := range val { - out[i] = normalizeValue(v) - } - return out - default: - return v - } -} - -func normalizeJSON(v any) any { - switch val := v.(type) { - case []any: - out := make([]any, len(val)) - for i, item := range val { - out[i] = normalizeJSON(item) - } - return out - case map[string]any: - m := make(map[string]any) - for k, v := range val { - m[k] = normalizeJSON(v) - } - return m - default: - return v - } -} diff --git a/go/go.mod b/go/go.mod index 55ab5a2..c42f565 100644 --- a/go/go.mod +++ b/go/go.mod @@ -1,4 +1,4 @@ -module github.com/jsonicjs/csv/go +module github.com/jsonicjs/xml/go go 1.24.7 diff --git a/go/xml.go b/go/xml.go new file mode 100644 index 0000000..97b30b8 --- /dev/null +++ b/go/xml.go @@ -0,0 +1,630 @@ +// Copyright (c) 2021-2025 Richard Rodger, MIT License + +// Package xml is a Jsonic plugin that parses XML into a tree of +// elements. The parser supports: elements with open/close and +// self-closing tags, attributes (single and double quoted with entity +// decoding), mixed element/text content, predefined and numeric +// character entity references, namespace resolution from xmlns/xmlns:* +// declarations, comments, CDATA sections, processing instructions and +// DOCTYPE declarations. +// +// The returned tree uses `map[string]any` nodes with keys `name`, +// `localName`, optional `prefix`, optional `namespace`, `attributes` +// (map of string -> string) and `children` (array of nested elements +// or text strings). +package xml + +import ( + "fmt" + "regexp" + "strconv" + "strings" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +const Version = "0.1.0" + +// Defaults are merged with caller-supplied options when the plugin is +// registered via jsonic.UseDefaults. +var Defaults = map[string]any{ + "namespaces": true, + "entities": true, + "customEntities": map[string]string{}, +} + +// Xml is the Jsonic plugin entry point. Register via: +// +// j := jsonic.Make() +// j.UseDefaults(xml.Xml, xml.Defaults) +// result, err := j.Parse(src) +func Xml(j *jsonic.Jsonic, options map[string]any) error { + // Guard against re-invocation: Use() re-runs plugins on SetOptions calls. + if j.Decoration("xml-init") != nil { + return nil + } + j.Decorate("xml-init", true) + + namespacesOn := toBool(options["namespaces"], true) + entitiesOn := toBool(options["entities"], true) + customEntities := toStringMap(options["customEntities"]) + + decode := buildEntityDecoder(entitiesOn, customEntities) + + // Reserve #XIG (ignored) and #XOP/#XCL/#XSC (tag tokens) so they have + // stable tins before the grammar references them. The tins are then + // passed to the tag matcher by closure. + xigTin := j.Token("#XIG", "") + xopTin := j.Token("#XOP", "") + xclTin := j.Token("#XCL", "") + xscTin := j.Token("#XSC", "") + + // Register a dummy fixed token bound to a character that cannot + // legally appear in XML source (ASCII SOH). This keeps the lexer's + // internal `FixedSorted` list non-empty, which in turn disables an + // otherwise-hardcoded fallback that still ends text tokens on any + // of `{ } [ ] : ,` even when those symbols have been removed from + // the fixed token map. Without this, XML text content containing a + // comma would be truncated at the comma. + soh := "\x01" + _ = j.Token("#XDUM", soh) + + // Custom lexer matcher registered at low priority so it runs before + // the built-in text/fixed matchers and captures every `<...>` + // construct as a single token. + j.SetOptions(jsonic.Options{ + Lex: &jsonic.LexOptions{ + Match: map[string]*jsonic.MatchSpec{ + "xmltag": {Order: 100_000, Make: buildXmlTagMatcher(decode, xigTin, xopTin, xclTin, xscTin)}, + }, + }, + Ender: []string{"<"}, + Rule: &jsonic.RuleOptions{ + Start: "xml", + Exclude: "jsonic,imp", + }, + Fixed: &jsonic.FixedOptions{Token: map[string]*string{ + "#OB": nil, "#CB": nil, "#OS": nil, "#CS": nil, + "#CL": nil, "#CA": nil, + }}, + Number: &jsonic.NumberOptions{Lex: boolPtr(false)}, + Value: &jsonic.ValueOptions{Lex: boolPtr(false)}, + String: &jsonic.StringOptions{Lex: boolPtr(false)}, + Comment: &jsonic.CommentOptions{Lex: boolPtr(false)}, + Space: &jsonic.SpaceOptions{Lex: boolPtr(false)}, + Line: &jsonic.LineOptions{Lex: boolPtr(false)}, + Text: &jsonic.TextOptions{ + Modify: []jsonic.ValModifier{func(v any) any { + if s, ok := v.(string); ok && entitiesOn { + return decode(s) + } + return v + }}, + }, + Error: map[string]string{ + "xml_mismatched_tag": "closing tag does not match opening tag <$openname>", + "xml_invalid_tag": "invalid tag: $fsrc", + "xml_unterminated": "unterminated $kind", + }, + Hint: map[string]string{ + "xml_mismatched_tag": "Each opening tag must be paired with a matching closing tag.\nExpected but found .", + "xml_invalid_tag": "The tag syntax is not valid XML.", + "xml_unterminated": "The $kind starting at this position is not terminated.", + }, + }) + + // IGNORE set: drop #XIG (comments, PIs, DOCTYPE) along with the + // default members so any of them is skipped by the parser. + j.SetTokenSet("IGNORE", []jsonic.Tin{ + j.Token("#SP", ""), j.Token("#LN", ""), j.Token("#CM", ""), xigTin, + }) + + // Grammar declarations. Mirror the TypeScript grammar exactly. + refs := map[jsonic.FuncRef]any{ + "@xml-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if r.Child == nil || r.Child == jsonic.NoRule || r.Child.Node == nil { + return + } + // The Go parser follows the Next chain forward from the root + // rule to find the final result holder, so the current rule's + // node is what the caller will see. Set it (and the original + // root's node via the Prev chain as well for safety). + r.Node = r.Child.Node + root := firstRule(r) + root.Node = r.Child.Node + if namespacesOn { + if el, ok := r.Node.(map[string]any); ok { + resolveNamespaces(el, nil) + } + } + }), + + "@element-open": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + v := r.O0.Val.(map[string]any) + name := v["name"].(string) + attrs := v["attributes"].(map[string]any) + r.Node = map[string]any{ + "name": name, + "localName": name, + "attributes": attrs, + "children": []any{}, + } + }), + + "@element-selfclose": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + v := r.O0.Val.(map[string]any) + name := v["name"].(string) + attrs := v["attributes"].(map[string]any) + r.Node = map[string]any{ + "name": name, + "localName": name, + "attributes": attrs, + "children": []any{}, + } + }), + + "@element-close": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + el, _ := r.Node.(map[string]any) + openName, _ := el["name"].(string) + closeName, _ := r.C0.Val.(string) + if openName != closeName { + // The Go parser's top-level error handling reports parse + // errors under a single "unexpected" code, so encode our + // specific error code into the token's `Src`: that string + // is substituted into the error detail via $fsrc and will + // appear in err.Error() for consumers (and tests) that + // want to key on the specific cause. + r.C0.Src = "xml_mismatched_tag: does not match <" + openName + ">" + if r.C0.Use == nil { + r.C0.Use = map[string]any{} + } + r.C0.Use["openname"] = openName + r.C0.Err = "xml_mismatched_tag" + ctx.ParseErr = r.C0 + } + }), + + "@child-text": jsonic.AltAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + el, _ := r.Node.(map[string]any) + children, _ := el["children"].([]any) + el["children"] = append(children, r.O0.Val) + r.U["done"] = true + }), + + "@child-bc": jsonic.StateAction(func(r *jsonic.Rule, ctx *jsonic.Context) { + if done, _ := r.U["done"].(bool); done { + return + } + if r.Child == nil || r.Child == jsonic.NoRule || r.Child.Node == nil { + return + } + el, ok := r.Node.(map[string]any) + if !ok { + return + } + children, _ := el["children"].([]any) + el["children"] = append(children, r.Child.Node) + }), + + "@element-is-selfclosed": jsonic.AltCond(func(r *jsonic.Rule, ctx *jsonic.Context) bool { + v, _ := r.U["selfclose"].(int) + return v == 1 + }), + } + + gs := &jsonic.GrammarSpec{ + Ref: refs, + Rule: map[string]*jsonic.GrammarRuleSpec{ + "xml": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#ZZ"}, + {S: "#TX", R: "xml"}, + {P: "element"}, + }, + Close: []*jsonic.GrammarAltSpec{ + {S: "#ZZ"}, + {S: "#TX", R: "xml"}, + }, + }, + "element": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#XSC", A: "@element-selfclose", U: map[string]any{"selfclose": 1}}, + {S: "#XOP", P: "content", A: "@element-open"}, + }, + Close: []*jsonic.GrammarAltSpec{ + {C: "@element-is-selfclosed"}, + {S: "#XCL", A: "@element-close"}, + }, + }, + "content": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#XCL", B: 1}, + {P: "child"}, + }, + Close: []*jsonic.GrammarAltSpec{ + {S: "#XCL", B: 1}, + {R: "content"}, + }, + }, + "child": { + Open: []*jsonic.GrammarAltSpec{ + {S: "#TX", A: "@child-text"}, + {S: "#XOP", B: 1, P: "element"}, + {S: "#XSC", B: 1, P: "element"}, + }, + }, + }, + } + if err := j.Grammar(gs); err != nil { + return fmt.Errorf("xml: apply grammar: %w", err) + } + + return nil +} + +// firstRule walks back through Prev links to find the originating rule +// instance (matches the root rule used by the parser as the result +// holder). +func firstRule(r *jsonic.Rule) *jsonic.Rule { + cur := r + for cur.Prev != nil && cur.Prev != jsonic.NoRule { + cur = cur.Prev + } + return cur +} + +// predefinedEntities is the five XML-predefined entities. +var predefinedEntities = map[string]string{ + "amp": "&", + "lt": "<", + "gt": ">", + "quot": "\"", + "apos": "'", +} + +// entityRE matches a single entity reference: named, decimal numeric, or +// hexadecimal numeric. (?:...) would be ideal but the Go stdlib regexp +// supports named groups; this uses plain groups for portability. +var entityRE = regexp.MustCompile(`&(#x[0-9a-fA-F]+|#[0-9]+|[A-Za-z_][A-Za-z0-9_]*);`) + +// buildEntityDecoder returns a function that decodes the five +// predefined entities, numeric character references, and any +// caller-supplied custom entities. When `enabled` is false the +// function is an identity. +func buildEntityDecoder(enabled bool, custom map[string]string) func(string) string { + if !enabled { + return func(s string) string { return s } + } + merged := make(map[string]string, len(predefinedEntities)+len(custom)) + for k, v := range predefinedEntities { + merged[k] = v + } + for k, v := range custom { + merged[k] = v + } + return func(s string) string { + if !strings.Contains(s, "&") { + return s + } + return entityRE.ReplaceAllStringFunc(s, func(match string) string { + ref := match[1 : len(match)-1] + if ref[0] == '#' { + var code int64 + var err error + if len(ref) > 1 && (ref[1] == 'x' || ref[1] == 'X') { + code, err = strconv.ParseInt(ref[2:], 16, 32) + } else { + code, err = strconv.ParseInt(ref[1:], 10, 32) + } + if err != nil { + return match + } + return string(rune(code)) + } + if v, ok := merged[ref]; ok { + return v + } + return match + }) + } +} + +// buildXmlTagMatcher returns a MakeLexMatcher that recognises every +// top-level XML `<...>` construct at the current lex position. On a +// successful match it consumes the full construct and emits exactly +// one of: +// +// #XOP val = {"name":..., "attributes":...} +// #XSC val = {"name":..., "attributes":...} +// #XCL val = name (string) +// #XIG | | (ignored) +// #TX val = cdata body (verbatim, no entity decoding) +func buildXmlTagMatcher( + decode func(string) string, + xigTin, xopTin, xclTin, xscTin jsonic.Tin, +) jsonic.MakeLexMatcher { + return func(_ *jsonic.LexConfig, _ *jsonic.Options) jsonic.LexMatcher { + return func(lex *jsonic.Lex, _ *jsonic.Rule) *jsonic.Token { + pnt := lex.Cursor() + src := lex.Src + srclen := len(src) + sI := pnt.SI + if sI >= srclen || src[sI] != '<' { + return nil + } + + // Comment: + if strings.HasPrefix(src[sI:], "") + if end < 0 { + return lex.Bad("unterminated_comment") + } + finish := sI + 4 + end + 3 + tsrc := src[sI:finish] + tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // CDATA: + if strings.HasPrefix(src[sI:], "") + if end < 0 { + return lex.Bad("unterminated_cdata") + } + finish := body + end + 3 + text := src[body : body+end] + tsrc := src[sI:finish] + tkn := lex.Token("#TX", jsonic.TinTX, text, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // DOCTYPE: (allows a single level of [] subset) + if strings.HasPrefix(src[sI:], "' && depth <= 0 { + break + } + i++ + } + if i >= srclen { + return lex.Bad("unterminated_doctype") + } + finish := i + 1 + tsrc := src[sI:finish] + tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // Processing instruction: + if sI+1 < srclen && src[sI+1] == '?' { + end := strings.Index(src[sI+2:], "?>") + if end < 0 { + return lex.Bad("unterminated_pi") + } + finish := sI + 2 + end + 2 + tsrc := src[sI:finish] + tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // Closing tag: + if sI+1 < srclen && src[sI+1] == '/' { + i := sI + 2 + if i >= srclen || !isNameStart(src[i]) { + return nil + } + nameStart := i + i++ + for i < srclen && isNameChar(src[i]) { + i++ + } + name := src[nameStart:i] + for i < srclen && isSpace(src[i]) { + i++ + } + if i >= srclen || src[i] != '>' { + return lex.Bad("xml_invalid_tag") + } + finish := i + 1 + tsrc := src[sI:finish] + tkn := lex.Token("#XCL", xclTin, name, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // Opening or self-close tag: + i := sI + 1 + if i >= srclen || !isNameStart(src[i]) { + return nil + } + nameStart := i + i++ + for i < srclen && isNameChar(src[i]) { + i++ + } + name := src[nameStart:i] + attrs := map[string]any{} + + for { + wsStart := i + for i < srclen && isSpace(src[i]) { + i++ + } + if i >= srclen { + return lex.Bad("xml_invalid_tag") + } + + // End of tag. + if src[i] == '>' { + finish := i + 1 + tsrc := src[sI:finish] + val := map[string]any{"name": name, "attributes": attrs} + tkn := lex.Token("#XOP", xopTin, val, tsrc) + advance(pnt, sI, finish) + return tkn + } + if src[i] == '/' && i+1 < srclen && src[i+1] == '>' { + finish := i + 2 + tsrc := src[sI:finish] + val := map[string]any{"name": name, "attributes": attrs} + tkn := lex.Token("#XSC", xscTin, val, tsrc) + advance(pnt, sI, finish) + return tkn + } + + // Attributes must be separated by whitespace. + if wsStart == i { + return lex.Bad("xml_invalid_tag") + } + + // Attribute name. + if !isNameStart(src[i]) { + return lex.Bad("xml_invalid_tag") + } + attrStart := i + i++ + for i < srclen && isNameChar(src[i]) { + i++ + } + attrName := src[attrStart:i] + + for i < srclen && isSpace(src[i]) { + i++ + } + if i >= srclen || src[i] != '=' { + return lex.Bad("xml_invalid_tag") + } + i++ + for i < srclen && isSpace(src[i]) { + i++ + } + + if i >= srclen { + return lex.Bad("xml_invalid_tag") + } + quote := src[i] + if quote != '"' && quote != '\'' { + return lex.Bad("xml_invalid_tag") + } + i++ + valStart := i + for i < srclen && src[i] != quote { + i++ + } + if i >= srclen { + return lex.Bad("xml_invalid_tag") + } + raw := src[valStart:i] + i++ // consume closing quote + attrs[attrName] = decode(raw) + } + } + } +} + +// resolveNamespaces annotates `element` (and its descendants) with +// `prefix`, `localName` and `namespace` fields resolved from xmlns / +// xmlns:* attributes in scope. +func resolveNamespaces(element map[string]any, scope map[string]string) { + local := make(map[string]string, len(scope)+4) + for k, v := range scope { + local[k] = v + } + if attrs, ok := element["attributes"].(map[string]any); ok { + for k, v := range attrs { + s, _ := v.(string) + if k == "xmlns" { + local[""] = s + } else if strings.HasPrefix(k, "xmlns:") { + local[k[6:]] = s + } + } + } + + name, _ := element["name"].(string) + if idx := strings.Index(name, ":"); idx >= 0 { + prefix := name[:idx] + element["prefix"] = prefix + element["localName"] = name[idx+1:] + if uri, ok := local[prefix]; ok { + element["namespace"] = uri + } + } else { + element["localName"] = name + if uri, ok := local[""]; ok { + element["namespace"] = uri + } + } + + children, _ := element["children"].([]any) + for _, c := range children { + if ce, ok := c.(map[string]any); ok { + resolveNamespaces(ce, local) + } + } +} + +// --- helpers --- + +func advance(pnt *jsonic.Point, from, to int) { + pnt.SI = to + pnt.CI += to - from +} + +func isNameStart(ch byte) bool { + return (ch >= 'A' && ch <= 'Z') || + (ch >= 'a' && ch <= 'z') || + ch == '_' || ch == ':' +} + +func isNameChar(ch byte) bool { + return isNameStart(ch) || + (ch >= '0' && ch <= '9') || + ch == '-' || ch == '.' +} + +func isSpace(ch byte) bool { + return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' +} + +func boolPtr(b bool) *bool { return &b } + +func toBool(v any, def bool) bool { + if v == nil { + return def + } + b, ok := v.(bool) + if !ok { + return def + } + return b +} + +func toStringMap(v any) map[string]string { + out := map[string]string{} + switch m := v.(type) { + case map[string]string: + for k, vv := range m { + out[k] = vv + } + case map[string]any: + for k, vv := range m { + if s, ok := vv.(string); ok { + out[k] = s + } + } + } + return out +} diff --git a/go/xml_test.go b/go/xml_test.go new file mode 100644 index 0000000..a6186f7 --- /dev/null +++ b/go/xml_test.go @@ -0,0 +1,289 @@ +package xml + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// specEntry represents one row of a TSV spec file. +type specEntry struct { + File string + Line int + Name string + Input string // Escape-decoded XML source. + Expected string // Raw cell: JSON text, or "ERROR" / "ERROR:code". + Opts string // Raw JSON (may be empty). +} + +// specDir returns the absolute path to the shared TSV spec directory. +func specDir() string { + return filepath.Join("..", "test", "spec") +} + +// loadSpec reads a TSV spec file into a slice of specEntry. Comment and +// blank lines are skipped. Escapes in the `input` column are decoded +// via unescapeInput; the `expected` and `opts` columns are left raw so +// JSON's own escape rules are honoured by the downstream JSON parser. +func loadSpec(t *testing.T, path string) []specEntry { + t.Helper() + f, err := os.Open(path) + if err != nil { + t.Fatalf("open %s: %v", path, err) + } + defer f.Close() + + var out []specEntry + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 1<<20), 1<<20) + lineNo := 0 + for scanner.Scan() { + lineNo++ + line := scanner.Text() + if line == "" || strings.HasPrefix(line, "#") { + continue + } + cols := strings.Split(line, "\t") + if len(cols) < 3 { + t.Fatalf("%s:%d: expected at least 3 tab-separated columns, got %d", path, lineNo, len(cols)) + } + entry := specEntry{ + File: filepath.Base(path), + Line: lineNo, + Name: cols[0], + Input: unescapeInput(cols[1]), + Expected: cols[2], + } + if len(cols) >= 4 { + entry.Opts = cols[3] + } + out = append(out, entry) + } + if err := scanner.Err(); err != nil { + t.Fatalf("read %s: %v", path, err) + } + return out +} + +// unescapeInput decodes the escape sequences used in the `input` +// column of the TSV spec: \n (LF), \r (CR), \t (TAB), \\ (backslash). +// Any other `\x` sequence is left intact so XML escapes like `\d` are +// not accidentally rewritten. +func unescapeInput(s string) string { + if !strings.Contains(s, `\`) { + return s + } + var b strings.Builder + b.Grow(len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\\' && i+1 < len(s) { + switch s[i+1] { + case 'n': + b.WriteByte('\n') + i++ + continue + case 'r': + b.WriteByte('\r') + i++ + continue + case 't': + b.WriteByte('\t') + i++ + continue + case '\\': + b.WriteByte('\\') + i++ + continue + } + } + b.WriteByte(c) + } + return b.String() +} + +// parseOpts decodes the optional options JSON into a map suitable for +// jsonic.UseDefaults. Empty strings produce an empty map. +func parseOpts(t *testing.T, entry specEntry) map[string]any { + t.Helper() + if strings.TrimSpace(entry.Opts) == "" { + return map[string]any{} + } + var out map[string]any + if err := json.Unmarshal([]byte(entry.Opts), &out); err != nil { + t.Fatalf("%s:%d: parse opts %q: %v", entry.File, entry.Line, entry.Opts, err) + } + return out +} + +// parseExpected decodes the expected cell: either a JSON document or +// an `ERROR` / `ERROR:code` marker. +func parseExpected(t *testing.T, entry specEntry) (wantErr bool, errCode string, wantJSON any) { + t.Helper() + raw := entry.Expected + if strings.HasPrefix(raw, "ERROR") { + rest := strings.TrimPrefix(raw, "ERROR") + rest = strings.TrimPrefix(rest, ":") + return true, rest, nil + } + if err := json.Unmarshal([]byte(raw), &wantJSON); err != nil { + t.Fatalf("%s:%d: parse expected JSON %q: %v", entry.File, entry.Line, raw, err) + } + return false, "", wantJSON +} + +// runSpecFile is the workhorse: it loads one spec file and runs each +// row as its own sub-test. +func runSpecFile(t *testing.T, path string) { + entries := loadSpec(t, path) + if len(entries) == 0 { + t.Fatalf("%s: no spec entries loaded", path) + } + for _, entry := range entries { + entry := entry + t.Run(entry.Name, func(t *testing.T) { + opts := parseOpts(t, entry) + wantErr, errCode, wantVal := parseExpected(t, entry) + + j := jsonic.Make() + if err := j.UseDefaults(Xml, Defaults, opts); err != nil { + t.Fatalf("plugin init: %v", err) + } + got, err := j.Parse(entry.Input) + + if wantErr { + if err == nil { + t.Fatalf("expected parse error, got result %v", got) + } + if errCode != "" && !strings.Contains(err.Error(), errCode) { + t.Fatalf("expected error code %q, got %q", errCode, err.Error()) + } + return + } + if err != nil { + t.Fatalf("unexpected parse error: %v", err) + } + + // Round-trip the got value through JSON for type normalisation + // so `[]any` vs concrete slice types compare cleanly against + // values decoded from the spec via json.Unmarshal. + gotJSON, err := json.Marshal(got) + if err != nil { + t.Fatalf("marshal got: %v", err) + } + var gotVal any + if err := json.Unmarshal(gotJSON, &gotVal); err != nil { + t.Fatalf("unmarshal got: %v", err) + } + if !reflect.DeepEqual(gotVal, wantVal) { + wantPretty, _ := json.Marshal(wantVal) + t.Fatalf("\nwant: %s\ngot : %s", wantPretty, gotJSON) + } + }) + } +} + +func TestBasicSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "basic.tsv")) } +func TestAttributesSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "attributes.tsv")) } +func TestEntitiesSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "entities.tsv")) } +func TestNamespacesSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "namespaces.tsv")) } +func TestStructureSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "structure.tsv")) } +func TestErrorsSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "errors.tsv")) } +func TestW3CSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "w3c.tsv")) } + +// --- XML embedded in Jsonic source ----------------------------------------- +// +// Real-world use case: a Jsonic document holds an XML payload as a string. +// Parse the outer document with stock Jsonic, then feed the embedded XML +// string into a second Jsonic instance configured with the Xml plugin. + +func TestXmlEmbeddedInJsonic(t *testing.T) { + // An ordinary Jsonic document. Uses backtick-delimited multiline + // strings so the XML can embed newlines and double quotes verbatim. + jsonicSrc := "{\n" + + " title: 'order-42',\n" + + " payload: `" + + `` + "\n" + + `` + "\n" + + ` Widget` + "\n" + + ` Gadget` + "\n" + + `` + "`,\n" + + "}\n" + + outer, err := jsonic.Parse(jsonicSrc) + if err != nil { + t.Fatalf("parse outer Jsonic: %v", err) + } + m, ok := outer.(map[string]any) + if !ok { + t.Fatalf("outer should be map, got %T", outer) + } + if m["title"] != "order-42" { + t.Fatalf("title mismatch: %v", m["title"]) + } + payload, ok := m["payload"].(string) + if !ok { + t.Fatalf("payload should be string, got %T", m["payload"]) + } + + // Parse the XML payload with the Xml plugin. + xmlParser := jsonic.Make() + if err := xmlParser.UseDefaults(Xml, Defaults); err != nil { + t.Fatalf("xml plugin init: %v", err) + } + parsed, err := xmlParser.Parse(payload) + if err != nil { + t.Fatalf("parse XML payload: %v", err) + } + el, ok := parsed.(map[string]any) + if !ok { + t.Fatalf("xml result should be map, got %T", parsed) + } + if el["name"] != "order" { + t.Fatalf("root name: got %v, want order", el["name"]) + } + attrs, _ := el["attributes"].(map[string]any) + if attrs["id"] != "42" { + t.Fatalf("root attr id: got %v, want 42", attrs["id"]) + } + // Count children and check attrs. + children, _ := el["children"].([]any) + var items []map[string]any + for _, c := range children { + if cm, ok := c.(map[string]any); ok && cm["name"] == "item" { + items = append(items, cm) + } + } + if len(items) != 2 { + t.Fatalf("expected 2 item elements, got %d", len(items)) + } + if a, _ := items[0]["attributes"].(map[string]any); a["qty"] != "2" { + t.Fatalf("item[0].qty: got %v, want 2", a["qty"]) + } + if a, _ := items[1]["attributes"].(map[string]any); a["qty"] != "1" { + t.Fatalf("item[1].qty: got %v, want 1", a["qty"]) + } +} + +// TestSpecDirExists is a sanity check that the shared test/spec folder is +// reachable from the Go test working directory. +func TestSpecDirExists(t *testing.T) { + info, err := os.Stat(specDir()) + if err != nil { + t.Fatalf("spec dir: %v", err) + } + if !info.IsDir() { + t.Fatalf("%s is not a directory", specDir()) + } +} + +// Compile-time assertion that specEntry stringifies meaningfully in +// error messages (keeps `fmt` import stable if trimmed elsewhere). +var _ = fmt.Sprintf diff --git a/test/fixtures/basic-array.json b/test/fixtures/basic-array.json deleted file mode 100644 index e8a1b12..0000000 --- a/test/fixtures/basic-array.json +++ /dev/null @@ -1 +0,0 @@ -[["1","2"],["3","4"]] diff --git a/test/fixtures/basic-noheader-names.json b/test/fixtures/basic-noheader-names.json deleted file mode 100644 index 7df9a6f..0000000 --- a/test/fixtures/basic-noheader-names.json +++ /dev/null @@ -1 +0,0 @@ -[{"x":"a","y":"b"},{"x":"1","y":"2"},{"x":"3","y":"4"}] diff --git a/test/fixtures/basic-noheader.json b/test/fixtures/basic-noheader.json deleted file mode 100644 index 86c86e2..0000000 --- a/test/fixtures/basic-noheader.json +++ /dev/null @@ -1 +0,0 @@ -[{"field~0":"a","field~1":"b"},{"field~0":"1","field~1":"2"},{"field~0":"3","field~1":"4"}] diff --git a/test/fixtures/basic.csv b/test/fixtures/basic.csv deleted file mode 100644 index 0099ae9..0000000 --- a/test/fixtures/basic.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -1,2 -3,4 diff --git a/test/fixtures/basic.json b/test/fixtures/basic.json deleted file mode 100644 index 8db32a8..0000000 --- a/test/fixtures/basic.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1","b":"2"},{"a":"3","b":"4"}] diff --git a/test/fixtures/comment-empty.csv b/test/fixtures/comment-empty.csv deleted file mode 100644 index 28a3f77..0000000 --- a/test/fixtures/comment-empty.csv +++ /dev/null @@ -1,8 +0,0 @@ -a -1 -#comment -2 -3 - -#another comment -4 diff --git a/test/fixtures/comment-empty.json b/test/fixtures/comment-empty.json deleted file mode 100644 index ebc5b1c..0000000 --- a/test/fixtures/comment-empty.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":""},{"a":"2"},{"a":"3"},{"a":""},{"a":""},{"a":"4"}] diff --git a/test/fixtures/comment-inline.csv b/test/fixtures/comment-inline.csv deleted file mode 100644 index f927089..0000000 --- a/test/fixtures/comment-inline.csv +++ /dev/null @@ -1,3 +0,0 @@ -a#X -1 - b #c diff --git a/test/fixtures/comment-inline.json b/test/fixtures/comment-inline.json deleted file mode 100644 index 8a46826..0000000 --- a/test/fixtures/comment-inline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":" b "}] diff --git a/test/fixtures/comment-line.csv b/test/fixtures/comment-line.csv deleted file mode 100644 index 82875ca..0000000 --- a/test/fixtures/comment-line.csv +++ /dev/null @@ -1,5 +0,0 @@ -a -1 -#this is a comment -2 -3 diff --git a/test/fixtures/comment-line.json b/test/fixtures/comment-line.json deleted file mode 100644 index 071af2a..0000000 --- a/test/fixtures/comment-line.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":"2"},{"a":"3"}] diff --git a/test/fixtures/crlf.csv b/test/fixtures/crlf.csv deleted file mode 100644 index 4ba71dc..0000000 --- a/test/fixtures/crlf.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -A,B -C,D diff --git a/test/fixtures/crlf.json b/test/fixtures/crlf.json deleted file mode 100644 index c2872a6..0000000 --- a/test/fixtures/crlf.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B"},{"a":"C","b":"D"}] diff --git a/test/fixtures/empty-fields.csv b/test/fixtures/empty-fields.csv deleted file mode 100644 index 0970345..0000000 --- a/test/fixtures/empty-fields.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -1, -,1 -1,2, -,1,2 diff --git a/test/fixtures/empty-fields.json b/test/fixtures/empty-fields.json deleted file mode 100644 index 1da5613..0000000 --- a/test/fixtures/empty-fields.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1","b":""},{"a":"","b":"1"},{"a":"1","b":"2","field~2":""},{"a":"","b":"1","field~2":"2"}] diff --git a/test/fixtures/empty-records-default.json b/test/fixtures/empty-records-default.json deleted file mode 100644 index 7561320..0000000 --- a/test/fixtures/empty-records-default.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":"2"},{"a":"3"},{"a":"4"}] diff --git a/test/fixtures/empty-records.csv b/test/fixtures/empty-records.csv deleted file mode 100644 index bbeb6f4..0000000 --- a/test/fixtures/empty-records.csv +++ /dev/null @@ -1,8 +0,0 @@ -a -1 - -2 -3 - - -4 diff --git a/test/fixtures/empty-records.json b/test/fixtures/empty-records.json deleted file mode 100644 index ebc5b1c..0000000 --- a/test/fixtures/empty-records.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":""},{"a":"2"},{"a":"3"},{"a":""},{"a":""},{"a":"4"}] diff --git a/test/fixtures/happy.csv b/test/fixtures/happy.csv deleted file mode 100644 index 89a52c0..0000000 --- a/test/fixtures/happy.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -1,B,true -2,BB,false diff --git a/test/fixtures/happy.json b/test/fixtures/happy.json deleted file mode 100644 index c0ec104..0000000 --- a/test/fixtures/happy.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "a": "1", - "b": "B", - "c": "true" - }, - { - "a": "2", - "b": "BB", - "c": "false" - } -] diff --git a/test/fixtures/leading-newline.csv b/test/fixtures/leading-newline.csv deleted file mode 100644 index d555e5e..0000000 --- a/test/fixtures/leading-newline.csv +++ /dev/null @@ -1,3 +0,0 @@ - -a,b -A,B diff --git a/test/fixtures/leading-newline.json b/test/fixtures/leading-newline.json deleted file mode 100644 index e265c5e..0000000 --- a/test/fixtures/leading-newline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B"}] diff --git a/test/fixtures/manifest.json b/test/fixtures/manifest.json deleted file mode 100644 index b7f0b22..0000000 --- a/test/fixtures/manifest.json +++ /dev/null @@ -1,872 +0,0 @@ -{ - "happy": { - "name": "happy" - }, - "quote": { - "name": "quote" - }, - "notrim": { - "name": "notrim" - }, - "trim": { - "name": "trim", - "csvFile": "notrim", - "opt": { - "trim": true - } - }, - "papa-one-row": { - "name": "papa-One row", - "opt": { - "header": false, - "object": false - } - }, - "papa-two-rows": { - "name": "papa-Two rows", - "opt": { - "header": false, - "object": false - } - }, - "papa-three-rows": { - "name": "papa-Three rows", - "opt": { - "header": false, - "object": false - } - }, - "papa-whitespace-at-edges-of-unquoted-field": { - "name": "papa-Whitespace at edges of unquoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field": { - "name": "papa-Quoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-extra-whitespace-on-edges": { - "name": "papa-Quoted field with extra whitespace on edges", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-delimiter": { - "name": "papa-Quoted field with delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-line-break": { - "name": "papa-Quoted field with line break", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-fields-with-line-breaks": { - "name": "papa-Quoted fields with line breaks", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break": { - "name": "papa-Quoted fields at end of row with delimiter and line break", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-escaped-quotes": { - "name": "papa-Quoted field with escaped quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-escaped-quotes-at-boundaries": { - "name": "papa-Quoted field with escaped quotes at boundaries", - "opt": { - "header": false, - "object": false - } - }, - "papa-unquoted-field-with-quotes-at-end-of-field": { - "name": "papa-Unquoted field with quotes at end of field", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-quotes-around-delimiter": { - "name": "papa-Quoted field with quotes around delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-quotes-on-right-side-of-delimiter": { - "name": "papa-Quoted field with quotes on right side of delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-quotes-on-left-side-of-delimiter": { - "name": "papa-Quoted field with quotes on left side of delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too": { - "name": "papa-Quoted field with 5 quotes in a row and a delimiter in there: too", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-whitespace-around-quotes": { - "name": "papa-Quoted field with whitespace around quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-misplaced-quotes-in-data-not-as-opening-quotes": { - "name": "papa-Misplaced quotes in data: not as opening quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-has-no-closing-quote": { - "name": "papa-Quoted field has no closing quote", - "opt": { - "header": false, - "object": false - }, - "err": "unterminated_string" - }, - "papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer": { - "name": "papa-Quoted field has invalid trailing quote after delimiter with a valid closer", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-invalid-trailing-quote-after-delimiter": { - "name": "papa-Quoted field has invalid trailing quote after delimiter", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-invalid-trailing-quote-before-delimiter": { - "name": "papa-Quoted field has invalid trailing quote before delimiter", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-invalid-trailing-quote-after-new-line": { - "name": "papa-Quoted field has invalid trailing quote after new line", - "opt": { - "header": false, - "object": false - }, - "err": "unexpected" - }, - "papa-quoted-field-has-valid-trailing-quote-via-delimiter": { - "name": "papa-Quoted field has valid trailing quote via delimiter", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-has-valid-trailing-quote-via-n": { - "name": "papa-Quoted field has valid trailing quote via \\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-has-valid-trailing-quote-via-eof": { - "name": "papa-Quoted field has valid trailing quote via EOF", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote": { - "name": "papa-Quoted field contains delimiters and \\n with valid trailing quote", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-starts-with-quoted-field": { - "name": "papa-Line starts with quoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-starts-with-unquoted-empty-field": { - "name": "papa-Line starts with unquoted empty field", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-ends-with-quoted-field": { - "name": "papa-Line ends with quoted field", - "opt": { - "header": false, - "object": false - } - }, - "papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n": { - "name": "papa-Line ends with quoted field: first field of next line is empty, \\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes": { - "name": "papa-Quoted field at end of row (but not at EOF) has quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-quoted-field-at-eof-is-empty": { - "name": "papa-Empty quoted field at EOF is empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-multiple-consecutive-empty-fields": { - "name": "papa-Multiple consecutive empty fields", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-input-string": { - "name": "papa-Empty input string", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-the-delimiter-2-empty-fields": { - "name": "papa-Input is just the delimiter (2 empty fields)", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-empty-fields": { - "name": "papa-Input is just empty fields", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-a-string-a-single-field": { - "name": "papa-Input is just a string (a single field)", - "opt": { - "header": false, - "object": false - } - }, - "papa-commented-line-at-beginning": { - "name": "papa-Commented line at beginning", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-commented-line-in-middle": { - "name": "papa-Commented line in middle", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-commented-line-at-end": { - "name": "papa-Commented line at end", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-two-comment-lines-consecutively": { - "name": "papa-Two comment lines consecutively", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-two-comment-lines-consecutively-at-end-of-file": { - "name": "papa-Two comment lines consecutively at end of file", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-three-comment-lines-consecutively-at-beginning-of-file": { - "name": "papa-Three comment lines consecutively at beginning of file", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-entire-file-is-comment-lines": { - "name": "papa-Entire file is comment lines", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-comment-with-non-default-character": { - "name": "papa-Comment with non-default character", - "opt": { - "header": false, - "object": false, - "comment": true - }, - "jsonicOpt": { - "comment": { - "def": { - "hash": { - "start": "!" - } - } - } - } - }, - "papa-bad-comments-value-specified": { - "name": "papa-Bad comments value specified", - "opt": { - "header": false, - "object": false - } - }, - "papa-multi-character-comment-string": { - "name": "papa-Multi-character comment string", - "opt": { - "header": false, - "object": false, - "comment": true - }, - "jsonicOpt": { - "comment": { - "def": { - "hash": { - "start": "=N(" - } - } - } - } - }, - "papa-input-with-only-a-commented-line": { - "name": "papa-Input with only a commented line", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-jsonic-input-with-only-a-commented-line-and-blank-line-after": { - "name": "papa-jsonic-Input with only a commented line and blank line after", - "opt": { - "header": false, - "object": false, - "comment": true - } - }, - "papa-input-with-only-a-commented-line-without-comments-enabled": { - "name": "papa-Input with only a commented line: without comments enabled", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-without-comments-with-line-starting-with-whitespace": { - "name": "papa-Input without comments with line starting with whitespace", - "opt": { - "header": false, - "object": false - } - }, - "papa-multiple-rows-one-column-no-delimiter-found": { - "name": "papa-Multiple rows: one column (no delimiter found)", - "opt": { - "header": false, - "object": false - } - }, - "papa-jsonic-one-column-input-with-empty-fields": { - "name": "papa-jsonic-One column input with empty fields", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-two-rows-just-r": { - "name": "papa-Two rows: just \\r", - "opt": { - "header": false, - "object": false - } - }, - "papa-two-rows-r-n": { - "name": "papa-Two rows: \\r\\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-r-n": { - "name": "papa-Quoted field with \\r\\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-r": { - "name": "papa-Quoted field with \\r", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-field-with-n": { - "name": "papa-Quoted field with \\n", - "opt": { - "header": false, - "object": false - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter": { - "name": "papa-Quoted fields with spaces between closing quote and next delimiter", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line": { - "name": "papa-Quoted fields with spaces between closing quote and next new line", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-quoted-fields-with-spaces-after-closing-quote": { - "name": "papa-Quoted fields with spaces after closing quote", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-misplaced-quotes-in-data-twice-not-as-opening-quotes": { - "name": "papa-Misplaced quotes in data twice: not as opening quotes", - "opt": { - "header": false, - "object": false - } - }, - "papa-header-row-with-one-row-of-data": { - "name": "papa-Header row with one row of data", - "opt": { - "header": true - } - }, - "papa-header-row-only": { - "name": "papa-Header row only" - }, - "papa-row-with-too-few-fields": { - "name": "papa-Row with too few fields", - "opt": { - "field": { - "exact": true - } - }, - "err": "csv_missing_field" - }, - "papa-row-with-too-many-fields": { - "name": "papa-Row with too many fields", - "opt": { - "field": { - "exact": true - } - }, - "err": "csv_extra_field" - }, - "papa-row-with-enough-fields-but-blank-field-in-the-begining": { - "name": "papa-Row with enough fields but blank field in the begining", - "opt": { - "header": false, - "object": false - } - }, - "papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers": { - "name": "papa-Row with enough fields but blank field in the begining using headers" - }, - "papa-row-with-enough-fields-but-blank-field-at-end": { - "name": "papa-Row with enough fields but blank field at end" - }, - "papa-tab-delimiter": { - "name": "papa-Tab delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "\t" - } - } - }, - "papa-pipe-delimiter": { - "name": "papa-Pipe delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "|" - } - } - }, - "papa-ascii-30-delimiter": { - "name": "papa-ASCII 30 delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "\u001e" - } - } - }, - "papa-ascii-31-delimiter": { - "name": "papa-ASCII 31 delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": "\u001f" - } - } - }, - "papa-multi-character-delimiter": { - "name": "papa-Multi-character delimiter", - "opt": { - "header": false, - "object": false, - "field": { - "separation": ", " - } - } - }, - "papa-multi-character-delimiter-length-2-with-quoted-field": { - "name": "papa-Multi-character delimiter (length 2) with quoted field", - "opt": { - "header": false, - "object": false, - "field": { - "separation": ", " - } - } - }, - "papa-dynamic-typing-converts-boolean-literals": { - "name": "papa-Dynamic typing converts boolean literals", - "opt": { - "header": false, - "object": false, - "value": true - }, - "jsonicOpt": { - "value": { - "def": { - "TRUE": { - "val": true - }, - "FALSE": { - "val": false - } - } - } - } - }, - "papa-dynamic-typing-doesn-t-convert-other-types": { - "name": "papa-Dynamic typing doesn't convert other types", - "opt": { - "header": false, - "object": false, - "value": true - }, - "jsonicOpt": { - "value": { - "def": { - "null": null - } - } - } - }, - "papa-jsonic-blank-line-at-beginning": { - "name": "papa-jsonic-Blank line at beginning", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-jsonic-blank-line-in-middle": { - "name": "papa-jsonic-Blank line in middle", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-jsonic-blank-lines-at-end": { - "name": "papa-jsonic-Blank lines at end", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-jsonic-blank-line-in-middle-with-whitespace": { - "name": "papa-jsonic-Blank line in middle with whitespace", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-first-field-of-a-line-is-empty": { - "name": "papa-First field of a line is empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-last-field-of-a-line-is-empty": { - "name": "papa-Last field of a line is empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-other-fields-are-empty": { - "name": "papa-Other fields are empty", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-input-string-2": { - "name": "papa-Empty input string 2", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-the-delimiter-2-empty-fields-2": { - "name": "papa-Input is just the delimiter (2 empty fields) 2", - "opt": { - "header": false, - "object": false - } - }, - "papa-input-is-just-a-string-a-single-field-2": { - "name": "papa-Input is just a string (a single field) 2", - "opt": { - "header": false, - "object": false - } - }, - "papa-empty-lines": { - "name": "papa-Empty lines", - "opt": { - "header": false, - "object": false, - "record": { - "empty": true - } - } - }, - "papa-skip-empty-lines": { - "name": "papa-Skip empty lines", - "opt": { - "header": false, - "object": false - } - }, - "papa-skip-empty-lines-with-newline-at-end-of-input": { - "name": "papa-Skip empty lines: with newline at end of input", - "opt": { - "header": false, - "object": false - } - }, - "papa-skip-empty-lines-with-empty-input": { - "name": "papa-Skip empty lines: with empty input", - "opt": { - "header": false, - "object": false - } - }, - "papa-skip-empty-lines-with-first-line-only-whitespace": { - "name": "papa-Skip empty lines: with first line only whitespace", - "opt": { - "header": false, - "object": false - } - }, - "papa-single-quote-as-quote-character": { - "name": "papa-Single quote as quote character", - "opt": { - "header": false, - "object": false, - "string": { - "quote": "'" - } - } - }, - "papa-custom-escape-character-in-the-middle": { - "name": "papa-Custom escape character in the middle", - "opt": { - "header": false, - "object": false, - "string": { - "csv": false - } - } - }, - "papa-custom-escape-character-at-the-end": { - "name": "papa-Custom escape character at the end", - "opt": { - "header": false, - "object": false, - "string": { - "csv": false - } - } - }, - "papa-header-row-with-preceding-comment": { - "name": "papa-Header row with preceding comment", - "opt": { - "comment": true - } - }, - "papa-carriage-return-in-header-inside-quotes-with-line-feed-endings": { - "name": "papa-Carriage return in header inside quotes: with line feed endings", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-n-endings-uses-n-linebreak": { - "name": "papa-Using \\n endings uses \\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings with \\r\\n in header field uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings with \\n in header field uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak": { - "name": "papa-Using \\r\\n endings with \\n in header field with skip empty lines uses \\r\\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak": { - "name": "papa-Using \\n endings with \\r\\n in header field uses \\n linebreak", - "opt": { - "header": false, - "object": false - } - }, - "papa-using-reserved-regex-character-as-quote-character": { - "name": "papa-Using reserved regex character | as quote character", - "opt": { - "header": false, - "object": false, - "string": { - "quote": "|" - } - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter": { - "name": "papa-Quoted fields with spaces between closing quote and next delimiter and contains delimiter", - "opt": { - "header": false, - "object": false, - "trim": true - } - }, - "papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline": { - "name": "papa-Quoted fields with spaces between closing quote and newline and contains newline", - "opt": { - "header": false, - "object": false, - "trim": true - } - } -} diff --git a/test/fixtures/multi-char-separator.csv b/test/fixtures/multi-char-separator.csv deleted file mode 100644 index a2f41fb..0000000 --- a/test/fixtures/multi-char-separator.csv +++ /dev/null @@ -1,3 +0,0 @@ -a~~b~~c -A~~B~~C -AA~~BB~~CC diff --git a/test/fixtures/multi-char-separator.json b/test/fixtures/multi-char-separator.json deleted file mode 100644 index f4668e4..0000000 --- a/test/fixtures/multi-char-separator.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/multirow.csv b/test/fixtures/multirow.csv deleted file mode 100644 index b532031..0000000 --- a/test/fixtures/multirow.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -A,B,C -AA,BB,CC -AAA,BBB,CCC diff --git a/test/fixtures/multirow.json b/test/fixtures/multirow.json deleted file mode 100644 index 2128c0b..0000000 --- a/test/fixtures/multirow.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"},{"a":"AAA","b":"BBB","c":"CCC"}] diff --git a/test/fixtures/notrim.csv b/test/fixtures/notrim.csv deleted file mode 100644 index 155dee7..0000000 --- a/test/fixtures/notrim.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b,c -1 , 2 , 3 - 11 , 22 , 33 -4 , 5 , 6 - 44 , 55 , 66 diff --git a/test/fixtures/notrim.json b/test/fixtures/notrim.json deleted file mode 100644 index 50f6466..0000000 --- a/test/fixtures/notrim.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "a": "1 ", - "b": " 2 ", - "c": " 3" - }, - { - "a": " 11 ", - "b": " 22 ", - "c": " 33 " - }, - { - "a": "4\t", - "b": "\t5\t", - "c": "\t6" - }, - { - "a": "\t44\t", - "b": "\t\t55\t\t\t", - "c": "\t66\t" - } -] diff --git a/test/fixtures/number.csv b/test/fixtures/number.csv deleted file mode 100644 index f4f3001..0000000 --- a/test/fixtures/number.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -1,2.5 -1e2,abc diff --git a/test/fixtures/number.json b/test/fixtures/number.json deleted file mode 100644 index dcd2454..0000000 --- a/test/fixtures/number.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":1,"b":2.5},{"a":100,"b":"abc"}] diff --git a/test/fixtures/papa-ascii-30-delimiter.csv b/test/fixtures/papa-ascii-30-delimiter.csv deleted file mode 100644 index 0024b0a..0000000 --- a/test/fixtures/papa-ascii-30-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -abc -def \ No newline at end of file diff --git a/test/fixtures/papa-ascii-30-delimiter.json b/test/fixtures/papa-ascii-30-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-ascii-30-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-ascii-31-delimiter.csv b/test/fixtures/papa-ascii-31-delimiter.csv deleted file mode 100644 index ee8afcf..0000000 --- a/test/fixtures/papa-ascii-31-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -abc -def \ No newline at end of file diff --git a/test/fixtures/papa-ascii-31-delimiter.json b/test/fixtures/papa-ascii-31-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-ascii-31-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-bad-comments-value-specified.csv b/test/fixtures/papa-bad-comments-value-specified.csv deleted file mode 100644 index 164bb60..0000000 --- a/test/fixtures/papa-bad-comments-value-specified.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -5comment -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-bad-comments-value-specified.json b/test/fixtures/papa-bad-comments-value-specified.json deleted file mode 100644 index c8c02a3..0000000 --- a/test/fixtures/papa-bad-comments-value-specified.json +++ /dev/null @@ -1,15 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "5comment" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.csv b/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.csv deleted file mode 100644 index cbcc8a5..0000000 --- a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a","b" -"c","d" -"e","f" -"g","h" -"i","j" \ No newline at end of file diff --git a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.json b/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.json deleted file mode 100644 index 8e09d82..0000000 --- a/test/fixtures/papa-carriage-return-in-header-inside-quotes-with-line-feed-endings.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\r\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-comment-with-non-default-character.csv b/test/fixtures/papa-comment-with-non-default-character.csv deleted file mode 100644 index 6bacc65..0000000 --- a/test/fixtures/papa-comment-with-non-default-character.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -!Comment goes here -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-comment-with-non-default-character.json b/test/fixtures/papa-comment-with-non-default-character.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-comment-with-non-default-character.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-commented-line-at-beginning.csv b/test/fixtures/papa-commented-line-at-beginning.csv deleted file mode 100644 index 9d5dd4e..0000000 --- a/test/fixtures/papa-commented-line-at-beginning.csv +++ /dev/null @@ -1,2 +0,0 @@ -# Comment! -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-commented-line-at-beginning.json b/test/fixtures/papa-commented-line-at-beginning.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-commented-line-at-beginning.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-commented-line-at-end.csv b/test/fixtures/papa-commented-line-at-end.csv deleted file mode 100644 index 42497fd..0000000 --- a/test/fixtures/papa-commented-line-at-end.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,true,false -# Comment \ No newline at end of file diff --git a/test/fixtures/papa-commented-line-at-end.json b/test/fixtures/papa-commented-line-at-end.json deleted file mode 100644 index 2cb707c..0000000 --- a/test/fixtures/papa-commented-line-at-end.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "true", - "false" - ] -] diff --git a/test/fixtures/papa-commented-line-in-middle.csv b/test/fixtures/papa-commented-line-in-middle.csv deleted file mode 100644 index 53df74c..0000000 --- a/test/fixtures/papa-commented-line-in-middle.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -# Comment -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-commented-line-in-middle.json b/test/fixtures/papa-commented-line-in-middle.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-commented-line-in-middle.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-custom-escape-character-at-the-end.csv b/test/fixtures/papa-custom-escape-character-at-the-end.csv deleted file mode 100644 index 69ea0dd..0000000 --- a/test/fixtures/papa-custom-escape-character-at-the-end.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,"c\"d\"" \ No newline at end of file diff --git a/test/fixtures/papa-custom-escape-character-at-the-end.json b/test/fixtures/papa-custom-escape-character-at-the-end.json deleted file mode 100644 index e4033d0..0000000 --- a/test/fixtures/papa-custom-escape-character-at-the-end.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c\"d\"" - ] -] diff --git a/test/fixtures/papa-custom-escape-character-in-the-middle.csv b/test/fixtures/papa-custom-escape-character-in-the-middle.csv deleted file mode 100644 index b37a6fd..0000000 --- a/test/fixtures/papa-custom-escape-character-in-the-middle.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,"c\"d\"f" \ No newline at end of file diff --git a/test/fixtures/papa-custom-escape-character-in-the-middle.json b/test/fixtures/papa-custom-escape-character-in-the-middle.json deleted file mode 100644 index 85cd7d5..0000000 --- a/test/fixtures/papa-custom-escape-character-in-the-middle.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c\"d\"f" - ] -] diff --git a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.csv b/test/fixtures/papa-dynamic-typing-converts-boolean-literals.csv deleted file mode 100644 index d36c135..0000000 --- a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.csv +++ /dev/null @@ -1 +0,0 @@ -true,false,T,F,TRUE,FALSE,True,False \ No newline at end of file diff --git a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.json b/test/fixtures/papa-dynamic-typing-converts-boolean-literals.json deleted file mode 100644 index 0c34701..0000000 --- a/test/fixtures/papa-dynamic-typing-converts-boolean-literals.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - true, - false, - "T", - "F", - true, - false, - "True", - "False" - ] -] diff --git a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.csv b/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.csv deleted file mode 100644 index 7e39851..0000000 --- a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -undefined,null,[ -var,float,if \ No newline at end of file diff --git a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.json b/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.json deleted file mode 100644 index 160d79c..0000000 --- a/test/fixtures/papa-dynamic-typing-doesn-t-convert-other-types.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "A", - "B", - "C" - ], - [ - "undefined", - "null", - "[" - ], - [ - "var", - "float", - "if" - ] -] diff --git a/test/fixtures/papa-empty-input-string-2.csv b/test/fixtures/papa-empty-input-string-2.csv deleted file mode 100644 index e69de29..0000000 diff --git a/test/fixtures/papa-empty-input-string-2.json b/test/fixtures/papa-empty-input-string-2.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-empty-input-string-2.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-empty-input-string.csv b/test/fixtures/papa-empty-input-string.csv deleted file mode 100644 index e69de29..0000000 diff --git a/test/fixtures/papa-empty-input-string.json b/test/fixtures/papa-empty-input-string.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-empty-input-string.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-empty-lines.csv b/test/fixtures/papa-empty-lines.csv deleted file mode 100644 index 4247d50..0000000 --- a/test/fixtures/papa-empty-lines.csv +++ /dev/null @@ -1,5 +0,0 @@ - -a,b,c - -d,e,f - diff --git a/test/fixtures/papa-empty-lines.json b/test/fixtures/papa-empty-lines.json deleted file mode 100644 index 963e246..0000000 --- a/test/fixtures/papa-empty-lines.json +++ /dev/null @@ -1,15 +0,0 @@ -[ - [], - [ - "a", - "b", - "c" - ], - [], - [ - "d", - "e", - "f" - ], - [] -] diff --git a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.csv b/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.csv deleted file mode 100644 index 2d02206..0000000 --- a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,"" -a,b,"" \ No newline at end of file diff --git a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.json b/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.json deleted file mode 100644 index 98b5299..0000000 --- a/test/fixtures/papa-empty-quoted-field-at-eof-is-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "" - ], - [ - "a", - "b", - "" - ] -] diff --git a/test/fixtures/papa-entire-file-is-comment-lines.csv b/test/fixtures/papa-entire-file-is-comment-lines.csv deleted file mode 100644 index f77b825..0000000 --- a/test/fixtures/papa-entire-file-is-comment-lines.csv +++ /dev/null @@ -1,3 +0,0 @@ -#comment1 -#comment2 -#comment3 \ No newline at end of file diff --git a/test/fixtures/papa-entire-file-is-comment-lines.json b/test/fixtures/papa-entire-file-is-comment-lines.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-entire-file-is-comment-lines.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-first-field-of-a-line-is-empty.csv b/test/fixtures/papa-first-field-of-a-line-is-empty.csv deleted file mode 100644 index df89dba..0000000 --- a/test/fixtures/papa-first-field-of-a-line-is-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,c -,e,f \ No newline at end of file diff --git a/test/fixtures/papa-first-field-of-a-line-is-empty.json b/test/fixtures/papa-first-field-of-a-line-is-empty.json deleted file mode 100644 index 7ab352a..0000000 --- a/test/fixtures/papa-first-field-of-a-line-is-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-header-row-only.csv b/test/fixtures/papa-header-row-only.csv deleted file mode 100644 index 8ae723e..0000000 --- a/test/fixtures/papa-header-row-only.csv +++ /dev/null @@ -1 +0,0 @@ -A,B,C \ No newline at end of file diff --git a/test/fixtures/papa-header-row-only.json b/test/fixtures/papa-header-row-only.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-header-row-only.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-header-row-with-one-row-of-data.csv b/test/fixtures/papa-header-row-with-one-row-of-data.csv deleted file mode 100644 index fea02ce..0000000 --- a/test/fixtures/papa-header-row-with-one-row-of-data.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-header-row-with-one-row-of-data.json b/test/fixtures/papa-header-row-with-one-row-of-data.json deleted file mode 100644 index 79cd368..0000000 --- a/test/fixtures/papa-header-row-with-one-row-of-data.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - { - "A": "a", - "B": "b", - "C": "c" - } -] diff --git a/test/fixtures/papa-header-row-with-preceding-comment.csv b/test/fixtures/papa-header-row-with-preceding-comment.csv deleted file mode 100644 index 37801e2..0000000 --- a/test/fixtures/papa-header-row-with-preceding-comment.csv +++ /dev/null @@ -1,3 +0,0 @@ -#Comment -a,b -c,d diff --git a/test/fixtures/papa-header-row-with-preceding-comment.json b/test/fixtures/papa-header-row-with-preceding-comment.json deleted file mode 100644 index 8812637..0000000 --- a/test/fixtures/papa-header-row-with-preceding-comment.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - { - "a": "c", - "b": "d" - } -] diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.csv b/test/fixtures/papa-input-is-just-a-string-a-single-field-2.csv deleted file mode 100644 index 7203e92..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.csv +++ /dev/null @@ -1 +0,0 @@ -Abc def \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.json b/test/fixtures/papa-input-is-just-a-string-a-single-field-2.json deleted file mode 100644 index a5f44c8..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field-2.json +++ /dev/null @@ -1,5 +0,0 @@ -[ - [ - "Abc def" - ] -] diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field.csv b/test/fixtures/papa-input-is-just-a-string-a-single-field.csv deleted file mode 100644 index 7203e92..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field.csv +++ /dev/null @@ -1 +0,0 @@ -Abc def \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-a-string-a-single-field.json b/test/fixtures/papa-input-is-just-a-string-a-single-field.json deleted file mode 100644 index a5f44c8..0000000 --- a/test/fixtures/papa-input-is-just-a-string-a-single-field.json +++ /dev/null @@ -1,5 +0,0 @@ -[ - [ - "Abc def" - ] -] diff --git a/test/fixtures/papa-input-is-just-empty-fields.csv b/test/fixtures/papa-input-is-just-empty-fields.csv deleted file mode 100644 index f6f13f5..0000000 --- a/test/fixtures/papa-input-is-just-empty-fields.csv +++ /dev/null @@ -1,2 +0,0 @@ -,, -,,, \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-empty-fields.json b/test/fixtures/papa-input-is-just-empty-fields.json deleted file mode 100644 index 46e6f81..0000000 --- a/test/fixtures/papa-input-is-just-empty-fields.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [ - "", - "", - "" - ], - [ - "", - "", - "", - "" - ] -] diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.csv b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.csv deleted file mode 100644 index 41622b4..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.csv +++ /dev/null @@ -1 +0,0 @@ -, \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.json b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.json deleted file mode 100644 index 3a9e22f..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields-2.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - [ - "", - "" - ] -] diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.csv b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.csv deleted file mode 100644 index 41622b4..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.csv +++ /dev/null @@ -1 +0,0 @@ -, \ No newline at end of file diff --git a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.json b/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.json deleted file mode 100644 index 3a9e22f..0000000 --- a/test/fixtures/papa-input-is-just-the-delimiter-2-empty-fields.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - [ - "", - "" - ] -] diff --git a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.csv b/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.csv deleted file mode 100644 index 65b570c..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.csv +++ /dev/null @@ -1 +0,0 @@ -#commented line \ No newline at end of file diff --git a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.json b/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.json deleted file mode 100644 index 66808d4..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line-without-comments-enabled.json +++ /dev/null @@ -1,5 +0,0 @@ -[ - [ - "#commented line" - ] -] diff --git a/test/fixtures/papa-input-with-only-a-commented-line.csv b/test/fixtures/papa-input-with-only-a-commented-line.csv deleted file mode 100644 index 65b570c..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line.csv +++ /dev/null @@ -1 +0,0 @@ -#commented line \ No newline at end of file diff --git a/test/fixtures/papa-input-with-only-a-commented-line.json b/test/fixtures/papa-input-with-only-a-commented-line.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-input-with-only-a-commented-line.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.csv b/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.csv deleted file mode 100644 index 2395318..0000000 --- a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.csv +++ /dev/null @@ -1,3 +0,0 @@ -a - b -c \ No newline at end of file diff --git a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.json b/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.json deleted file mode 100644 index d4c6aea..0000000 --- a/test/fixtures/papa-input-without-comments-with-line-starting-with-whitespace.json +++ /dev/null @@ -1,11 +0,0 @@ -[ - [ - "a" - ], - [ - " b" - ], - [ - "c" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-line-at-beginning.csv b/test/fixtures/papa-jsonic-blank-line-at-beginning.csv deleted file mode 100644 index 155c206..0000000 --- a/test/fixtures/papa-jsonic-blank-line-at-beginning.csv +++ /dev/null @@ -1,3 +0,0 @@ - -a,b,c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-jsonic-blank-line-at-beginning.json b/test/fixtures/papa-jsonic-blank-line-at-beginning.json deleted file mode 100644 index 3c9bfa4..0000000 --- a/test/fixtures/papa-jsonic-blank-line-at-beginning.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [], - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.csv b/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.csv deleted file mode 100644 index 4b98566..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.json b/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.json deleted file mode 100644 index d7f6c55..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle-with-whitespace.json +++ /dev/null @@ -1,15 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - " " - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle.csv b/test/fixtures/papa-jsonic-blank-line-in-middle.csv deleted file mode 100644 index c02e652..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-jsonic-blank-line-in-middle.json b/test/fixtures/papa-jsonic-blank-line-in-middle.json deleted file mode 100644 index 281e7bc..0000000 --- a/test/fixtures/papa-jsonic-blank-line-in-middle.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-jsonic-blank-lines-at-end.csv b/test/fixtures/papa-jsonic-blank-lines-at-end.csv deleted file mode 100644 index e076fcb..0000000 --- a/test/fixtures/papa-jsonic-blank-lines-at-end.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -d,e,f - diff --git a/test/fixtures/papa-jsonic-blank-lines-at-end.json b/test/fixtures/papa-jsonic-blank-lines-at-end.json deleted file mode 100644 index dfbca40..0000000 --- a/test/fixtures/papa-jsonic-blank-lines-at-end.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ], - [] -] diff --git a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.csv b/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.csv deleted file mode 100644 index 9355ec2..0000000 --- a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.csv +++ /dev/null @@ -1 +0,0 @@ -#commented line diff --git a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.json b/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-jsonic-input-with-only-a-commented-line-and-blank-line-after.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.csv b/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.csv deleted file mode 100644 index 30b315b..0000000 --- a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.csv +++ /dev/null @@ -1,7 +0,0 @@ -a -b - - -c -d -e diff --git a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.json b/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.json deleted file mode 100644 index 42ccc45..0000000 --- a/test/fixtures/papa-jsonic-one-column-input-with-empty-fields.json +++ /dev/null @@ -1,19 +0,0 @@ -[ - [ - "a" - ], - [ - "b" - ], - [], - [], - [ - "c" - ], - [ - "d" - ], - [ - "e" - ] -] diff --git a/test/fixtures/papa-last-field-of-a-line-is-empty.csv b/test/fixtures/papa-last-field-of-a-line-is-empty.csv deleted file mode 100644 index 81e726d..0000000 --- a/test/fixtures/papa-last-field-of-a-line-is-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b, -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-last-field-of-a-line-is-empty.json b/test/fixtures/papa-last-field-of-a-line-is-empty.json deleted file mode 100644 index bf859ad..0000000 --- a/test/fixtures/papa-last-field-of-a-line-is-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.csv b/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.csv deleted file mode 100644 index 59e9ea6..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -,e,f -,"h","i" -,"k","l" \ No newline at end of file diff --git a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.json b/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.json deleted file mode 100644 index 6174c3e..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field-first-field-of-next-line-is-empty-n.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "", - "e", - "f" - ], - [ - "", - "h", - "i" - ], - [ - "", - "k", - "l" - ] -] diff --git a/test/fixtures/papa-line-ends-with-quoted-field.csv b/test/fixtures/papa-line-ends-with-quoted-field.csv deleted file mode 100644 index abc5889..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -d,e,f -"g","h","i" -"j","k","l" \ No newline at end of file diff --git a/test/fixtures/papa-line-ends-with-quoted-field.json b/test/fixtures/papa-line-ends-with-quoted-field.json deleted file mode 100644 index 815c73e..0000000 --- a/test/fixtures/papa-line-ends-with-quoted-field.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ], - [ - "g", - "h", - "i" - ], - [ - "j", - "k", - "l" - ] -] diff --git a/test/fixtures/papa-line-starts-with-quoted-field.csv b/test/fixtures/papa-line-starts-with-quoted-field.csv deleted file mode 100644 index 35700a8..0000000 --- a/test/fixtures/papa-line-starts-with-quoted-field.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,c -"d",e,f \ No newline at end of file diff --git a/test/fixtures/papa-line-starts-with-quoted-field.json b/test/fixtures/papa-line-starts-with-quoted-field.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-line-starts-with-quoted-field.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-line-starts-with-unquoted-empty-field.csv b/test/fixtures/papa-line-starts-with-unquoted-empty-field.csv deleted file mode 100644 index ac81806..0000000 --- a/test/fixtures/papa-line-starts-with-unquoted-empty-field.csv +++ /dev/null @@ -1,2 +0,0 @@ -,b,c -"d",e,f \ No newline at end of file diff --git a/test/fixtures/papa-line-starts-with-unquoted-empty-field.json b/test/fixtures/papa-line-starts-with-unquoted-empty-field.json deleted file mode 100644 index 7079a92..0000000 --- a/test/fixtures/papa-line-starts-with-unquoted-empty-field.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.csv b/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.csv deleted file mode 100644 index 4f93485..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.csv +++ /dev/null @@ -1 +0,0 @@ -A,B "B",C \ No newline at end of file diff --git a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.json b/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.json deleted file mode 100644 index de57929..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-not-as-opening-quotes.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B \"B\"", - "C" - ] -] diff --git a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.csv b/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.csv deleted file mode 100644 index 2fc1844..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B",C -D,E",F \ No newline at end of file diff --git a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.json b/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.json deleted file mode 100644 index eeff7d3..0000000 --- a/test/fixtures/papa-misplaced-quotes-in-data-twice-not-as-opening-quotes.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "B\"", - "C" - ], - [ - "D", - "E\"", - "F" - ] -] diff --git a/test/fixtures/papa-multi-character-comment-string.csv b/test/fixtures/papa-multi-character-comment-string.csv deleted file mode 100644 index 149256a..0000000 --- a/test/fixtures/papa-multi-character-comment-string.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -=N(Comment) -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-multi-character-comment-string.json b/test/fixtures/papa-multi-character-comment-string.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-multi-character-comment-string.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.csv b/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.csv deleted file mode 100644 index 037f991..0000000 --- a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.csv +++ /dev/null @@ -1 +0,0 @@ -a, b, "c, e", d \ No newline at end of file diff --git a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.json b/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.json deleted file mode 100644 index 441bdb9..0000000 --- a/test/fixtures/papa-multi-character-delimiter-length-2-with-quoted-field.json +++ /dev/null @@ -1,8 +0,0 @@ -[ - [ - "a", - "b", - "c, e", - "d" - ] -] diff --git a/test/fixtures/papa-multi-character-delimiter.csv b/test/fixtures/papa-multi-character-delimiter.csv deleted file mode 100644 index 5f2665b..0000000 --- a/test/fixtures/papa-multi-character-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -a, b, c \ No newline at end of file diff --git a/test/fixtures/papa-multi-character-delimiter.json b/test/fixtures/papa-multi-character-delimiter.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-multi-character-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-multiple-consecutive-empty-fields.csv b/test/fixtures/papa-multiple-consecutive-empty-fields.csv deleted file mode 100644 index 6b5f5c6..0000000 --- a/test/fixtures/papa-multiple-consecutive-empty-fields.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,,,c,d -,,e,,,f \ No newline at end of file diff --git a/test/fixtures/papa-multiple-consecutive-empty-fields.json b/test/fixtures/papa-multiple-consecutive-empty-fields.json deleted file mode 100644 index f46e882..0000000 --- a/test/fixtures/papa-multiple-consecutive-empty-fields.json +++ /dev/null @@ -1,18 +0,0 @@ -[ - [ - "a", - "b", - "", - "", - "c", - "d" - ], - [ - "", - "", - "e", - "", - "", - "f" - ] -] diff --git a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.csv b/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.csv deleted file mode 100644 index 0fec236..0000000 --- a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.csv +++ /dev/null @@ -1,5 +0,0 @@ -a -b -c -d -e \ No newline at end of file diff --git a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.json b/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.json deleted file mode 100644 index 40c4651..0000000 --- a/test/fixtures/papa-multiple-rows-one-column-no-delimiter-found.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "a" - ], - [ - "b" - ], - [ - "c" - ], - [ - "d" - ], - [ - "e" - ] -] diff --git a/test/fixtures/papa-one-row.csv b/test/fixtures/papa-one-row.csv deleted file mode 100644 index 341e344..0000000 --- a/test/fixtures/papa-one-row.csv +++ /dev/null @@ -1 +0,0 @@ -A,b,c \ No newline at end of file diff --git a/test/fixtures/papa-one-row.json b/test/fixtures/papa-one-row.json deleted file mode 100644 index a462c67..0000000 --- a/test/fixtures/papa-one-row.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-other-fields-are-empty.csv b/test/fixtures/papa-other-fields-are-empty.csv deleted file mode 100644 index 528105e..0000000 --- a/test/fixtures/papa-other-fields-are-empty.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,,c -,, \ No newline at end of file diff --git a/test/fixtures/papa-other-fields-are-empty.json b/test/fixtures/papa-other-fields-are-empty.json deleted file mode 100644 index 0490600..0000000 --- a/test/fixtures/papa-other-fields-are-empty.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "", - "c" - ], - [ - "", - "", - "" - ] -] diff --git a/test/fixtures/papa-pipe-delimiter.csv b/test/fixtures/papa-pipe-delimiter.csv deleted file mode 100644 index 224ccfe..0000000 --- a/test/fixtures/papa-pipe-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a|b|c -d|e|f \ No newline at end of file diff --git a/test/fixtures/papa-pipe-delimiter.json b/test/fixtures/papa-pipe-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-pipe-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.csv b/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.csv deleted file mode 100644 index c5e50ef..0000000 --- a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,"c""c""" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.json b/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.json deleted file mode 100644 index 426ccb4..0000000 --- a/test/fixtures/papa-quoted-field-at-end-of-row-but-not-at-eof-has-quotes.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c\"c\"" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.csv b/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.csv deleted file mode 100644 index 2029076..0000000 --- a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,c -d,e,f" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.json b/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.json deleted file mode 100644 index e0176ec..0000000 --- a/test/fixtures/papa-quoted-field-contains-delimiters-and-n-with-valid-trailing-quote.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - [ - "a", - "b,c\nd,e,f" - ] -] diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer.csv deleted file mode 100644 index 25dc50f..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter-with-a-valid-closer.csv +++ /dev/null @@ -1,2 +0,0 @@ -"a,"b,c" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter.csv deleted file mode 100644 index 3e8fdd3..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,"c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-new-line.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-new-line.csv deleted file mode 100644 index 42aef64..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-after-new-line.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,c -d"e,f,g \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-before-delimiter.csv b/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-before-delimiter.csv deleted file mode 100644 index 0a9d31d..0000000 --- a/test/fixtures/papa-quoted-field-has-invalid-trailing-quote-before-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b"c,d -e,f,g \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-no-closing-quote.csv b/test/fixtures/papa-quoted-field-has-no-closing-quote.csv deleted file mode 100644 index c9d316a..0000000 --- a/test/fixtures/papa-quoted-field-has-no-closing-quote.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b,c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.csv b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.csv deleted file mode 100644 index b2f34ae..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,"b",c -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.json b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.csv b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.csv deleted file mode 100644 index 7c3866f..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,c -d,e,"f" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.json b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-eof.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.csv b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.csv deleted file mode 100644 index 05aca5b..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -a,b,"c" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.json b/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-quoted-field-has-valid-trailing-quote-via-n.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.csv b/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.csv deleted file mode 100644 index 01d8e62..0000000 --- a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.csv +++ /dev/null @@ -1 +0,0 @@ -"1","cnonce="""",nc=""""","2" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.json b/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.json deleted file mode 100644 index 9da6a71..0000000 --- a/test/fixtures/papa-quoted-field-with-5-quotes-in-a-row-and-a-delimiter-in-there-too.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "1", - "cnonce=\"\",nc=\"\"", - "2" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-delimiter.csv b/test/fixtures/papa-quoted-field-with-delimiter.csv deleted file mode 100644 index 9382cff..0000000 --- a/test/fixtures/papa-quoted-field-with-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B,B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-delimiter.json b/test/fixtures/papa-quoted-field-with-delimiter.json deleted file mode 100644 index 2bc5490..0000000 --- a/test/fixtures/papa-quoted-field-with-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B,B", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.csv b/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.csv deleted file mode 100644 index fbbb934..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.csv +++ /dev/null @@ -1 +0,0 @@ -A,"""B""",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.json b/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.json deleted file mode 100644 index 3cdf91b..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes-at-boundaries.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "\"B\"", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes.csv b/test/fixtures/papa-quoted-field-with-escaped-quotes.csv deleted file mode 100644 index 00dec07..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B""B""B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-escaped-quotes.json b/test/fixtures/papa-quoted-field-with-escaped-quotes.json deleted file mode 100644 index 25b324e..0000000 --- a/test/fixtures/papa-quoted-field-with-escaped-quotes.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\"B\"B", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.csv b/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.csv deleted file mode 100644 index 96c142a..0000000 --- a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.csv +++ /dev/null @@ -1 +0,0 @@ -A," B ",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.json b/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.json deleted file mode 100644 index 4b42a7c..0000000 --- a/test/fixtures/papa-quoted-field-with-extra-whitespace-on-edges.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - " B ", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-line-break.csv b/test/fixtures/papa-quoted-field-with-line-break.csv deleted file mode 100644 index c98c6ec..0000000 --- a/test/fixtures/papa-quoted-field-with-line-break.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B -B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-line-break.json b/test/fixtures/papa-quoted-field-with-line-break.json deleted file mode 100644 index cc396ac..0000000 --- a/test/fixtures/papa-quoted-field-with-line-break.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\nB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-n.csv b/test/fixtures/papa-quoted-field-with-n.csv deleted file mode 100644 index c98c6ec..0000000 --- a/test/fixtures/papa-quoted-field-with-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B -B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-n.json b/test/fixtures/papa-quoted-field-with-n.json deleted file mode 100644 index cc396ac..0000000 --- a/test/fixtures/papa-quoted-field-with-n.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\nB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.csv b/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.csv deleted file mode 100644 index a72e39d..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,""",""",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.json b/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.json deleted file mode 100644 index d80f7d4..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-around-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "\",\"", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.csv b/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.csv deleted file mode 100644 index 4bade34..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,""",",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.json b/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.json deleted file mode 100644 index dc50e4a..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-left-side-of-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "\",", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.csv b/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.csv deleted file mode 100644 index f0256e9..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.csv +++ /dev/null @@ -1 +0,0 @@ -A,",""",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.json b/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.json deleted file mode 100644 index c174154..0000000 --- a/test/fixtures/papa-quoted-field-with-quotes-on-right-side-of-delimiter.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - ",\"", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-r-n.csv b/test/fixtures/papa-quoted-field-with-r-n.csv deleted file mode 100644 index 1ad0a44..0000000 --- a/test/fixtures/papa-quoted-field-with-r-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B -B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-r-n.json b/test/fixtures/papa-quoted-field-with-r-n.json deleted file mode 100644 index 242a6dc..0000000 --- a/test/fixtures/papa-quoted-field-with-r-n.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\r\nB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-r.csv b/test/fixtures/papa-quoted-field-with-r.csv deleted file mode 100644 index eeb695a..0000000 --- a/test/fixtures/papa-quoted-field-with-r.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-r.json b/test/fixtures/papa-quoted-field-with-r.json deleted file mode 100644 index d70ec72..0000000 --- a/test/fixtures/papa-quoted-field-with-r.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\rB", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.csv b/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.csv deleted file mode 100644 index 1053b10..0000000 --- a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.csv +++ /dev/null @@ -1 +0,0 @@ -A, "B" ,C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.json b/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.json deleted file mode 100644 index 6525263..0000000 --- a/test/fixtures/papa-quoted-field-with-whitespace-around-quotes.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - " \"B\" ", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-field.csv b/test/fixtures/papa-quoted-field.csv deleted file mode 100644 index 6eadde4..0000000 --- a/test/fixtures/papa-quoted-field.csv +++ /dev/null @@ -1 +0,0 @@ -A,"B",C \ No newline at end of file diff --git a/test/fixtures/papa-quoted-field.json b/test/fixtures/papa-quoted-field.json deleted file mode 100644 index f860470..0000000 --- a/test/fixtures/papa-quoted-field.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B", - "C" - ] -] diff --git a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.csv b/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.csv deleted file mode 100644 index b96a397..0000000 --- a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,"c,c -c" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.json b/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.json deleted file mode 100644 index e1b467d..0000000 --- a/test/fixtures/papa-quoted-fields-at-end-of-row-with-delimiter-and-line-break.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c,c\nc" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-line-breaks.csv b/test/fixtures/papa-quoted-fields-with-line-breaks.csv deleted file mode 100644 index 88a3644..0000000 --- a/test/fixtures/papa-quoted-fields-with-line-breaks.csv +++ /dev/null @@ -1,4 +0,0 @@ -A,"B -B","C -C -C" \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-line-breaks.json b/test/fixtures/papa-quoted-fields-with-line-breaks.json deleted file mode 100644 index c396f92..0000000 --- a/test/fixtures/papa-quoted-fields-with-line-breaks.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\nB", - "C\nC\nC" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.csv b/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.csv deleted file mode 100644 index 87f53ab..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,"B" ,C,"D" -E,F,"G" ,"H" -Q,W,"E" ,R \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.json b/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.json deleted file mode 100644 index dbcf350..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-after-closing-quote.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - [ - "A", - "B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ], - [ - "Q", - "W", - "E", - "R" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.csv deleted file mode 100644 index 3621fb5..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,"c -" -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.json deleted file mode 100644 index a811254..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-newline-and-contains-newline.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c\n" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.csv deleted file mode 100644 index e63c960..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,",B" ,C,D -E,F,G,H \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.json deleted file mode 100644 index e5958d5..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter-and-contains-delimiter.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - [ - "A", - ",B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.csv deleted file mode 100644 index 3d789d1..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,"B" ,C,D -E,F,"G" ,H \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.json deleted file mode 100644 index 420b102..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-delimiter.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - [ - "A", - "B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ] -] diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.csv b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.csv deleted file mode 100644 index 9e3acdf..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C,"D" -E,F,G,"H" -Q,W,E,R \ No newline at end of file diff --git a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.json b/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.json deleted file mode 100644 index dbcf350..0000000 --- a/test/fixtures/papa-quoted-fields-with-spaces-between-closing-quote-and-next-new-line.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - [ - "A", - "B", - "C", - "D" - ], - [ - "E", - "F", - "G", - "H" - ], - [ - "Q", - "W", - "E", - "R" - ] -] diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.csv b/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.csv deleted file mode 100644 index 7573f21..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C -a,b, \ No newline at end of file diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.json b/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.json deleted file mode 100644 index 3c5cd20..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-at-end.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - { - "A": "a", - "B": "b", - "C": "" - } -] diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.csv b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.csv deleted file mode 100644 index aa16fa0..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -,b1,c1 -,b2,c2 \ No newline at end of file diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.json b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.json deleted file mode 100644 index bceacae..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining-using-headers.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "A": "", - "B": "b1", - "C": "c1" - }, - { - "A": "", - "B": "b2", - "C": "c2" - } -] diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.csv b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.csv deleted file mode 100644 index b50700a..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -,b1,c1 -a2,b2,c2 \ No newline at end of file diff --git a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.json b/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.json deleted file mode 100644 index 433465c..0000000 --- a/test/fixtures/papa-row-with-enough-fields-but-blank-field-in-the-begining.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "A", - "B", - "C" - ], - [ - "", - "b1", - "c1" - ], - [ - "a2", - "b2", - "c2" - ] -] diff --git a/test/fixtures/papa-row-with-too-few-fields.csv b/test/fixtures/papa-row-with-too-few-fields.csv deleted file mode 100644 index 8b51576..0000000 --- a/test/fixtures/papa-row-with-too-few-fields.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C -a,b \ No newline at end of file diff --git a/test/fixtures/papa-row-with-too-many-fields.csv b/test/fixtures/papa-row-with-too-many-fields.csv deleted file mode 100644 index 4f38565..0000000 --- a/test/fixtures/papa-row-with-too-many-fields.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,B,C -a,b,c,d,e -f,g,h \ No newline at end of file diff --git a/test/fixtures/papa-single-quote-as-quote-character.csv b/test/fixtures/papa-single-quote-as-quote-character.csv deleted file mode 100644 index 01ec509..0000000 --- a/test/fixtures/papa-single-quote-as-quote-character.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,'c,d' \ No newline at end of file diff --git a/test/fixtures/papa-single-quote-as-quote-character.json b/test/fixtures/papa-single-quote-as-quote-character.json deleted file mode 100644 index 948722a..0000000 --- a/test/fixtures/papa-single-quote-as-quote-character.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c,d" - ] -] diff --git a/test/fixtures/papa-skip-empty-lines-with-empty-input.csv b/test/fixtures/papa-skip-empty-lines-with-empty-input.csv deleted file mode 100644 index e69de29..0000000 diff --git a/test/fixtures/papa-skip-empty-lines-with-empty-input.json b/test/fixtures/papa-skip-empty-lines-with-empty-input.json deleted file mode 100644 index fe51488..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-empty-input.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.csv b/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.csv deleted file mode 100644 index 5e73edc..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.csv +++ /dev/null @@ -1,2 +0,0 @@ - -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.json b/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.json deleted file mode 100644 index 1c206a1..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-first-line-only-whitespace.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - [ - " " - ], - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.csv b/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.csv deleted file mode 100644 index 29a42cc..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f diff --git a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.json b/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-skip-empty-lines-with-newline-at-end-of-input.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-skip-empty-lines.csv b/test/fixtures/papa-skip-empty-lines.csv deleted file mode 100644 index 68eacbe..0000000 --- a/test/fixtures/papa-skip-empty-lines.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c - -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-skip-empty-lines.json b/test/fixtures/papa-skip-empty-lines.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-skip-empty-lines.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-tab-delimiter.csv b/test/fixtures/papa-tab-delimiter.csv deleted file mode 100644 index 34caaac..0000000 --- a/test/fixtures/papa-tab-delimiter.csv +++ /dev/null @@ -1,2 +0,0 @@ -a b c -d e f \ No newline at end of file diff --git a/test/fixtures/papa-tab-delimiter.json b/test/fixtures/papa-tab-delimiter.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-tab-delimiter.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.csv b/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.csv deleted file mode 100644 index 3d5f757..0000000 --- a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.csv +++ /dev/null @@ -1,4 +0,0 @@ -#comment1 -#comment2 -#comment3 -a,b,c \ No newline at end of file diff --git a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.json b/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-three-comment-lines-consecutively-at-beginning-of-file.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-three-rows.csv b/test/fixtures/papa-three-rows.csv deleted file mode 100644 index a7c9397..0000000 --- a/test/fixtures/papa-three-rows.csv +++ /dev/null @@ -1,3 +0,0 @@ -A,b,c -d,E,f -G,h,i \ No newline at end of file diff --git a/test/fixtures/papa-three-rows.json b/test/fixtures/papa-three-rows.json deleted file mode 100644 index ce1c97e..0000000 --- a/test/fixtures/papa-three-rows.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ], - [ - "G", - "h", - "i" - ] -] diff --git a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.csv b/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.csv deleted file mode 100644 index 9d0af26..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -#comment1 -#comment2 \ No newline at end of file diff --git a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.json b/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.json deleted file mode 100644 index f47beff..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively-at-end-of-file.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "b", - "c" - ] -] diff --git a/test/fixtures/papa-two-comment-lines-consecutively.csv b/test/fixtures/papa-two-comment-lines-consecutively.csv deleted file mode 100644 index 2d63911..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively.csv +++ /dev/null @@ -1,4 +0,0 @@ -a,b,c -#comment1 -#comment2 -d,e,f \ No newline at end of file diff --git a/test/fixtures/papa-two-comment-lines-consecutively.json b/test/fixtures/papa-two-comment-lines-consecutively.json deleted file mode 100644 index d0b6253..0000000 --- a/test/fixtures/papa-two-comment-lines-consecutively.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "a", - "b", - "c" - ], - [ - "d", - "e", - "f" - ] -] diff --git a/test/fixtures/papa-two-rows-just-r.csv b/test/fixtures/papa-two-rows-just-r.csv deleted file mode 100644 index da7eef8..0000000 --- a/test/fixtures/papa-two-rows-just-r.csv +++ /dev/null @@ -1 +0,0 @@ -A,b,c d,E,f \ No newline at end of file diff --git a/test/fixtures/papa-two-rows-just-r.json b/test/fixtures/papa-two-rows-just-r.json deleted file mode 100644 index 142ca69..0000000 --- a/test/fixtures/papa-two-rows-just-r.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ] -] diff --git a/test/fixtures/papa-two-rows-r-n.csv b/test/fixtures/papa-two-rows-r-n.csv deleted file mode 100644 index 5a60fa0..0000000 --- a/test/fixtures/papa-two-rows-r-n.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,b,c -d,E,f \ No newline at end of file diff --git a/test/fixtures/papa-two-rows-r-n.json b/test/fixtures/papa-two-rows-r-n.json deleted file mode 100644 index 142ca69..0000000 --- a/test/fixtures/papa-two-rows-r-n.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ] -] diff --git a/test/fixtures/papa-two-rows.csv b/test/fixtures/papa-two-rows.csv deleted file mode 100644 index ea46d36..0000000 --- a/test/fixtures/papa-two-rows.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,b,c -d,E,f \ No newline at end of file diff --git a/test/fixtures/papa-two-rows.json b/test/fixtures/papa-two-rows.json deleted file mode 100644 index 142ca69..0000000 --- a/test/fixtures/papa-two-rows.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - [ - "A", - "b", - "c" - ], - [ - "d", - "E", - "f" - ] -] diff --git a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.csv b/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.csv deleted file mode 100644 index a420a04..0000000 --- a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.csv +++ /dev/null @@ -1 +0,0 @@ -A,B",C \ No newline at end of file diff --git a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.json b/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.json deleted file mode 100644 index b1ca572..0000000 --- a/test/fixtures/papa-unquoted-field-with-quotes-at-end-of-field.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "A", - "B\"", - "C" - ] -] diff --git a/test/fixtures/papa-using-n-endings-uses-n-linebreak.csv b/test/fixtures/papa-using-n-endings-uses-n-linebreak.csv deleted file mode 100644 index 2e11fcd..0000000 --- a/test/fixtures/papa-using-n-endings-uses-n-linebreak.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-n-endings-uses-n-linebreak.json b/test/fixtures/papa-using-n-endings-uses-n-linebreak.json deleted file mode 100644 index c670851..0000000 --- a/test/fixtures/papa-using-n-endings-uses-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.csv b/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.csv deleted file mode 100644 index abd33f3..0000000 --- a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.json b/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.json deleted file mode 100644 index 8e09d82..0000000 --- a/test/fixtures/papa-using-n-endings-with-r-n-in-header-field-uses-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\r\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.csv deleted file mode 100644 index 3a6870e..0000000 --- a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.json deleted file mode 100644 index c670851..0000000 --- a/test/fixtures/papa-using-r-n-endings-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.csv deleted file mode 100644 index 607dafe..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.json deleted file mode 100644 index fed2e68..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.csv deleted file mode 100644 index a6be125..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j diff --git a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.json deleted file mode 100644 index fed2e68..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-n-in-header-field-with-skip-empty-lines-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.csv b/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.csv deleted file mode 100644 index 2c0ca0b..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.csv +++ /dev/null @@ -1,6 +0,0 @@ -"a -a",b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.json b/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.json deleted file mode 100644 index 8e09d82..0000000 --- a/test/fixtures/papa-using-r-n-endings-with-r-n-in-header-field-uses-r-n-linebreak.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\r\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.csv b/test/fixtures/papa-using-reserved-regex-character-as-quote-character.csv deleted file mode 100644 index a43df71..0000000 --- a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.csv +++ /dev/null @@ -1,6 +0,0 @@ -|a -a|,b -c,d -e,f -g,h -i,j \ No newline at end of file diff --git a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.json b/test/fixtures/papa-using-reserved-regex-character-as-quote-character.json deleted file mode 100644 index fed2e68..0000000 --- a/test/fixtures/papa-using-reserved-regex-character-as-quote-character.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - [ - "a\na", - "b" - ], - [ - "c", - "d" - ], - [ - "e", - "f" - ], - [ - "g", - "h" - ], - [ - "i", - "j" - ] -] diff --git a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.csv b/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.csv deleted file mode 100644 index 64954bc..0000000 --- a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.csv +++ /dev/null @@ -1 +0,0 @@ -a, b ,c \ No newline at end of file diff --git a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.json b/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.json deleted file mode 100644 index a165586..0000000 --- a/test/fixtures/papa-whitespace-at-edges-of-unquoted-field.json +++ /dev/null @@ -1,7 +0,0 @@ -[ - [ - "a", - "\tb ", - "c" - ] -] diff --git a/test/fixtures/pipe-separator.csv b/test/fixtures/pipe-separator.csv deleted file mode 100644 index e8d53ce..0000000 --- a/test/fixtures/pipe-separator.csv +++ /dev/null @@ -1,3 +0,0 @@ -a|b|c -A|B|C -AA|BB|CC diff --git a/test/fixtures/pipe-separator.json b/test/fixtures/pipe-separator.json deleted file mode 100644 index f4668e4..0000000 --- a/test/fixtures/pipe-separator.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/quote.csv b/test/fixtures/quote.csv deleted file mode 100644 index 57a9c05..0000000 --- a/test/fixtures/quote.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -"1","B","true" -"2","B""B","false" diff --git a/test/fixtures/quote.json b/test/fixtures/quote.json deleted file mode 100644 index da71f01..0000000 --- a/test/fixtures/quote.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "a": "1", - "b": "B", - "c": "true" - }, - { - "a": "2", - "b": "B\"B", - "c": "false" - } -] diff --git a/test/fixtures/quoted-escape.csv b/test/fixtures/quoted-escape.csv deleted file mode 100644 index 8c30b29..0000000 --- a/test/fixtures/quoted-escape.csv +++ /dev/null @@ -1,6 +0,0 @@ -a -"""b" -"b""" -"""b""" -"b""c" -"b""c""d" diff --git a/test/fixtures/quoted-escape.json b/test/fixtures/quoted-escape.json deleted file mode 100644 index 0d2bdd2..0000000 --- a/test/fixtures/quoted-escape.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"\"b"},{"a":"b\""},{"a":"\"b\""},{"a":"b\"c"},{"a":"b\"c\"d"}] diff --git a/test/fixtures/quoted-newline.csv b/test/fixtures/quoted-newline.csv deleted file mode 100644 index 734a55c..0000000 --- a/test/fixtures/quoted-newline.csv +++ /dev/null @@ -1,5 +0,0 @@ -a,b -"line1 -line2",simple -"hello","world -!" diff --git a/test/fixtures/quoted-newline.json b/test/fixtures/quoted-newline.json deleted file mode 100644 index c3c8894..0000000 --- a/test/fixtures/quoted-newline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"line1\nline2","b":"simple"},{"a":"hello","b":"world\n!"}] diff --git a/test/fixtures/quoted-simple.csv b/test/fixtures/quoted-simple.csv deleted file mode 100644 index bc880e6..0000000 --- a/test/fixtures/quoted-simple.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b -"hello","world" -"foo",bar diff --git a/test/fixtures/quoted-simple.json b/test/fixtures/quoted-simple.json deleted file mode 100644 index 031e677..0000000 --- a/test/fixtures/quoted-simple.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"hello","b":"world"},{"a":"foo","b":"bar"}] diff --git a/test/fixtures/record-separator.csv b/test/fixtures/record-separator.csv deleted file mode 100644 index 830e627..0000000 --- a/test/fixtures/record-separator.csv +++ /dev/null @@ -1 +0,0 @@ -a,b,c%A,B,C%AA,BB,CC \ No newline at end of file diff --git a/test/fixtures/record-separator.json b/test/fixtures/record-separator.json deleted file mode 100644 index f4668e4..0000000 --- a/test/fixtures/record-separator.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"A","b":"B","c":"C"},{"a":"AA","b":"BB","c":"CC"}] diff --git a/test/fixtures/trailing-newline.csv b/test/fixtures/trailing-newline.csv deleted file mode 100644 index 9255cff..0000000 --- a/test/fixtures/trailing-newline.csv +++ /dev/null @@ -1,3 +0,0 @@ -a -1 -2 diff --git a/test/fixtures/trailing-newline.json b/test/fixtures/trailing-newline.json deleted file mode 100644 index 94d1615..0000000 --- a/test/fixtures/trailing-newline.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":"1"},{"a":"2"}] diff --git a/test/fixtures/trim.csv b/test/fixtures/trim.csv deleted file mode 100644 index 7998755..0000000 --- a/test/fixtures/trim.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b - hello , world - foo ,bar diff --git a/test/fixtures/trim.json b/test/fixtures/trim.json deleted file mode 100644 index f8f2ee7..0000000 --- a/test/fixtures/trim.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "a": "1", - "b": "2", - "c": "3" - }, - { - "a": "11", - "b": "22", - "c": "33" - }, - { - "a": "4", - "b": "5", - "c": "6" - }, - { - "a": "44", - "b": "55", - "c": "66" - } -] diff --git a/test/fixtures/value.csv b/test/fixtures/value.csv deleted file mode 100644 index 97c34a5..0000000 --- a/test/fixtures/value.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,b,c -true,false,null -hello,true,1 diff --git a/test/fixtures/value.json b/test/fixtures/value.json deleted file mode 100644 index 22ed72b..0000000 --- a/test/fixtures/value.json +++ /dev/null @@ -1 +0,0 @@ -[{"a":true,"b":false,"c":null},{"a":"hello","b":true,"c":"1"}] diff --git a/test/spec/attributes.tsv b/test/spec/attributes.tsv new file mode 100644 index 0000000..6630236 --- /dev/null +++ b/test/spec/attributes.tsv @@ -0,0 +1,15 @@ +# name input expected opts +# Attribute handling: double quotes, single quotes, entities, spacing, +# mixed names. See basic.tsv for the spec header / escape rules. + +attr-one {"name":"a","localName":"a","attributes":{"x":"1"},"children":[]} +attr-two {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} +attr-with-text text {"name":"a","localName":"a","attributes":{"x":"hello world"},"children":["text"]} +attr-single-quote {"name":"a","localName":"a","attributes":{"x":"value"},"children":[]} +attr-single-quote-with-dq {"name":"a","localName":"a","attributes":{"x":"says \"hi\""},"children":[]} +attr-extra-spaces {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} +attr-newlines {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} +attr-name-dash {"name":"a","localName":"a","attributes":{"data-x":"1"},"children":[]} +attr-name-dot {"name":"a","localName":"a","attributes":{"v.2":"ok"},"children":[]} +attr-empty-value {"name":"a","localName":"a","attributes":{"x":""},"children":[]} +attr-mixed-quotes {"name":"a","localName":"a","attributes":{"x":"1","y":"2"},"children":[]} diff --git a/test/spec/basic.tsv b/test/spec/basic.tsv new file mode 100644 index 0000000..7131a8e --- /dev/null +++ b/test/spec/basic.tsv @@ -0,0 +1,27 @@ +# name input expected opts +# ----------------------------------------------------------------------------- +# Each row is one XML parse test. +# name - unique test identifier +# input - XML source. Escapes: \n (LF) \r (CR) \t (TAB) \\ (backslash) +# expected - JSON encoding of the parsed result, OR the literal token +# "ERROR" (optionally followed by ":code") to assert a parse +# error. The JSON is parsed as-is (standard JSON escapes). +# opts - optional JSON object of plugin options; empty for defaults. +# ----------------------------------------------------------------------------- + +empty-element {"name":"a","localName":"a","attributes":{},"children":[]} +self-closing {"name":"a","localName":"a","attributes":{},"children":[]} +self-closing-space
{"name":"br","localName":"br","attributes":{},"children":[]} +text-simple
hello {"name":"a","localName":"a","attributes":{},"children":["hello"]} +text-whitespace hello world {"name":"greet","localName":"greet","attributes":{},"children":["hello world"]} +text-only-spaces

hello world

{"name":"p","localName":"p","attributes":{},"children":[" hello world "]} +nested-empty {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[]}]} +nested-text x {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":["x"]}]} +deeply-nested x {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[{"name":"c","localName":"c","attributes":{},"children":["x"]}]}]} +multiple-children-selfclose {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[]},{"name":"c","localName":"c","attributes":{},"children":[]}]} +multiple-children-text 12 {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":["1"]},{"name":"c","localName":"c","attributes":{},"children":["2"]}]} +mixed-content helloinnerworld {"name":"a","localName":"a","attributes":{},"children":["hello",{"name":"b","localName":"b","attributes":{},"children":["inner"]},"world"]} +multiline \n 1\n 2\n {"name":"root","localName":"root","attributes":{},"children":["\n ",{"name":"a","localName":"a","attributes":{},"children":["1"]},"\n ",{"name":"b","localName":"b","attributes":{},"children":["2"]},"\n"]} +tag-name-dash x {"name":"a-b","localName":"a-b","attributes":{},"children":["x"]} +tag-name-dot x {"name":"a.b","localName":"a.b","attributes":{},"children":["x"]} +tag-name-underscore x {"name":"a_b","localName":"a_b","attributes":{},"children":["x"]} diff --git a/test/spec/entities.tsv b/test/spec/entities.tsv new file mode 100644 index 0000000..9292a4c --- /dev/null +++ b/test/spec/entities.tsv @@ -0,0 +1,21 @@ +# name input expected opts +# Entity references: predefined (amp lt gt quot apos), numeric (decimal and +# hex), unknown (passed through), and user-supplied custom entities. + +pre-amp & {"name":"a","localName":"a","attributes":{},"children":["&"]} +pre-lt < {"name":"a","localName":"a","attributes":{},"children":["<"]} +pre-gt > {"name":"a","localName":"a","attributes":{},"children":[">"]} +pre-quot " {"name":"a","localName":"a","attributes":{},"children":["\""]} +pre-apos ' {"name":"a","localName":"a","attributes":{},"children":["'"]} +pre-all-in-one &<>"' {"name":"a","localName":"a","attributes":{},"children":["&<>\"'"]} +pre-in-text Tom & Jerry {"name":"a","localName":"a","attributes":{},"children":["Tom & Jerry"]} +num-dec-single A {"name":"a","localName":"a","attributes":{},"children":["A"]} +num-dec-multi AB {"name":"a","localName":"a","attributes":{},"children":["AB"]} +num-hex-single A {"name":"a","localName":"a","attributes":{},"children":["A"]} +num-hex-multi AB {"name":"a","localName":"a","attributes":{},"children":["AB"]} +num-hex-astral 😀 {"name":"a","localName":"a","attributes":{},"children":["\uD83D\uDE00"]} +entity-in-attr {"name":"a","localName":"a","attributes":{"title":"Tom & Jerry"},"children":[]} +num-in-attr {"name":"a","localName":"a","attributes":{"v":"AB"},"children":[]} +unknown-passthrough &unknown; {"name":"a","localName":"a","attributes":{},"children":["&unknown;"]} +custom-entity © 2025 all rights {"name":"a","localName":"a","attributes":{},"children":["© 2025\u00a0all rights"]} {"customEntities":{"nbsp":"\u00a0","copy":"©"}} +entities-disabled & {"name":"a","localName":"a","attributes":{},"children":["&"]} {"entities":false} diff --git a/test/spec/errors.tsv b/test/spec/errors.tsv new file mode 100644 index 0000000..0c521ff --- /dev/null +++ b/test/spec/errors.tsv @@ -0,0 +1,10 @@ +# name input expected opts +# Inputs that must raise a parse error. `expected` uses the literal token +# "ERROR" (optionally ":code" for a specific error code) to indicate that +# parsing must fail. + +mismatched-close ERROR:xml_mismatched_tag +unterminated-comment {"name":"a","localName":"a","attributes":{},"children":[]} +comment-in-element hello {"name":"a","localName":"a","attributes":{},"children":["hello"]} +comment-around-child {"name":"a","localName":"a","attributes":{},"children":[{"name":"b","localName":"b","attributes":{},"children":[]}]} +pi-xml-decl {"name":"a","localName":"a","attributes":{},"children":[]} +pi-xml-stylesheet {"name":"root","localName":"root","attributes":{},"children":[]} +doctype-simple {"name":"html","localName":"html","attributes":{},"children":[]} +doctype-system hi {"name":"note","localName":"note","attributes":{},"children":[{"name":"body","localName":"body","attributes":{},"children":["hi"]}]} +doctype-internal-subset ]> {"name":"a","localName":"a","attributes":{},"children":[]} +cdata-basic & raw text]]> {"name":"a","localName":"a","attributes":{},"children":[" & raw text"]} +cdata-with-newlines {"name":"a","localName":"a","attributes":{},"children":["line1\nline2"]} +cdata-no-entity-decode {"name":"a","localName":"a","attributes":{},"children":["&"]} diff --git a/test/spec/w3c.tsv b/test/spec/w3c.tsv new file mode 100644 index 0000000..f167342 --- /dev/null +++ b/test/spec/w3c.tsv @@ -0,0 +1,20 @@ +# name input expected opts +# Standardised / real-world XML parse cases. Inputs include examples taken +# or adapted from W3C conformance documents (XML 1.0 appendix, XHTML, +# Atom, SVG, SOAP, RSS) and canonical "not well-formed" counterexamples. +# Document structure is verified; full specification conformance is not. + +xml-decl-basic {"name":"doc","localName":"doc","attributes":{},"children":[]} +xml-decl-standalone {"name":"doc","localName":"doc","attributes":{},"children":[]} +xmltest-valid-001 \nHello, World! {"name":"doc","localName":"doc","attributes":{},"children":["Hello, World!"]} +xmltest-valid-attr {"name":"doc","localName":"doc","attributes":{"attr1":"value1","attr2":"value2"},"children":[]} +xmltest-valid-nested text {"name":"doc","localName":"doc","attributes":{},"children":[{"name":"child1","localName":"child1","attributes":{},"children":[]},{"name":"child2","localName":"child2","attributes":{},"children":[{"name":"nested","localName":"nested","attributes":{},"children":["text"]}]}]} +atom-entry Exampleurn:uuid:1 {"name":"entry","localName":"entry","namespace":"http://www.w3.org/2005/Atom","attributes":{"xmlns":"http://www.w3.org/2005/Atom"},"children":[{"name":"title","localName":"title","namespace":"http://www.w3.org/2005/Atom","attributes":{},"children":["Example"]},{"name":"id","localName":"id","namespace":"http://www.w3.org/2005/Atom","attributes":{},"children":["urn:uuid:1"]}]} +soap-envelope Apples {"name":"soap:Envelope","prefix":"soap","localName":"Envelope","namespace":"http://schemas.xmlsoap.org/soap/envelope/","attributes":{"xmlns:soap":"http://schemas.xmlsoap.org/soap/envelope/"},"children":[{"name":"soap:Body","prefix":"soap","localName":"Body","namespace":"http://schemas.xmlsoap.org/soap/envelope/","attributes":{},"children":[{"name":"m:GetPrice","prefix":"m","localName":"GetPrice","namespace":"https://example.com","attributes":{"xmlns:m":"https://example.com"},"children":[{"name":"m:Item","prefix":"m","localName":"Item","namespace":"https://example.com","attributes":{},"children":["Apples"]}]}]}]} +svg-rect {"name":"svg","localName":"svg","namespace":"http://www.w3.org/2000/svg","attributes":{"xmlns":"http://www.w3.org/2000/svg"},"children":[{"name":"rect","localName":"rect","namespace":"http://www.w3.org/2000/svg","attributes":{"x":"0","y":"0","width":"10","height":"10"},"children":[]}]} +rss-channel Examplehttps://e.example/ {"name":"rss","localName":"rss","attributes":{"version":"2.0"},"children":[{"name":"channel","localName":"channel","attributes":{},"children":[{"name":"title","localName":"title","attributes":{},"children":["Example"]},{"name":"link","localName":"link","attributes":{},"children":["https://e.example/"]}]}]} +xhtml-paragraph

Hello bold world.

{"name":"p","localName":"p","attributes":{"class":"greeting"},"children":["Hello ",{"name":"em","localName":"em","attributes":{},"children":["bold"]}," world."]} +notes-document \n\nToveJaniDon't forget me & cheers {"name":"note","localName":"note","attributes":{},"children":[{"name":"to","localName":"to","attributes":{},"children":["Tove"]},{"name":"from","localName":"from","attributes":{},"children":["Jani"]},{"name":"body","localName":"body","attributes":{},"children":["Don't forget me & cheers"]}]} +not-wf-no-close ERROR +not-wf-unclosed-nested ERROR +not-wf-stray-close ERROR diff --git a/test/xml.test.ts b/test/xml.test.ts index 8c4d7f7..90d3824 100644 --- a/test/xml.test.ts +++ b/test/xml.test.ts @@ -2,364 +2,165 @@ import { describe, test } from 'node:test' import assert from 'node:assert' +import { readFileSync, readdirSync } from 'node:fs' +import { join } from 'node:path' import { Jsonic } from 'jsonic' import { Xml } from '../dist/xml' -// Build a plain element literal in the shape the parser emits. Optional -// namespace / prefix fields are only present when actually resolved. -function elem( - name: string, - children: any[] = [], - attributes: Record = {}, - extras: Record = {}, -) { - const out: any = { - name, - localName: extras.localName ?? name, - attributes, - children, - } - if (extras.prefix) out.prefix = extras.prefix - if (extras.namespace) out.namespace = extras.namespace - return out +// --------------------------------------------------------------------------- +// Shared TSV spec runner +// +// Test cases are defined in tab-separated value files under test/spec/*.tsv. +// Each non-comment row is: +// nameinputexpectedopts +// - `input` uses the escape set \n \r \t \\ +// - `expected` is raw JSON (standard JSON escapes apply) or the literal +// token ERROR / ERROR:code for expected parse failures. +// - `opts` is optional JSON for plugin options. +// The same files drive the Go test suite in go/xml_test.go. +// --------------------------------------------------------------------------- + +// At runtime this test file is loaded from `dist-test/`, so hop up one +// level to reach the shared spec directory in the project root. +const specDir = join(__dirname, '..', 'test', 'spec') + +type SpecRow = { + file: string + line: number + name: string + input: string + expected: string + opts: string } -describe('xml', () => { - test('empty-element', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), elem('a')) - }) - - test('self-closing-element', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), elem('a')) - assert.deepEqual(jx('
'), elem('br')) - }) - - test('text-content', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('
hello'), elem('a', ['hello'])) - assert.deepEqual( - jx('hello world'), - elem('greet', ['hello world']), - ) - }) - - test('nested-elements', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), elem('a', [elem('b')])) - assert.deepEqual(jx('x'), elem('a', [elem('b', ['x'])])) - }) - - test('deeply-nested', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx('x'), - elem('a', [elem('b', [elem('c', ['x'])])]), - ) - }) - - test('multiple-children', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(''), - elem('a', [elem('b'), elem('c')]), - ) - assert.deepEqual( - jx('12'), - elem('a', [elem('b', ['1']), elem('c', ['2'])]), - ) - }) - - test('mixed-content', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx('helloinnerworld'), - elem('a', ['hello', elem('b', ['inner']), 'world']), - ) - }) - - test('tag-name-variants', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('x'), elem('a-b', ['x'])) - assert.deepEqual(jx('x'), elem('a.b', ['x'])) - assert.deepEqual(jx('x'), elem('a_b', ['x'])) - }) - - test('mismatched-tag', () => { - const jx = Jsonic.make().use(Xml) - assert.throws(() => jx(''), /xml_mismatched_tag|mismatched/i) - }) - - test('multiline-content', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx('\n 1\n 2\n'), - elem('root', [ - '\n ', - elem('a', ['1']), - '\n ', - elem('b', ['2']), - '\n', - ]), - ) - }) - - test('preserves-whitespace-text', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx('

hello world

'), - elem('p', [' hello world ']), - ) - }) - - test('attributes-basic', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(''), - elem('a', [], { x: '1' }), - ) - assert.deepEqual( - jx(''), - elem('a', [], { x: '1', y: '2' }), - ) - assert.deepEqual( - jx('text'), - elem('a', ['text'], { x: 'hello world' }), - ) - }) - - test('attributes-single-quote', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(``), - elem('a', [], { x: 'value' }), - ) - assert.deepEqual( - jx(``), - elem('a', [], { x: 'it says "hi"' }), - ) - }) - - test('attributes-spacing-variants', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(''), - elem('a', [], { x: '1', y: '2' }), - ) - assert.deepEqual( - jx(''), - elem('a', [], { x: '1', y: '2' }), - ) - }) - - test('attributes-with-dashes-and-dots', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(''), - elem('a', [], { 'data-x': '1', 'v.2': 'ok' }), - ) - }) - - test('entities-predefined-in-text', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx('&<>"''), - elem('a', [`&<>"'`]), - ) - assert.deepEqual( - jx('Tom & Jerry'), - elem('a', ['Tom & Jerry']), - ) - }) - - test('entities-numeric-references', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx('AB'), elem('a', ['AB'])) - assert.deepEqual(jx('AB'), elem('a', ['AB'])) - assert.deepEqual(jx('😀'), elem('a', ['\u{1F600}'])) - }) - - test('entities-in-attribute-values', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(''), - elem('a', [], { title: 'Tom & Jerry' }), - ) - assert.deepEqual( - jx(''), - elem('a', [], { v: 'AB' }), - ) - }) - - test('entities-unknown-passthrough', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx('&unknown;'), - elem('a', ['&unknown;']), - ) - }) - - test('entities-custom', () => { - const jx = Jsonic.make().use(Xml, { - customEntities: { nbsp: '\u00a0', copy: '\u00a9' }, +function loadSpec(file: string): SpecRow[] { + const path = join(specDir, file) + const body = readFileSync(path, 'utf8') + const rows: SpecRow[] = [] + const lines = body.split('\n') + for (let i = 0; i < lines.length; i++) { + const raw = lines[i] + if (raw === '' || raw.startsWith('#')) continue + const cols = raw.split('\t') + if (cols.length < 3) { + throw new Error(`${file}:${i + 1}: expected >=3 tab-separated columns`) + } + rows.push({ + file, + line: i + 1, + name: cols[0], + input: unescapeInput(cols[1]), + expected: cols[2], + opts: cols[3] ?? '', }) - assert.deepEqual( - jx('© 2025 all rights'), - elem('a', ['\u00a9 2025\u00a0all rights']), - ) - }) - - test('entities-disabled', () => { - const jx = Jsonic.make().use(Xml, { entities: false }) - assert.deepEqual(jx('&'), elem('a', ['&'])) - }) - - test('comments-ignored', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual(jx(''), elem('a')) - assert.deepEqual( - jx('hello'), - elem('a', ['hello']), - ) - assert.deepEqual( - jx(''), - elem('a', [elem('b')]), - ) - }) - - test('processing-instructions-ignored', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(''), - elem('a'), - ) - assert.deepEqual( - jx(''), - elem('root'), - ) - }) - - test('doctype-ignored', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(''), - elem('html'), - ) - assert.deepEqual( - jx( - 'hi', - ), - elem('note', [elem('body', ['hi'])]), - ) - }) - - test('doctype-with-internal-subset', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(']>'), - elem('a'), - ) - }) - - test('cdata-section', () => { - const jx = Jsonic.make().use(Xml) - assert.deepEqual( - jx(' & raw text]]>'), - elem('a', [' & raw text']), - ) - }) - - test('namespaces-default', () => { - const jx = Jsonic.make().use(Xml) - const result = jx('') - assert.deepEqual(result, { - name: 'a', - localName: 'a', - namespace: 'http://example.com', - attributes: { xmlns: 'http://example.com' }, - children: [ - { - name: 'b', - localName: 'b', - namespace: 'http://example.com', - attributes: {}, - children: [], - }, - ], - }) - }) + } + return rows +} - test('namespaces-prefixed', () => { - const jx = Jsonic.make().use(Xml) - const result = jx( - 'body', - ) - assert.deepEqual(result, { - name: 'root', - localName: 'root', - attributes: { 'xmlns:x': 'http://x.example' }, - children: [ - { - name: 'x:a', - prefix: 'x', - localName: 'a', - namespace: 'http://x.example', - attributes: { 'x:k': 'v' }, - children: ['body'], - }, - ], - }) - }) +// Decode the escape sequences used in the spec `input` column. Keeps +// the behaviour identical to the Go loader so the two language test +// suites exercise the exact same XML text. +function unescapeInput(s: string): string { + if (!s.includes('\\')) return s + let out = '' + for (let i = 0; i < s.length; i++) { + const c = s[i] + if (c === '\\' && i + 1 < s.length) { + const n = s[i + 1] + if (n === 'n') { out += '\n'; i++; continue } + if (n === 'r') { out += '\r'; i++; continue } + if (n === 't') { out += '\t'; i++; continue } + if (n === '\\') { out += '\\'; i++; continue } + } + out += c + } + return out +} - test('namespaces-inherited-scope', () => { - const jx = Jsonic.make().use(Xml) - const result = jx( - '', - ) - assert.equal(result.children[0].namespace, 'http://p.example') - assert.equal(result.children[0].children[0].namespace, 'http://p.example') +function runSpec(file: string) { + const rows = loadSpec(file) + describe(file, () => { + for (const row of rows) { + test(row.name, () => { + const opts = row.opts.trim() === '' ? undefined : JSON.parse(row.opts) + const jx = opts ? Jsonic.make().use(Xml, opts) : Jsonic.make().use(Xml) + + if (row.expected.startsWith('ERROR')) { + const code = row.expected.slice(5).replace(/^:/, '') + assert.throws( + () => jx(row.input), + (err: Error) => + code === '' || err.message.includes(code) || + // Jsonic wraps codes as `jsonic/`; accept that form too. + err.message.includes('/' + code), + `${row.file}:${row.line}: expected error ${row.expected}`, + ) + return + } + + const got = jx(row.input) + const want = JSON.parse(row.expected) + // Round-trip `got` through JSON so ordering of keys does not affect + // structural comparison (deepEqual is already order-insensitive for + // objects, but this also strips undefined fields cleanly). + assert.deepEqual( + JSON.parse(JSON.stringify(got)), + want, + `${row.file}:${row.line}: ${row.name}`, + ) + }) + } }) +} - test('namespaces-override-in-child', () => { - const jx = Jsonic.make().use(Xml) - const result = jx( - '', - ) - assert.equal(result.namespace, 'A') - assert.equal(result.children[0].namespace, 'B') - assert.equal(result.children[0].children[0].namespace, 'B') - assert.equal(result.children[1].namespace, 'A') - }) +// Auto-discover every .tsv under test/spec and run it. Keeping this +// driven by directory contents means adding a new spec file never +// requires editing the TypeScript test code. +for (const file of readdirSync(specDir)) { + if (file.endsWith('.tsv')) runSpec(file) +} - test('namespaces-disabled', () => { - const jx = Jsonic.make().use(Xml, { namespaces: false }) - const result = jx('') - assert.equal(result.namespace, undefined) - assert.equal(result.prefix, undefined) - }) - test('full-document', () => { - const jx = Jsonic.make().use(Xml) - const src = ` - - - - Tove - Jani - Reminder - Don't forget me this weekend! & cheers - ]]> -` - const result = jx(src) - assert.equal(result.name, 'note') - assert.equal(result.attributes.lang, 'en') - const childElems = result.children.filter((c: any) => 'object' === typeof c) - assert.equal(childElems.length, 5) - assert.equal(childElems[0].name, 'to') - assert.equal(childElems[0].children[0], 'Tove') - assert.equal(childElems[3].children[0], "Don't forget me this weekend! & cheers") - assert.equal(childElems[4].children[0], '') +// --------------------------------------------------------------------------- +// XML embedded in Jsonic source +// +// A common real-world pattern is to keep XML payloads inside a larger +// Jsonic configuration file as a string value. This test demonstrates +// that the stock Jsonic parser reads the outer document and the Xml +// plugin parses the embedded payload. +// --------------------------------------------------------------------------- + +describe('xml-embedded-in-jsonic', () => { + test('parses XML inside a Jsonic multiline string', () => { + // A plain Jsonic document. The backtick string carries the XML + // payload verbatim, with newlines and double quotes intact. + const jsonicSrc = "{\n" + + " title: 'order-42',\n" + + " payload: `" + + '\n' + + '\n' + + ' Widget\n' + + ' Gadget\n' + + '' + "`,\n" + + "}\n" + + const outer = Jsonic(jsonicSrc) as any + assert.equal(outer.title, 'order-42') + assert.equal(typeof outer.payload, 'string') + + const xmlParser = Jsonic.make().use(Xml) + const parsed = xmlParser(outer.payload) as any + assert.equal(parsed.name, 'order') + assert.equal(parsed.attributes.id, '42') + + const items = parsed.children.filter( + (c: any) => typeof c === 'object' && c.name === 'item', + ) + assert.equal(items.length, 2) + assert.equal(items[0].attributes.qty, '2') + assert.equal(items[0].children[0], 'Widget') + assert.equal(items[1].attributes.qty, '1') + assert.equal(items[1].children[0], 'Gadget') }) }) From c1ca5b99517fa7553ba58bb4a28d587a0e09f31a Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 10:20:36 +0000 Subject: [PATCH 04/15] Add embed mode and W3C XML Test Suite integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit embed mode ---------- Adds an `embed: true` plugin option that extends Jsonic's own grammar so a literal XML element (`` or ``) can appear wherever a Jsonic value is expected — inside maps, lists, or at the top level. Default behaviour (`embed: false`) remains pure-XML parsing with the JSON rules stripped. When `embed: true` the plugin: - keeps the full JSON/JSONIC grammar in place, including the structural fixed tokens `{ } [ ] : ,`; - splices two alternates into the `val` rule so `#XOP`/`#XSC` tokens dispatch to the `element` rule; - tracks XML nesting depth in `ctx.u.xmlDepth`: while depth > 0 the custom matcher also claims any run of non-`<` characters as a single `#TX` node (optionally entity-decoded), so Jsonic's lexer can't reinterpret a comma or colon inside XML text as a JSON separator; - resolves namespaces on close of an `element` rule that sits directly under a `val` rule. The embedded-XML tests in both `test/xml.test.ts` and `go/xml_test.go` now use real literal XML in Jsonic source instead of stuffing the document inside a string. W3C XML Conformance Test Suite ------------------------------ Added `scripts/fetch-xml-suite.sh` to download the 2013-09-23 snapshot of the W3C XML Test Suite (xmltest) into `test/xmlconf/` (gitignored — the suite is owned by W3C and contributors and is not redistributed). Both languages pick it up automatically when present: - `go/xmlconf_test.go` iterates `xmltest/valid/sa/*.xml` and `xmltest/not-wf/sa/*.xml`, counting successful parses and expected rejections, and asserts each count stays above a regression floor. - An equivalent `describe(..., { skip: ... })` block in `test/xml.test.ts` does the same for Node. Current numbers (regression guard in parentheses): - valid/sa : 116 / 120 parsed (floor 110) - not-wf/sa : 39 / 186 rejected (floor 30) The handful of `valid/sa` misses are UTF-16 BOM files and tests that use non-Latin tag names — both out of scope for the current parser. Many `not-wf/sa` tests hinge on character-level WF constraints our structural parser doesn't enforce, hence the conservative floor. --- .gitignore | 7 +- go/xml.go | 205 ++++++++++++++++++++++++++------ go/xml_test.go | 171 ++++++++++++++++++++------- go/xmlconf_test.go | 139 ++++++++++++++++++++++ scripts/fetch-xml-suite.sh | 46 ++++++++ src/xml.ts | 235 +++++++++++++++++++++++-------------- test/xml.test.ts | 183 ++++++++++++++++++++++++----- 7 files changed, 785 insertions(+), 201 deletions(-) create mode 100644 go/xmlconf_test.go create mode 100755 scripts/fetch-xml-suite.sh diff --git a/.gitignore b/.gitignore index 853c4a4..c2ad347 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,6 @@ lib-cov *.seed *.log -*.csv -!test/fixtures/*.csv *.dat *.out *.pid @@ -19,7 +17,6 @@ node_modules .idea/ - trial test/coverage.html @@ -32,4 +29,6 @@ dist-test package-lock.json yarn.lock - +# W3C XML Conformance Test Suite (downloaded on demand via +# scripts/fetch-xml-suite.sh — not redistributed). +test/xmlconf/ diff --git a/go/xml.go b/go/xml.go index 97b30b8..1c7f8c2 100644 --- a/go/xml.go +++ b/go/xml.go @@ -27,10 +27,28 @@ const Version = "0.1.0" // Defaults are merged with caller-supplied options when the plugin is // registered via jsonic.UseDefaults. +// +// Option keys: +// +// namespaces bool resolve xmlns / xmlns:* into prefix / +// localName / namespace fields on every +// element. Default: true. +// entities bool decode the five predefined entities and +// numeric character references in text and +// attribute values. Default: true. +// customEntities map[string]string extra named entities to recognise. +// embed bool when true, keep Jsonic's JSON/JSONIC +// grammar in place and splice an XML +// literal alternate into the `val` rule +// so `` can appear wherever +// Jsonic expects a value. When false +// (default) the parser is reconfigured +// as a pure-XML parser. var Defaults = map[string]any{ "namespaces": true, "entities": true, "customEntities": map[string]string{}, + "embed": false, } // Xml is the Jsonic plugin entry point. Register via: @@ -48,6 +66,7 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { namespacesOn := toBool(options["namespaces"], true) entitiesOn := toBool(options["entities"], true) customEntities := toStringMap(options["customEntities"]) + embed := toBool(options["embed"], false) decode := buildEntityDecoder(entitiesOn, customEntities) @@ -59,48 +78,29 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { xclTin := j.Token("#XCL", "") xscTin := j.Token("#XSC", "") - // Register a dummy fixed token bound to a character that cannot - // legally appear in XML source (ASCII SOH). This keeps the lexer's - // internal `FixedSorted` list non-empty, which in turn disables an - // otherwise-hardcoded fallback that still ends text tokens on any - // of `{ } [ ] : ,` even when those symbols have been removed from - // the fixed token map. Without this, XML text content containing a - // comma would be truncated at the comma. - soh := "\x01" - _ = j.Token("#XDUM", soh) - - // Custom lexer matcher registered at low priority so it runs before - // the built-in text/fixed matchers and captures every `<...>` - // construct as a single token. + if !embed { + // Register a dummy fixed token bound to a character that cannot + // legally appear in XML source (ASCII SOH). This keeps the + // lexer's internal `FixedSorted` list non-empty, which in turn + // disables an otherwise-hardcoded fallback that still ends text + // tokens on `{ } [ ] : ,` even when those symbols have been + // removed from the fixed token map. Without this, XML text + // content containing a comma would be truncated at the comma. + // In embed mode the JSON structural tokens remain in place, so + // the dummy is not needed. + soh := "\x01" + _ = j.Token("#XDUM", soh) + } + + // Shared options installed in both modes: the custom matcher, the + // text-end character `<`, and the XML-specific error templates. j.SetOptions(jsonic.Options{ Lex: &jsonic.LexOptions{ Match: map[string]*jsonic.MatchSpec{ - "xmltag": {Order: 100_000, Make: buildXmlTagMatcher(decode, xigTin, xopTin, xclTin, xscTin)}, + "xmltag": {Order: 100_000, Make: buildXmlTagMatcher(decode, entitiesOn, embed, xigTin, xopTin, xclTin, xscTin)}, }, }, Ender: []string{"<"}, - Rule: &jsonic.RuleOptions{ - Start: "xml", - Exclude: "jsonic,imp", - }, - Fixed: &jsonic.FixedOptions{Token: map[string]*string{ - "#OB": nil, "#CB": nil, "#OS": nil, "#CS": nil, - "#CL": nil, "#CA": nil, - }}, - Number: &jsonic.NumberOptions{Lex: boolPtr(false)}, - Value: &jsonic.ValueOptions{Lex: boolPtr(false)}, - String: &jsonic.StringOptions{Lex: boolPtr(false)}, - Comment: &jsonic.CommentOptions{Lex: boolPtr(false)}, - Space: &jsonic.SpaceOptions{Lex: boolPtr(false)}, - Line: &jsonic.LineOptions{Lex: boolPtr(false)}, - Text: &jsonic.TextOptions{ - Modify: []jsonic.ValModifier{func(v any) any { - if s, ok := v.(string); ok && entitiesOn { - return decode(s) - } - return v - }}, - }, Error: map[string]string{ "xml_mismatched_tag": "closing tag does not match opening tag <$openname>", "xml_invalid_tag": "invalid tag: $fsrc", @@ -113,8 +113,41 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { }, }) + if !embed { + // Pure XML mode: reconfigure the parser so Jsonic's own value + // grammar is unreachable and all lexers other than our tag + // matcher are quiescent. + j.SetOptions(jsonic.Options{ + Rule: &jsonic.RuleOptions{ + Start: "xml", + Exclude: "jsonic,imp", + }, + Fixed: &jsonic.FixedOptions{Token: map[string]*string{ + "#OB": nil, "#CB": nil, "#OS": nil, "#CS": nil, + "#CL": nil, "#CA": nil, + }}, + Number: &jsonic.NumberOptions{Lex: boolPtr(false)}, + Value: &jsonic.ValueOptions{Lex: boolPtr(false)}, + String: &jsonic.StringOptions{Lex: boolPtr(false)}, + Comment: &jsonic.CommentOptions{Lex: boolPtr(false)}, + Space: &jsonic.SpaceOptions{Lex: boolPtr(false)}, + Line: &jsonic.LineOptions{Lex: boolPtr(false)}, + Text: &jsonic.TextOptions{ + Modify: []jsonic.ValModifier{func(v any) any { + if s, ok := v.(string); ok && entitiesOn { + return decode(s) + } + return v + }}, + }, + }) + } + // IGNORE set: drop #XIG (comments, PIs, DOCTYPE) along with the - // default members so any of them is skipped by the parser. + // default members so any of them is skipped by the parser. In + // embed mode this preserves all default ignored tokens; in pure + // mode the SP/LN/CM tokens are never produced (we disabled their + // lexers), but keeping them here is harmless. j.SetTokenSet("IGNORE", []jsonic.Tin{ j.Token("#SP", ""), j.Token("#LN", ""), j.Token("#CM", ""), xigTin, }) @@ -259,9 +292,73 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { return fmt.Errorf("xml: apply grammar: %w", err) } + if embed { + // Splice XML literals into the Jsonic `val` rule. When the + // parser is looking for a value and sees `#XOP` or `#XSC`, + // push the `element` rule (backtracking by 1 so element.open + // can read the same token and dispatch). + j.Rule("val", func(rs *jsonic.RuleSpec) { + rs.Open = append(rs.Open, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{xopTin}}, + B: 1, P: "element", G: "xml", + }, + &jsonic.AltSpec{ + S: [][]jsonic.Tin{{xscTin}}, + B: 1, P: "element", G: "xml", + }, + ) + }) + + // In embed mode the top-level wrapper is Jsonic's `val` rule, + // so the @xml-bc hook that copies the root element to + // ctx.root().node is not invoked. Resolve namespaces instead + // when the element rule closes directly under a val rule. + if namespacesOn { + j.Rule("element", func(rs *jsonic.RuleSpec) { + rs.AddBC(func(r *jsonic.Rule, ctx *jsonic.Context) { + if r.Parent != nil && r.Parent != jsonic.NoRule && + r.Parent.Name == "val" { + if el, ok := r.Node.(map[string]any); ok { + resolveNamespaces(el, nil) + } + } + }) + }) + } + } + return nil } +// xmlDepth reads the per-parse XML nesting counter from the lex context. +// Returns 0 if not set. +func xmlDepth(lex *jsonic.Lex) int { + if lex == nil || lex.Ctx == nil { + return 0 + } + if lex.Ctx.U == nil { + lex.Ctx.U = map[string]any{} + return 0 + } + v, _ := lex.Ctx.U["xmlDepth"].(int) + return v +} + +// setXmlDepth writes the XML nesting counter, clamping at zero. +func setXmlDepth(lex *jsonic.Lex, d int) { + if lex == nil || lex.Ctx == nil { + return + } + if lex.Ctx.U == nil { + lex.Ctx.U = map[string]any{} + } + if d < 0 { + d = 0 + } + lex.Ctx.U["xmlDepth"] = d +} + // firstRule walks back through Prev links to find the originating rule // instance (matches the root rule used by the parser as the result // holder). @@ -341,6 +438,8 @@ func buildEntityDecoder(enabled bool, custom map[string]string) func(string) str // #TX val = cdata body (verbatim, no entity decoding) func buildXmlTagMatcher( decode func(string) string, + entitiesOn bool, + embed bool, xigTin, xopTin, xclTin, xscTin jsonic.Tin, ) jsonic.MakeLexMatcher { return func(_ *jsonic.LexConfig, _ *jsonic.Options) jsonic.LexMatcher { @@ -349,6 +448,31 @@ func buildXmlTagMatcher( src := lex.Src srclen := len(src) sI := pnt.SI + + // Embed mode: inside an open XML element (depth > 0), + // consume characters up to the next `<` as a single #TX + // text token so that Jsonic's own matchers don't reinterpret + // commas/colons/etc. as JSON separators. + if embed && sI < srclen && src[sI] != '<' { + if depth := xmlDepth(lex); depth > 0 { + i := sI + for i < srclen && src[i] != '<' { + i++ + } + if i == sI { + return nil + } + raw := src[sI:i] + var val any = raw + if entitiesOn { + val = decode(raw) + } + tkn := lex.Token("#TX", jsonic.TinTX, val, raw) + advance(pnt, sI, i) + return tkn + } + } + if sI >= srclen || src[sI] != '<' { return nil } @@ -441,6 +565,9 @@ func buildXmlTagMatcher( tsrc := src[sI:finish] tkn := lex.Token("#XCL", xclTin, name, tsrc) advance(pnt, sI, finish) + if embed { + setXmlDepth(lex, xmlDepth(lex)-1) + } return tkn } @@ -473,6 +600,9 @@ func buildXmlTagMatcher( val := map[string]any{"name": name, "attributes": attrs} tkn := lex.Token("#XOP", xopTin, val, tsrc) advance(pnt, sI, finish) + if embed { + setXmlDepth(lex, xmlDepth(lex)+1) + } return tkn } if src[i] == '/' && i+1 < srclen && src[i+1] == '>' { @@ -481,6 +611,7 @@ func buildXmlTagMatcher( val := map[string]any{"name": name, "attributes": attrs} tkn := lex.Token("#XSC", xscTin, val, tsrc) advance(pnt, sI, finish) + // #XSC is an instantly-closed element; depth unchanged. return tkn } diff --git a/go/xml_test.go b/go/xml_test.go index a6186f7..be18e7e 100644 --- a/go/xml_test.go +++ b/go/xml_test.go @@ -198,63 +198,83 @@ func TestStructureSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), func TestErrorsSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "errors.tsv")) } func TestW3CSpec(t *testing.T) { runSpecFile(t, filepath.Join(specDir(), "w3c.tsv")) } -// --- XML embedded in Jsonic source ----------------------------------------- +// --- XML literals embedded in Jsonic source -------------------------------- // -// Real-world use case: a Jsonic document holds an XML payload as a string. -// Parse the outer document with stock Jsonic, then feed the embedded XML -// string into a second Jsonic instance configured with the Xml plugin. +// With `embed: true` the plugin extends Jsonic's own grammar so a literal +// XML element can appear anywhere a Jsonic value is expected. These tests +// exercise that integration: plain Jsonic documents still parse, a lone +// XML literal parses as an element, XML literals inside maps and lists +// parse in place, text with JSON-syntax characters (commas, colons) is +// preserved, and namespaces resolve across the embedded tree. -func TestXmlEmbeddedInJsonic(t *testing.T) { - // An ordinary Jsonic document. Uses backtick-delimited multiline - // strings so the XML can embed newlines and double quotes verbatim. - jsonicSrc := "{\n" + - " title: 'order-42',\n" + - " payload: `" + - `` + "\n" + - `` + "\n" + - ` Widget` + "\n" + - ` Gadget` + "\n" + - `` + "`,\n" + - "}\n" +func embedParser(t *testing.T) *jsonic.Jsonic { + t.Helper() + j := jsonic.Make() + if err := j.UseDefaults(Xml, Defaults, map[string]any{"embed": true}); err != nil { + t.Fatalf("UseDefaults: %v", err) + } + return j +} - outer, err := jsonic.Parse(jsonicSrc) +func TestEmbedPlainJsonicStillWorks(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`{a:1, b:"two"}`) if err != nil { - t.Fatalf("parse outer Jsonic: %v", err) + t.Fatalf("parse: %v", err) } - m, ok := outer.(map[string]any) + m, ok := got.(map[string]any) if !ok { - t.Fatalf("outer should be map, got %T", outer) + t.Fatalf("expected map, got %T", got) } - if m["title"] != "order-42" { - t.Fatalf("title mismatch: %v", m["title"]) + if m["a"] != float64(1) || m["b"] != "two" { + t.Fatalf("plain jsonic: got %v", m) } - payload, ok := m["payload"].(string) +} + +func TestEmbedXmlAsTopLevelValue(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`hello`) + if err != nil { + t.Fatalf("parse: %v", err) + } + el, ok := got.(map[string]any) if !ok { - t.Fatalf("payload should be string, got %T", m["payload"]) + t.Fatalf("expected map, got %T", got) } - - // Parse the XML payload with the Xml plugin. - xmlParser := jsonic.Make() - if err := xmlParser.UseDefaults(Xml, Defaults); err != nil { - t.Fatalf("xml plugin init: %v", err) + if el["name"] != "a" { + t.Fatalf("name: got %v", el["name"]) + } + children, _ := el["children"].([]any) + if len(children) != 1 || children[0] != "hello" { + t.Fatalf("children: got %v", children) } - parsed, err := xmlParser.Parse(payload) +} + +func TestEmbedXmlInsideJsonicMap(t *testing.T) { + j := embedParser(t) + src := `{ + title: "order-42", + payload: + Widget + Gadget + , +}` + got, err := j.Parse(src) if err != nil { - t.Fatalf("parse XML payload: %v", err) + t.Fatalf("parse: %v", err) } - el, ok := parsed.(map[string]any) - if !ok { - t.Fatalf("xml result should be map, got %T", parsed) + m := got.(map[string]any) + if m["title"] != "order-42" { + t.Fatalf("title: got %v", m["title"]) } - if el["name"] != "order" { - t.Fatalf("root name: got %v, want order", el["name"]) + payload := m["payload"].(map[string]any) + if payload["name"] != "order" { + t.Fatalf("payload.name: got %v", payload["name"]) } - attrs, _ := el["attributes"].(map[string]any) - if attrs["id"] != "42" { - t.Fatalf("root attr id: got %v, want 42", attrs["id"]) + if a, _ := payload["attributes"].(map[string]any); a["id"] != "42" { + t.Fatalf("payload.attributes.id: got %v", a["id"]) } - // Count children and check attrs. - children, _ := el["children"].([]any) + children, _ := payload["children"].([]any) var items []map[string]any for _, c := range children { if cm, ok := c.(map[string]any); ok && cm["name"] == "item" { @@ -265,10 +285,75 @@ func TestXmlEmbeddedInJsonic(t *testing.T) { t.Fatalf("expected 2 item elements, got %d", len(items)) } if a, _ := items[0]["attributes"].(map[string]any); a["qty"] != "2" { - t.Fatalf("item[0].qty: got %v, want 2", a["qty"]) + t.Fatalf("item[0].qty: got %v", a["qty"]) + } + if ch, _ := items[0]["children"].([]any); len(ch) != 1 || ch[0] != "Widget" { + t.Fatalf("item[0].children: got %v", ch) } if a, _ := items[1]["attributes"].(map[string]any); a["qty"] != "1" { - t.Fatalf("item[1].qty: got %v, want 1", a["qty"]) + t.Fatalf("item[1].qty: got %v", a["qty"]) + } +} + +func TestEmbedXmlTextPreservesJsonSyntaxChars(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`Hello, World!`) + if err != nil { + t.Fatalf("parse: %v", err) + } + children, _ := got.(map[string]any)["children"].([]any) + if len(children) != 1 || children[0] != "Hello, World!" { + t.Fatalf("children: got %v", children) + } + + got2, err := j.Parse(`key: value`) + if err != nil { + t.Fatalf("parse: %v", err) + } + children2, _ := got2.(map[string]any)["children"].([]any) + if len(children2) != 1 || children2[0] != "key: value" { + t.Fatalf("children2: got %v", children2) + } +} + +func TestEmbedMultipleXmlInsideJsonicList(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`[, x, ]`) + if err != nil { + t.Fatalf("parse: %v", err) + } + arr, ok := got.([]any) + if !ok || len(arr) != 3 { + t.Fatalf("expected 3-element list, got %v", got) + } + if arr[0].(map[string]any)["name"] != "a" { + t.Fatalf("arr[0]: %v", arr[0]) + } + if ch, _ := arr[1].(map[string]any)["children"].([]any); len(ch) != 1 || ch[0] != "x" { + t.Fatalf("arr[1].children: %v", ch) + } + if a, _ := arr[2].(map[string]any)["attributes"].(map[string]any); a["x"] != "1" { + t.Fatalf("arr[2].attributes.x: %v", a) + } +} + +func TestEmbedXmlNamespacesResolve(t *testing.T) { + j := embedParser(t) + got, err := j.Parse(`{doc: }`) + if err != nil { + t.Fatalf("parse: %v", err) + } + doc := got.(map[string]any)["doc"].(map[string]any) + if doc["namespace"] != "http://e.example" { + t.Fatalf("doc.namespace: %v", doc["namespace"]) + } + children, _ := doc["children"].([]any) + if len(children) != 1 { + t.Fatalf("expected 1 child, got %d", len(children)) + } + child := children[0].(map[string]any) + if child["namespace"] != "http://e.example" { + t.Fatalf("child.namespace: %v", child["namespace"]) } } diff --git a/go/xmlconf_test.go b/go/xmlconf_test.go new file mode 100644 index 0000000..d73ff26 --- /dev/null +++ b/go/xmlconf_test.go @@ -0,0 +1,139 @@ +package xml + +import ( + "os" + "path/filepath" + "strings" + "testing" + + jsonic "github.com/jsonicjs/jsonic/go" +) + +// Exercise the parser against the W3C XML Conformance Test Suite +// (xmltest, James Clark's set). The suite is not bundled with the +// repository — run `scripts/fetch-xml-suite.sh` to download it into +// `test/xmlconf/`. When the suite is absent these tests are skipped. +// +// Our parser deliberately doesn't implement every XML 1.0 well- +// formedness constraint (we don't validate character legality, resolve +// DTD-declared entities, or check for all forbidden sequences such as +// `--` inside comments), so the goal of these tests is not 100% +// conformance. Instead each test records how many documents parsed as +// expected and fails only if that count regresses below a stable +// floor. The numbers below were measured against the current parser +// and will move upward as conformance improves. + +const ( + // Minimum `valid/sa/*.xml` documents that must parse without error + // (out of 120). The handful of expected failures are UTF-16/UTF-32 + // byte-order-marked files and tests that use non-Latin tag names, + // both of which are out of scope for the current implementation. + validSaPassFloor = 110 + + // Minimum `not-wf/sa/*.xml` documents that must be rejected. The + // parser catches structural well-formedness errors (bad tags, + // unmatched close, unterminated constructs) but does not check + // many character-level WF constraints, so this floor is set well + // below total (186) and serves as a regression guard. + notWfSaRejectFloor = 30 +) + +func xmlconfRoot(t *testing.T) string { + t.Helper() + root := filepath.Join("..", "test", "xmlconf") + info, err := os.Stat(filepath.Join(root, "xmltest")) + if err != nil || !info.IsDir() { + t.Skipf("W3C XML Test Suite not found at %s; run scripts/fetch-xml-suite.sh to enable this test", root) + } + return root +} + +func xmlconfFiles(t *testing.T, dir string) []string { + t.Helper() + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("read %s: %v", dir, err) + } + var out []string + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".xml") { + continue + } + out = append(out, filepath.Join(dir, e.Name())) + } + return out +} + +func xmlconfParse(src string) (any, error) { + j := jsonic.Make() + if err := j.UseDefaults(Xml, Defaults); err != nil { + return nil, err + } + return j.Parse(src) +} + +func TestXmlConfValidStandalone(t *testing.T) { + root := xmlconfRoot(t) + files := xmlconfFiles(t, filepath.Join(root, "xmltest", "valid", "sa")) + if len(files) == 0 { + t.Skipf("no files under xmltest/valid/sa") + } + + pass := 0 + var failures []string + for _, path := range files { + body, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + if _, perr := xmlconfParse(string(body)); perr != nil { + failures = append(failures, filepath.Base(path)+": "+ + strings.SplitN(perr.Error(), "\n", 2)[0]) + continue + } + pass++ + } + + total := len(files) + t.Logf("valid/sa: %d / %d parsed successfully", pass, total) + if pass < validSaPassFloor { + t.Errorf("valid/sa pass count %d dropped below floor %d (total %d). Sample failures:\n %s", + pass, validSaPassFloor, total, strings.Join(firstN(failures, 5), "\n ")) + } +} + +func TestXmlConfNotWellFormedStandalone(t *testing.T) { + root := xmlconfRoot(t) + files := xmlconfFiles(t, filepath.Join(root, "xmltest", "not-wf", "sa")) + if len(files) == 0 { + t.Skipf("no files under xmltest/not-wf/sa") + } + + rejected := 0 + var falseAccepts []string + for _, path := range files { + body, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + if _, perr := xmlconfParse(string(body)); perr != nil { + rejected++ + continue + } + falseAccepts = append(falseAccepts, filepath.Base(path)) + } + + total := len(files) + t.Logf("not-wf/sa: %d / %d rejected as expected", rejected, total) + if rejected < notWfSaRejectFloor { + t.Errorf("not-wf/sa reject count %d dropped below floor %d (total %d). Sample false accepts:\n %s", + rejected, notWfSaRejectFloor, total, strings.Join(firstN(falseAccepts, 5), "\n ")) + } +} + +func firstN(list []string, n int) []string { + if len(list) > n { + return list[:n] + } + return list +} diff --git a/scripts/fetch-xml-suite.sh b/scripts/fetch-xml-suite.sh new file mode 100755 index 0000000..ac6629f --- /dev/null +++ b/scripts/fetch-xml-suite.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Download the W3C XML Conformance Test Suite (xmlts, 2013-09-23 +# snapshot) and extract it into test/xmlconf/ so both the Go and +# TypeScript test runners can exercise the parser against thousands +# of real-world XML documents. +# +# The archive is owned by W3C and its contributors (Sun, OASIS, IBM, +# University of Edinburgh, Fuji Xerox, ...) and is not redistributed +# as part of this repository. Running this script is an explicit +# opt-in to download it from the W3C site. +# +# Usage: +# scripts/fetch-xml-suite.sh # default location +# scripts/fetch-xml-suite.sh /some/dir # custom destination +# +# After fetching, the conformance-driven tests are picked up +# automatically: +# go test ./go/... +# npm test +set -euo pipefail + +URL="https://www.w3.org/XML/Test/xmlts20130923.tar.gz" + +REPO_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" +DEST="${1:-$REPO_ROOT/test/xmlconf}" + +if [ -d "$DEST" ] && [ -d "$DEST/xmltest" ]; then + echo "Suite already present at $DEST (delete the directory to re-download)." + exit 0 +fi + +tmp="$(mktemp -d)" +trap 'rm -rf "$tmp"' EXIT + +echo "Fetching $URL ..." +curl -fL -o "$tmp/xmlts.tar.gz" "$URL" + +echo "Extracting to $DEST ..." +mkdir -p "$DEST" +# The archive already contains a top-level `xmlconf/` directory, so +# strip one component to land its contents directly in $DEST. +tar -xzf "$tmp/xmlts.tar.gz" -C "$DEST" --strip-components=1 + +valid=$(find "$DEST/xmltest/valid/sa" -maxdepth 1 -name '*.xml' | wc -l) +notwf=$(find "$DEST/xmltest/not-wf/sa" -maxdepth 1 -name '*.xml' | wc -l) +echo "Done. Extracted $valid standalone-valid and $notwf not-well-formed XML files." diff --git a/src/xml.ts b/src/xml.ts index 4eac2a6..45591ef 100644 --- a/src/xml.ts +++ b/src/xml.ts @@ -4,6 +4,7 @@ import { Jsonic, Rule, + RuleSpec, Plugin, Context, Config, @@ -39,6 +40,16 @@ type XmlOptions = { entities: boolean // Additional named entities to recognise beyond the five predefined ones. customEntities: Record + // Embed mode. When `false` (default), the plugin configures the parser + // for pure-XML input: the start rule becomes `xml`, JSON structural + // tokens are disabled, and all non-XML lexing is turned off. + // + // When `true`, the plugin leaves Jsonic's JSON/JSONIC rules in place + // and adds an alternate to the `val` rule so that a literal XML + // element (`` or ``) appears wherever Jsonic + // expects a value. The XML literal is parsed with the same element + // grammar used in pure mode. + embed: boolean } // --- BEGIN EMBEDDED xml-grammar.jsonic --- @@ -95,65 +106,70 @@ const grammarText = ` const Xml: Plugin = (jsonic: Jsonic, options: XmlOptions) => { + const embed = options.embed === true const decodeEntity = buildEntityDecoder(options) - // Register custom lexer matchers. - // - // The XML tag matcher handles any `<...>` construct: elements (open, - // close, self-closing) with attributes, comments, CDATA, processing - // instructions and DOCTYPE declarations. - // - // A text modifier decodes entity references (`&` etc.) in text - // nodes. Attribute values are decoded inside the tag matcher. + // Register custom lexer matcher. The same matcher is used in both + // modes; in embed mode it additionally consumes text between tags so + // Jsonic's own text/fixed lexers don't split it on `,` `:` etc. jsonic.options({ lex: { match: { - xmltag: { order: 1e5, make: buildXmlTagMatcher(decodeEntity) }, + xmltag: { + order: 1e5, + make: buildXmlTagMatcher(decodeEntity, embed, options), + }, }, emptyResult: undefined, }, - // Terminate text at `<` so tag starts are not absorbed into text runs. + // Terminate Jsonic text at `<` so XML tag starts are not absorbed + // into Jsonic text runs. ender: ['<'], - rule: { - start: 'xml', - // Strip out JSON rules so XML input is not reinterpreted. - exclude: 'jsonic,imp', - }, - // Disable JSON structural fixed tokens. - fixed: { - token: { - '#OB': null, - '#CB': null, - '#OS': null, - '#CS': null, - '#CL': null, - '#CA': null, + }) + + if (!embed) { + // Pure XML mode: reconfigure the parser so Jsonic's own value + // grammar is unreachable and all lexers other than our tag matcher + // are quiescent. + jsonic.options({ + rule: { + start: 'xml', + exclude: 'jsonic,imp', }, - }, - // Comments and processing instructions are emitted as a dedicated - // #XIG token and skipped by the parser via the IGNORE set. Keep the - // default IGNORE members so that whichever lexers happen to produce - // #SP/#LN/#CM still get skipped. - tokenSet: { - IGNORE: ['#SP', '#LN', '#CM', '#XIG'], - }, - // Disable number, value, and string lexing so XML text content is - // always a plain string. - number: { lex: false }, - value: { lex: false }, - string: { lex: false }, - comment: { lex: false }, - // Treat whitespace and newlines as part of text content rather than - // as separate tokens so text between tags is preserved verbatim. - space: { lex: false }, - line: { lex: false }, - // Decode entity references in text nodes. - text: { - modify: (val: any) => - 'string' === typeof val && options.entities !== false - ? decodeEntity(val) - : val, - }, + fixed: { + token: { + '#OB': null, '#CB': null, '#OS': null, '#CS': null, + '#CL': null, '#CA': null, + }, + }, + tokenSet: { + IGNORE: ['#SP', '#LN', '#CM', '#XIG'], + }, + number: { lex: false }, + value: { lex: false }, + string: { lex: false }, + comment: { lex: false }, + space: { lex: false }, + line: { lex: false }, + text: { + modify: (val: any) => + 'string' === typeof val && options.entities !== false + ? decodeEntity(val) + : val, + }, + }) + } else { + // Embed mode: keep all of Jsonic's standard grammar. Still register + // #XIG for comments/PIs/DOCTYPE and add it to IGNORE. + jsonic.options({ + tokenSet: { + IGNORE: ['#SP', '#LN', '#CM', '#XIG'], + }, + }) + } + + // Error templates and hints are installed in both modes. + jsonic.options({ error: { xml_mismatched_tag: 'closing tag does not match opening tag <$openname>', @@ -169,11 +185,6 @@ Expected but found .`, }) const refs: Record = { - // Propagate the parsed root element up to the xml rule so it becomes - // the final parse result. The xml rule uses `r: xml` to skip leading - // and trailing whitespace text, which creates a chain of rule - // instances. The root is the first one; walk the rule chain back to - // it so the final result is stored on the root rule's node. '@xml-bc': (r: Rule, ctx: Context) => { if (r.child && r.child.node) { const root = ctx.root() @@ -184,9 +195,6 @@ Expected but found .`, } }, - // Initialise the element node when the opening tag `` is - // matched. The tag token's value carries both the name and the - // parsed attribute map. '@element-open': (r: Rule) => { const v = r.o0.val r.node = { @@ -197,7 +205,6 @@ Expected but found .`, } }, - // Self-closing tag `` - no children. '@element-selfclose': (r: Rule) => { const v = r.o0.val r.node = { @@ -208,7 +215,6 @@ Expected but found .`, } }, - // Verify that `` matches the opening ``. '@element-close': (r: Rule, ctx: Context) => { const openName = r.node && r.node.name const closeName = r.c0.val @@ -218,34 +224,57 @@ Expected but found .`, } }, - // Text node - push the text value onto the enclosing element's - // children array. The content/child rules inherit `r.node` from the - // parent element, so `r.node.children` is the enclosing element's - // child list. '@child-text': (r: Rule) => { r.node.children.push(r.o0.val) r.u.done = true }, - // After the child rule returns (either from a text match above or - // from a nested `element` push), copy the nested element node into - // the parent element's children. Text was already pushed in open. '@child-bc': (r: Rule) => { if (true !== r.u.done && r.child && r.child.node) { r.node.children.push(r.child.node) } }, - // Condition: close of element is trivially met when it was a - // self-closing tag (``) with no separate close tag to match. '@element-is-selfclosed': (r: Rule) => true === !!r.u.selfclose, } - // Parse embedded grammar definition using a separate standard Jsonic - // instance, then wire refs and apply. + // Parse embedded grammar definition and wire refs. const grammarDef = Jsonic.make()(grammarText) grammarDef.ref = refs jsonic.grammar(grammarDef) + + if (embed) { + // Splice XML literals into the Jsonic `val` rule. When the parser + // is looking for a value and sees an `#XOP` or `#XSC` token, it + // pushes the `element` rule which builds the XML subtree. Backtrack + // by 1 so `element.open` can read the same token and dispatch to + // the correct branch. + const XOP = jsonic.token('#XOP') + const XSC = jsonic.token('#XSC') + jsonic.rule('val', (rs: RuleSpec) => { + return rs.open( + [ + { s: [XOP], b: 1, p: 'element', g: 'xml' }, + { s: [XSC], b: 1, p: 'element', g: 'xml' }, + ], + ) + }) + + // In embed mode the top-level wrapper is Jsonic's `val` rule, so + // the `@xml-bc` hook that copies the root element to `ctx.root().node` + // is not invoked. Resolve namespaces after the full tree lands on + // the element rule by hooking its close-state action. + if (options.namespaces !== false) { + jsonic.rule('element', (rs: RuleSpec) => { + rs.bc((r: Rule) => { + if (r.node && 'object' === typeof r.node && r.parent && + r.parent.name === 'val') { + resolveNamespaces(r.node, {}) + } + }) + }) + } + } } @@ -258,9 +287,6 @@ const predefinedEntities: Record = { apos: "'", } -// Build an entity-decoding function. Decodes the five predefined -// entities, numeric character references (`&#NN;` decimal and `&#xNN;` -// hex), plus any user-supplied custom entities. function buildEntityDecoder(options: XmlOptions) { const entities = { ...predefinedEntities, @@ -290,7 +316,12 @@ function buildEntityDecoder(options: XmlOptions) { // Build a lexer matcher that recognises all top-level XML constructs -// starting with `<`: +// starting with `<`. In embed mode the matcher also claims any text +// between an open tag and its matching close tag so that Jsonic's own +// text/fixed matchers don't split XML character data on JSON-syntax +// characters (`,`, `:`, etc.). +// +// Emits one of: // -> #XOP val = { name, attributes } // -> #XSC val = { name, attributes } // -> #XCL val = name @@ -300,18 +331,41 @@ function buildEntityDecoder(options: XmlOptions) { // -> #TX (verbatim text, no entity decoding) function buildXmlTagMatcher( decodeEntity: (src: string) => string, + embed: boolean, + options: XmlOptions, ) { const isNameStart = (ch: string) => /[A-Za-z_:]/.test(ch) const isNameChar = (ch: string) => /[A-Za-z0-9_\-\.:]/.test(ch) - const isSpace = (ch: string) => ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' + const isSpace = (ch: string) => + ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' return function makeXmlTagMatcher(_cfg: Config, _opts: Options) { return function xmlTagMatcher(lex: Lex) { const { pnt, src } = lex const sI = pnt.sI - if (src[sI] !== '<') return undefined + + // Embed mode: inside an open XML element (depth > 0), consume + // characters up to the next `<` as a single #TX text token so + // that Jsonic's own matchers don't reinterpret commas/colons/etc + // as JSON separators. + if (embed && sI < src.length && src[sI] !== '<') { + const depth = (lex.ctx?.u?.xmlDepth | 0) || 0 + if (depth > 0) { + let i = sI + while (i < src.length && src[i] !== '<') i++ + if (i === sI) return undefined + const raw = src.substring(sI, i) + const val = options.entities !== false ? decodeEntity(raw) : raw + const tkn = lex.token('#TX', val, raw, pnt) + pnt.sI = i + pnt.cI += i - sI + return tkn + } + } + + if (sI >= src.length || src[sI] !== '<') return undefined // Comment: if (src.startsWith(' | | (ignored) // #TX val = cdata body (verbatim, no entity decoding) func buildXmlTagMatcher( - decode func(string) string, + decode EntityDecoder, entitiesOn bool, embed bool, xigTin, xopTin, xclTin, xscTin jsonic.Tin, @@ -569,7 +666,7 @@ func buildXmlTagMatcher( normalised := normaliseLineEndings(raw) var val any = normalised if entitiesOn { - val = decode(normalised) + val = decode(normalised, dtdEntities(lex)) } tkn := lex.Token("#TX", jsonic.TinTX, val, raw) advance(pnt, sI, i) @@ -627,12 +724,19 @@ func buildXmlTagMatcher( if strings.HasPrefix(src[sI:], "' && depth <= 0 { break } @@ -642,6 +746,26 @@ func buildXmlTagMatcher( return lex.Bad("unterminated_doctype") } finish := i + 1 + // Extract any general internal entity + // declarations from the internal subset and stash them + // on the per-parse context. The matcher's text and + // attribute paths read this map back via lex.Ctx.U. + if subsetStart >= 0 && subsetEnd > subsetStart && lex.Ctx != nil { + found := parseDoctypeEntities(src[subsetStart:subsetEnd]) + if len(found) > 0 { + if lex.Ctx.U == nil { + lex.Ctx.U = map[string]any{} + } + existing, _ := lex.Ctx.U["dtdEntities"].(map[string]string) + if existing == nil { + existing = map[string]string{} + } + for k, v := range found { + existing[k] = v + } + lex.Ctx.U["dtdEntities"] = existing + } + } tsrc := src[sI:finish] tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) advance(pnt, sI, finish) @@ -793,7 +917,7 @@ func buildXmlTagMatcher( // types, all attributes are treated as CDATA-typed // (no further whitespace collapsing or trimming). normalised := normaliseAttrWhitespace(raw) - attrs[attrName] = decode(normalised) + attrs[attrName] = decode(normalised, dtdEntities(lex)) } } } diff --git a/src/xml.ts b/src/xml.ts index 8638f0b..ae33712 100644 --- a/src/xml.ts +++ b/src/xml.ts @@ -446,14 +446,27 @@ const predefinedEntities: Record = { apos: "'", } +// Build an entity decoder. The plugin-time entity map (predefined + +// customEntities) is closed over; per-parse entities declared in the +// DOCTYPE internal subset are passed in via the optional `dtd` +// argument and recursively expanded with cycle detection. +// +// Returned function signature: +// decode(src, dtd?) -> string +// where `dtd` is a per-parse map { name -> raw value } that the +// matcher pulls from `lex.ctx.u.dtdEntities`. function buildEntityDecoder(options: XmlOptions) { - const entities = { + const baseEntities = { ...predefinedEntities, ...(options?.customEntities || {}), } - const entityRE = /&(#x[0-9a-fA-F]+|#[0-9]+|[A-Za-z_][A-Za-z0-9_]*);/g + const entityRE = /&(#x[0-9a-fA-F]+|#[0-9]+|[A-Za-z_:][A-Za-z0-9_\-\.:]*);/g - return function decodeEntities(src: string): string { + function expand( + src: string, + dtd: Record, + seen: Set, + ): string { if (src.indexOf('&') < 0) return src return src.replace(entityRE, (match, ref) => { if (ref[0] === '#') { @@ -468,9 +481,86 @@ function buildEntityDecoder(options: XmlOptions) { return match } } - return undefined !== entities[ref] ? entities[ref] : match + // Predefined / option-supplied entities take precedence over + // anything declared in the DTD (matches the XML 1.0 rule that + // the five predefined entities are always available). + if (undefined !== baseEntities[ref]) return baseEntities[ref] + if (undefined !== dtd[ref]) { + if (seen.has(ref)) { + // Recursive entity reference is a WF violation. Fall through + // and keep the unexpanded text so the upstream WF check can + // catch the resulting bare `&` if the caller wants to treat + // this as an error; here we simply break the cycle. + return match + } + seen.add(ref) + const out = expand(dtd[ref], dtd, seen) + seen.delete(ref) + return out + } + return match }) } + + return function decodeEntities(src: string, dtd?: Record): string { + return expand(src, dtd || {}, new Set()) + } +} + +// Parse the body of a DOCTYPE declaration (the text between the `[` +// and `]` of the internal subset) and extract every internal general +// entity declaration ``. Parameter entity +// declarations (``) and external entity +// declarations (`` etc.) are recognised +// but skipped. Other declarations (` { + const ents: Record = {} + const isSpace = (ch: string) => + ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' + const isNm = (ch: string) => isNameCharCP(ch.charCodeAt(0)) + + let i = 0 + while (i < body.length) { + const idx = body.indexOf('', j) + i = end < 0 ? body.length : end + 1 + continue + } + // Read name. + if (j >= body.length || !isNameStartCP(body.charCodeAt(j))) { + i = j + 1 + continue + } + const nameStart = j + j++ + while (j < body.length && isNm(body[j])) j++ + const name = body.substring(nameStart, j) + while (j < body.length && isSpace(body[j])) j++ + // Quoted entity value -> internal entity. + if (body[j] === '"' || body[j] === "'") { + const quote = body[j] + j++ + const valStart = j + while (j < body.length && body[j] !== quote) j++ + if (j >= body.length) break + ents[name] = body.substring(valStart, j) + j++ + } + // External entity (SYSTEM / PUBLIC) - skip; we don't fetch. + const end = body.indexOf('>', j) + i = end < 0 ? body.length : end + 1 + } + return ents } @@ -489,7 +579,7 @@ function buildEntityDecoder(options: XmlOptions) { // -> #XIG (parser ignores) // -> #TX (verbatim text, no entity decoding) function buildXmlTagMatcher( - decodeEntity: (src: string) => string, + decodeEntity: (src: string, dtd?: Record) => string, embed: boolean, options: XmlOptions, ) { @@ -530,18 +620,21 @@ function buildXmlTagMatcher( // Returns either { val: string } on success or { err: string } if a // WF constraint is violated. Pure decoding (without validation) is // also available for CDATA bodies via decodeEntity(). - function processText(raw: string): { val?: string; err?: string } { + function processText( + raw: string, + dtd: Record, + ): { val?: string; err?: string } { const ctrlErr = checkChars(raw) if (ctrlErr) return { err: ctrlErr } if (raw.indexOf(']]>') >= 0) { return { err: 'cdata_terminator_in_text' } } - const ampErr = checkEntityRefs(raw) + const ampErr = checkEntityRefs(raw, dtd) if (ampErr) return { err: ampErr } // §2.11: normalise CR LF and lone CR to LF before downstream processing. const normalised = normaliseLineEndings(raw) return { - val: options.entities !== false ? decodeEntity(normalised) : normalised, + val: options.entities !== false ? decodeEntity(normalised, dtd) : normalised, } } @@ -580,7 +673,8 @@ function buildXmlTagMatcher( while (i < src.length && src[i] !== '<') i++ if (i === sI) return undefined const raw = src.substring(sI, i) - const result = processText(raw) + const dtd = (lex.ctx?.u?.dtdEntities) || {} + const result = processText(raw, dtd) if (result.err) { return lex.bad(result.err, sI, i) } @@ -636,17 +730,34 @@ function buildXmlTagMatcher( if (src.startsWith('' && depth <= 0) break + if (ch === '[') { + if (depth === 0) subsetStart = i + 1 + depth++ + } else if (ch === ']') { + depth-- + if (depth === 0) subsetEnd = i + } else if (ch === '>' && depth <= 0) break i++ } if (i >= src.length) { return lex.bad('unterminated_doctype', sI, src.length) } const end = i + 1 + // Extract any general internal entity + // declarations from the internal subset and stash them on + // the per-parse context. The matcher's text and attribute + // paths read this map back via lex.ctx.u.dtdEntities. + if (subsetStart >= 0 && subsetEnd > subsetStart && lex.ctx) { + const u: any = lex.ctx.u || (lex.ctx.u = {}) + const found = parseDoctypeEntities(src.substring(subsetStart, subsetEnd)) + if (Object.keys(found).length > 0) { + u.dtdEntities = { ...(u.dtdEntities || {}), ...found } + } + } const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) pnt.sI = end pnt.cI += end - sI @@ -780,7 +891,8 @@ function buildXmlTagMatcher( if (charErr) { return lex.bad(charErr, valStart, i) } - const ampErr = checkEntityRefs(rawVal) + const dtd = (lex.ctx?.u?.dtdEntities) || {} + const ampErr = checkEntityRefs(rawVal, dtd) if (ampErr) { return lex.bad(ampErr, valStart, i) } @@ -793,7 +905,7 @@ function buildXmlTagMatcher( // attribute types, so all attributes are treated as CDATA- // typed (no further whitespace collapsing or trimming). const normalised = normaliseAttrWhitespace(rawVal) - attributes[attrName] = decodeEntity(normalised) + attributes[attrName] = decodeEntity(normalised, dtd) } } } @@ -868,13 +980,15 @@ function checkChars(s: string): string { // Validate entity references in a run of character data. Returns an // error code on the first malformed reference, or '' if every `&` -// in the input is part of a well-formed reference. +// in the input is part of a well-formed reference. The optional +// `dtd` argument lets the validator accept DOCTYPE-declared entity +// names; without it, only the syntactic form is enforced. // // Well-formed forms: // &name; — name must start with a NameStartChar // &#nnnn; — decimal numeric character reference // &#xhhhh; — hexadecimal numeric character reference -function checkEntityRefs(s: string): string { +function checkEntityRefs(s: string, _dtd?: Record): string { for (let i = 0; i < s.length; i++) { if (s[i] !== '&') continue const semi = s.indexOf(';', i + 1) diff --git a/test/spec/dtd-entities.tsv b/test/spec/dtd-entities.tsv new file mode 100644 index 0000000..f7e0c4e --- /dev/null +++ b/test/spec/dtd-entities.tsv @@ -0,0 +1,16 @@ +# name input expected opts +# DOCTYPE-declared general internal entities. The plugin parses +# `` declarations from the internal subset, +# stores them per-parse, and uses them to resolve `&name;` in text +# and attribute values (recursively, with cycle detection). + +dtd-entity-basic-text ]>&foo; {"name":"doc","localName":"doc","attributes":{},"children":["bar"]} +dtd-entity-mixed-text ]>hello &x;! {"name":"doc","localName":"doc","attributes":{},"children":["hello world!"]} +dtd-entity-in-attribute ]> {"name":"doc","localName":"doc","attributes":{"a":"hello world"},"children":[]} +dtd-entity-single-quoted ]>&x; {"name":"doc","localName":"doc","attributes":{},"children":["plain"]} +dtd-entity-numeric-ref-in-value ]>&x; {"name":"doc","localName":"doc","attributes":{},"children":["A"]} +dtd-entity-recursive ]>&b; {"name":"d","localName":"d","attributes":{},"children":["BBAAABB"]} +dtd-entity-multi-decl ]>&a;&b;&c; {"name":"d","localName":"d","attributes":{},"children":["123"]} +dtd-entity-parameter-ignored ]>&g; {"name":"d","localName":"d","attributes":{},"children":["G"]} +dtd-entity-external-ignored ]>&g; {"name":"d","localName":"d","attributes":{},"children":["ok"]} +dtd-entity-predefined-overrides ]>& {"name":"d","localName":"d","attributes":{},"children":["&"]} From 0388d75e113a7b4d6f00a78fc85a2a4d0a150c8a Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 16:31:51 +0000 Subject: [PATCH 14/15] xml: add strictEntities option (default on) and fix DOCTYPE parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XML 1.0 §4.1 requires every named entity reference to resolve to a declared entity (predefined, custom, or DOCTYPE-declared). Add a new `strictEntities` option (default `true`) that enforces this in `checkEntityRefs`. When set to `false`, references to unknown names pass through unexpanded (legacy behaviour useful for templating). While testing the new check, the DOCTYPE depth tracker was found to treat `]` and `>` characters inside quoted entity values as if they ended the internal subset, which made declarations like `` cut the subset short and any subsequent `]` references reach the validator as undeclared. The tracker now skips over single- and double-quoted strings while walking the DOCTYPE, restoring the W3C valid/sa pass count to 120/120. Conformance changes: - valid/sa : 120/120 (unchanged) - not-wf/sa : 60/186 -> 64/186 (+4 strict-entity catches) The legacy "unknown-passthrough" test was renamed to "unknown-rejected" with a new "unknown-passthrough-lenient" variant that opts in via `{strictEntities: false}`. --- go/xml.go | 65 ++++++++++++++++++++++++++++++++++-------- src/xml.ts | 58 +++++++++++++++++++++++++++++++------ test/spec/entities.tsv | 3 +- 3 files changed, 104 insertions(+), 22 deletions(-) diff --git a/go/xml.go b/go/xml.go index 508fb99..a00d052 100644 --- a/go/xml.go +++ b/go/xml.go @@ -40,6 +40,11 @@ const Version = "0.1.0" // numeric character references in text and // attribute values. Default: true. // customEntities map[string]string extra named entities to recognise. +// strictEntities bool enforce XML 1.0 §4.1: every named entity +// reference must resolve to a declared +// entity. Default: true. When false, +// references to unknown names are left +// as-is in the output. // embed bool when true, keep Jsonic's JSON/JSONIC // grammar in place and splice an XML // literal alternate into the `val` rule @@ -51,6 +56,7 @@ var Defaults = map[string]any{ "namespaces": true, "entities": true, "customEntities": map[string]string{}, + "strictEntities": true, "embed": false, } @@ -69,9 +75,10 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { namespacesOn := toBool(options["namespaces"], true) entitiesOn := toBool(options["entities"], true) customEntities := toStringMap(options["customEntities"]) + strictEntities := toBool(options["strictEntities"], true) embed := toBool(options["embed"], false) - decode := buildEntityDecoder(entitiesOn, customEntities) + decode, declared := buildEntityDecoder(entitiesOn, customEntities) // Reserve #XIG (ignored) and #XOP/#XCL/#XSC (tag tokens) so they have // stable tins before the grammar references them. The tins are then @@ -100,7 +107,7 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { j.SetOptions(jsonic.Options{ Lex: &jsonic.LexOptions{ Match: map[string]*jsonic.MatchSpec{ - "xmltag": {Order: 100_000, Make: buildXmlTagMatcher(decode, entitiesOn, embed, xigTin, xopTin, xclTin, xscTin)}, + "xmltag": {Order: 100_000, Make: buildXmlTagMatcher(decode, declared, entitiesOn, strictEntities, embed, xigTin, xopTin, xclTin, xscTin)}, }, }, Ender: []string{"<"}, @@ -117,6 +124,7 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { "invalid_xml_char": "illegal control character in XML data", "reserved_namespace": "invalid use of a reserved namespace prefix or URI", "unbound_prefix": "element or attribute uses an undeclared namespace prefix", + "undeclared_entity": "reference to undeclared entity", }, Hint: map[string]string{ "xml_mismatched_tag": "Each opening tag must be paired with a matching closing tag.\nExpected but found .", @@ -131,6 +139,7 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { "invalid_xml_char": "Only #x9, #xA, #xD and code points >= #x20 are legal XML characters.", "reserved_namespace": "The \"xml\" prefix is fixed to " + xmlNSURI + "; the \"xmlns\" prefix cannot be redeclared, and neither URI may be bound to any other prefix or as the default namespace.", "unbound_prefix": "Declare the prefix with xmlns:prefix=\"...\" on this element or one of its ancestors.", + "undeclared_entity": "Declare the entity in the DOCTYPE internal subset, add it to the customEntities option, or set strictEntities: false to allow unresolved references through.", }, }) @@ -553,11 +562,12 @@ type EntityDecoder func(s string, dtd map[string]string) string // buildEntityDecoder returns a function that decodes the five // predefined entities, numeric character references, any // caller-supplied custom entities, and per-parse DTD entities. -// When `enabled` is false the function is an identity. -func buildEntityDecoder(enabled bool, custom map[string]string) EntityDecoder { - if !enabled { - return func(s string, _ map[string]string) string { return s } - } +// When `enabled` is false the function is an identity. The second +// return value is the merged set of always-declared names used for +// strict-entity validation in the matcher. +func buildEntityDecoder( + enabled bool, custom map[string]string, +) (EntityDecoder, map[string]string) { base := make(map[string]string, len(predefinedEntities)+len(custom)) for k, v := range predefinedEntities { base[k] = v @@ -565,6 +575,9 @@ func buildEntityDecoder(enabled bool, custom map[string]string) EntityDecoder { for k, v := range custom { base[k] = v } + if !enabled { + return func(s string, _ map[string]string) string { return s }, base + } var expand func(s string, dtd map[string]string, seen map[string]bool) string expand = func(s string, dtd map[string]string, seen map[string]bool) string { if !strings.Contains(s, "&") { @@ -605,7 +618,7 @@ func buildEntityDecoder(enabled bool, custom map[string]string) EntityDecoder { } return func(s string, dtd map[string]string) string { return expand(s, dtd, map[string]bool{}) - } + }, base } // buildXmlTagMatcher returns a MakeLexMatcher that recognises every @@ -620,7 +633,9 @@ func buildEntityDecoder(enabled bool, custom map[string]string) EntityDecoder { // #TX val = cdata body (verbatim, no entity decoding) func buildXmlTagMatcher( decode EntityDecoder, + declared map[string]string, entitiesOn bool, + strict bool, embed bool, xigTin, xopTin, xclTin, xscTin jsonic.Tin, ) jsonic.MakeLexMatcher { @@ -659,7 +674,7 @@ func buildXmlTagMatcher( if strings.Contains(raw, "]]>") { return lex.Bad("cdata_terminator_in_text") } - if code := checkEntityRefs(raw); code != "" { + if code := checkEntityRefs(raw, dtdEntities(lex), declared, strict); code != "" { return lex.Bad(code) } // §2.11 end-of-line normalisation. @@ -727,6 +742,19 @@ func buildXmlTagMatcher( subsetStart, subsetEnd := -1, -1 for i < srclen { ch := src[i] + // Skip over quoted strings so `]` and `>` inside an + // entity value or attribute default cannot terminate + // the subset prematurely. + if ch == '"' || ch == '\'' { + i++ + for i < srclen && src[i] != ch { + i++ + } + if i < srclen { + i++ + } + continue + } if ch == '[' { if depth == 0 { subsetStart = i + 1 @@ -906,7 +934,7 @@ func buildXmlTagMatcher( if code := checkChars(raw); code != "" { return lex.Bad(code) } - if code := checkEntityRefs(raw); code != "" { + if code := checkEntityRefs(raw, dtdEntities(lex), declared, strict); code != "" { return lex.Bad(code) } if _, ok := attrs[attrName]; ok { @@ -988,14 +1016,19 @@ func checkChars(s string) string { // checkEntityRefs validates that every `&` in `s` begins a well-formed // entity reference. Returns "" on success, otherwise an error code -// suitable for lex.Bad(). +// suitable for lex.Bad(). The `dtd` map supplies DOCTYPE-declared +// entity names; `declared` adds names that are always declared +// (typically the predefined and caller-supplied entities). When +// `strict` is true, references to unknown names trigger +// "undeclared_entity"; otherwise the syntactic check still runs but +// unknown names pass through. // // Well-formed forms: // // &name; - name must start with a NameStartChar // &#nnnn; - decimal numeric character reference // &#xhhhh; - hexadecimal numeric character reference -func checkEntityRefs(s string) string { +func checkEntityRefs(s string, dtd, declared map[string]string, strict bool) string { for i := 0; i < len(s); i++ { if s[i] != '&' { continue @@ -1057,6 +1090,14 @@ func checkEntityRefs(s string) string { } j += sz } + // §4.1: in strict mode the named entity must resolve. + if strict { + if _, ok := declared[ref]; !ok { + if _, ok := dtd[ref]; !ok { + return "undeclared_entity" + } + } + } } i = semi } diff --git a/src/xml.ts b/src/xml.ts index ae33712..22e8d53 100644 --- a/src/xml.ts +++ b/src/xml.ts @@ -47,6 +47,12 @@ type XmlOptions = { entities: boolean // Additional named entities to recognise beyond the five predefined ones. customEntities: Record + // Whether to enforce XML 1.0 §4.1 — every named entity reference must + // resolve to a declared entity (predefined, customEntities, or a DOCTYPE + // declaration). Default: true. When set to false, references + // to unknown names are left as-is in the output (legacy behaviour + // useful for templating). + strictEntities: boolean // Embed mode. When `false` (default), the plugin configures the parser // for pure-XML input: the start rule becomes `xml`, JSON structural // tokens are disabled, and all non-XML lexing is turned off. @@ -191,6 +197,7 @@ const Xml: Plugin = (jsonic: Jsonic, options: XmlOptions) => { invalid_xml_char: 'illegal control character in XML data', reserved_namespace: 'invalid use of a reserved namespace prefix or URI', unbound_prefix: 'element or attribute uses an undeclared namespace prefix', + undeclared_entity: 'reference to undeclared entity', }, hint: { xml_mismatched_tag: `Each opening tag must be paired with a matching closing tag. @@ -206,6 +213,7 @@ Expected but found .`, invalid_xml_char: `Only #x9, #xA, #xD and code points >= #x20 are legal XML characters.`, reserved_namespace: `The "xml" prefix is fixed to ${XML_NS_URI}; the "xmlns" prefix cannot be redeclared, and neither URI may be bound to any other prefix or as the default namespace.`, unbound_prefix: `Declare the prefix with xmlns:prefix="..." on this element or one of its ancestors.`, + undeclared_entity: `Declare the entity in the DOCTYPE internal subset, add it to the customEntities option, or set strictEntities: false to allow unresolved references through.`, }, }) @@ -502,9 +510,15 @@ function buildEntityDecoder(options: XmlOptions) { }) } - return function decodeEntities(src: string, dtd?: Record): string { + const decoder = function decodeEntities(src: string, dtd?: Record): string { return expand(src, dtd || {}, new Set()) - } + } as DecodeEntitiesFn + decoder.declared = baseEntities + return decoder +} + +type DecodeEntitiesFn = ((src: string, dtd?: Record) => string) & { + declared: Record } // Parse the body of a DOCTYPE declaration (the text between the `[` @@ -579,10 +593,12 @@ function parseDoctypeEntities(body: string): Record { // -> #XIG (parser ignores) // -> #TX (verbatim text, no entity decoding) function buildXmlTagMatcher( - decodeEntity: (src: string, dtd?: Record) => string, + decodeEntity: DecodeEntitiesFn, embed: boolean, options: XmlOptions, ) { + const strict = options.strictEntities !== false + const declared = decodeEntity.declared // Backwards-compatible single-char predicates retained for sites that // only need a simple character class check (e.g. peek before reading // a name). Multi-byte / surrogate pair handling is in `readName` / @@ -629,7 +645,7 @@ function buildXmlTagMatcher( if (raw.indexOf(']]>') >= 0) { return { err: 'cdata_terminator_in_text' } } - const ampErr = checkEntityRefs(raw, dtd) + const ampErr = checkEntityRefs(raw, dtd, declared, strict) if (ampErr) return { err: ampErr } // §2.11: normalise CR LF and lone CR to LF before downstream processing. const normalised = normaliseLineEndings(raw) @@ -734,6 +750,15 @@ function buildXmlTagMatcher( let subsetEnd = -1 while (i < src.length) { const ch = src[i] + // Skip over quoted strings so `]` and `>` inside an + // entity value or attribute default cannot terminate the + // subset prematurely. + if (ch === '"' || ch === "'") { + i++ + while (i < src.length && src[i] !== ch) i++ + if (i < src.length) i++ + continue + } if (ch === '[') { if (depth === 0) subsetStart = i + 1 depth++ @@ -892,7 +917,7 @@ function buildXmlTagMatcher( return lex.bad(charErr, valStart, i) } const dtd = (lex.ctx?.u?.dtdEntities) || {} - const ampErr = checkEntityRefs(rawVal, dtd) + const ampErr = checkEntityRefs(rawVal, dtd, declared, strict) if (ampErr) { return lex.bad(ampErr, valStart, i) } @@ -980,15 +1005,23 @@ function checkChars(s: string): string { // Validate entity references in a run of character data. Returns an // error code on the first malformed reference, or '' if every `&` -// in the input is part of a well-formed reference. The optional -// `dtd` argument lets the validator accept DOCTYPE-declared entity -// names; without it, only the syntactic form is enforced. +// in the input is part of a well-formed reference. The `dtd` map +// supplies DOCTYPE-declared entity names; `extra` adds named +// entities to consider declared (typically the predefined and +// caller-supplied entities). When `strict` is true, references to +// unknown names trigger `bad_entity_ref`; when false (legacy mode), +// the syntactic check still runs but unknown names pass through. // // Well-formed forms: // &name; — name must start with a NameStartChar // &#nnnn; — decimal numeric character reference // &#xhhhh; — hexadecimal numeric character reference -function checkEntityRefs(s: string, _dtd?: Record): string { +function checkEntityRefs( + s: string, + dtd?: Record, + extra?: Record, + strict?: boolean, +): string { for (let i = 0; i < s.length; i++) { if (s[i] !== '&') continue const semi = s.indexOf(';', i + 1) @@ -1018,6 +1051,12 @@ function checkEntityRefs(s: string, _dtd?: Record): string { if (!isNameCharCP(cp)) return 'bad_entity_ref' j += cp > 0xffff ? 2 : 1 } + // §4.1: in strict mode the named entity must resolve. + if (strict && + !(extra && Object.prototype.hasOwnProperty.call(extra, ref)) && + !(dtd && Object.prototype.hasOwnProperty.call(dtd, ref))) { + return 'undeclared_entity' + } } i = semi } @@ -1138,6 +1177,7 @@ Xml.defaults = { namespaces: true, entities: true, customEntities: {}, + strictEntities: true, embed: false, } as XmlOptions diff --git a/test/spec/entities.tsv b/test/spec/entities.tsv index 9292a4c..7e6aa97 100644 --- a/test/spec/entities.tsv +++ b/test/spec/entities.tsv @@ -16,6 +16,7 @@ num-hex-multi AB {"name":"a","localName":"a","attributes":{},"c num-hex-astral 😀 {"name":"a","localName":"a","attributes":{},"children":["\uD83D\uDE00"]} entity-in-attr {"name":"a","localName":"a","attributes":{"title":"Tom & Jerry"},"children":[]} num-in-attr {"name":"a","localName":"a","attributes":{"v":"AB"},"children":[]} -unknown-passthrough &unknown; {"name":"a","localName":"a","attributes":{},"children":["&unknown;"]} +unknown-rejected &unknown; ERROR:undeclared_entity +unknown-passthrough-lenient &unknown; {"name":"a","localName":"a","attributes":{},"children":["&unknown;"]} {"strictEntities":false} custom-entity © 2025 all rights {"name":"a","localName":"a","attributes":{},"children":["© 2025\u00a0all rights"]} {"customEntities":{"nbsp":"\u00a0","copy":"©"}} entities-disabled & {"name":"a","localName":"a","attributes":{},"children":["&"]} {"entities":false} From 994cb418bb7276e0c9b8988afb5d25cf3be759a1 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 16:35:25 +0000 Subject: [PATCH 15/15] xml: apply DOCTYPE-supplied attribute defaults from Parse `` declarations from the DOCTYPE internal subset and use them to fill in attributes that are missing from element instances. Both literal defaults and the `#FIXED "value"` form are honoured; `#REQUIRED` and `#IMPLIED` declarations contribute nothing because they have no default value. Implementation: - `parseDoctypeAttlists` scans for each `` declaration, skips the AttType (a bare uppercase identifier, an enumeration `( ... )`, or `NOTATION ( ... )`), and collects the default value. The result is keyed by element name then attribute name and stored on the per-parse context as `dtdAttrDefaults`. - The `@element-open` and `@element-selfclose` actions consult that map via `applyAttrDefaults` and merge in any defaults that the parsed element does not already provide. A new test/spec/dtd-attlist.tsv exercises basic defaults, override by an instance attribute, multiple declarations on one element, `#FIXED`, enumeration types, the no-default `#REQUIRED`/`#IMPLIED` forms, and per-element scoping. All 126 TS tests and the Go suite pass; W3C conformance numbers are unchanged. --- go/xml.go | 179 +++++++++++++++++++++++++++++++++++--- src/xml.ts | 157 ++++++++++++++++++++++++++++++--- test/spec/dtd-attlist.tsv | 16 ++++ 3 files changed, 330 insertions(+), 22 deletions(-) create mode 100644 test/spec/dtd-attlist.tsv diff --git a/go/xml.go b/go/xml.go index a00d052..e743b17 100644 --- a/go/xml.go +++ b/go/xml.go @@ -222,7 +222,7 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { r.Node = map[string]any{ "name": name, "localName": name, - "attributes": attrs, + "attributes": applyAttrDefaults(attrs, name, ctx), "children": []any{}, } }), @@ -234,7 +234,7 @@ func Xml(j *jsonic.Jsonic, options map[string]any) error { r.Node = map[string]any{ "name": name, "localName": name, - "attributes": attrs, + "attributes": applyAttrDefaults(attrs, name, ctx), "children": []any{}, } }), @@ -385,6 +385,148 @@ func dtdEntities(lex *jsonic.Lex) map[string]string { return m } +// dtdAttrDefaults reads the per-parse DOCTYPE-supplied attribute +// default map keyed by element name (set by the DOCTYPE matcher +// path). Returns nil if none have been registered yet. +func dtdAttrDefaults(ctx *jsonic.Context) map[string]map[string]string { + if ctx == nil || ctx.U == nil { + return nil + } + m, _ := ctx.U["dtdAttrDefaults"].(map[string]map[string]string) + return m +} + +// applyAttrDefaults merges in DOCTYPE-supplied default attribute +// values for any attribute missing from the parsed element instance. +// Returns the original map if no defaults apply. +func applyAttrDefaults( + attrs map[string]any, elemName string, ctx *jsonic.Context, +) map[string]any { + all := dtdAttrDefaults(ctx) + if all == nil { + return attrs + } + defaults, ok := all[elemName] + if !ok { + return attrs + } + for k, v := range defaults { + if _, present := attrs[k]; !present { + attrs[k] = v + } + } + return attrs +} + +// parseDoctypeAttlists scans a DOCTYPE internal-subset body and +// extracts every `` default +// attribute value, keyed by element name and attribute name. Both +// literal defaults and `#FIXED "value"` defaults are returned; +// `#REQUIRED` and `#IMPLIED` declarations contribute nothing because +// they have no default value. +func parseDoctypeAttlists(body string) map[string]map[string]string { + skipSpace := func(s int) int { + for s < len(body) && isSpace(body[s]) { + s++ + } + return s + } + out := map[string]map[string]string{} + + i := 0 + for i < len(body) { + idx := strings.Index(body[i:], "= len(body) { + break + } + if body[j] == '>' { + j++ + break + } + attrName, attrEnd, ok := readName(body, j) + if !ok { + j++ + continue + } + j = attrEnd + j = skipSpace(j) + + // Skip AttType. + if j < len(body) && body[j] == '(' { + close := strings.Index(body[j:], ")") + if close < 0 { + j = len(body) + break + } + j = j + close + 1 + } else if strings.HasPrefix(body[j:], "NOTATION") { + j += len("NOTATION") + j = skipSpace(j) + if j < len(body) && body[j] == '(' { + close := strings.Index(body[j:], ")") + if close < 0 { + j = len(body) + break + } + j = j + close + 1 + } + } else { + for j < len(body) && body[j] >= 'A' && body[j] <= 'Z' { + j++ + } + } + j = skipSpace(j) + + // DefaultDecl. + if strings.HasPrefix(body[j:], "#REQUIRED") { + j += len("#REQUIRED") + continue + } + if strings.HasPrefix(body[j:], "#IMPLIED") { + j += len("#IMPLIED") + continue + } + if strings.HasPrefix(body[j:], "#FIXED") { + j += len("#FIXED") + j = skipSpace(j) + } + if j < len(body) && (body[j] == '"' || body[j] == '\'') { + quote := body[j] + j++ + valStart := j + for j < len(body) && body[j] != quote { + j++ + } + if j >= len(body) { + break + } + value := body[valStart:j] + if out[elemName] == nil { + out[elemName] = map[string]string{} + } + out[elemName][attrName] = value + j++ + } + } + i = j + } + return out +} + // parseDoctypeEntities scans a DOCTYPE internal-subset body and // extracts every internal general entity declaration of the form // `` (or single-quoted). Parameter entity @@ -774,16 +916,16 @@ func buildXmlTagMatcher( return lex.Bad("unterminated_doctype") } finish := i + 1 - // Extract any general internal entity - // declarations from the internal subset and stash them - // on the per-parse context. The matcher's text and - // attribute paths read this map back via lex.Ctx.U. + // Extract internal-subset declarations and stash them + // on the per-parse context. The matcher's text / + // attribute paths and the element actions read these + // back via lex.Ctx.U. if subsetStart >= 0 && subsetEnd > subsetStart && lex.Ctx != nil { - found := parseDoctypeEntities(src[subsetStart:subsetEnd]) - if len(found) > 0 { - if lex.Ctx.U == nil { - lex.Ctx.U = map[string]any{} - } + subset := src[subsetStart:subsetEnd] + if lex.Ctx.U == nil { + lex.Ctx.U = map[string]any{} + } + if found := parseDoctypeEntities(subset); len(found) > 0 { existing, _ := lex.Ctx.U["dtdEntities"].(map[string]string) if existing == nil { existing = map[string]string{} @@ -793,6 +935,21 @@ func buildXmlTagMatcher( } lex.Ctx.U["dtdEntities"] = existing } + if found := parseDoctypeAttlists(subset); len(found) > 0 { + existing, _ := lex.Ctx.U["dtdAttrDefaults"].(map[string]map[string]string) + if existing == nil { + existing = map[string]map[string]string{} + } + for elem, defs := range found { + if existing[elem] == nil { + existing[elem] = map[string]string{} + } + for k, v := range defs { + existing[elem][k] = v + } + } + lex.Ctx.U["dtdAttrDefaults"] = existing + } } tsrc := src[sI:finish] tkn := lex.Token("#XIG", xigTin, tsrc, tsrc) diff --git a/src/xml.ts b/src/xml.ts index 22e8d53..7c70ebf 100644 --- a/src/xml.ts +++ b/src/xml.ts @@ -239,22 +239,22 @@ Expected but found .`, // document hasn't already produced a root (XML 1.0 §2.1). '@no-root-yet': (_r: Rule, ctx: Context) => true !== ctx.u.rootSeen, - '@element-open': (r: Rule) => { + '@element-open': (r: Rule, ctx: Context) => { const v = r.o0.val r.node = { name: v.name, localName: v.name, - attributes: v.attributes, + attributes: applyAttrDefaults(v.attributes, v.name, ctx), children: [], } }, - '@element-selfclose': (r: Rule) => { + '@element-selfclose': (r: Rule, ctx: Context) => { const v = r.o0.val r.node = { name: v.name, localName: v.name, - attributes: v.attributes, + attributes: applyAttrDefaults(v.attributes, v.name, ctx), children: [], } }, @@ -521,6 +521,133 @@ type DecodeEntitiesFn = ((src: string, dtd?: Record) => string) declared: Record } +// Parse the body of a DOCTYPE declaration (the text between the `[` +// and `]` of the internal subset) and extract every `` +// declaration's default attribute values, keyed by element name and +// attribute name. Both literal defaults and `#FIXED "value"` defaults +// are returned; `#REQUIRED` and `#IMPLIED` declarations contribute +// nothing because they have no default value. +// +// Used by the matcher's element actions to fill in attributes that +// were not present on the element instance. +function parseDoctypeAttlists(body: string): Record> { + const isSpace = (ch: string) => + ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' + const isUpperAscii = (ch: string) => + ch >= 'A' && ch <= 'Z' + const skipSpace = (s: number): number => { + while (s < body.length && isSpace(body[s])) s++ + return s + } + const out: Record> = {} + + let i = 0 + while (i < body.length) { + const idx = body.indexOf('' or EOF. + while (j < body.length) { + j = skipSpace(j) + if (j >= body.length) break + if (body[j] === '>') { j++; break } + + const attrName = readNameInBody(body, j) + if (!attrName) { j++; continue } + j = attrName.end + j = skipSpace(j) + + // Skip AttType: enumeration `( ... )`, `NOTATION ( ... )`, or + // a bare type identifier (CDATA, ID, IDREF, IDREFS, NMTOKEN, + // NMTOKENS, ENTITY, ENTITIES). + if (body[j] === '(') { + const close = body.indexOf(')', j) + if (close < 0) { j = body.length; break } + j = close + 1 + } else if (body.startsWith('NOTATION', j)) { + j += 'NOTATION'.length + j = skipSpace(j) + if (body[j] === '(') { + const close = body.indexOf(')', j) + if (close < 0) { j = body.length; break } + j = close + 1 + } + } else { + while (j < body.length && isUpperAscii(body[j])) j++ + } + j = skipSpace(j) + + // DefaultDecl. + if (body.startsWith('#REQUIRED', j)) { + j += '#REQUIRED'.length + continue + } + if (body.startsWith('#IMPLIED', j)) { + j += '#IMPLIED'.length + continue + } + if (body.startsWith('#FIXED', j)) { + j += '#FIXED'.length + j = skipSpace(j) + } + if (body[j] === '"' || body[j] === "'") { + const quote = body[j] + j++ + const valStart = j + while (j < body.length && body[j] !== quote) j++ + if (j >= body.length) break + const value = body.substring(valStart, j) + if (!out[elemName.name]) out[elemName.name] = {} + out[elemName.name][attrName.name] = value + j++ + } + } + i = j + } + return out +} + +// applyAttrDefaults merges in DOCTYPE-supplied default attribute +// values (``) for any attribute +// missing from the parsed element instance. Returns the original +// attributes object if no defaults apply. +function applyAttrDefaults( + attrs: Record, + elemName: string, + ctx: Context, +): Record { + const defaults = ctx?.u?.dtdAttrDefaults?.[elemName] + if (!defaults) return attrs + const out = { ...attrs } + for (const k of Object.keys(defaults)) { + if (!Object.prototype.hasOwnProperty.call(out, k)) { + out[k] = defaults[k] + } + } + return out +} + +// readNameInBody is a free-function counterpart to the matcher's +// `readName` closure used by the DTD parsers, which run before the +// matcher closure has been instantiated. +function readNameInBody(s: string, start: number): { name: string; end: number } | null { + if (start >= s.length) return null + const cp0 = s.codePointAt(start)! + if (!isNameStartCP(cp0)) return null + let i = start + (cp0 > 0xffff ? 2 : 1) + while (i < s.length) { + const cp = s.codePointAt(i)! + if (!isNameCharCP(cp)) break + i += cp > 0xffff ? 2 : 1 + } + return { name: s.substring(start, i), end: i } +} + // Parse the body of a DOCTYPE declaration (the text between the `[` // and `]` of the internal subset) and extract every internal general // entity declaration ``. Parameter entity @@ -772,15 +899,23 @@ function buildXmlTagMatcher( return lex.bad('unterminated_doctype', sI, src.length) } const end = i + 1 - // Extract any general internal entity - // declarations from the internal subset and stash them on - // the per-parse context. The matcher's text and attribute - // paths read this map back via lex.ctx.u.dtdEntities. + // Extract internal-subset declarations and stash them on + // the per-parse context. The matcher's text/attribute paths + // and the element actions read these back via lex.ctx.u. if (subsetStart >= 0 && subsetEnd > subsetStart && lex.ctx) { const u: any = lex.ctx.u || (lex.ctx.u = {}) - const found = parseDoctypeEntities(src.substring(subsetStart, subsetEnd)) - if (Object.keys(found).length > 0) { - u.dtdEntities = { ...(u.dtdEntities || {}), ...found } + const subset = src.substring(subsetStart, subsetEnd) + const ents = parseDoctypeEntities(subset) + if (Object.keys(ents).length > 0) { + u.dtdEntities = { ...(u.dtdEntities || {}), ...ents } + } + const atts = parseDoctypeAttlists(subset) + if (Object.keys(atts).length > 0) { + const merged = { ...(u.dtdAttrDefaults || {}) } + for (const elem of Object.keys(atts)) { + merged[elem] = { ...(merged[elem] || {}), ...atts[elem] } + } + u.dtdAttrDefaults = merged } } const tkn = lex.token('#XIG', src.substring(sI, end), src.substring(sI, end), pnt) diff --git a/test/spec/dtd-attlist.tsv b/test/spec/dtd-attlist.tsv new file mode 100644 index 0000000..f1ef44e --- /dev/null +++ b/test/spec/dtd-attlist.tsv @@ -0,0 +1,16 @@ +# name input expected opts +# DOCTYPE-supplied default attribute values via . The +# plugin parses every declaration in the internal subset +# and, when an element instance does not carry an attribute, fills +# in the declared default value. +# +# #REQUIRED and #IMPLIED contribute nothing because they have no +# default value; #FIXED "value" and bare quoted defaults are honoured. + +attlist-basic-default ]> {"name":"doc","localName":"doc","attributes":{"x":"default"},"children":[]} +attlist-default-overridden ]> {"name":"doc","localName":"doc","attributes":{"x":"custom"},"children":[]} +attlist-multiple-defaults ]> {"name":"doc","localName":"doc","attributes":{"a":"A","b":"B"},"children":[]} +attlist-fixed ]> {"name":"doc","localName":"doc","attributes":{"lang":"en"},"children":[]} +attlist-enumeration-default ]> {"name":"doc","localName":"doc","attributes":{"x":"b"},"children":[]} +attlist-required-and-implied-no-default ]> {"name":"doc","localName":"doc","attributes":{"y":"Y"},"children":[]} +attlist-applies-per-element ]> {"name":"root","localName":"root","attributes":{},"children":[{"name":"a","localName":"a","attributes":{"x":"AX"},"children":[]},{"name":"b","localName":"b","attributes":{"x":"BX"},"children":[]}]}