From 4e15d61936fb0920d044d530653d38ea47fd7ce2 Mon Sep 17 00:00:00 2001 From: atellier2 Date: Mon, 4 May 2026 08:23:55 +0200 Subject: [PATCH 1/4] =?UTF-8?q?docs:=20refonte=20du=20README=20et=20cr?= =?UTF-8?q?=C3=A9ation=20du=20r=C3=A9pertoire=20docs/=20(#1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 379 ++++++++++++++++--------------------- docs/api.md | 436 +++++++++++++++++++++++++++++++++++++++++++ docs/architecture.md | 207 ++++++++++++++++++++ docs/examples.md | 274 +++++++++++++++++++++++++++ 4 files changed, 1077 insertions(+), 219 deletions(-) create mode 100644 docs/api.md create mode 100644 docs/architecture.md create mode 100644 docs/examples.md diff --git a/README.md b/README.md index b510aa1..03c250a 100644 --- a/README.md +++ b/README.md @@ -1,247 +1,188 @@ -This repository has plain dependency-free JavaScript that can process GEDCOM in various ways and forms. -It is designed to have various operating modes -which are handled by separate module files to facilitate uses which do not need them all. +# js-gedcom -This repository does not currently handle different character sets. -It assumes you have correctly parsed bytes into a JavaScript string before processing. +A dependency-free JavaScript library for parsing, validating, and creating [GEDCOM](https://gedcom.io/) genealogy files. Supports both GEDCOM 5.x and [FamilySearch GEDCOM 7](https://gedcom.io/specifications/FamilySearchGEDCOMv7.html). -To use this project as a FamilySearch GEDCOM 7 validator, visit . +> **Online validator**: To validate a GEDCOM 7 file directly in your browser, visit . -# Status +--- -- [x] Tag-oriented layer - - [x] Tag-oriented parser - - [x] With CONT and CONC handling - - [x] With multiple dialects - - [x] Manual creation of structures - - [x] Tag-oriented JSON serializer/deserializer - - [x] `querySelector` and `querySelectorAll` accepting `"HEAD.GEDC"`-type tag paths -- [x] Type-aware layer - - [x] Parse spec from - - [x] Parse tag-oriented into type-aware - - [x] Context-aware structure type - - [x] Error for out-of-place standard tags - - [x] Error for cardinality violations - - [x] Structure-type-aware payload parsing - - [x] Error for malformed payloads - - [x] Error for enumeration set membership violations - - [x] Error for pointed-to type violations - - [x] Support extensions, schema - - [x] Warn about undocumented, unregistered, aliased, and relocated - - [x] Warn about deprecations - - [x] EXID.TYPE - - [ ] g7:enumset-ord-STAT members COMPLETED, EXCLUDED, INFANT, PRE_1970, SUBMITTED, UNCLEARED - - [ ] Warn about not-recommended patterns - - [x] Manual creation of structures - - [x] Creation, pointer handling, etc - - [x] Error checking - - [x] on request via `.validate()` - - [ ] automatic partial checking on creation: payload types, superstructure not having too many of non-plural substructures - - [x] Serialize to tag-oriented - - [x] Schema deduction - - [x] Serialization - - [x] Type-oriented JSON serializer/deserializer - - [x] Datatype serialization/deserialization - - [x] Structure serialization/deserialization - - [x] `find` and `findOrCreate` accepting arbitrarily-nested structure types and payload values (e.g. for finding a record with a given `EXID` and `EXID-TYPE`). - -So far, the testing has been limited to starting with maximal70.ged augmented with various extensions and verifying the following properties, mostly by hand, also checking that all warnings and errors issued are correct: +## What is GEDCOM? + +GEDCOM (Genealogical Data Communication) is the standard format for exchanging family tree data between genealogy applications. A GEDCOM file is a plain-text tree of structures. Each line contains a **level**, a **tag**, and an optional **payload**: + +``` +0 HEAD +1 GEDC +2 VERS 7.0 +0 @I1@ INDI +1 NAME John /Doe/ +1 BIRT +2 DATE 1 JAN 1900 +``` + +Structures nest by level: a level-`2` line is a child of the last level-`1` line. + +--- + +## Architecture + +The library is organized into **three layers**, each building on the previous: + +| Layer | Module | Role | +|-------|--------|------| +| Tag-oriented | `gedcstruct.js` | Parse/serialize raw GEDCOM syntax | +| Type-aware | `g7structure.js` | Validate and work with GEDCOM 7 semantics | +| Specification | `g7lookups.js` | FamilySearch GEDCOM 7 registry | + +The tag-oriented layer alone is sufficient to read and manipulate GEDCOM files without strict validation. The type-aware layer requires the FamilySearch specification and enforces cardinality rules, payload types, and extension handling. + +→ See [docs/architecture.md](docs/architecture.md) for a detailed explanation. + +--- + +## Quick Start + +### Read a GEDCOM file (tag-oriented layer) ```js -gedc = GEDCStruct.fromString(maximal, g7ConfGEDC) -maximal2 = gedc.toString() -// assert(maximal2 == maximal) - -json_gedc = gedc.map(e=>e.toJSON()) -gedc2 = GEDCStruct.fromJSON(json) -maximal3 = gedc2.map(e => e.toString('\n',-1,false)).join('') -// assert(maximal3 == maximal) - -ged7 = G7Dataset.fromGEDC(gedc, g7validation) -gedc3 = ged7.toGEDC() -maximal4 = gedc3.toString() -// assert(maximal4 == maximal modulo some reordering and normalization) - -json_ged7 = ged7.toJSON() -ged72 = G7Dataset.fromJSON(json, g7validation) -gedc4 = ged72.toGEDC() -maximal5 = gedc4.toString() -// assert(maximal5 == maximal4) +import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' + +const gedc = GEDCStruct.fromString(gedcomText, g7ConfGEDC, console.error) +// gedc is an array of level-0 GEDCStruct nodes + +const version = gedc.querySelector('HEAD.GEDC.VERS')?.payload // "7.0" +const individuals = [...gedc.querySelectorAll('.INDI')] // all INDI records ``` -I've also done just a little ad-hoc testing to verify that if I create a G7Dataset programmatically it it populates its schema and otherwise serializes as expected. - -# GEDC parser/serializer - -Parses a GEDCOM dataset string -into a sequence of GEDC structures. -Each structure contains - -- `tag` -- optionally `payload`, which is one of - - a string - - a (pointer to) another GEDC structure - - `null` for an encoded pointer with no destination -- optionally `sub`, which is a list of other structures - -For internal use, we also track the following: - -- `sup`, the (unique) structure that this structure is in the `sub` list of, or null for top-level structures -- `references`, a (usually empty) list of other structures that this is the `payload` of -- optionally `id`, a recommended xref_id to use in serializing pointers to this structure - -Both GEDCStruct -and the list of GEDCStruct returned by `fromJSON` and `fromString` -have two utility methods, `querySelect` and `querySelectAll`, -modeled after the corresponding methods in DOM Elements -but using GEDCOM dot-notation paths instead. In particular, - -- `XYZ` matches any structure with tag `XYZ` -- `.XYZ` matches any top-level structure with tag `XYZ` -- `ABC.XYZ` matches any structure with tag `XYZ` that is a substructure of a structure with tag `ABC` -- `ABC..XYZ` matches any structure with tag `XYZ` that contained within a structure with tag `ABC` - -GEDC parsing takes care of converting xref_id to pointers -and managing CONT and CONC pseudostructures; -GEDC serializing handles these going the other way. - -GEDC parsing and serializing both accept a configuration object with the following keys. - -Parsing configurations: - -- `len` = `0`{.js} - - positive: limit lines to this many characters - - zero: no length limit - - negative: no length limit and no CONC allowed - - - `tag` = `/.*/`{.js} - - A regex to limit the set of permitted tags. - Tags will always match at least `/^[^@\p{Cc}\p{Z}][^\p{Cc}\p{Z}]*$/u`{.js}: - that is, 1 or more characters, - no whitespace or control characters, - and not beginning with `@`. - -- `xref` = `/.*/`{.js} - - A regex to limit the set of permitted cross-reference identifiers. - Cross-reference identifiers will always match at least `/^([^@#\p{Cc}]|\t)([^@\p{Cc}]|\t)*$/u`{.js}: - that is, one or more characters, - no non-tab control characters, - no `@`, - and not beginning with `#`. - -- `linesep` = `/.*/`{.js} - - A regex to limit what is considered a line separation. - Line separations will always match at least /^[\n\r]\p{WSpace}*$/u: - that is, a carriage return or line feed - followed by whitespace. - -- `delim` = `/.*/`{.js} - - A regex to limit what is considered a delimiter. - Delimiters will always match at least /^[ \t\p{Zs}]+$/u: - that is, linear whitespace. - - A single space will always be used during serialization, regardless of the value of `delim`. - -- `payload` = `/.*/`{.js} - - A regex to limit permitted string payloads. - -- `zeros` = `false` - - If `true`, allow leading zeros on levels (e.g. `00` or `01`) - -Serializing configurations: - -- `newline` = `'\n'`{.js} - - A string to insert between lines when serializing. - Should match `linesep`. - -- `escapes` = `false` - - If `true`, serialize payloads beginning `@#` as `@#` instead of `@@#`. - Both always deserialize as the same thing. - -Two special config objects are provided to match the GEDCOM 5.x and FamilySearch GEDCOM 7.x specs: +### Read and validate a GEDCOM 7 file ```js -/** GEDCOM 5.x-compatible configuration */ -const g5ConfGEDC = { - len: 255, - tag: /^[0-9a-z_A-Z]{1,31}$/u, - xref: /^[0-9a-z_A-Z][^\p{Cc}@]{0,19}$/u, - linesep: /^[\r\n][\r\n \t]*$/, - delim: /^ $/, - zeros: false, - escapes: true, -} - -/** GEDCOM 7.x-compatible configuration */ -const g7ConfGEDC = { - len: -1, - tag: /^([A-Z]|_[0-9_A-Z])[0-9_A-Z]*$/u, - xref: /^([A-Z]|_[0-9_A-Z])[0-9_A-Z]*$/u, - linesep: /^(\r\n?|\n\r?)$/, - delim: /^ $/, - payload: /^.+$/, - zeros: false, - escapes: false, -} +import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' +import { G7Lookups } from './g7lookups.js' +import { G7Dataset } from './g7structure.js' + +// 1. Load the GEDCOM 7 specification +const spec = await fetch('https://raw.githubusercontent.com/FamilySearch/GEDCOM-registries/main/generated_files/g7validation.json') + .then(r => r.json()) +const lookup = new G7Lookups(spec) +lookup.err = msg => console.error('Error:', msg) +lookup.warn = msg => console.warn('Warning:', msg) + +// 2. Parse +const gedc = GEDCStruct.fromString(gedcomText, g7ConfGEDC) +const dataset = G7Dataset.fromGEDC(gedc, lookup) + +// 3. Validate +dataset.validate() ``` -As of commit 34dd91ad90ce5e8301e943b4d559a603028b45c9 (2023-07-18), the implementation round-trips `maximal70.ged` from ; that is maximal70.ged → fromString → toJSON → fromJSON → toString == maximal70.ged. -Note that this does not constitute an exhaustive test -and the code may contain bugs. +### Build a dataset programmatically -# FamilySearch GEDCOM 7 Type Checker +```js +const dataset = new G7Dataset(lookup) -Uses a parsed schema like [g7validation.json](https://github.com/FamilySearch/GEDCOM-registries/blob/main/generated_files/g7validation.json) -to convert a GEDC dataset into a FamilySearch GEDCOM 7 dataset. -Various FamilySearch GEDCOM 7 rules are embedded within the code, -including extension handling, -payload datatypes, -and structure ordering rules. +// Create an individual +const person = dataset.createRecord('https://gedcom.io/terms/v7/record-INDI') -A G7Structure contains +// Add a birth event +person.createSubstructure('https://gedcom.io/terms/v7/BIRT', 'Y') + .createSubstructure('https://gedcom.io/terms/v7/DATE', '1 JAN 1900') -- `type`, a URI or unregistered extension tag -- optionally `payload`, which may have many different types depending on the `type` -- `sub`, which is a map with `type` keys and list-of-G7Structure values +// Serialize to GEDCOM text +const output = dataset.toString() +``` -The `type` is omitted during JSON serialization as it is available in the G7Structure's containing structure. +### Find or create (idempotent writes) -For internal use, we also track the following: +```js +// Find an individual by REFN value, or create it if not found +const person = dataset.findOrCreate( + 'https://gedcom.io/terms/v7/record-INDI', -1, + 'https://gedcom.io/terms/v7/REFN', 'ID-42' +) + +// Calling again with the same arguments returns the same object +const same = dataset.findOrCreate( + 'https://gedcom.io/terms/v7/record-INDI', -1, + 'https://gedcom.io/terms/v7/REFN', 'ID-42' +) +// person === same → true +``` -- `sup`, the (unique) structure that this structure is in the `sub` list of, or null for top-level structures -- `references`, a (usually empty) list of other structures that this is the `payload` of -- optionally `id`, a recommended xref_id to use in serializing pointers to this structure +--- -Because some operations are handled centrally (such as determining which extension tags are in use), -a G7Dataset is used to enclose the G7Structures; -it contains +## Modules -- `header`, a `G7Structure` with type -- `records`, which is exactly like G7Structure's `sub` +### `gedcstruct.js` — Tag-oriented layer -# License +Turns GEDCOM text into a tree of `GEDCStruct` nodes. Handles `CONT`/`CONC` pseudo-structures, cross-reference pointers, and 5.x or 7.x dialects. -This code is released under both the MIT and UNLICENSE. -The dual licensing is motivated by the following observations: +Exports: `GEDCStruct`, `g5ConfGEDC`, `g7ConfGEDC` -- I, Luther Tychonievich, would like to participate in a small bit of ideological activism by promoting the Unlicense's goal: to disclaim copyright monopoly interest. -- I would also like as many people to use the code as possible. Since the Unlicense is not a proven or well known license, I also offer this code under the MIT license, which is ubiquitous and accepted by almost everyone. +### `g7lookups.js` — GEDCOM 7 specification -More specifically, this code and all its dependencies are compatible with this licensing choice. Any dependencies (direct and transitive) will always be limited to permissive licenses. This code will never depend on code that is not permissively licensed. This means rejecting any dependency that uses a copyleft license such as the GPL, LGPL, MPL or any of the Creative Commons ShareAlike licenses. +Wraps the [FamilySearch GEDCOM Registries](https://github.com/FamilySearch/GEDCOM-registries) JSON to provide tag definitions, payload types, enumeration sets, and extension handling. +Exports: `G7Lookups` -# Contributing +### `g7structure.js` — Type-aware layer -Reports of errors or gaps in the code are very welcome, preferably as [issues on github](https://github.com/gedcom7code/js-gedcom/issues). -Pull requests extending functionality or fixing errors are also welcome. +Converts tag-oriented nodes into type-validated `G7Structure` objects. Understands GEDCOM 7 semantics, cardinality rules, payload types, and extension handling. + +Exports: `G7Structure`, `G7Dataset` + +### `g7datatypes.js` — Payload data types + +Implements typed payload values: `G7Date`, `G7DateValue`, `G7Age`, `G7Time`, `G7Enum`. + +--- + +## Documentation + +- [Architecture and data flow](docs/architecture.md) +- [API reference](docs/api.md) +- [Practical examples](docs/examples.md) + +--- + +## Character Encoding + +This library operates on JavaScript strings. It does not handle byte-level encoding conversion (UTF-8, ANSEL, etc.) — you must decode the file into a JavaScript string before passing it to the library. + +--- + +## License + +Released under both the [MIT License](LICENSE-MIT) and the [Unlicense](LICENSE-UNLICENSE). Both apply simultaneously; use whichever suits you. + +--- + +## Contributing + +Bug reports and pull requests are welcome via [GitHub Issues](https://github.com/gedcom7code/js-gedcom/issues). + +--- + +## Development Status + +
+Feature checklist + +- [x] Tag-oriented layer + - [x] Parser with CONT/CONC handling and multiple dialects + - [x] Manual structure creation + - [x] JSON serializer/deserializer + - [x] `querySelector` and `querySelectorAll` +- [x] Type-aware layer + - [x] Load GEDCOM 7 specification from GEDCOM-registries + - [x] Context-aware structure type resolution + - [x] Payload type validation and cardinality rules + - [x] Extension handling (undocumented, unregistered, aliased, relocated) + - [x] Deprecation warnings + - [x] Manual structure creation with error checking (`.validate()`) + - [ ] Automatic partial checking on creation + - [x] Serialize to tag-oriented layer with schema deduction + - [x] JSON serializer/deserializer + - [x] `find` and `findOrCreate` + +
diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..2b8376a --- /dev/null +++ b/docs/api.md @@ -0,0 +1,436 @@ +# API Reference + +## Module `gedcstruct.js` + +### Class `GEDCStruct` + +Represents a node in the GEDCOM syntactic tree (tag-oriented layer). + +#### Properties + +| Property | Type | Description | +|----------|------|-------------| +| `tag` | `string` | The GEDCOM tag of this node | +| `payload` | `string \| GEDCStruct \| null \| undefined` | Node value: text, pointer to another node, `null` if the pointer destination is unknown, or `undefined` if absent | +| `sub` | `GEDCStruct[]` | Child sub-structures | +| `superstruct` | `GEDCStruct \| null` | Parent structure (read-only) | +| `references` | `GEDCStruct[]` | Structures pointing to this node (read-only) | +| `xref_id` | `string \| undefined` | Recommended identifier for serializing pointers to this node (read-only) | + +#### Static methods + +--- + +**`GEDCStruct.fromString(input, config?, logger?)`** + +Parses a GEDCOM text into an array of `GEDCStruct` (level-0 nodes). + +| Parameter | Type | Description | +|-----------|------|-------------| +| `input` | `string` | The complete GEDCOM text | +| `config` | `object` | Dialect configuration. Use `g7ConfGEDC` or `g5ConfGEDC`. Default: permissive generic dialect | +| `logger` | `function(msg)` | Called for each syntax error | + +Returns: `GEDCStruct[]` + +--- + +**`GEDCStruct.fromJSON(obj)`** + +Reconstructs a `GEDCStruct` array from a JSON object produced by `toJSON()`. + +Returns: `GEDCStruct[]` + +--- + +#### Instance methods + +**`toString(newline?, maxlen?, escapes?)`** + +Serializes this node and its descendants to GEDCOM text. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `newline` | `string` | `'\n'` | Line separator | +| `maxlen` | `number` | `0` | Max characters per line (0 = unlimited, negative = unlimited without CONC) | +| `escapes` | `boolean` | `false` | If `true`, do not escape `@#` as `@@#` | + +Returns: `string` + +--- + +**`toJSON()`** + +Serializes this node to a JSON object that can be restored with `fromJSON`. + +Returns: `object` + +--- + +**`querySelector(path)`** + +Returns the **first** node matching the given tag path. + +**`querySelectorAll(path)`** + +Returns an iterator over **all** nodes matching the given tag path. + +Path syntax: + +| Path | Meaning | +|------|---------| +| `XYZ` | Any node with tag `XYZ` | +| `.XYZ` | A root (level-0) node with tag `XYZ` | +| `ABC.XYZ` | An `XYZ` that is a direct child of an `ABC` | +| `ABC..XYZ` | An `XYZ` that is any descendant of an `ABC` | + +Example: +```js +gedc.querySelector('HEAD.GEDC.VERS') // GEDCOM version +gedc.querySelectorAll('.INDI') // all individuals +``` + +--- + +### Configuration objects + +#### `g7ConfGEDC` — FamilySearch GEDCOM 7 + +```js +{ + len: -1, // no line length limit, CONC not allowed + tag: /^([A-Z]|_[0-9_A-Z])[0-9_A-Z]*$/u, + xref: /^([A-Z]|_[0-9_A-Z])[0-9_A-Z]*$/u, + linesep: /^(\r\n?|\n\r?)$/, + delim: /^ $/, + payload: /^.+$/, + zeros: false, + escapes: false, +} +``` + +#### `g5ConfGEDC` — GEDCOM 5.x + +```js +{ + len: 255, // max 255 characters per line + tag: /^[0-9a-z_A-Z]{1,31}$/u, + xref: /^[0-9a-z_A-Z][^\p{Cc}@]{0,19}$/u, + linesep: /^[\r\n][\r\n \t]*$/, + delim: /^ $/, + zeros: false, + escapes: true, +} +``` + +#### Configuration keys + +| Key | Role | +|-----|------| +| `len` | Max line length (0 = unlimited, negative = unlimited without CONC) | +| `tag` | Regex for valid tags | +| `xref` | Regex for valid cross-reference identifiers | +| `linesep` | Regex for allowed line separators | +| `delim` | Regex for allowed delimiters | +| `payload` | Regex for allowed string payloads | +| `zeros` | `true` to allow leading zeros on levels (`00`, `01`…) | +| `escapes` | `true` to suppress escaping `@#` as `@@#` during serialization | + +--- + +## Module `g7lookups.js` + +### Class `G7Lookups` + +Wraps the FamilySearch GEDCOM 7 specification and acts as a dynamic registry for tag type resolution and extension handling. + +#### Properties + +| Property | Type | Description | +|----------|------|-------------| +| `err` | `function(msg)` | Callback for specification violations | +| `warn` | `function(msg)` | Callback for discouraged patterns | + +Duplicate messages are automatically suppressed: each unique message is reported only once. + +#### Constructor + +**`new G7Lookups(g7validation)`** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `g7validation` | `object` | Content of FamilySearch's `g7validation.json` | + +The file can be fetched from: +`https://raw.githubusercontent.com/FamilySearch/GEDCOM-registries/main/generated_files/g7validation.json` + +--- + +## Module `g7structure.js` + +### Class `G7Dataset` + +Top-level container for a complete GEDCOM 7 dataset. + +#### Properties + +| Property | Type | Description | +|----------|------|-------------| +| `header` | `G7Structure` | The `HEAD` structure of the dataset | +| `records` | `Map` | Root records indexed by type URI | + +#### Static methods + +--- + +**`G7Dataset.fromString(text, lookup)`** + +Parses a GEDCOM text directly into a `G7Dataset` (combines `GEDCStruct.fromString` and `fromGEDC`). + +| Parameter | Type | Description | +|-----------|------|-------------| +| `text` | `string` | The complete GEDCOM text | +| `lookup` | `G7Lookups` | The GEDCOM 7 specification | + +Returns: `G7Dataset` + +--- + +**`G7Dataset.fromGEDC(gedc, lookup)`** + +Converts a `GEDCStruct` array into a typed, validated `G7Dataset`. + +| Parameter | Type | Description | +|-----------|------|-------------| +| `gedc` | `GEDCStruct[]` | Result of `GEDCStruct.fromString()` | +| `lookup` | `G7Lookups` | The GEDCOM 7 specification | + +Returns: `G7Dataset` + +--- + +**`G7Dataset.fromJSON(obj, lookup)`** + +Reconstructs a `G7Dataset` from a JSON object produced by `toJSON()`. + +Returns: `G7Dataset` + +--- + +#### Instance methods + +**`createRecord(type, payload?, pltype?, id?)`** + +Creates a root record and adds it to the dataset. + +| Parameter | Type | Description | +|-----------|------|-------------| +| `type` | `string` | GEDCOM 7 type URI (e.g. `'https://gedcom.io/terms/v7/record-INDI'`) | +| `payload` | variable | Initial payload value | +| `pltype` | `string` | Explicit payload type if not inferrable | +| `id` | `string` | Suggested xref identifier for serialization | + +Returns: `G7Structure` + +--- + +**`find(type, payload, ...args)`** + +Searches for a record of the given type. Accepts additional criteria as `(typeURI, value)` pairs to match against sub-structures. + +```js +// Find the individual with REFN "ID-42" +const person = dataset.find( + 'https://gedcom.io/terms/v7/record-INDI', -1, + 'https://gedcom.io/terms/v7/REFN', 'ID-42' +) +// Returns null if not found +``` + +The value `-1` as payload means "any value". + +Returns: `G7Structure | null` + +--- + +**`findOrCreate(type, payload, ...args)`** + +Like `find`, but creates the record if it does not exist. Multiple calls with the same arguments always return the same object. + +Returns: `G7Structure` + +--- + +**`validate()`** + +Recursively traverses the entire dataset and checks GEDCOM 7 rules (cardinality, payload types, required fields). Errors and warnings are reported via the `G7Lookups` callbacks. + +Returns: `number` — the number of errors found. + +--- + +**`populateSchema()`** + +Inspects the extensions used in the dataset and automatically adds the required `HEAD.SCHMA.TAG` entries. Call this before `toString()` if any extensions are present. + +--- + +**`toString()`** + +Serializes the dataset to GEDCOM 7 text. + +Returns: `string` + +--- + +**`toJSON()`** + +Serializes the dataset to a JSON object that can be restored with `fromJSON`. + +Returns: `object` + +--- + +**`toGEDC()`** + +Converts the dataset to a `GEDCStruct` array (layer 1), enabling fine-grained serialization via `GEDCStruct.toString()`. + +Returns: `GEDCStruct[]` + +--- + +### Class `G7Structure` + +Represents a typed node in the GEDCOM 7 tree. + +#### Properties + +| Property | Type | Description | +|----------|------|-------------| +| `type` | `string` | GEDCOM 7 URI or undocumented extension tag | +| `payload` | variable | Typed value depending on the structure type | +| `sub` | `Map` | Sub-structures indexed by type URI | +| `superstruct` | `G7Structure \| null` | Parent structure (read-only) | +| `references` | object | Structures pointing to this one (read-only) | +| `xref_id` | `string \| undefined` | Suggested xref identifier (read-only) | + +#### Instance methods + +**`createSubstructure(type, payload?, pltype?)`** + +Creates a sub-structure and attaches it to this node. + +| Parameter | Type | Description | +|-----------|------|-------------| +| `type` | `string` | GEDCOM 7 type URI | +| `payload` | variable | Initial payload value | +| `pltype` | `string` | Explicit payload type if not inferrable | + +Returns: `G7Structure` (the new sub-structure) + +--- + +**`find(type, payload, ...args)`** + +Like `G7Dataset.find`, but searches within this node's sub-structures. + +Returns: `G7Structure | null` + +--- + +**`findOrCreate(type, payload, ...args)`** + +Like `G7Dataset.findOrCreate`, but operates on this node's sub-structures. + +Returns: `G7Structure` + +--- + +**`validate()`** + +Runs recursive validation from this node downward. + +Returns: `number` — error count. + +--- + +**`toString()`**, **`toJSON()`**, **`toGEDC()`** + +Same as the `G7Dataset` methods, but scoped to this node and its descendants. + +--- + +## Module `g7datatypes.js` + +These classes represent the structured payload types of GEDCOM 7. They are returned automatically by the type-aware layer; you generally do not construct them manually. + +### `G7Age` + +Represents a GEDCOM 7 age value (e.g. `> 35y 6m`). + +| Property | Type | Description | +|----------|------|-------------| +| `operator` | `string \| undefined` | `'<'` or `'>'` | +| `years` | `number \| undefined` | Number of years | +| `months` | `number \| undefined` | Number of months | +| `weeks` | `number \| undefined` | Number of weeks | +| `days` | `number \| undefined` | Number of days | + +### `G7Date` + +Represents a precise calendar date (e.g. `1 JAN 1900`). + +| Property | Type | Description | +|----------|------|-------------| +| `calendar` | `string` | Calendar URI (Gregorian by default) | +| `month` | `string \| undefined` | Month code (`JAN`, `FEB`…) | +| `day` | `number \| undefined` | Day of month | +| `year` | `number` | Year | +| `epoch` | `string \| undefined` | Epoch (`BCE`…) | + +### `G7DateValue` + +Represents a flexible date: precise date, approximation, range, or period. + +| Property | Type | Description | +|----------|------|-------------| +| `type` | `string` | `'date'`, `'dateRange'`, `'datePeriod'`, or qualifier (`'ABT'`, `'CAL'`, `'EST'`) | +| `date` | `G7Date \| undefined` | Primary date | +| `date2` | `G7Date \| undefined` | Second date (for `BET … AND …` ranges) | + +### `G7Time` + +Represents a time value (e.g. `12:30:45.5Z`). + +| Property | Type | Description | +|----------|------|-------------| +| `hours` | `number` | Hours (0–23) | +| `minutes` | `number` | Minutes | +| `seconds` | `number \| undefined` | Seconds | +| `timezone` | `string \| undefined` | Timezone (`'Z'` or offset `+HH:MM`) | + +### `G7Enum` + +Represents a GEDCOM 7 enumeration value. + +| Property | Type | Description | +|----------|------|-------------| +| `value` | `string` | The tag or URI of the enumeration value | + +--- + +## Payload types by structure + +The nature of a `G7Structure`'s payload depends on its type URI: + +| Payload type | JavaScript class | +|--------------|-----------------| +| Free text | `string` | +| Pointer | `G7Structure` (or `null` if destination unknown) | +| Absent | `undefined` | +| Integer | `number` | +| Age | `G7Age` | +| Date | `G7DateValue` | +| Time | `G7Time` | +| Enumeration | `G7Enum` | +| List | `Array` | diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..0b19ef5 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,207 @@ +# Architecture + +## Overview + +The library is organized into independent layers. Each layer can be used alone, or combined with the next for richer processing. + +``` +Raw GEDCOM text + │ + ▼ +┌─────────────────────────────────────────┐ +│ Layer 1: Tag-oriented (gedcstruct) │ +│ Tree of GEDCStruct nodes │ +│ – tag, payload (string/pointer), sub │ +└─────────────────────────────────────────┘ + │ + ▼ G7Dataset.fromGEDC() +┌─────────────────────────────────────────┐ +│ Layer 2: Type-aware (g7structure) │ +│ Tree of validated G7Structure nodes │ +│ – type (URI), payload (typed), sub (Map│ +└─────────────────────────────────────────┘ + ▲ + │ powered by +┌─────────────────────────────────────────┐ +│ Layer 3: Specification (g7lookups) │ +│ G7Lookups: FamilySearch registry │ +│ – tag definitions, types, enumerations │ +└─────────────────────────────────────────┘ +``` + +--- + +## Layer 1: Tag-oriented — `gedcstruct.js` + +### Role + +This layer reads raw GEDCOM syntax without knowing the meaning of any tag. It produces a tree of `GEDCStruct` nodes by handling: + +- level numbering (0, 1, 2…) +- cross-references (`@I1@`) resolved to direct pointers +- `CONT` (line continuation) and `CONC` (concatenation) pseudo-structures, which are transparent after parsing +- GEDCOM dialects (5.x or 7.x) + +### Structure of a `GEDCStruct` node + +Each node represents one GEDCOM line: + +``` +0 @I1@ INDI +1 NAME John /Doe/ +1 BIRT +2 DATE 1 JAN 1900 +``` + +| Property | Type | Description | +|----------|------|-------------| +| `tag` | `string` | The GEDCOM tag (`INDI`, `NAME`, `BIRT`…) | +| `payload` | `string \| GEDCStruct \| null \| undefined` | The line value, or a pointer to another node | +| `sub` | `GEDCStruct[]` | Child sub-structures | + +`fromString` returns the level-0 nodes (root records). Each node holds its children in `sub`. + +### Dialects + +Two pre-built configuration objects match the official specifications: + +- **`g7ConfGEDC`**: FamilySearch GEDCOM 7 — no line length limit, strictly alphanumeric tags, no `CONC`. +- **`g5ConfGEDC`**: GEDCOM 5.x — 255-character line limit, more permissive tag format. + +The configuration controls: line length, tag and cross-reference format, allowed delimiters, and allowed payloads. + +### Data flow (layer 1 only) + +``` +GEDCOM text + │ GEDCStruct.fromString(text, config, logger) + ▼ +GEDCStruct[] (array of level-0 nodes) + │ + ├── querySelector('HEAD.GEDC.VERS') → first match + ├── querySelectorAll('.INDI') → all individuals + │ + │ toString(newline, maxlen, escapes) + ▼ +GEDCOM text (round-trip) +``` + +--- + +## Layer 2: Type-aware — `g7structure.js` + +### Role + +This layer applies GEDCOM 7 semantics to the tree produced by layer 1. For each `GEDCStruct` node it: + +1. determines the **type** (FamilySearch URI) based on context (tag + position in tree) +2. parses the **payload** into the correct type (date, age, enumeration, pointer…) +3. enforces **cardinality rules** (required fields, singular fields, etc.) +4. handles **extensions** (tags starting with `_`, unregistered tags, relocated tags) + +### The two main classes + +#### `G7Structure` — A typed node + +| Property | Type | Description | +|----------|------|-------------| +| `type` | `string` | GEDCOM 7 URI or undocumented extension tag | +| `payload` | variable | Typed value (string, G7Date, G7Enum, G7Structure, null…) | +| `sub` | `Map` | Sub-structures indexed by type URI | + +The `sub` map key is the type URI, not the tag. Multiple sub-structures of the same type are grouped in an array. + +#### `G7Dataset` — The complete dataset + +Top-level container holding: + +| Property | Description | +|----------|-------------| +| `header` | The `G7Structure` of type `HEAD` | +| `records` | `Map` — all root records | + +### Data flow (layer 2) + +``` +GEDCStruct[] (layer 1) + │ G7Dataset.fromGEDC(gedc, lookup) + ▼ +G7Dataset + ├── header: G7Structure (HEAD) + └── records: Map + ├── 'https://gedcom.io/terms/v7/record-INDI' → [G7Structure, ...] + ├── 'https://gedcom.io/terms/v7/record-FAM' → [G7Structure, ...] + └── ... + + │ dataset.validate() → error count + │ dataset.populateSchema() → add HEAD.SCHMA for extensions + │ dataset.toString() → GEDCOM text + │ dataset.toJSON() → JSON object + ▼ +Output +``` + +### Programmatic creation + +Layer 2 can be used without parsing. A dataset is built from scratch: + +``` +new G7Dataset(lookup) + │ createRecord(typeURI) + ▼ +G7Structure (record) + │ createSubstructure(typeURI, payload) + ▼ +G7Structure (sub-structure) + ... +``` + +The `findOrCreate` pattern enables declarative writes: the call describes the desired structure and the library either returns the existing one or creates it. + +--- + +## Layer 3: Specification — `g7lookups.js` + +### Role + +`G7Lookups` wraps the `g7validation.json` file published by FamilySearch. This file describes all standard tags, their payload types, cardinality rules, and valid enumeration sets. + +Layer 2 consults `G7Lookups` for every node during conversion from layer 1. The `err` and `warn` callbacks capture errors and warnings without interrupting processing. + +### Extension handling + +GEDCOM extensions are tags or structures defined outside the official specification. `G7Lookups` classifies them into four categories: + +| Category | Description | +|----------|-------------| +| **Undocumented** | `_`-prefixed tag with no definition in `HEAD.SCHMA` | +| **Unregistered** | URI present in `SCHMA` but absent from the FamilySearch registry | +| **Aliased** | Tag that matches a standard type in a different context | +| **Relocated** | Standard structure used under an unexpected superstructure | + +--- + +## Payload types — `g7datatypes.js` + +GEDCOM 7 defines several structured value types. The library represents them as distinct objects: + +| Class | GEDCOM example | Description | +|-------|----------------|-------------| +| `G7Age` | `> 35y 6m` | Age with operator, years, months, weeks, days | +| `G7Date` | `1 JAN 1900` | Precise date with calendar, month, day, year, epoch | +| `G7DateValue` | `ABT 1900`, `BET 1900 AND 1910` | Flexible date (approximation, range, period…) | +| `G7Time` | `12:30:45Z` | Time with timezone | +| `G7Enum` | `HUSB` | Enumeration value (URI or tag depending on context) | + +`string` payloads (free text, name, language…) remain plain JavaScript strings. + +--- + +## Separation of concerns + +This architecture supports several independent use cases: + +- **Simple parsing**: use only `gedcstruct.js` to read any GEDCOM file without type validation. +- **Validation**: go through all three layers to detect every violation of the GEDCOM 7 specification. +- **Transformation**: read at layer 1, modify, write back to GEDCOM without involving layer 2. +- **Typed creation**: build a valid GEDCOM 7 dataset from scratch using layer 2. diff --git a/docs/examples.md b/docs/examples.md new file mode 100644 index 0000000..624aef5 --- /dev/null +++ b/docs/examples.md @@ -0,0 +1,274 @@ +# Practical Examples + +## 1. Read a GEDCOM file and extract data + +### Simple extraction with the tag-oriented layer + +For quickly scanning a GEDCOM file without type validation, `GEDCStruct` alone is sufficient. + +```js +import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' + +const gedc = GEDCStruct.fromString(gedcomText, g7ConfGEDC, console.error) + +// Read the GEDCOM version +const version = gedc.querySelector('HEAD.GEDC.VERS')?.payload +console.log('Version:', version) // "7.0" + +// List all individuals +for (const indi of gedc.querySelectorAll('.INDI')) { + const name = indi.querySelector('NAME')?.payload + const birth = indi.querySelector('BIRT.DATE')?.payload + console.log(name, '— born:', birth) +} +``` + +### Extraction with the type-aware layer (GEDCOM 7) + +To access typed payloads and benefit from validation: + +```js +import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' +import { G7Lookups } from './g7lookups.js' +import { G7Dataset } from './g7structure.js' + +const spec = await fetch('https://raw.githubusercontent.com/FamilySearch/GEDCOM-registries/main/generated_files/g7validation.json').then(r => r.json()) +const lookup = new G7Lookups(spec) +lookup.err = msg => console.error(msg) +lookup.warn = msg => console.warn(msg) + +const gedc = GEDCStruct.fromString(gedcomText, g7ConfGEDC) +const dataset = G7Dataset.fromGEDC(gedc, lookup) + +const INDI = 'https://gedcom.io/terms/v7/record-INDI' +const NAME = 'https://gedcom.io/terms/v7/NAME' +const BIRT = 'https://gedcom.io/terms/v7/BIRT' +const DATE = 'https://gedcom.io/terms/v7/DATE' + +for (const person of dataset.records.get(INDI) ?? []) { + const name = person.sub.get(NAME)?.[0]?.payload // string + const date = person.sub.get(BIRT)?.[0]?.sub.get(DATE)?.[0]?.payload // G7DateValue + console.log(name, date?.toString()) +} +``` + +--- + +## 2. Validate a GEDCOM 7 file + +```js +import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' +import { G7Lookups } from './g7lookups.js' +import { G7Dataset } from './g7structure.js' + +const errors = [] +const warnings = [] + +const spec = await fetch('...g7validation.json').then(r => r.json()) +const lookup = new G7Lookups(spec) +lookup.err = msg => errors.push(msg) +lookup.warn = msg => warnings.push(msg) + +const gedc = GEDCStruct.fromString(gedcomText, g7ConfGEDC) +const dataset = G7Dataset.fromGEDC(gedc, lookup) +dataset.validate() + +console.log(`${errors.length} error(s), ${warnings.length} warning(s)`) +errors.forEach(e => console.error('ERROR:', e)) +warnings.forEach(w => console.warn('WARN: ', w)) +``` + +--- + +## 3. Build a GEDCOM 7 dataset from scratch + +```js +import { G7Lookups } from './g7lookups.js' +import { G7Dataset } from './g7structure.js' + +const T = 'https://gedcom.io/terms/v7/' // URI prefix shorthand + +const spec = await fetch('...g7validation.json').then(r => r.json()) +const lookup = new G7Lookups(spec) +const dataset = new G7Dataset(lookup) + +// Create an individual +const person = dataset.createRecord(T + 'record-INDI') + +// Add a name +person.createSubstructure(T + 'NAME', 'Marie /Dupont/') + +// Add a birth event +const birth = person.createSubstructure(T + 'BIRT', 'Y') +birth.createSubstructure(T + 'DATE', '15 MAR 1985') +birth.createSubstructure(T + 'PLAC', 'Paris, France') + +// Add a note +person.createSubstructure(T + 'NOTE', 'Maternal ancestor.') + +// Serialize +dataset.populateSchema() // required if any extensions are used +const output = dataset.toString() +console.log(output) +``` + +--- + +## 4. Find or create structures (incremental import) + +The `findOrCreate` pattern is designed for iterative imports: the same operation can be called multiple times without creating duplicates. + +```js +const T = 'https://gedcom.io/terms/v7/' + +// Create or retrieve an individual identified by their REFN value +const person = dataset.findOrCreate(T + 'record-INDI', -1, T + 'REFN', 'PERSON-001') + +// Create or retrieve their birth event identified by a UUID +const birth = person.findOrCreate(T + 'BIRT', -1, T + 'UID', 'a3f4-...') +birth.payload = 'Y' + +// Create or retrieve a submitter identified by name +const submitter = dataset.findOrCreate(T + 'record-SUBM', -1, T + 'NAME', 'Alice Martin') + +// Link the submitter to the individual (idempotent) +person.findOrCreate(T + 'SUBM', submitter) +``` + +--- + +## 5. Create a family with parent–child links + +```js +const T = 'https://gedcom.io/terms/v7/' + +const father = dataset.findOrCreate(T + 'record-INDI', -1, T + 'REFN', 'FATHER') +father.createSubstructure(T + 'NAME', 'Jean /Dupont/') + +const mother = dataset.findOrCreate(T + 'record-INDI', -1, T + 'REFN', 'MOTHER') +mother.createSubstructure(T + 'NAME', 'Isabelle /Martin/') + +const child = dataset.findOrCreate(T + 'record-INDI', -1, T + 'REFN', 'CHILD') +child.createSubstructure(T + 'NAME', 'Marie /Dupont/') + +// Create the family record +const family = dataset.createRecord(T + 'record-FAM') +family.createSubstructure(T + 'HUSB', father) // payload is a pointer +family.createSubstructure(T + 'WIFE', mother) +family.createSubstructure(T + 'CHIL', child) + +// Required symmetric back-links +father.createSubstructure(T + 'INDI-FAMS', family) +mother.createSubstructure(T + 'INDI-FAMS', family) +child.createSubstructure(T + 'INDI-FAMC', family) +``` + +--- + +## 6. Use an extension + +Extensions are structures defined outside the standard specification. They must start with `_` or be registered in `HEAD.SCHMA`. + +```js +const person = dataset.createRecord('https://gedcom.io/terms/v7/record-INDI') + +// Documented extension (URI registered in HEAD.SCHMA) +person.createSubstructure('https://example.com/myapp/EXT-FIELD', 'value') + +// populateSchema() automatically adds the reference to HEAD.SCHMA +dataset.populateSchema() +const output = dataset.toString() +``` + +--- + +## 7. Round-trip: read, modify, rewrite + +```js +import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' +import { G7Lookups } from './g7lookups.js' +import { G7Dataset } from './g7structure.js' + +const spec = await fetch('...g7validation.json').then(r => r.json()) +const lookup = new G7Lookups(spec) +const gedc = GEDCStruct.fromString(originalText, g7ConfGEDC) +const dataset = G7Dataset.fromGEDC(gedc, lookup) + +const T = 'https://gedcom.io/terms/v7/' + +// Modify: add a note to every individual that does not already have one +for (const person of dataset.records.get(T + 'record-INDI') ?? []) { + if (!person.sub.has(T + 'NOTE')) { + person.createSubstructure(T + 'NOTE', 'Imported automatically.') + } +} + +// Validate before rewriting +dataset.validate() +dataset.populateSchema() + +const modifiedText = dataset.toString() +``` + +--- + +## 8. Serialize to JSON and restore + +The JSON format is useful for storing or transferring a dataset without going through GEDCOM text. + +```js +// Serialize +const json = dataset.toJSON() +const jsonString = JSON.stringify(json) + +// Restore +const restored = G7Dataset.fromJSON(JSON.parse(jsonString), lookup) +const text = restored.toString() +``` + +--- + +## 9. Use only the tag-oriented layer (GEDCOM 5.x or non-standard) + +For GEDCOM 5.x files or non-standard dialects, use `gedcstruct.js` alone with `g5ConfGEDC`: + +```js +import { GEDCStruct, g5ConfGEDC } from './gedcstruct.js' + +const gedc = GEDCStruct.fromString(gedcom5Text, g5ConfGEDC, console.error) + +// Read fields without semantic validation +const source = gedc.querySelector('.SOUR') +const title = source?.querySelector('TITL')?.payload + +// Rewrite (preserves 5.x dialect: 255-char limit, CONC if needed) +const output = gedc.map(s => s.toString('\r\n', 255, true)).join('') +``` + +--- + +## 10. Separate syntax errors from validation errors + +There are two distinct levels of errors: + +1. **Syntax errors**: detected by `GEDCStruct.fromString()` via the `logger` callback +2. **Validation errors**: detected by `G7Dataset.fromGEDC()` and `validate()` via `lookup.err` / `lookup.warn` + +```js +const syntaxErrors = [] +const typeErrors = [] +const typeWarnings = [] + +const gedc = GEDCStruct.fromString(text, g7ConfGEDC, msg => syntaxErrors.push(msg)) + +const lookup = new G7Lookups(spec) +lookup.err = msg => typeErrors.push(msg) +lookup.warn = msg => typeWarnings.push(msg) + +const dataset = G7Dataset.fromGEDC(gedc, lookup) +dataset.validate() + +console.log('Syntax: ', syntaxErrors) +console.log('Type: ', typeErrors) +console.log('Suggestions:', typeWarnings) +``` From a59a04026990bed6b2795dbc81245680b67f5ecf Mon Sep 17 00:00:00 2001 From: atellier2 Date: Mon, 4 May 2026 08:44:51 +0200 Subject: [PATCH 2/4] =?UTF-8?q?refactor:=20r=C3=A9organise=20la=20structur?= =?UTF-8?q?e=20des=20fichiers=20selon=20les=20bonnes=20pratiques=20(#2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Déplace les modules cœur dans src/ (gedcstruct.js, g7datatypes.js, g7lookups.js, g7structure.js) - Renomme demo/ en examples/ pour plus de clarté - Met à jour les chemins d'import dans simpleValidator.js et examples/test_findOrCreate.js - Ajoute package.json avec les exports ES module et les métadonnées du projet - Ajoute .gitignore https://claude.ai/code/session_01HoKHaY5SWQ9aCmxPvqWTKP Co-authored-by: Claude --- .gitignore | 3 +++ {demo => examples}/README.md | 0 {demo => examples}/test_findOrCreate.html | 0 {demo => examples}/test_findOrCreate.js | 6 +++--- package.json | 17 +++++++++++++++++ simpleValidator.js | 6 +++--- g7datatypes.js => src/g7datatypes.js | 0 g7lookups.js => src/g7lookups.js | 0 g7structure.js => src/g7structure.js | 0 gedcstruct.js => src/gedcstruct.js | 0 10 files changed, 26 insertions(+), 6 deletions(-) create mode 100644 .gitignore rename {demo => examples}/README.md (100%) rename {demo => examples}/test_findOrCreate.html (100%) rename {demo => examples}/test_findOrCreate.js (95%) create mode 100644 package.json rename g7datatypes.js => src/g7datatypes.js (100%) rename g7lookups.js => src/g7lookups.js (100%) rename g7structure.js => src/g7structure.js (100%) rename gedcstruct.js => src/gedcstruct.js (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3bdd52e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +dist/ +.DS_Store diff --git a/demo/README.md b/examples/README.md similarity index 100% rename from demo/README.md rename to examples/README.md diff --git a/demo/test_findOrCreate.html b/examples/test_findOrCreate.html similarity index 100% rename from demo/test_findOrCreate.html rename to examples/test_findOrCreate.html diff --git a/demo/test_findOrCreate.js b/examples/test_findOrCreate.js similarity index 95% rename from demo/test_findOrCreate.js rename to examples/test_findOrCreate.js index 42cd00f..32ce545 100644 --- a/demo/test_findOrCreate.js +++ b/examples/test_findOrCreate.js @@ -1,6 +1,6 @@ -import { GEDCStruct, g7ConfGEDC } from "../gedcstruct.js" -import { G7Lookups } from '../g7lookups.js' -import { G7Dataset } from '../g7structure.js' +import { GEDCStruct, g7ConfGEDC } from "../src/gedcstruct.js" +import { G7Lookups } from '../src/g7lookups.js' +import { G7Dataset } from '../src/g7structure.js' const addError = (msg) => { const li = document.createElement('li') diff --git a/package.json b/package.json new file mode 100644 index 0000000..c88eb05 --- /dev/null +++ b/package.json @@ -0,0 +1,17 @@ +{ + "name": "js-gedcom", + "version": "1.0.0", + "description": "JavaScript library for parsing and validating GEDCOM 7 genealogy files", + "type": "module", + "main": "src/g7structure.js", + "exports": { + "./gedcstruct": "./src/gedcstruct.js", + "./g7datatypes": "./src/g7datatypes.js", + "./g7lookups": "./src/g7lookups.js", + "./g7structure": "./src/g7structure.js" + }, + "license": "MIT OR Unlicense", + "files": [ + "src/" + ] +} diff --git a/simpleValidator.js b/simpleValidator.js index c9a1967..56d83eb 100644 --- a/simpleValidator.js +++ b/simpleValidator.js @@ -1,6 +1,6 @@ -import { GEDCStruct, g7ConfGEDC } from "./gedcstruct.js" -import { G7Lookups } from './g7lookups.js' -import { G7Dataset } from './g7structure.js' +import { GEDCStruct, g7ConfGEDC } from "./src/gedcstruct.js" +import { G7Lookups } from './src/g7lookups.js' +import { G7Dataset } from './src/g7structure.js' const addError = (msg) => { const li = document.createElement('li') diff --git a/g7datatypes.js b/src/g7datatypes.js similarity index 100% rename from g7datatypes.js rename to src/g7datatypes.js diff --git a/g7lookups.js b/src/g7lookups.js similarity index 100% rename from g7lookups.js rename to src/g7lookups.js diff --git a/g7structure.js b/src/g7structure.js similarity index 100% rename from g7structure.js rename to src/g7structure.js diff --git a/gedcstruct.js b/src/gedcstruct.js similarity index 100% rename from gedcstruct.js rename to src/gedcstruct.js From 8f015d72b938fd00ec38ba0db0b4534a94bdcec8 Mon Sep 17 00:00:00 2001 From: atellier2 Date: Mon, 4 May 2026 16:18:48 +0200 Subject: [PATCH 3/4] =?UTF-8?q?refactor:=20r=C3=A9organise=20la=20structur?= =?UTF-8?q?e=20des=20fichiers=20selon=20les=20bonnes=20pratiques=20(#4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Déplace les modules cœur dans src/ (gedcstruct.js, g7datatypes.js, g7lookups.js, g7structure.js) - Renomme demo/ en examples/ pour plus de clarté - Met à jour les chemins d'import dans simpleValidator.js et examples/test_findOrCreate.js - Ajoute package.json avec les exports ES module et les métadonnées du projet - Ajoute .gitignore https://claude.ai/code/session_01HoKHaY5SWQ9aCmxPvqWTKP Co-authored-by: Claude From 3951d7bf3cfb2191b29c8d50984cf008e6d7ca45 Mon Sep 17 00:00:00 2001 From: atellier2 Date: Mon, 4 May 2026 16:26:34 +0200 Subject: [PATCH 4/4] Claude/improve documentation qfx in (#3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: refonte du README et création du répertoire docs/ - README.md réécrit pour être orienté utilisateur : introduction GEDCOM, architecture en 3 couches, quick start, description des modules, liens vers la doc détaillée - docs/architecture.md : explication du flux de données entre les couches, rôle de chaque classe, types de payloads - docs/api.md : référence complète de GEDCStruct, G7Dataset, G7Structure, G7Lookups, G7Datatypes et des objets de configuration - docs/examples.md : 10 exemples concrets (parsing, validation, création, find-or-create, familles, extensions, round-trip, JSON, GEDCOM 5.x) * docs: translate all documentation to English README.md and all docs/ files (architecture, api, examples) were written in French by mistake; translated to English throughout. https://claude.ai/code/session_019gWQ4DPdsTsLPoNNK9z4j3 * refactor: reorganize project structure following best practices - src/ library source files (gedcstruct, g7datatypes, g7lookups, g7structure) - examples/ runnable examples, replacing the root-level validator and demo/ - examples/validator/ GEDCOM 7 validator web app (index.html + validator.js) - examples/find-or-create/ programmatic dataset demo (index.html + find-or-create.js) - docs/ documentation (unchanged) - Root kept clean: README.md and LICENSE files only Update all import paths and script src references accordingly. https://claude.ai/code/session_019gWQ4DPdsTsLPoNNK9z4j3 * test: add complete test suite and fix G7Dataset.fromString bug - Add unit tests for GEDCStruct (parsing, CONT/CONC, pointers, escaping, querySelector, toString, JSON round-trip) - Add unit tests for all G7 datatypes (G7Age, G7Date, G7DateValue, G7Time, G7Datatype dispatch, checkDatatype) with a minimal mock lookup - Add integration tests for G7Dataset (construction, record creation, substructure creation, find/findOrCreate, pointers, validation, serialisation, JSON round-trip, fromString) - Add integration end-to-end round-trip tests (GEDCStruct and G7Dataset) - Add test infrastructure: spec downloader (tests/setup.js), spec loader helper (tests/helpers/spec.js), GEDCOM fixture (tests/fixtures/minimal.ged) - Add package.json with test scripts (test, test:unit, test:integration, test:setup) - Add .gitignore excluding the downloaded spec fixture - Fix bug in G7Dataset.fromString: variable was named `src` before being assigned, causing a ReferenceError — corrected to use parameter `str` - Update README with testing instructions All 137 tests pass (29 suites). https://claude.ai/code/session_019gWQ4DPdsTsLPoNNK9z4j3 --------- Co-authored-by: Claude Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: atellier2 <10942114+atellier2@users.noreply.github.com> --- .gitignore | 2 + README.md | 43 +- docs/examples.md | 26 +- .../find-or-create.js} | 6 +- .../index.html} | 2 +- index.html => examples/validator/index.html | 2 +- .../validator/validator.js | 6 +- package.json | 17 +- src/g7structure.js | 2 +- tests/fixtures/minimal.ged | 18 + tests/helpers/spec.js | 39 ++ tests/integration/g7dataset.test.js | 288 ++++++++++++ tests/integration/round-trip.test.js | 159 +++++++ tests/setup.js | 23 + tests/unit/g7datatypes.test.js | 418 ++++++++++++++++++ tests/unit/gedcstruct.test.js | 291 ++++++++++++ 16 files changed, 1310 insertions(+), 32 deletions(-) rename examples/{test_findOrCreate.js => find-or-create/find-or-create.js} (95%) rename examples/{test_findOrCreate.html => find-or-create/index.html} (92%) rename index.html => examples/validator/index.html (95%) rename simpleValidator.js => examples/validator/validator.js (92%) create mode 100644 tests/fixtures/minimal.ged create mode 100644 tests/helpers/spec.js create mode 100644 tests/integration/g7dataset.test.js create mode 100644 tests/integration/round-trip.test.js create mode 100644 tests/setup.js create mode 100644 tests/unit/g7datatypes.test.js create mode 100644 tests/unit/gedcstruct.test.js diff --git a/.gitignore b/.gitignore index 3bdd52e..0b7752e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +# Downloaded at test setup — not committed (generated file from another repo) +tests/fixtures/g7validation.json node_modules/ dist/ .DS_Store diff --git a/README.md b/README.md index 03c250a..b3a1d29 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ The tag-oriented layer alone is sufficient to read and manipulate GEDCOM files w ### Read a GEDCOM file (tag-oriented layer) ```js -import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' +import { GEDCStruct, g7ConfGEDC } from './src/gedcstruct.js' const gedc = GEDCStruct.fromString(gedcomText, g7ConfGEDC, console.error) // gedc is an array of level-0 GEDCStruct nodes @@ -57,9 +57,9 @@ const individuals = [...gedc.querySelectorAll('.INDI')] // all INDI r ### Read and validate a GEDCOM 7 file ```js -import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' -import { G7Lookups } from './g7lookups.js' -import { G7Dataset } from './g7structure.js' +import { GEDCStruct, g7ConfGEDC } from './src/gedcstruct.js' +import { G7Lookups } from './src/g7lookups.js' +import { G7Dataset } from './src/g7structure.js' // 1. Load the GEDCOM 7 specification const spec = await fetch('https://raw.githubusercontent.com/FamilySearch/GEDCOM-registries/main/generated_files/g7validation.json') @@ -113,25 +113,25 @@ const same = dataset.findOrCreate( ## Modules -### `gedcstruct.js` — Tag-oriented layer +### `src/gedcstruct.js` — Tag-oriented layer Turns GEDCOM text into a tree of `GEDCStruct` nodes. Handles `CONT`/`CONC` pseudo-structures, cross-reference pointers, and 5.x or 7.x dialects. Exports: `GEDCStruct`, `g5ConfGEDC`, `g7ConfGEDC` -### `g7lookups.js` — GEDCOM 7 specification +### `src/g7lookups.js` — GEDCOM 7 specification Wraps the [FamilySearch GEDCOM Registries](https://github.com/FamilySearch/GEDCOM-registries) JSON to provide tag definitions, payload types, enumeration sets, and extension handling. Exports: `G7Lookups` -### `g7structure.js` — Type-aware layer +### `src/g7structure.js` — Type-aware layer Converts tag-oriented nodes into type-validated `G7Structure` objects. Understands GEDCOM 7 semantics, cardinality rules, payload types, and extension handling. Exports: `G7Structure`, `G7Dataset` -### `g7datatypes.js` — Payload data types +### `src/g7datatypes.js` — Payload data types Implements typed payload values: `G7Date`, `G7DateValue`, `G7Age`, `G7Time`, `G7Enum`. @@ -145,6 +145,33 @@ Implements typed payload values: `G7Date`, `G7DateValue`, `G7Age`, `G7Time`, `G7 --- +## Testing + +Requires Node.js 20+. The integration tests depend on the FamilySearch GEDCOM 7 specification, which is downloaded once and cached locally. + +```bash +# First-time setup: download the spec fixture +npm run test:setup + +# Run all tests +npm test + +# Run only unit tests (no network required) +npm run test:unit + +# Run only integration tests +npm run test:integration +``` + +| Suite | Location | Requires spec | +|-------|----------|:-------------:| +| Unit — `GEDCStruct` | `tests/unit/gedcstruct.test.js` | No | +| Unit — datatypes | `tests/unit/g7datatypes.test.js` | No | +| Integration — `G7Dataset` | `tests/integration/g7dataset.test.js` | Yes | +| Integration — round-trip | `tests/integration/round-trip.test.js` | Yes | + +--- + ## Character Encoding This library operates on JavaScript strings. It does not handle byte-level encoding conversion (UTF-8, ANSEL, etc.) — you must decode the file into a JavaScript string before passing it to the library. diff --git a/docs/examples.md b/docs/examples.md index 624aef5..4668318 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -7,7 +7,7 @@ For quickly scanning a GEDCOM file without type validation, `GEDCStruct` alone is sufficient. ```js -import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' +import { GEDCStruct, g7ConfGEDC } from './src/gedcstruct.js' const gedc = GEDCStruct.fromString(gedcomText, g7ConfGEDC, console.error) @@ -28,9 +28,9 @@ for (const indi of gedc.querySelectorAll('.INDI')) { To access typed payloads and benefit from validation: ```js -import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' -import { G7Lookups } from './g7lookups.js' -import { G7Dataset } from './g7structure.js' +import { GEDCStruct, g7ConfGEDC } from './src/gedcstruct.js' +import { G7Lookups } from './src/g7lookups.js' +import { G7Dataset } from './src/g7structure.js' const spec = await fetch('https://raw.githubusercontent.com/FamilySearch/GEDCOM-registries/main/generated_files/g7validation.json').then(r => r.json()) const lookup = new G7Lookups(spec) @@ -57,9 +57,9 @@ for (const person of dataset.records.get(INDI) ?? []) { ## 2. Validate a GEDCOM 7 file ```js -import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' -import { G7Lookups } from './g7lookups.js' -import { G7Dataset } from './g7structure.js' +import { GEDCStruct, g7ConfGEDC } from './src/gedcstruct.js' +import { G7Lookups } from './src/g7lookups.js' +import { G7Dataset } from './src/g7structure.js' const errors = [] const warnings = [] @@ -83,8 +83,8 @@ warnings.forEach(w => console.warn('WARN: ', w)) ## 3. Build a GEDCOM 7 dataset from scratch ```js -import { G7Lookups } from './g7lookups.js' -import { G7Dataset } from './g7structure.js' +import { G7Lookups } from './src/g7lookups.js' +import { G7Dataset } from './src/g7structure.js' const T = 'https://gedcom.io/terms/v7/' // URI prefix shorthand @@ -185,9 +185,9 @@ const output = dataset.toString() ## 7. Round-trip: read, modify, rewrite ```js -import { GEDCStruct, g7ConfGEDC } from './gedcstruct.js' -import { G7Lookups } from './g7lookups.js' -import { G7Dataset } from './g7structure.js' +import { GEDCStruct, g7ConfGEDC } from './src/gedcstruct.js' +import { G7Lookups } from './src/g7lookups.js' +import { G7Dataset } from './src/g7structure.js' const spec = await fetch('...g7validation.json').then(r => r.json()) const lookup = new G7Lookups(spec) @@ -233,7 +233,7 @@ const text = restored.toString() For GEDCOM 5.x files or non-standard dialects, use `gedcstruct.js` alone with `g5ConfGEDC`: ```js -import { GEDCStruct, g5ConfGEDC } from './gedcstruct.js' +import { GEDCStruct, g5ConfGEDC } from './src/gedcstruct.js' const gedc = GEDCStruct.fromString(gedcom5Text, g5ConfGEDC, console.error) diff --git a/examples/test_findOrCreate.js b/examples/find-or-create/find-or-create.js similarity index 95% rename from examples/test_findOrCreate.js rename to examples/find-or-create/find-or-create.js index 32ce545..6544f2c 100644 --- a/examples/test_findOrCreate.js +++ b/examples/find-or-create/find-or-create.js @@ -1,6 +1,6 @@ -import { GEDCStruct, g7ConfGEDC } from "../src/gedcstruct.js" -import { G7Lookups } from '../src/g7lookups.js' -import { G7Dataset } from '../src/g7structure.js' +import { GEDCStruct, g7ConfGEDC } from "../../src/gedcstruct.js" +import { G7Lookups } from '../../src/g7lookups.js' +import { G7Dataset } from '../../src/g7structure.js' const addError = (msg) => { const li = document.createElement('li') diff --git a/examples/test_findOrCreate.html b/examples/find-or-create/index.html similarity index 92% rename from examples/test_findOrCreate.html rename to examples/find-or-create/index.html index 21259b7..6f956fe 100644 --- a/examples/test_findOrCreate.html +++ b/examples/find-or-create/index.html @@ -3,7 +3,7 @@ Test: generate GEDCOM 7 data - +