From 24a777db355b9e0f3e2b8a81edb47c28ddd5b57b Mon Sep 17 00:00:00 2001 From: Somasundaram Ayyappan <1802828+somus@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:31:40 +0530 Subject: [PATCH 1/4] feat(spec): extract v0.1.0 spec artifacts --- README.md | 9 + fixtures/validation/README.md | 221 + .../content-hash-invalid-hex.trail.jsonl | 2 + .../content-hash-mismatch.trail.jsonl | 3 + ...envelope-content-hash-mismatch.trail.jsonl | 3 + .../absent-content-hash.trail.jsonl | 2 + .../envelope-two-tier.trail.jsonl | 4 + .../hash-vectors/jcs-stress.trail.jsonl | 3 + .../minimal-pending-roundtrip.trail.jsonl | 3 + .../multi-session-slice.trail.jsonl | 6 + .../hash-vectors/replacement-char.trail.jsonl | 3 + .../segment-chain-seq1.trail.jsonl | 3 + .../segment-chain-seq2.trail.jsonl | 3 + ...ntial-pairing-with-session-end.trail.jsonl | 5 + .../ambiguous-sequential-pairing.trail.jsonl | 4 + .../branch-point-unknown-from-id.trail.jsonl | 3 + ...ry-unknown-abandoned-branch-id.trail.jsonl | 3 + .../invalid-graph/duplicate-id.trail.jsonl | 3 + ...licate-option-labels-mixed-ids.trail.jsonl | 2 + .../duplicate-option-labels.trail.jsonl | 2 + .../duplicate-segment-seq.trail.jsonl | 2 + .../duplicate-tool-result-for-id.trail.jsonl | 5 + .../envelope-not-at-line-1.trail.jsonl | 2 + ...velope-sessions-manifest-empty.trail.jsonl | 3 + ...ope-sessions-manifest-multiple.trail.jsonl | 3 + ...nvelope-without-session-header.trail.jsonl | 2 + .../header-has-parent-id.trail.jsonl | 2 + ...lti-session-cross-group-parent.trail.jsonl | 4 + .../multi-session-orphan-prelude.trail.jsonl | 3 + .../multiple-envelopes.trail.jsonl | 3 + .../non-interoperable-number.trail.jsonl | 2 + .../non-monotonic-event-ts.trail.jsonl | 3 + .../out-of-order-segment-seq.trail.jsonl | 2 + .../invalid-graph/parent-cycle.trail.jsonl | 3 + .../parse-fidelity-drift.trail.jsonl | 2 + ...ential-pairing-stays-in-branch.trail.jsonl | 4 + ...airing-stays-in-sibling-branch.trail.jsonl | 5 + ...ays-in-subagent-sibling-branch.trail.jsonl | 5 + ...n-end-forward-final-message-id.trail.jsonl | 3 + ...n-end-unknown-final-message-id.trail.jsonl | 3 + .../stream-open-with-content-hash.trail.jsonl | 3 + .../tool-args-unredacted-secret.trail.jsonl | 3 + ...turn-scope-does-not-close-call.trail.jsonl | 3 + ...id-wins-over-semantic-conflict.trail.jsonl | 4 + .../unknown-parent-id.trail.jsonl | 2 + .../unmatched-tool-call-at-eof.trail.jsonl | 2 + ...-tool-call-partial-suppression.trail.jsonl | 4 + ...rminated-without-open-call-ids.trail.jsonl | 3 + ...-query-response-unknown-for-id.trail.jsonl | 2 + ...ent-message-attachment-bad-uri.trail.jsonl | 3 + ...gent-message-usage-extra-field.trail.jsonl | 3 + ...t-message-usage-missing-output.trail.jsonl | 3 + ...message-usage-missing-required.trail.jsonl | 3 + ...sage-usage-zero-context-window.trail.jsonl | 3 + ...-thinking-usage-missing-output.trail.jsonl | 2 + .../capability-change-bad-reason.trail.jsonl | 2 + .../capability-change-bad-scope.trail.jsonl | 2 + .../capability-change-empty.trail.jsonl | 2 + .../command-invoke-bad-kind.trail.jsonl | 2 + ...mmand-invoke-bad-result-action.trail.jsonl | 2 + .../command-invoke-missing-kind.trail.jsonl | 2 + .../command-invoke-missing-name.trail.jsonl | 2 + .../envelope-missing-producer.trail.jsonl | 2 + .../header-wrong-schema-version.trail.jsonl | 1 + .../redaction-count-non-integer.trail.jsonl | 2 + .../segment-seq-1-with-prev-hash.trail.jsonl | 1 + ...egment-seq-2-without-prev-hash.trail.jsonl | 1 + ...ment-seq-2-without-session-uid.trail.jsonl | 1 + .../segment-seq-zero.trail.jsonl | 1 + ...sion-end-final-message-id-null.trail.jsonl | 3 + ...-metadata-update-bad-field-cwd.trail.jsonl | 2 + ...ion-metadata-update-bad-reason.trail.jsonl | 2 + ...metadata-update-bad-tags-value.trail.jsonl | 2 + ...n-metadata-update-bad-worktree.trail.jsonl | 2 + .../session-uid-not-ulid-or-uuid.trail.jsonl | 1 + .../tool-call-aborted-bad-reason.trail.jsonl | 2 + ...rted-tool-scope-missing-for-id.trail.jsonl | 2 + ...aborted-turn-scope-with-for-id.trail.jsonl | 2 + ...ol-call-file-list-missing-path.trail.jsonl | 2 + ...ol-call-file-patch-empty-files.trail.jsonl | 2 + ...l-file-patch-file-missing-diff.trail.jsonl | 2 + .../tool-call-missing-args-path.trail.jsonl | 2 + ...tool-call-usage-missing-output.trail.jsonl | 3 + ...-result-attachment-extra-field.trail.jsonl | 3 + ...a-file-read-range-wrong-length.trail.jsonl | 3 + ...ta-mcp-call-block-missing-type.trail.jsonl | 3 + ...meta-shell-command-extra-field.trail.jsonl | 3 + ...-truncated-missing-output-size.trail.jsonl | 3 + .../user-message-missing-text.trail.jsonl | 2 + .../user-message-non-string-text.trail.jsonl | 2 + ...ull-revision-with-empty-branch.trail.jsonl | 1 + ...null-revision-with-head-commit.trail.jsonl | 1 + ...s-null-revision-without-branch.trail.jsonl | 1 + fixtures/validation/manifest.json | 3889 +++++++++++++++++ fixtures/validation/manifest.schema.json | 123 + ...y-change-unknown-payload-field.trail.jsonl | 2 + .../ill-formed-string.trail.jsonl | 2 + .../nested-unknown-payload-field.trail.jsonl | 2 + ...atch-compatible-schema-version.trail.jsonl | 2 + .../reserved-future-event-type.trail.jsonl | 2 + ...lt-meta-registered-extra-field.trail.jsonl | 3 + .../unknown-event-type.trail.jsonl | 2 + .../unknown-payload-field.trail.jsonl | 2 + ...t-message-attachments-multiple.trail.jsonl | 3 + .../agent-message-attachments.trail.jsonl | 3 + .../valid/agent-message-usage.trail.jsonl | 3 + .../valid/agent-thinking-usage.trail.jsonl | 2 + ...bility-change-initial-snapshot.trail.jsonl | 2 + .../valid/capability-change.trail.jsonl | 5 + .../command-invoke-extension-kind.trail.jsonl | 2 + .../valid/command-invoke-full.trail.jsonl | 2 + .../valid/command-invoke-minimal.trail.jsonl | 2 + .../valid/command-invoke-plugin.trail.jsonl | 2 + ...mmand-invoke-result-action-ext.trail.jsonl | 2 + .../valid/command-invoke-slash.trail.jsonl | 2 + ...xt-compact-provenance-only-ids.trail.jsonl | 2 + ...t-compact-replaced-message-ids.trail.jsonl | 4 + .../valid/linear-with-parent-ids.trail.jsonl | 3 + .../valid/minimal-linear.trail.jsonl | 3 + .../minimal-with-content-hash.trail.jsonl | 3 + .../valid/multi-segment-seg1.trail.jsonl | 2 + .../valid/multi-segment-seg2.trail.jsonl | 2 + .../multi-session-fork-from-chain.trail.jsonl | 4 + .../multi-session-two-no-envelope.trail.jsonl | 4 + .../multi-session-with-envelope.trail.jsonl | 5 + .../multiple-session-end-events.trail.jsonl | 4 + .../valid/redaction-count-meta.trail.jsonl | 2 + ...l-message-id-references-header.trail.jsonl | 2 + ...sion-end-with-final-message-id.trail.jsonl | 4 + .../session-header-metadata-base.trail.jsonl | 2 + ...ata-update-agent-model-default.trail.jsonl | 2 + .../session-metadata-update-name.trail.jsonl | 2 + .../session-metadata-update-tags.trail.jsonl | 2 + ...ion-metadata-update-vcs-branch.trail.jsonl | 2 + ...session-metadata-update-vendor.trail.jsonl | 2 + ...pec-example-incomplete-session.trail.jsonl | 4 + .../valid/spec-example-mcp-call.trail.jsonl | 4 + ...spec-example-synthesized-event.trail.jsonl | 5 + ...ple-tool-call-semantic-pairing.trail.jsonl | 5 + ...e-tool-result-fallback-pairing.trail.jsonl | 4 + ...-example-tree-abandoned-branch.trail.jsonl | 6 + .../streaming-finalized-clean.trail.jsonl | 4 + .../valid/streaming-open.trail.jsonl | 3 + .../valid/system-event-vcs-commit.trail.jsonl | 4 + .../tool-call-aborted-closes-call.trail.jsonl | 3 + ...aborted-extension-scope-reason.trail.jsonl | 2 + .../tool-call-aborted-turn-scope.trail.jsonl | 2 + .../valid/tool-call-file-list.trail.jsonl | 3 + .../valid/tool-call-file-patch.trail.jsonl | 3 + .../tool-call-matched-by-for-id.trail.jsonl | 3 + ...ll-matched-by-semantic-call-id.trail.jsonl | 3 + ...l-matched-same-parent-siblings.trail.jsonl | 4 + ...tool-call-matched-sequentially.trail.jsonl | 3 + .../valid/tool-call-usage.trail.jsonl | 3 + ...sult-attachments-with-mcp-meta.trail.jsonl | 3 + .../valid/tool-result-attachments.trail.jsonl | 3 + ...d-targets-header-falls-through.trail.jsonl | 3 + .../tool-result-meta-file-read.trail.jsonl | 3 + .../tool-result-meta-mcp-call.trail.jsonl | 3 + ...tool-result-meta-shell-command.trail.jsonl | 3 + ...sult-meta-toplevel-vendor-kind.trail.jsonl | 3 + ...-result-meta-unregistered-kind.trail.jsonl | 3 + ...l-result-meta-vendor-extension.trail.jsonl | 3 + ...l-result-output-size-truncated.trail.jsonl | 3 + ...call-suppressed-by-session-end.trail.jsonl | 3 + ...ppressed-by-session-terminated.trail.jsonl | 3 + .../user-message-origin-injected.trail.jsonl | 2 + ...uery-duplicate-labels-with-ids.trail.jsonl | 2 + .../valid/vcs-unborn-head.trail.jsonl | 1 + ...with-trail-envelope-all-fields.trail.jsonl | 3 + .../with-trail-envelope-and-hash.trail.jsonl | 3 + .../valid/with-trail-envelope.trail.jsonl | 4 + schema/draft.json | 1915 ++++++++ schema/v0.1.0.json | 1915 ++++++++ spec.md | 132 + spec/draft/01-motivation.md | 8 + spec/draft/02-goals-and-non-goals.md | 44 + spec/draft/03-at-a-glance.md | 14 + spec/draft/04-terminology.md | 29 + spec/draft/05-file-format.md | 34 + spec/draft/06-versioning.md | 32 + ...entity-artifacts-and-content-addressing.md | 65 + spec/draft/08-the-trail-envelope.md | 78 + spec/draft/09-the-session-header.md | 248 ++ spec/draft/10-events.md | 916 ++++ spec/draft/11-canonical-tool-taxonomy.md | 90 + spec/draft/12-vendor-extensions.md | 39 + spec/draft/13-tree-and-branching.md | 18 + spec/draft/14-canonical-agent-registry.md | 12 + ...truncation-overflow-and-raw-source-size.md | 50 + spec/draft/16-redaction.md | 31 + spec/draft/17-security-considerations.md | 22 + spec/draft/18-validation.md | 172 + spec/draft/19-formal-schema.md | 8 + spec/draft/20-examples.md | 14 + spec/draft/README.md | 37 + spec/draft/appendix-a-minimal-valid-record.md | 17 + .../appendix-b-content-hash-worked-example.md | 38 + spec/draft/changelog.md | 23 + spec/draft/license.md | 7 + spec/v0.1.0/01-motivation.md | 8 + spec/v0.1.0/02-goals-and-non-goals.md | 44 + spec/v0.1.0/03-at-a-glance.md | 14 + spec/v0.1.0/04-terminology.md | 29 + spec/v0.1.0/05-file-format.md | 34 + spec/v0.1.0/06-versioning.md | 32 + ...entity-artifacts-and-content-addressing.md | 65 + spec/v0.1.0/08-the-trail-envelope.md | 78 + spec/v0.1.0/09-the-session-header.md | 248 ++ spec/v0.1.0/10-events.md | 916 ++++ spec/v0.1.0/11-canonical-tool-taxonomy.md | 90 + spec/v0.1.0/12-vendor-extensions.md | 39 + spec/v0.1.0/13-tree-and-branching.md | 18 + spec/v0.1.0/14-canonical-agent-registry.md | 12 + ...truncation-overflow-and-raw-source-size.md | 50 + spec/v0.1.0/16-redaction.md | 31 + spec/v0.1.0/17-security-considerations.md | 22 + spec/v0.1.0/18-validation.md | 172 + spec/v0.1.0/19-formal-schema.md | 8 + spec/v0.1.0/20-examples.md | 14 + spec/v0.1.0/README.md | 37 + .../v0.1.0/appendix-a-minimal-valid-record.md | 17 + .../appendix-b-content-hash-worked-example.md | 38 + spec/v0.1.0/changelog.md | 23 + spec/v0.1.0/license.md | 7 + 225 files changed, 12753 insertions(+) create mode 100644 fixtures/validation/README.md create mode 100644 fixtures/validation/hash-mismatch/content-hash-invalid-hex.trail.jsonl create mode 100644 fixtures/validation/hash-mismatch/content-hash-mismatch.trail.jsonl create mode 100644 fixtures/validation/hash-mismatch/trail-envelope-content-hash-mismatch.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/absent-content-hash.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/envelope-two-tier.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/jcs-stress.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/minimal-pending-roundtrip.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/multi-session-slice.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/replacement-char.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/segment-chain-seq1.trail.jsonl create mode 100644 fixtures/validation/hash-vectors/segment-chain-seq2.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/ambiguous-sequential-pairing-with-session-end.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/ambiguous-sequential-pairing.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/branch-point-unknown-from-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/branch-summary-unknown-abandoned-branch-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/duplicate-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/duplicate-option-labels-mixed-ids.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/duplicate-option-labels.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/duplicate-segment-seq.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/duplicate-tool-result-for-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/envelope-not-at-line-1.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/envelope-sessions-manifest-empty.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/envelope-sessions-manifest-multiple.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/envelope-without-session-header.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/header-has-parent-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/multi-session-cross-group-parent.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/multi-session-orphan-prelude.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/multiple-envelopes.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/non-interoperable-number.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/non-monotonic-event-ts.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/out-of-order-segment-seq.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/parent-cycle.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/parse-fidelity-drift.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/sequential-pairing-stays-in-branch.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/sequential-pairing-stays-in-sibling-branch.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/sequential-pairing-stays-in-subagent-sibling-branch.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/session-end-forward-final-message-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/session-end-unknown-final-message-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/stream-open-with-content-hash.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/tool-args-unredacted-secret.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/tool-call-aborted-turn-scope-does-not-close-call.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/tool-result-for-id-wins-over-semantic-conflict.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/unknown-parent-id.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/unmatched-tool-call-at-eof.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/unmatched-tool-call-partial-suppression.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/unmatched-tool-call-session-terminated-without-open-call-ids.trail.jsonl create mode 100644 fixtures/validation/invalid-graph/user-query-response-unknown-for-id.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/agent-message-attachment-bad-uri.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/agent-message-usage-extra-field.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/agent-message-usage-missing-output.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/agent-message-usage-missing-required.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/agent-message-usage-zero-context-window.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/agent-thinking-usage-missing-output.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/capability-change-bad-reason.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/capability-change-bad-scope.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/capability-change-empty.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/command-invoke-bad-kind.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/command-invoke-bad-result-action.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/command-invoke-missing-kind.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/command-invoke-missing-name.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/envelope-missing-producer.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/header-wrong-schema-version.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/redaction-count-non-integer.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/segment-seq-1-with-prev-hash.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/segment-seq-2-without-prev-hash.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/segment-seq-2-without-session-uid.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/segment-seq-zero.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/session-end-final-message-id-null.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/session-metadata-update-bad-field-cwd.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/session-metadata-update-bad-reason.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/session-metadata-update-bad-tags-value.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/session-metadata-update-bad-worktree.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/session-uid-not-ulid-or-uuid.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-aborted-bad-reason.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-aborted-tool-scope-missing-for-id.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-aborted-turn-scope-with-for-id.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-file-list-missing-path.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-file-patch-empty-files.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-file-patch-file-missing-diff.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-missing-args-path.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-call-usage-missing-output.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-result-attachment-extra-field.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-result-meta-file-read-range-wrong-length.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-result-meta-mcp-call-block-missing-type.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-result-meta-shell-command-extra-field.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/tool-result-truncated-missing-output-size.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/user-message-missing-text.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/user-message-non-string-text.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/vcs-null-revision-with-empty-branch.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/vcs-null-revision-with-head-commit.trail.jsonl create mode 100644 fixtures/validation/invalid-schema/vcs-null-revision-without-branch.trail.jsonl create mode 100644 fixtures/validation/manifest.json create mode 100644 fixtures/validation/manifest.schema.json create mode 100644 fixtures/validation/reader-tolerant/capability-change-unknown-payload-field.trail.jsonl create mode 100644 fixtures/validation/reader-tolerant/ill-formed-string.trail.jsonl create mode 100644 fixtures/validation/reader-tolerant/nested-unknown-payload-field.trail.jsonl create mode 100644 fixtures/validation/reader-tolerant/patch-compatible-schema-version.trail.jsonl create mode 100644 fixtures/validation/reader-tolerant/reserved-future-event-type.trail.jsonl create mode 100644 fixtures/validation/reader-tolerant/tool-result-meta-registered-extra-field.trail.jsonl create mode 100644 fixtures/validation/reader-tolerant/unknown-event-type.trail.jsonl create mode 100644 fixtures/validation/reader-tolerant/unknown-payload-field.trail.jsonl create mode 100644 fixtures/validation/valid/agent-message-attachments-multiple.trail.jsonl create mode 100644 fixtures/validation/valid/agent-message-attachments.trail.jsonl create mode 100644 fixtures/validation/valid/agent-message-usage.trail.jsonl create mode 100644 fixtures/validation/valid/agent-thinking-usage.trail.jsonl create mode 100644 fixtures/validation/valid/capability-change-initial-snapshot.trail.jsonl create mode 100644 fixtures/validation/valid/capability-change.trail.jsonl create mode 100644 fixtures/validation/valid/command-invoke-extension-kind.trail.jsonl create mode 100644 fixtures/validation/valid/command-invoke-full.trail.jsonl create mode 100644 fixtures/validation/valid/command-invoke-minimal.trail.jsonl create mode 100644 fixtures/validation/valid/command-invoke-plugin.trail.jsonl create mode 100644 fixtures/validation/valid/command-invoke-result-action-ext.trail.jsonl create mode 100644 fixtures/validation/valid/command-invoke-slash.trail.jsonl create mode 100644 fixtures/validation/valid/context-compact-provenance-only-ids.trail.jsonl create mode 100644 fixtures/validation/valid/context-compact-replaced-message-ids.trail.jsonl create mode 100644 fixtures/validation/valid/linear-with-parent-ids.trail.jsonl create mode 100644 fixtures/validation/valid/minimal-linear.trail.jsonl create mode 100644 fixtures/validation/valid/minimal-with-content-hash.trail.jsonl create mode 100644 fixtures/validation/valid/multi-segment-seg1.trail.jsonl create mode 100644 fixtures/validation/valid/multi-segment-seg2.trail.jsonl create mode 100644 fixtures/validation/valid/multi-session-fork-from-chain.trail.jsonl create mode 100644 fixtures/validation/valid/multi-session-two-no-envelope.trail.jsonl create mode 100644 fixtures/validation/valid/multi-session-with-envelope.trail.jsonl create mode 100644 fixtures/validation/valid/multiple-session-end-events.trail.jsonl create mode 100644 fixtures/validation/valid/redaction-count-meta.trail.jsonl create mode 100644 fixtures/validation/valid/session-end-final-message-id-references-header.trail.jsonl create mode 100644 fixtures/validation/valid/session-end-with-final-message-id.trail.jsonl create mode 100644 fixtures/validation/valid/session-header-metadata-base.trail.jsonl create mode 100644 fixtures/validation/valid/session-metadata-update-agent-model-default.trail.jsonl create mode 100644 fixtures/validation/valid/session-metadata-update-name.trail.jsonl create mode 100644 fixtures/validation/valid/session-metadata-update-tags.trail.jsonl create mode 100644 fixtures/validation/valid/session-metadata-update-vcs-branch.trail.jsonl create mode 100644 fixtures/validation/valid/session-metadata-update-vendor.trail.jsonl create mode 100644 fixtures/validation/valid/spec-example-incomplete-session.trail.jsonl create mode 100644 fixtures/validation/valid/spec-example-mcp-call.trail.jsonl create mode 100644 fixtures/validation/valid/spec-example-synthesized-event.trail.jsonl create mode 100644 fixtures/validation/valid/spec-example-tool-call-semantic-pairing.trail.jsonl create mode 100644 fixtures/validation/valid/spec-example-tool-result-fallback-pairing.trail.jsonl create mode 100644 fixtures/validation/valid/spec-example-tree-abandoned-branch.trail.jsonl create mode 100644 fixtures/validation/valid/streaming-finalized-clean.trail.jsonl create mode 100644 fixtures/validation/valid/streaming-open.trail.jsonl create mode 100644 fixtures/validation/valid/system-event-vcs-commit.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-aborted-closes-call.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-aborted-extension-scope-reason.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-aborted-turn-scope.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-file-list.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-file-patch.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-matched-by-for-id.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-matched-by-semantic-call-id.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-matched-same-parent-siblings.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-matched-sequentially.trail.jsonl create mode 100644 fixtures/validation/valid/tool-call-usage.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-attachments-with-mcp-meta.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-attachments.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-for-id-targets-header-falls-through.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-meta-file-read.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-meta-mcp-call.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-meta-shell-command.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-meta-toplevel-vendor-kind.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-meta-unregistered-kind.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-meta-vendor-extension.trail.jsonl create mode 100644 fixtures/validation/valid/tool-result-output-size-truncated.trail.jsonl create mode 100644 fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-end.trail.jsonl create mode 100644 fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-terminated.trail.jsonl create mode 100644 fixtures/validation/valid/user-message-origin-injected.trail.jsonl create mode 100644 fixtures/validation/valid/user-query-duplicate-labels-with-ids.trail.jsonl create mode 100644 fixtures/validation/valid/vcs-unborn-head.trail.jsonl create mode 100644 fixtures/validation/valid/with-trail-envelope-all-fields.trail.jsonl create mode 100644 fixtures/validation/valid/with-trail-envelope-and-hash.trail.jsonl create mode 100644 fixtures/validation/valid/with-trail-envelope.trail.jsonl create mode 100644 schema/draft.json create mode 100644 schema/v0.1.0.json create mode 100644 spec.md create mode 100644 spec/draft/01-motivation.md create mode 100644 spec/draft/02-goals-and-non-goals.md create mode 100644 spec/draft/03-at-a-glance.md create mode 100644 spec/draft/04-terminology.md create mode 100644 spec/draft/05-file-format.md create mode 100644 spec/draft/06-versioning.md create mode 100644 spec/draft/07-identity-artifacts-and-content-addressing.md create mode 100644 spec/draft/08-the-trail-envelope.md create mode 100644 spec/draft/09-the-session-header.md create mode 100644 spec/draft/10-events.md create mode 100644 spec/draft/11-canonical-tool-taxonomy.md create mode 100644 spec/draft/12-vendor-extensions.md create mode 100644 spec/draft/13-tree-and-branching.md create mode 100644 spec/draft/14-canonical-agent-registry.md create mode 100644 spec/draft/15-truncation-overflow-and-raw-source-size.md create mode 100644 spec/draft/16-redaction.md create mode 100644 spec/draft/17-security-considerations.md create mode 100644 spec/draft/18-validation.md create mode 100644 spec/draft/19-formal-schema.md create mode 100644 spec/draft/20-examples.md create mode 100644 spec/draft/README.md create mode 100644 spec/draft/appendix-a-minimal-valid-record.md create mode 100644 spec/draft/appendix-b-content-hash-worked-example.md create mode 100644 spec/draft/changelog.md create mode 100644 spec/draft/license.md create mode 100644 spec/v0.1.0/01-motivation.md create mode 100644 spec/v0.1.0/02-goals-and-non-goals.md create mode 100644 spec/v0.1.0/03-at-a-glance.md create mode 100644 spec/v0.1.0/04-terminology.md create mode 100644 spec/v0.1.0/05-file-format.md create mode 100644 spec/v0.1.0/06-versioning.md create mode 100644 spec/v0.1.0/07-identity-artifacts-and-content-addressing.md create mode 100644 spec/v0.1.0/08-the-trail-envelope.md create mode 100644 spec/v0.1.0/09-the-session-header.md create mode 100644 spec/v0.1.0/10-events.md create mode 100644 spec/v0.1.0/11-canonical-tool-taxonomy.md create mode 100644 spec/v0.1.0/12-vendor-extensions.md create mode 100644 spec/v0.1.0/13-tree-and-branching.md create mode 100644 spec/v0.1.0/14-canonical-agent-registry.md create mode 100644 spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md create mode 100644 spec/v0.1.0/16-redaction.md create mode 100644 spec/v0.1.0/17-security-considerations.md create mode 100644 spec/v0.1.0/18-validation.md create mode 100644 spec/v0.1.0/19-formal-schema.md create mode 100644 spec/v0.1.0/20-examples.md create mode 100644 spec/v0.1.0/README.md create mode 100644 spec/v0.1.0/appendix-a-minimal-valid-record.md create mode 100644 spec/v0.1.0/appendix-b-content-hash-worked-example.md create mode 100644 spec/v0.1.0/changelog.md create mode 100644 spec/v0.1.0/license.md diff --git a/README.md b/README.md index c4bbf6e..94819a5 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,15 @@ Open interchange format contract for coding-agent session trails. This repository contains the Agent Trail specification, JSON Schema artifacts, fixtures, and format ADRs. +## Repository Map + +- [`spec/v0.1.0/`](./spec/v0.1.0/) - frozen v0.1.0 specification content split by major section. +- [`spec/draft/`](./spec/draft/) - current draft specification content. +- [`spec.md`](./spec.md) - compatibility index for legacy monorepo `spec.md` anchors. +- [`schema/v0.1.0.json`](./schema/v0.1.0.json) - frozen v0.1.0 JSON Schema artifact. +- [`schema/draft.json`](./schema/draft.json) - current draft JSON Schema artifact. +- [`fixtures/validation/`](./fixtures/validation/) - validation conformance fixture corpus and manifest. + ## Related Repositories Agent Trail is split across focused repositories: diff --git a/fixtures/validation/README.md b/fixtures/validation/README.md new file mode 100644 index 0000000..fb225dd --- /dev/null +++ b/fixtures/validation/README.md @@ -0,0 +1,221 @@ +# Validation fixtures + +Committed synthetic trail files exercising the Agent Trail validation paths. All fixtures are reusable across `@agent-trail/core`, `@agent-trail/cli`, and future adapter tests. + +Fixture policy for the workspace lives in [`docs/parser-source-matrix.md`](../../../docs/parser-source-matrix.md#fixture-policy): committed fixtures are synthetic or redacted; real local sessions stay out of git and are loaded only by opt-in ignored tests. + +## Conventions + +- File extension: `.trail.jsonl` (spec.md §5.1). +- Synthetic data only. No real session content, no PII, no secrets. +- Synthetic ids are deterministic spec-shaped values such as `01HSESS...` and `01HEVTA...`; synthetic agent: `codex-cli`; synthetic timestamps anchored at `2026-05-17T14:00:00.000Z`. +- One scenario per file. Filename is the scenario in kebab-case. +- Scenarios are grouped by validation layer (`valid/`, `invalid-schema/`, `invalid-graph/`, `hash-mismatch/`, `reader-tolerant/`). +- Expected diagnostics are documented below. Tests in `packages/core/src/fixtures.test.ts` and `packages/cli/src/validate.test.ts` assert them. + +## Loading + +```ts +const FIXTURES = new URL("../../../tests/fixtures/validation/", import.meta.url); +const loadFixture = (rel: string) => Bun.file(new URL(rel, FIXTURES)).text(); +``` + +For CLI tests that need a real on-disk path: + +```ts +import { fileURLToPath } from "node:url"; +const path = fileURLToPath(new URL("valid/minimal-linear.trail.jsonl", FIXTURES)); +``` + + +## Scenarios + +This section is generated from `manifest.json`; run `bun run sync:conformance` after fixture or expectation changes. + +### hash-mismatch/ + +- `hash-mismatch/content-hash-invalid-hex.trail.jsonl` — classes: W, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `hash-mismatch/content-hash-mismatch.trail.jsonl` — classes: W, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `hash-mismatch/trail-envelope-content-hash-mismatch.trail.jsonl` — classes: W, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) + +### hash-vectors/ + +- `hash-vectors/absent-content-hash.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean +- `hash-vectors/envelope-two-tier.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean +- `hash-vectors/jcs-stress.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean +- `hash-vectors/minimal-pending-roundtrip.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean +- `hash-vectors/multi-session-slice.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean +- `hash-vectors/replacement-char.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean +- `hash-vectors/segment-chain-seq1.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean +- `hash-vectors/segment-chain-seq2.trail.jsonl` — classes: W, R2, strict: valid, tolerant: clean + +### invalid-graph/ + +- `invalid-graph/ambiguous-sequential-pairing-with-session-end.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/ambiguous-sequential-pairing.trail.jsonl` — classes: W, R1, R2, strict: valid with 2 diagnostic(s), tolerant: 2 diagnostic(s) +- `invalid-graph/branch-point-unknown-from-id.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/branch-summary-unknown-abandoned-branch-id.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/duplicate-id.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-graph/duplicate-option-labels-mixed-ids.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/duplicate-option-labels.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/duplicate-segment-seq.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/duplicate-tool-result-for-id.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/envelope-not-at-line-1.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-graph/envelope-sessions-manifest-empty.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/envelope-sessions-manifest-multiple.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/envelope-without-session-header.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-graph/header-has-parent-id.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-graph/multi-session-cross-group-parent.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-graph/multi-session-orphan-prelude.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-graph/multiple-envelopes.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-graph/non-interoperable-number.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/non-monotonic-event-ts.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/out-of-order-segment-seq.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/parent-cycle.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-graph/parse-fidelity-drift.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-graph/sequential-pairing-stays-in-branch.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/sequential-pairing-stays-in-sibling-branch.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/sequential-pairing-stays-in-subagent-sibling-branch.trail.jsonl` — classes: W, R1, R2, strict: invalid with 3 assertion(s), tolerant: 4 diagnostic(s) +- `invalid-graph/session-end-forward-final-message-id.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/session-end-unknown-final-message-id.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/stream-open-with-content-hash.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-graph/tool-args-unredacted-secret.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/tool-call-aborted-turn-scope-does-not-close-call.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/tool-result-for-id-wins-over-semantic-conflict.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/unknown-parent-id.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-graph/unmatched-tool-call-at-eof.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/unmatched-tool-call-partial-suppression.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/unmatched-tool-call-session-terminated-without-open-call-ids.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) +- `invalid-graph/user-query-response-unknown-for-id.trail.jsonl` — classes: W, R1, R2, strict: valid with 1 diagnostic(s), tolerant: 1 diagnostic(s) + +### invalid-schema/ + +- `invalid-schema/agent-message-attachment-bad-uri.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/agent-message-usage-extra-field.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/agent-message-usage-missing-output.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/agent-message-usage-missing-required.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/agent-message-usage-zero-context-window.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/agent-thinking-usage-missing-output.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/capability-change-bad-reason.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/capability-change-bad-scope.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/capability-change-empty.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/command-invoke-bad-kind.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/command-invoke-bad-result-action.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/command-invoke-missing-kind.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/command-invoke-missing-name.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/envelope-missing-producer.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/header-wrong-schema-version.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-schema/redaction-count-non-integer.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/segment-seq-1-with-prev-hash.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/segment-seq-2-without-prev-hash.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/segment-seq-2-without-session-uid.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/segment-seq-zero.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/session-end-final-message-id-null.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/session-metadata-update-bad-field-cwd.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/session-metadata-update-bad-reason.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/session-metadata-update-bad-tags-value.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/session-metadata-update-bad-worktree.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/session-uid-not-ulid-or-uuid.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-call-aborted-bad-reason.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-call-aborted-tool-scope-missing-for-id.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-call-aborted-turn-scope-with-for-id.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-call-file-list-missing-path.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-schema/tool-call-file-patch-empty-files.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-schema/tool-call-file-patch-file-missing-diff.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-schema/tool-call-missing-args-path.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-schema/tool-call-usage-missing-output.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-result-attachment-extra-field.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 2 diagnostic(s) +- `invalid-schema/tool-result-meta-file-read-range-wrong-length.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-result-meta-mcp-call-block-missing-type.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-result-meta-shell-command-extra-field.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/tool-result-truncated-missing-output-size.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/user-message-missing-text.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/user-message-non-string-text.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/vcs-null-revision-with-empty-branch.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/vcs-null-revision-with-head-commit.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `invalid-schema/vcs-null-revision-without-branch.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) + +### reader-tolerant/ + +- `reader-tolerant/capability-change-unknown-payload-field.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `reader-tolerant/ill-formed-string.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `reader-tolerant/nested-unknown-payload-field.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `reader-tolerant/patch-compatible-schema-version.trail.jsonl` — classes: W, R1, R2, strict: invalid with 2 assertion(s), tolerant: 1 diagnostic(s) +- `reader-tolerant/reserved-future-event-type.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `reader-tolerant/tool-result-meta-registered-extra-field.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `reader-tolerant/unknown-event-type.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) +- `reader-tolerant/unknown-payload-field.trail.jsonl` — classes: W, R1, R2, strict: invalid with 1 assertion(s), tolerant: 1 diagnostic(s) + +### valid/ + +- `valid/agent-message-attachments-multiple.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/agent-message-attachments.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/agent-message-usage.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/agent-thinking-usage.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/capability-change-initial-snapshot.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/capability-change.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/command-invoke-extension-kind.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/command-invoke-full.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/command-invoke-minimal.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/command-invoke-plugin.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/command-invoke-result-action-ext.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/command-invoke-slash.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/context-compact-provenance-only-ids.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/context-compact-replaced-message-ids.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/linear-with-parent-ids.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/minimal-linear.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/minimal-with-content-hash.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/multi-segment-seg1.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/multi-segment-seg2.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/multi-session-fork-from-chain.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/multi-session-two-no-envelope.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/multi-session-with-envelope.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/multiple-session-end-events.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/redaction-count-meta.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-end-final-message-id-references-header.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-end-with-final-message-id.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-header-metadata-base.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-metadata-update-agent-model-default.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-metadata-update-name.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-metadata-update-tags.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-metadata-update-vcs-branch.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/session-metadata-update-vendor.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/spec-example-incomplete-session.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/spec-example-mcp-call.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/spec-example-synthesized-event.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/spec-example-tool-call-semantic-pairing.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/spec-example-tool-result-fallback-pairing.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/spec-example-tree-abandoned-branch.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/streaming-finalized-clean.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/streaming-open.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/system-event-vcs-commit.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-aborted-closes-call.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-aborted-extension-scope-reason.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-aborted-turn-scope.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-file-list.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-file-patch.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-matched-by-for-id.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-matched-by-semantic-call-id.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-matched-same-parent-siblings.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-matched-sequentially.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-call-usage.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-attachments-with-mcp-meta.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-attachments.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-for-id-targets-header-falls-through.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-meta-file-read.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-meta-mcp-call.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-meta-shell-command.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-meta-toplevel-vendor-kind.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-meta-unregistered-kind.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-meta-vendor-extension.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/tool-result-output-size-truncated.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/unmatched-tool-call-suppressed-by-session-end.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/unmatched-tool-call-suppressed-by-session-terminated.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/user-message-origin-injected.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/user-query-duplicate-labels-with-ids.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/vcs-unborn-head.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/with-trail-envelope-all-fields.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/with-trail-envelope-and-hash.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean +- `valid/with-trail-envelope.trail.jsonl` — classes: W, R1, R2, strict: valid, tolerant: clean + + diff --git a/fixtures/validation/hash-mismatch/content-hash-invalid-hex.trail.jsonl b/fixtures/validation/hash-mismatch/content-hash-invalid-hex.trail.jsonl new file mode 100644 index 0000000..bd2aeaa --- /dev/null +++ b/fixtures/validation/hash-mismatch/content-hash-invalid-hex.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"},"content_hash":"not-a-valid-sha256-hex-digest"} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} diff --git a/fixtures/validation/hash-mismatch/content-hash-mismatch.trail.jsonl b/fixtures/validation/hash-mismatch/content-hash-mismatch.trail.jsonl new file mode 100644 index 0000000..e6e3cff --- /dev/null +++ b/fixtures/validation/hash-mismatch/content-hash-mismatch.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"},"content_hash":"0000000000000000000000000000000000000000000000000000000000000000"} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/hash-mismatch/trail-envelope-content-hash-mismatch.trail.jsonl b/fixtures/validation/hash-mismatch/trail-envelope-content-hash-mismatch.trail.jsonl new file mode 100644 index 0000000..0443fce --- /dev/null +++ b/fixtures/validation/hash-mismatch/trail-envelope-content-hash-mismatch.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0","content_hash":"0000000000000000000000000000000000000000000000000000000000000000"} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} diff --git a/fixtures/validation/hash-vectors/absent-content-hash.trail.jsonl b/fixtures/validation/hash-vectors/absent-content-hash.trail.jsonl new file mode 100644 index 0000000..46b4782 --- /dev/null +++ b/fixtures/validation/hash-vectors/absent-content-hash.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESSABSENTHASH000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"00000000-0000-4000-8000-000000000288","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hashing inserts the pending sentinel when content_hash is absent"}} diff --git a/fixtures/validation/hash-vectors/envelope-two-tier.trail.jsonl b/fixtures/validation/hash-vectors/envelope-two-tier.trail.jsonl new file mode 100644 index 0000000..f7ecce8 --- /dev/null +++ b/fixtures/validation/hash-vectors/envelope-two-tier.trail.jsonl @@ -0,0 +1,4 @@ +{"content_hash":"a55a96d3a19b624aa8f2e38337521046facf5a6f2a04c3f3c3f7a14880774443","id":"01HTRACE000000000000000201","meta":{"x-example/scope":"file"},"producer":"trail-cli/0.3.0","schema_version":"0.1.0","sessions":[{"agent":"codex-cli","id":"01HSESS0000000000000000201"}],"ts":"2026-05-17T14:10:00.000Z","type":"trail"} +{"agent":{"name":"codex-cli"},"content_hash":"44b16ddea9fd914442eaf8f21c90b6f0eeb913ab499eb668a9c0c6e06b988522","id":"01HSESS0000000000000000201","meta":{"x-example/scope":"session"},"schema_version":"0.1.0","ts":"2026-05-17T14:10:01.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000201","ts":"2026-05-17T14:10:05.000Z","payload":{"text":"hash the envelope"}} +{"type":"agent_message","id":"01HEVTA0000000000000000202","ts":"2026-05-17T14:10:07.000Z","payload":{"text":"session first, envelope second"}} diff --git a/fixtures/validation/hash-vectors/jcs-stress.trail.jsonl b/fixtures/validation/hash-vectors/jcs-stress.trail.jsonl new file mode 100644 index 0000000..2650f15 --- /dev/null +++ b/fixtures/validation/hash-vectors/jcs-stress.trail.jsonl @@ -0,0 +1,3 @@ +{"agent":{"name":"codex-cli"},"content_hash":"24b3a1d99f3e725092cd0126c7d9fb84246e467ec5d2d1fd416f462ae8a58e14","id":"01HSESS0000000000000000301","meta":{"x-example/ints":{"decimal":1.0,"exponent":1e3,"negative_zero":-0,"safe":9007199254740991,"zero":0},"x-example/text":{"controls":"line\nnext\tcell","escaped":"café","literal":"café"},"x-example/z":{"a":1,"b":2}},"schema_version":"0.1.0","ts":"2026-05-17T14:20:00.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000301","ts":"2026-05-17T14:20:05.000Z","payload":{"text":"tabs\tand\nnewlines"},"meta":{"x-example/order":{"z":"last","a":"first"}}} +{"type":"agent_message","id":"01HEVTA0000000000000000302","ts":"2026-05-17T14:20:07.000Z","payload":{"text":"numbers and strings canonicalized"}} diff --git a/fixtures/validation/hash-vectors/minimal-pending-roundtrip.trail.jsonl b/fixtures/validation/hash-vectors/minimal-pending-roundtrip.trail.jsonl new file mode 100644 index 0000000..2b8ab69 --- /dev/null +++ b/fixtures/validation/hash-vectors/minimal-pending-roundtrip.trail.jsonl @@ -0,0 +1,3 @@ +{"agent":{"name":"codex-cli"},"content_hash":"f215ed334d3928e1abde804f2c4a870431b18d4fa7d755ec94d94be2a6ddd06e","id":"01HSESS0000000000000000101","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000101","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000102","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/hash-vectors/multi-session-slice.trail.jsonl b/fixtures/validation/hash-vectors/multi-session-slice.trail.jsonl new file mode 100644 index 0000000..96aca94 --- /dev/null +++ b/fixtures/validation/hash-vectors/multi-session-slice.trail.jsonl @@ -0,0 +1,6 @@ +{"agent":{"name":"codex-cli"},"content_hash":"0805ade157038a1cbb2895c55383b39b5a42aaa7d6ccc164174c0b3346d8fc14","id":"01HSESS0000000000000000401","schema_version":"0.1.0","ts":"2026-05-17T14:30:00.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000401","ts":"2026-05-17T14:30:05.000Z","payload":{"text":"first group"}} +{"type":"agent_message","id":"01HEVTA0000000000000000402","ts":"2026-05-17T14:30:07.000Z","payload":{"text":"first reply"}} +{"agent":{"name":"codex-cli"},"content_hash":"f7fe8ccdeaeddf70b868838614730c0a613d937bafeac48a42bb585e6df3ead7","fork_from":{"session_id":"01HSESS0000000000000000401"},"id":"01HSESS0000000000000000402","schema_version":"0.1.0","ts":"2026-05-17T14:31:00.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000403","ts":"2026-05-17T14:31:05.000Z","payload":{"text":"second group"}} +{"type":"agent_message","id":"01HEVTA0000000000000000404","ts":"2026-05-17T14:31:07.000Z","payload":{"text":"second reply"}} diff --git a/fixtures/validation/hash-vectors/replacement-char.trail.jsonl b/fixtures/validation/hash-vectors/replacement-char.trail.jsonl new file mode 100644 index 0000000..3a45d6e --- /dev/null +++ b/fixtures/validation/hash-vectors/replacement-char.trail.jsonl @@ -0,0 +1,3 @@ +{"agent":{"name":"codex-cli"},"content_hash":"594a06fa452fc097bce145be95dd614ddb81b6adf8fe2baf80cbe6e13fcaf4f2","id":"01HSESS0000000000000000501","schema_version":"0.1.0","ts":"2026-05-17T14:40:00.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000501","ts":"2026-05-17T14:40:05.000Z","payload":{"text":"decoded byte: �"}} +{"type":"agent_message","id":"01HEVTA0000000000000000502","ts":"2026-05-17T14:40:07.000Z","payload":{"text":"replacement character preserved"}} diff --git a/fixtures/validation/hash-vectors/segment-chain-seq1.trail.jsonl b/fixtures/validation/hash-vectors/segment-chain-seq1.trail.jsonl new file mode 100644 index 0000000..2fd1f7a --- /dev/null +++ b/fixtures/validation/hash-vectors/segment-chain-seq1.trail.jsonl @@ -0,0 +1,3 @@ +{"agent":{"name":"codex-cli"},"content_hash":"b12c68969624665898a4e3ec4dc5c0d06f7a7f305f8f7b16e347e18ccd2f6e67","id":"01HSESS0000000000000000601","schema_version":"0.1.0","segment":{"seq":1},"session_uid":"01HZZZZZZZZZZZZZZZZZZZZ601","ts":"2026-05-17T14:50:00.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000601","ts":"2026-05-17T14:50:05.000Z","payload":{"text":"segment one"}} +{"type":"agent_message","id":"01HEVTA0000000000000000602","ts":"2026-05-17T14:50:07.000Z","payload":{"text":"continued later"}} diff --git a/fixtures/validation/hash-vectors/segment-chain-seq2.trail.jsonl b/fixtures/validation/hash-vectors/segment-chain-seq2.trail.jsonl new file mode 100644 index 0000000..80f703a --- /dev/null +++ b/fixtures/validation/hash-vectors/segment-chain-seq2.trail.jsonl @@ -0,0 +1,3 @@ +{"agent":{"name":"codex-cli"},"content_hash":"3a676114740d09c2620c1c5e106ea8066bfc3926e478faf5cabd0cedfff398ec","id":"01HSESS0000000000000000602","schema_version":"0.1.0","segment":{"prev_content_hash":"b12c68969624665898a4e3ec4dc5c0d06f7a7f305f8f7b16e347e18ccd2f6e67","seq":2},"session_uid":"01HZZZZZZZZZZZZZZZZZZZZ601","ts":"2026-05-17T14:55:00.000Z","type":"session"} +{"type":"user_message","id":"01HEVTA0000000000000000603","ts":"2026-05-17T14:55:05.000Z","payload":{"text":"segment two"}} +{"type":"agent_message","id":"01HEVTA0000000000000000604","ts":"2026-05-17T14:55:07.000Z","payload":{"text":"chain verified"}} diff --git a/fixtures/validation/invalid-graph/ambiguous-sequential-pairing-with-session-end.trail.jsonl b/fixtures/validation/invalid-graph/ambiguous-sequential-pairing-with-session-end.trail.jsonl new file mode 100644 index 0000000..f2e2b04 --- /dev/null +++ b/fixtures/validation/invalid-graph/ambiguous-sequential-pairing-with-session-end.trail.jsonl @@ -0,0 +1,5 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"tool":"file_read","args":{"path":"b.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:07.000Z","payload":{"ok":true,"output":"b"}} +{"type":"session_end","id":"01HEVTA0000000000000000004","ts":"2026-05-17T14:00:08.000Z","payload":{"reason":"complete"}} diff --git a/fixtures/validation/invalid-graph/ambiguous-sequential-pairing.trail.jsonl b/fixtures/validation/invalid-graph/ambiguous-sequential-pairing.trail.jsonl new file mode 100644 index 0000000..b039518 --- /dev/null +++ b/fixtures/validation/invalid-graph/ambiguous-sequential-pairing.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"tool":"file_read","args":{"path":"b.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:07.000Z","payload":{"ok":true,"output":"b"}} diff --git a/fixtures/validation/invalid-graph/branch-point-unknown-from-id.trail.jsonl b/fixtures/validation/invalid-graph/branch-point-unknown-from-id.trail.jsonl new file mode 100644 index 0000000..5086344 --- /dev/null +++ b/fixtures/validation/invalid-graph/branch-point-unknown-from-id.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"branch_point","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"from_id":"01HEVTA0000000000000000002"}} +{"type":"user_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:02.000Z","payload":{"text":"later"}} diff --git a/fixtures/validation/invalid-graph/branch-summary-unknown-abandoned-branch-id.trail.jsonl b/fixtures/validation/invalid-graph/branch-summary-unknown-abandoned-branch-id.trail.jsonl new file mode 100644 index 0000000..9b23777 --- /dev/null +++ b/fixtures/validation/invalid-graph/branch-summary-unknown-abandoned-branch-id.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"branch_summary","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"abandoned_branch_id":"01HEVTA0000000000000000002","summary":"abandoned path"}} +{"type":"user_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:02.000Z","payload":{"text":"later"}} diff --git a/fixtures/validation/invalid-graph/duplicate-id.trail.jsonl b/fixtures/validation/invalid-graph/duplicate-id.trail.jsonl new file mode 100644 index 0000000..543e0d0 --- /dev/null +++ b/fixtures/validation/invalid-graph/duplicate-id.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/invalid-graph/duplicate-option-labels-mixed-ids.trail.jsonl b/fixtures/validation/invalid-graph/duplicate-option-labels-mixed-ids.trail.jsonl new file mode 100644 index 0000000..b856dc2 --- /dev/null +++ b/fixtures/validation/invalid-graph/duplicate-option-labels-mixed-ids.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_query","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"questions":[{"id":"ship","question":"Ship it?","options":[{"id":"yes-safe","label":"yes"},{"label":"yes","description":"Label-only yes"}]}]}} diff --git a/fixtures/validation/invalid-graph/duplicate-option-labels.trail.jsonl b/fixtures/validation/invalid-graph/duplicate-option-labels.trail.jsonl new file mode 100644 index 0000000..de152aa --- /dev/null +++ b/fixtures/validation/invalid-graph/duplicate-option-labels.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_query","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"questions":[{"id":"ship","question":"Ship it?","options":[{"label":"yes","description":"Ship now"},{"label":"yes","description":"Ship later"},{"id":"stable-no","label":"no"},{"id":"stable-no-again","label":"no"}]}]}} diff --git a/fixtures/validation/invalid-graph/duplicate-segment-seq.trail.jsonl b/fixtures/validation/invalid-graph/duplicate-segment-seq.trail.jsonl new file mode 100644 index 0000000..8584faf --- /dev/null +++ b/fixtures/validation/invalid-graph/duplicate-segment-seq.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000290","session_uid":"00000000-0000-4000-8000-000000000291","segment":{"seq":1},"ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000292","session_uid":"00000000-0000-4000-8000-000000000291","segment":{"seq":1},"ts":"2026-05-17T14:05:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-graph/duplicate-tool-result-for-id.trail.jsonl b/fixtures/validation/invalid-graph/duplicate-tool-result-for-id.trail.jsonl new file mode 100644 index 0000000..fb27492 --- /dev/null +++ b/fixtures/validation/invalid-graph/duplicate-tool-result-for-id.trail.jsonl @@ -0,0 +1,5 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"tool":"file_read","args":{"path":"b.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:07.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"hi"}} +{"type":"tool_result","id":"01HEVTA0000000000000000004","ts":"2026-05-17T14:00:08.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"again"}} diff --git a/fixtures/validation/invalid-graph/envelope-not-at-line-1.trail.jsonl b/fixtures/validation/invalid-graph/envelope-not-at-line-1.trail.jsonl new file mode 100644 index 0000000..222e827 --- /dev/null +++ b/fixtures/validation/invalid-graph/envelope-not-at-line-1.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0"} diff --git a/fixtures/validation/invalid-graph/envelope-sessions-manifest-empty.trail.jsonl b/fixtures/validation/invalid-graph/envelope-sessions-manifest-empty.trail.jsonl new file mode 100644 index 0000000..ceaeed7 --- /dev/null +++ b/fixtures/validation/invalid-graph/envelope-sessions-manifest-empty.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0","sessions":[]} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} diff --git a/fixtures/validation/invalid-graph/envelope-sessions-manifest-multiple.trail.jsonl b/fixtures/validation/invalid-graph/envelope-sessions-manifest-multiple.trail.jsonl new file mode 100644 index 0000000..c9ebbaa --- /dev/null +++ b/fixtures/validation/invalid-graph/envelope-sessions-manifest-multiple.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0","sessions":[{"id":"01HSESS0000000000000000001","agent":"codex-cli"},{"id":"01HSESS0000000000000000002","agent":"codex-cli"}]} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} diff --git a/fixtures/validation/invalid-graph/envelope-without-session-header.trail.jsonl b/fixtures/validation/invalid-graph/envelope-without-session-header.trail.jsonl new file mode 100644 index 0000000..8ac959c --- /dev/null +++ b/fixtures/validation/invalid-graph/envelope-without-session-header.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0"} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} diff --git a/fixtures/validation/invalid-graph/header-has-parent-id.trail.jsonl b/fixtures/validation/invalid-graph/header-has-parent-id.trail.jsonl new file mode 100644 index 0000000..e5d08ed --- /dev/null +++ b/fixtures/validation/invalid-graph/header-has-parent-id.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","parent_id":"01H0THER000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} diff --git a/fixtures/validation/invalid-graph/multi-session-cross-group-parent.trail.jsonl b/fixtures/validation/invalid-graph/multi-session-cross-group-parent.trail.jsonl new file mode 100644 index 0000000..11a5163 --- /dev/null +++ b/fixtures/validation/invalid-graph/multi-session-cross-group-parent.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000E01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000E01","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"g1 msg"}} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000E02","ts":"2026-05-17T14:05:00.000Z","agent":{"name":"claude-code"}} +{"type":"agent_message","id":"01HEVTA0000000000000000E02","parent_id":"01HEVTA0000000000000000E01","ts":"2026-05-17T14:05:05.000Z","payload":{"text":"ref g1"}} diff --git a/fixtures/validation/invalid-graph/multi-session-orphan-prelude.trail.jsonl b/fixtures/validation/invalid-graph/multi-session-orphan-prelude.trail.jsonl new file mode 100644 index 0000000..8ccdb6c --- /dev/null +++ b/fixtures/validation/invalid-graph/multi-session-orphan-prelude.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"user_message","id":"01HEVTA0000000000000000D00","ts":"2026-05-17T13:59:00.000Z","payload":{"text":"orphan"}} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000D01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000D01","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"msg"}} diff --git a/fixtures/validation/invalid-graph/multiple-envelopes.trail.jsonl b/fixtures/validation/invalid-graph/multiple-envelopes.trail.jsonl new file mode 100644 index 0000000..e393a09 --- /dev/null +++ b/fixtures/validation/invalid-graph/multiple-envelopes.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0"} +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000002","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0"} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-graph/non-interoperable-number.trail.jsonl b/fixtures/validation/invalid-graph/non-interoperable-number.trail.jsonl new file mode 100644 index 0000000..9f6ca59 --- /dev/null +++ b/fixtures/validation/invalid-graph/non-interoperable-number.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000288","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"00000000-0000-4000-8000-000000000289","ts":"2026-05-17T14:00:01.000Z","payload":{"text":"captured source id"},"source":{"agent":"codex-cli","raw":{"snowflake":9007199254740993}}} diff --git a/fixtures/validation/invalid-graph/non-monotonic-event-ts.trail.jsonl b/fixtures/validation/invalid-graph/non-monotonic-event-ts.trail.jsonl new file mode 100644 index 0000000..146025f --- /dev/null +++ b/fixtures/validation/invalid-graph/non-monotonic-event-ts.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","parent_id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:04.999Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/invalid-graph/out-of-order-segment-seq.trail.jsonl b/fixtures/validation/invalid-graph/out-of-order-segment-seq.trail.jsonl new file mode 100644 index 0000000..66dec7b --- /dev/null +++ b/fixtures/validation/invalid-graph/out-of-order-segment-seq.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000293","session_uid":"00000000-0000-4000-8000-000000000294","segment":{"seq":2,"prev_content_hash":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"},"ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000295","session_uid":"00000000-0000-4000-8000-000000000294","segment":{"seq":1},"ts":"2026-05-17T14:05:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-graph/parent-cycle.trail.jsonl b/fixtures/validation/invalid-graph/parent-cycle.trail.jsonl new file mode 100644 index 0000000..841ccba --- /dev/null +++ b/fixtures/validation/invalid-graph/parent-cycle.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"01HN0DE000000000000000000A","parent_id":"01HN0DE000000000000000000B","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"a"}} +{"type":"agent_message","id":"01HN0DE000000000000000000B","parent_id":"01HN0DE000000000000000000A","ts":"2026-05-17T14:00:06.000Z","payload":{"text":"b"}} diff --git a/fixtures/validation/invalid-graph/parse-fidelity-drift.trail.jsonl b/fixtures/validation/invalid-graph/parse-fidelity-drift.trail.jsonl new file mode 100644 index 0000000..111e58c --- /dev/null +++ b/fixtures/validation/invalid-graph/parse-fidelity-drift.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"},"parse_fidelity":{"quarantined_count":0}} +{"type":"system_event","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"kind":"x-codex/unknown_record","data":{"raw":{"type":"future_event"}}}} diff --git a/fixtures/validation/invalid-graph/sequential-pairing-stays-in-branch.trail.jsonl b/fixtures/validation/invalid-graph/sequential-pairing-stays-in-branch.trail.jsonl new file mode 100644 index 0000000..d31b698 --- /dev/null +++ b/fixtures/validation/invalid-graph/sequential-pairing-stays-in-branch.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"subagent_invoke","args":{"task":"inspect"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","parent_id":"01HEVTA0000000000000000001","payload":{"tool":"file_read","args":{"path":"child.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:07.000Z","payload":{"ok":true,"output":"parent result"}} diff --git a/fixtures/validation/invalid-graph/sequential-pairing-stays-in-sibling-branch.trail.jsonl b/fixtures/validation/invalid-graph/sequential-pairing-stays-in-sibling-branch.trail.jsonl new file mode 100644 index 0000000..82f7bd7 --- /dev/null +++ b/fixtures/validation/invalid-graph/sequential-pairing-stays-in-sibling-branch.trail.jsonl @@ -0,0 +1,5 @@ +{"type":"session","schema_version":"0.1.0","id":"01HEVTA0000000000000000000","ts":"2025-01-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:01.000Z","payload":{"text":"branch point"}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","parent_id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:02.000Z","payload":{"tool":"shell_command","args":{"command":"left"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000003","parent_id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:03.000Z","payload":{"tool":"shell_command","args":{"command":"right"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000004","parent_id":"01HEVTA0000000000000000003","ts":"2025-01-01T00:00:04.000Z","payload":{"ok":true,"output":"right"}} diff --git a/fixtures/validation/invalid-graph/sequential-pairing-stays-in-subagent-sibling-branch.trail.jsonl b/fixtures/validation/invalid-graph/sequential-pairing-stays-in-subagent-sibling-branch.trail.jsonl new file mode 100644 index 0000000..3a05d99 --- /dev/null +++ b/fixtures/validation/invalid-graph/sequential-pairing-stays-in-subagent-sibling-branch.trail.jsonl @@ -0,0 +1,5 @@ +{"type":"session","schema_version":"0.1.0","id":"01HEVTA0000000000000000000","ts":"2025-01-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:01.000Z","payload":{"tool":"subagent_invoke","args":{"prompt":"work"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","parent_id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:02.000Z","payload":{"tool":"shell_command","args":{"command":"left"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000003","parent_id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:03.000Z","payload":{"tool":"shell_command","args":{"command":"right"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000004","parent_id":"01HEVTA0000000000000000003","ts":"2025-01-01T00:00:04.000Z","payload":{"ok":true,"output":"right"}} diff --git a/fixtures/validation/invalid-graph/session-end-forward-final-message-id.trail.jsonl b/fixtures/validation/invalid-graph/session-end-forward-final-message-id.trail.jsonl new file mode 100644 index 0000000..a8f0dc9 --- /dev/null +++ b/fixtures/validation/invalid-graph/session-end-forward-final-message-id.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"session_end","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"reason":"complete","final_message_id":"01HEVTA0000000000000000002"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"text":"after"}} diff --git a/fixtures/validation/invalid-graph/session-end-unknown-final-message-id.trail.jsonl b/fixtures/validation/invalid-graph/session-end-unknown-final-message-id.trail.jsonl new file mode 100644 index 0000000..1413c1c --- /dev/null +++ b/fixtures/validation/invalid-graph/session-end-unknown-final-message-id.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +{"type":"session_end","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:08.000Z","payload":{"reason":"complete","final_message_id":"01HGH0ST000000000000000001"}} diff --git a/fixtures/validation/invalid-graph/stream-open-with-content-hash.trail.jsonl b/fixtures/validation/invalid-graph/stream-open-with-content-hash.trail.jsonl new file mode 100644 index 0000000..4c38d1c --- /dev/null +++ b/fixtures/validation/invalid-graph/stream-open-with-content-hash.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","content_hash":"3936b470a29cb8e6814158eefb2d03871f4f96df480488b761b373b85ef594d2","stream":{"state":"open"},"agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/invalid-graph/tool-args-unredacted-secret.trail.jsonl b/fixtures/validation/invalid-graph/tool-args-unredacted-secret.trail.jsonl new file mode 100644 index 0000000..5c81fc1 --- /dev/null +++ b/fixtures/validation/invalid-graph/tool-args-unredacted-secret.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"mcp_call","args":{"server":"github","tool":"get_issue","args":{"owner":"agent-trail","repo":"agent-trail"},"headers":{"Authorization":"Bearer abcdefABCDEF0123456789xyzXYZ"}}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:02.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"ok"}} diff --git a/fixtures/validation/invalid-graph/tool-call-aborted-turn-scope-does-not-close-call.trail.jsonl b/fixtures/validation/invalid-graph/tool-call-aborted-turn-scope-does-not-close-call.trail.jsonl new file mode 100644 index 0000000..76b6fde --- /dev/null +++ b/fixtures/validation/invalid-graph/tool-call-aborted-turn-scope-does-not-close-call.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"shell_command","args":{"command":"sleep 60"}}} +{"type":"tool_call_aborted","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"scope":"turn","reason":"user_interrupt"}} diff --git a/fixtures/validation/invalid-graph/tool-result-for-id-wins-over-semantic-conflict.trail.jsonl b/fixtures/validation/invalid-graph/tool-result-for-id-wins-over-semantic-conflict.trail.jsonl new file mode 100644 index 0000000..a6515ed --- /dev/null +++ b/fixtures/validation/invalid-graph/tool-result-for-id-wins-over-semantic-conflict.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","semantic":{"call_id":"call_b"},"payload":{"tool":"file_read","args":{"path":"b.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:07.000Z","semantic":{"call_id":"call_b"},"payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"hi"}} diff --git a/fixtures/validation/invalid-graph/unknown-parent-id.trail.jsonl b/fixtures/validation/invalid-graph/unknown-parent-id.trail.jsonl new file mode 100644 index 0000000..1e55097 --- /dev/null +++ b/fixtures/validation/invalid-graph/unknown-parent-id.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"01HEVTA0000000000000000001","parent_id":"01HGH0ST000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/invalid-graph/unmatched-tool-call-at-eof.trail.jsonl b/fixtures/validation/invalid-graph/unmatched-tool-call-at-eof.trail.jsonl new file mode 100644 index 0000000..47ac700 --- /dev/null +++ b/fixtures/validation/invalid-graph/unmatched-tool-call-at-eof.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} diff --git a/fixtures/validation/invalid-graph/unmatched-tool-call-partial-suppression.trail.jsonl b/fixtures/validation/invalid-graph/unmatched-tool-call-partial-suppression.trail.jsonl new file mode 100644 index 0000000..de67d62 --- /dev/null +++ b/fixtures/validation/invalid-graph/unmatched-tool-call-partial-suppression.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"tool":"file_read","args":{"path":"b.txt"}}} +{"type":"session_terminated","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:07.000Z","payload":{"reason":"eof_with_open_tool_calls","open_call_ids":["01HEVTA0000000000000000001"]}} diff --git a/fixtures/validation/invalid-graph/unmatched-tool-call-session-terminated-without-open-call-ids.trail.jsonl b/fixtures/validation/invalid-graph/unmatched-tool-call-session-terminated-without-open-call-ids.trail.jsonl new file mode 100644 index 0000000..5530d4b --- /dev/null +++ b/fixtures/validation/invalid-graph/unmatched-tool-call-session-terminated-without-open-call-ids.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"session_terminated","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"reason":"process_terminated"}} diff --git a/fixtures/validation/invalid-graph/user-query-response-unknown-for-id.trail.jsonl b/fixtures/validation/invalid-graph/user-query-response-unknown-for-id.trail.jsonl new file mode 100644 index 0000000..cc4d787 --- /dev/null +++ b/fixtures/validation/invalid-graph/user-query-response-unknown-for-id.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_query_response","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"for_id":"01HEVTA0000000000000000002","answers":{}}} diff --git a/fixtures/validation/invalid-schema/agent-message-attachment-bad-uri.trail.jsonl b/fixtures/validation/invalid-schema/agent-message-attachment-bad-uri.trail.jsonl new file mode 100644 index 0000000..41a0e87 --- /dev/null +++ b/fixtures/validation/invalid-schema/agent-message-attachment-bad-uri.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"plot this"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"here is the chart","attachments":[{"kind":"image","media_type":"image/png","uri":"data:image/png;base64,iVBORw0KGgo="}]}} diff --git a/fixtures/validation/invalid-schema/agent-message-usage-extra-field.trail.jsonl b/fixtures/validation/invalid-schema/agent-message-usage-extra-field.trail.jsonl new file mode 100644 index 0000000..abb46d2 --- /dev/null +++ b/fixtures/validation/invalid-schema/agent-message-usage-extra-field.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi","usage":{"input_tokens":10,"output_tokens":5,"cost_usd":0.01}}} diff --git a/fixtures/validation/invalid-schema/agent-message-usage-missing-output.trail.jsonl b/fixtures/validation/invalid-schema/agent-message-usage-missing-output.trail.jsonl new file mode 100644 index 0000000..c78b3cc --- /dev/null +++ b/fixtures/validation/invalid-schema/agent-message-usage-missing-output.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi","usage":{"input_tokens":1}}} diff --git a/fixtures/validation/invalid-schema/agent-message-usage-missing-required.trail.jsonl b/fixtures/validation/invalid-schema/agent-message-usage-missing-required.trail.jsonl new file mode 100644 index 0000000..372231f --- /dev/null +++ b/fixtures/validation/invalid-schema/agent-message-usage-missing-required.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi","usage":{"output_tokens":5}}} diff --git a/fixtures/validation/invalid-schema/agent-message-usage-zero-context-window.trail.jsonl b/fixtures/validation/invalid-schema/agent-message-usage-zero-context-window.trail.jsonl new file mode 100644 index 0000000..7dead17 --- /dev/null +++ b/fixtures/validation/invalid-schema/agent-message-usage-zero-context-window.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi","usage":{"input_tokens":1,"output_tokens":1,"context_window_tokens":0}}} diff --git a/fixtures/validation/invalid-schema/agent-thinking-usage-missing-output.trail.jsonl b/fixtures/validation/invalid-schema/agent-thinking-usage-missing-output.trail.jsonl new file mode 100644 index 0000000..8758e14 --- /dev/null +++ b/fixtures/validation/invalid-schema/agent-thinking-usage-missing-output.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"agent_thinking","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"text":"Need inspect before answer.","usage":{"input_tokens":10}}} diff --git a/fixtures/validation/invalid-schema/capability-change-bad-reason.trail.jsonl b/fixtures/validation/invalid-schema/capability-change-bad-reason.trail.jsonl new file mode 100644 index 0000000..7fb0fc0 --- /dev/null +++ b/fixtures/validation/invalid-schema/capability-change-bad-reason.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000128","session_uid":"00000000-0000-0000-0000-000000000128","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001285","ts":"2026-06-01T00:00:01.000Z","payload":{"scope":"tool","reason":"made_up","added":[{"name":"ToolSearch"}]}} diff --git a/fixtures/validation/invalid-schema/capability-change-bad-scope.trail.jsonl b/fixtures/validation/invalid-schema/capability-change-bad-scope.trail.jsonl new file mode 100644 index 0000000..c58b971 --- /dev/null +++ b/fixtures/validation/invalid-schema/capability-change-bad-scope.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000128","session_uid":"00000000-0000-0000-0000-000000000128","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001286","ts":"2026-06-01T00:00:01.000Z","payload":{"scope":"database","reason":"registered","added":[{"name":"ToolSearch"}]}} diff --git a/fixtures/validation/invalid-schema/capability-change-empty.trail.jsonl b/fixtures/validation/invalid-schema/capability-change-empty.trail.jsonl new file mode 100644 index 0000000..c0482f7 --- /dev/null +++ b/fixtures/validation/invalid-schema/capability-change-empty.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000128","session_uid":"00000000-0000-0000-0000-000000000128","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001287","ts":"2026-06-01T00:00:01.000Z","payload":{"scope":"tool","reason":"registered"}} diff --git a/fixtures/validation/invalid-schema/command-invoke-bad-kind.trail.jsonl b/fixtures/validation/invalid-schema/command-invoke-bad-kind.trail.jsonl new file mode 100644 index 0000000..ed13a0c --- /dev/null +++ b/fixtures/validation/invalid-schema/command-invoke-bad-kind.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"/wizard","kind":"wizard","via":"user_typed"}} diff --git a/fixtures/validation/invalid-schema/command-invoke-bad-result-action.trail.jsonl b/fixtures/validation/invalid-schema/command-invoke-bad-result-action.trail.jsonl new file mode 100644 index 0000000..27c39dd --- /dev/null +++ b/fixtures/validation/invalid-schema/command-invoke-bad-result-action.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"/clear","kind":"builtin","via":"user_typed","result_action":"frobnicate"}} diff --git a/fixtures/validation/invalid-schema/command-invoke-missing-kind.trail.jsonl b/fixtures/validation/invalid-schema/command-invoke-missing-kind.trail.jsonl new file mode 100644 index 0000000..817458e --- /dev/null +++ b/fixtures/validation/invalid-schema/command-invoke-missing-kind.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"/clear","via":"user_typed"}} diff --git a/fixtures/validation/invalid-schema/command-invoke-missing-name.trail.jsonl b/fixtures/validation/invalid-schema/command-invoke-missing-name.trail.jsonl new file mode 100644 index 0000000..a27cb54 --- /dev/null +++ b/fixtures/validation/invalid-schema/command-invoke-missing-name.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"kind":"builtin","via":"user_typed"}} diff --git a/fixtures/validation/invalid-schema/envelope-missing-producer.trail.jsonl b/fixtures/validation/invalid-schema/envelope-missing-producer.trail.jsonl new file mode 100644 index 0000000..656a568 --- /dev/null +++ b/fixtures/validation/invalid-schema/envelope-missing-producer.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z"} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-schema/header-wrong-schema-version.trail.jsonl b/fixtures/validation/invalid-schema/header-wrong-schema-version.trail.jsonl new file mode 100644 index 0000000..50363dd --- /dev/null +++ b/fixtures/validation/invalid-schema/header-wrong-schema-version.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.2.0","id":"01HSESS0000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-schema/redaction-count-non-integer.trail.jsonl b/fixtures/validation/invalid-schema/redaction-count-non-integer.trail.jsonl new file mode 100644 index 0000000..f50a9a6 --- /dev/null +++ b/fixtures/validation/invalid-schema/redaction-count-non-integer.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"redacted"},"meta":{"redaction_count":"two"}} diff --git a/fixtures/validation/invalid-schema/segment-seq-1-with-prev-hash.trail.jsonl b/fixtures/validation/invalid-schema/segment-seq-1-with-prev-hash.trail.jsonl new file mode 100644 index 0000000..fc1d6eb --- /dev/null +++ b/fixtures/validation/invalid-schema/segment-seq-1-with-prev-hash.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000006","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","segment":{"seq":1,"prev_content_hash":"deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"},"ts":"2026-05-26T10:25:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-schema/segment-seq-2-without-prev-hash.trail.jsonl b/fixtures/validation/invalid-schema/segment-seq-2-without-prev-hash.trail.jsonl new file mode 100644 index 0000000..3294f14 --- /dev/null +++ b/fixtures/validation/invalid-schema/segment-seq-2-without-prev-hash.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000003","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","segment":{"seq":2},"ts":"2026-05-26T10:10:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-schema/segment-seq-2-without-session-uid.trail.jsonl b/fixtures/validation/invalid-schema/segment-seq-2-without-session-uid.trail.jsonl new file mode 100644 index 0000000..19dadbd --- /dev/null +++ b/fixtures/validation/invalid-schema/segment-seq-2-without-session-uid.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000007","segment":{"seq":2,"prev_content_hash":"deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"},"ts":"2026-05-26T10:30:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-schema/segment-seq-zero.trail.jsonl b/fixtures/validation/invalid-schema/segment-seq-zero.trail.jsonl new file mode 100644 index 0000000..9594acf --- /dev/null +++ b/fixtures/validation/invalid-schema/segment-seq-zero.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000005","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","segment":{"seq":0},"ts":"2026-05-26T10:20:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-schema/session-end-final-message-id-null.trail.jsonl b/fixtures/validation/invalid-schema/session-end-final-message-id-null.trail.jsonl new file mode 100644 index 0000000..a9ee2ce --- /dev/null +++ b/fixtures/validation/invalid-schema/session-end-final-message-id-null.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hi"}} +{"type":"session_end","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"reason":"complete","final_message_id":null}} diff --git a/fixtures/validation/invalid-schema/session-metadata-update-bad-field-cwd.trail.jsonl b/fixtures/validation/invalid-schema/session-metadata-update-bad-field-cwd.trail.jsonl new file mode 100644 index 0000000..58b043b --- /dev/null +++ b/fixtures/validation/invalid-schema/session-metadata-update-bad-field-cwd.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001323","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"cwd","value":"/tmp/project","reason":"runtime_inferred"}} diff --git a/fixtures/validation/invalid-schema/session-metadata-update-bad-reason.trail.jsonl b/fixtures/validation/invalid-schema/session-metadata-update-bad-reason.trail.jsonl new file mode 100644 index 0000000..5775a8c --- /dev/null +++ b/fixtures/validation/invalid-schema/session-metadata-update-bad-reason.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001325","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"name","value":"Release notes","reason":"manual"}} diff --git a/fixtures/validation/invalid-schema/session-metadata-update-bad-tags-value.trail.jsonl b/fixtures/validation/invalid-schema/session-metadata-update-bad-tags-value.trail.jsonl new file mode 100644 index 0000000..5faaa38 --- /dev/null +++ b/fixtures/validation/invalid-schema/session-metadata-update-bad-tags-value.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001324","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"tags","value":"release","reason":"user_set"}} diff --git a/fixtures/validation/invalid-schema/session-metadata-update-bad-worktree.trail.jsonl b/fixtures/validation/invalid-schema/session-metadata-update-bad-worktree.trail.jsonl new file mode 100644 index 0000000..8b3f5f2 --- /dev/null +++ b/fixtures/validation/invalid-schema/session-metadata-update-bad-worktree.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001326","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"vcs.worktree","value":{"name":"topic"},"reason":"runtime_inferred"}} diff --git a/fixtures/validation/invalid-schema/session-uid-not-ulid-or-uuid.trail.jsonl b/fixtures/validation/invalid-schema/session-uid-not-ulid-or-uuid.trail.jsonl new file mode 100644 index 0000000..650c6a8 --- /dev/null +++ b/fixtures/validation/invalid-schema/session-uid-not-ulid-or-uuid.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000004","session_uid":"not-a-valid-uid","ts":"2026-05-26T10:15:00.000Z","agent":{"name":"codex-cli"}} diff --git a/fixtures/validation/invalid-schema/tool-call-aborted-bad-reason.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-aborted-bad-reason.trail.jsonl new file mode 100644 index 0000000..7b30456 --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-aborted-bad-reason.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call_aborted","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"scope":"turn","reason":"interrupted"}} diff --git a/fixtures/validation/invalid-schema/tool-call-aborted-tool-scope-missing-for-id.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-aborted-tool-scope-missing-for-id.trail.jsonl new file mode 100644 index 0000000..813a83d --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-aborted-tool-scope-missing-for-id.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call_aborted","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"scope":"tool_call","reason":"timeout"}} diff --git a/fixtures/validation/invalid-schema/tool-call-aborted-turn-scope-with-for-id.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-aborted-turn-scope-with-for-id.trail.jsonl new file mode 100644 index 0000000..8a1fe6f --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-aborted-turn-scope-with-for-id.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call_aborted","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"scope":"turn","reason":"user_interrupt","for_id":"01HEVTA0000000000000000002"}} diff --git a/fixtures/validation/invalid-schema/tool-call-file-list-missing-path.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-file-list-missing-path.trail.jsonl new file mode 100644 index 0000000..fef6ef8 --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-file-list-missing-path.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000501","session_uid":"00000000-0000-4000-8000-0000000005aa","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"00000000-0000-4000-8000-000000000502","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_list","args":{"recursive":true}}} diff --git a/fixtures/validation/invalid-schema/tool-call-file-patch-empty-files.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-file-patch-empty-files.trail.jsonl new file mode 100644 index 0000000..ee50f58 --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-file-patch-empty-files.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000301","session_uid":"00000000-0000-4000-8000-0000000003aa","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"00000000-0000-4000-8000-000000000302","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_patch","args":{"files":[]}}} diff --git a/fixtures/validation/invalid-schema/tool-call-file-patch-file-missing-diff.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-file-patch-file-missing-diff.trail.jsonl new file mode 100644 index 0000000..f98c16a --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-file-patch-file-missing-diff.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000401","session_uid":"00000000-0000-4000-8000-0000000004aa","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"00000000-0000-4000-8000-000000000402","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_patch","args":{"files":[{"path":"src/a.ts"}]}}} diff --git a/fixtures/validation/invalid-schema/tool-call-missing-args-path.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-missing-args-path.trail.jsonl new file mode 100644 index 0000000..4aa3bbb --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-missing-args-path.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{}}} diff --git a/fixtures/validation/invalid-schema/tool-call-usage-missing-output.trail.jsonl b/fixtures/validation/invalid-schema/tool-call-usage-missing-output.trail.jsonl new file mode 100644 index 0000000..5e6aae5 --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-call-usage-missing-output.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_read","args":{"path":"spec.md"},"usage":{"input_tokens":10}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:02.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"# Agent Trail\n"}} diff --git a/fixtures/validation/invalid-schema/tool-result-attachment-extra-field.trail.jsonl b/fixtures/validation/invalid-schema/tool-result-attachment-extra-field.trail.jsonl new file mode 100644 index 0000000..8e15c4e --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-result-attachment-extra-field.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"other","args":{"name":"screenshot"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"captured screenshot","attachments":[{"kind":"image","width":1024}]}} diff --git a/fixtures/validation/invalid-schema/tool-result-meta-file-read-range-wrong-length.trail.jsonl b/fixtures/validation/invalid-schema/tool-result-meta-file-read-range-wrong-length.trail.jsonl new file mode 100644 index 0000000..a183078 --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-result-meta-file-read-range-wrong-length.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"...","meta":{"file_read":{"range":[10,50,90]}}}} diff --git a/fixtures/validation/invalid-schema/tool-result-meta-mcp-call-block-missing-type.trail.jsonl b/fixtures/validation/invalid-schema/tool-result-meta-mcp-call-block-missing-type.trail.jsonl new file mode 100644 index 0000000..7685e9c --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-result-meta-mcp-call-block-missing-type.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"mcp_call","args":{"server":"docs","tool":"search"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"ok","meta":{"mcp_call":{"content_blocks":[{"text":"no type here"}]}}}} diff --git a/fixtures/validation/invalid-schema/tool-result-meta-shell-command-extra-field.trail.jsonl b/fixtures/validation/invalid-schema/tool-result-meta-shell-command-extra-field.trail.jsonl new file mode 100644 index 0000000..d6e3b7c --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-result-meta-shell-command-extra-field.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"shell_command","args":{"command":"ls"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"a.txt","meta":{"shell_command":{"stdout":"a.txt","exitcode":0}}}} diff --git a/fixtures/validation/invalid-schema/tool-result-truncated-missing-output-size.trail.jsonl b/fixtures/validation/invalid-schema/tool-result-truncated-missing-output-size.trail.jsonl new file mode 100644 index 0000000..f29a583 --- /dev/null +++ b/fixtures/validation/invalid-schema/tool-result-truncated-missing-output-size.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"shell_command","args":{"command":"printf '%s' large-output"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"large-output\n…[truncated]","truncated":true,"overflow_ref":"sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}} diff --git a/fixtures/validation/invalid-schema/user-message-missing-text.trail.jsonl b/fixtures/validation/invalid-schema/user-message-missing-text.trail.jsonl new file mode 100644 index 0000000..774ee5b --- /dev/null +++ b/fixtures/validation/invalid-schema/user-message-missing-text.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{}} diff --git a/fixtures/validation/invalid-schema/user-message-non-string-text.trail.jsonl b/fixtures/validation/invalid-schema/user-message-non-string-text.trail.jsonl new file mode 100644 index 0000000..f457ec8 --- /dev/null +++ b/fixtures/validation/invalid-schema/user-message-non-string-text.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":42}} diff --git a/fixtures/validation/invalid-schema/vcs-null-revision-with-empty-branch.trail.jsonl b/fixtures/validation/invalid-schema/vcs-null-revision-with-empty-branch.trail.jsonl new file mode 100644 index 0000000..16056ec --- /dev/null +++ b/fixtures/validation/invalid-schema/vcs-null-revision-with-empty-branch.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-00000000028b","session_uid":"00000000-0000-0000-0000-00000000028b","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"},"vcs":{"type":"git","revision":null,"branch":""}} diff --git a/fixtures/validation/invalid-schema/vcs-null-revision-with-head-commit.trail.jsonl b/fixtures/validation/invalid-schema/vcs-null-revision-with-head-commit.trail.jsonl new file mode 100644 index 0000000..3668417 --- /dev/null +++ b/fixtures/validation/invalid-schema/vcs-null-revision-with-head-commit.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-00000000028a","session_uid":"00000000-0000-0000-0000-00000000028a","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"},"vcs":{"type":"git","revision":null,"branch":"main","head_commit":"abcdef1"}} diff --git a/fixtures/validation/invalid-schema/vcs-null-revision-without-branch.trail.jsonl b/fixtures/validation/invalid-schema/vcs-null-revision-without-branch.trail.jsonl new file mode 100644 index 0000000..eac90e0 --- /dev/null +++ b/fixtures/validation/invalid-schema/vcs-null-revision-without-branch.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000290","session_uid":"00000000-0000-0000-0000-000000000290","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"},"vcs":{"type":"git","revision":null}} diff --git a/fixtures/validation/manifest.json b/fixtures/validation/manifest.json new file mode 100644 index 0000000..5e76798 --- /dev/null +++ b/fixtures/validation/manifest.json @@ -0,0 +1,3889 @@ +{ + "schema_version": "0.1.0", + "fixtures": [ + { + "path": "hash-mismatch/content-hash-invalid-hex.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "/content_hash", + "severity": "error", + "code": "content_hash_invalid" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "/content_hash", + "severity": "error", + "code": "content_hash_invalid" + } + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-mismatch/content-hash-mismatch.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1, + "path": "/content_hash", + "severity": "error", + "code": "content_hash_mismatch" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1, + "path": "/content_hash", + "severity": "warning", + "code": "content_hash_mismatch" + } + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-mismatch/trail-envelope-content-hash-mismatch.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1, + "path": "/content_hash", + "severity": "error", + "code": "content_hash_mismatch" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1, + "path": "/content_hash", + "severity": "warning", + "code": "content_hash_mismatch" + } + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/absent-content-hash.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "b69b9cafe47e606415d4f23868297bbe8a3e3efca32363b676a9eff8cdc5819f" + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/envelope-two-tier.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "44b16ddea9fd914442eaf8f21c90b6f0eeb913ab499eb668a9c0c6e06b988522" + ], + "file_hash": "a55a96d3a19b624aa8f2e38337521046facf5a6f2a04c3f3c3f7a14880774443" + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/jcs-stress.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "24b3a1d99f3e725092cd0126c7d9fb84246e467ec5d2d1fd416f462ae8a58e14" + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/minimal-pending-roundtrip.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "f215ed334d3928e1abde804f2c4a870431b18d4fa7d755ec94d94be2a6ddd06e" + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/multi-session-slice.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "0805ade157038a1cbb2895c55383b39b5a42aaa7d6ccc164174c0b3346d8fc14", + "f7fe8ccdeaeddf70b868838614730c0a613d937bafeac48a42bb585e6df3ead7" + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/replacement-char.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "594a06fa452fc097bce145be95dd614ddb81b6adf8fe2baf80cbe6e13fcaf4f2" + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/segment-chain-seq1.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "b12c68969624665898a4e3ec4dc5c0d06f7a7f305f8f7b16e347e18ccd2f6e67" + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "hash-vectors/segment-chain-seq2.trail.jsonl", + "comment": "Oracle: Go github.com/cyberphone/json-canonicalization v0.0.0-20241213102144-19d51d7fe467 + crypto/sha256", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "expected": { + "session_hashes": [ + "3a676114740d09c2620c1c5e106ea8066bfc3926e478faf5cabd0cedfff398ec" + ] + }, + "classes": [ + "W", + "R2" + ] + }, + { + "path": "invalid-graph/ambiguous-sequential-pairing-with-session-end.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 4, + "path": "/payload", + "severity": "warning", + "code": "ambiguous_sequential_pairing" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 4, + "path": "/payload", + "severity": "warning", + "code": "ambiguous_sequential_pairing" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/ambiguous-sequential-pairing.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + }, + { + "line": 4, + "path": "/payload", + "severity": "warning", + "code": "ambiguous_sequential_pairing" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + }, + { + "line": 4, + "path": "/payload", + "severity": "warning", + "code": "ambiguous_sequential_pairing" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/branch-point-unknown-from-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/payload/from_id", + "severity": "warning", + "code": "unknown_branch_point_from_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/from_id", + "severity": "warning", + "code": "unknown_branch_point_from_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/branch-summary-unknown-abandoned-branch-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/payload/abandoned_branch_id", + "severity": "warning", + "code": "unknown_abandoned_branch_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/abandoned_branch_id", + "severity": "warning", + "code": "unknown_abandoned_branch_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/duplicate-id.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "error", + "code": "duplicate_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "error", + "code": "duplicate_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/duplicate-option-labels-mixed-ids.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/payload/questions/0/options/1/label", + "severity": "warning", + "code": "duplicate_option_labels" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/questions/0/options/1/label", + "severity": "warning", + "code": "duplicate_option_labels" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/duplicate-option-labels.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/payload/questions/0/options/1/label", + "severity": "warning", + "code": "duplicate_option_labels" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/questions/0/options/1/label", + "severity": "warning", + "code": "duplicate_option_labels" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/duplicate-segment-seq.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/segment/seq", + "severity": "warning", + "code": "duplicate_segment_seq" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/segment/seq", + "severity": "warning", + "code": "duplicate_segment_seq" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/duplicate-tool-result-for-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/envelope-not-at-line-1.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2, + "path": "/type", + "severity": "error", + "code": "envelope_not_at_line_1" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/type", + "severity": "error", + "code": "envelope_not_at_line_1" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/envelope-sessions-manifest-empty.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 1, + "path": "/sessions", + "severity": "warning", + "code": "envelope_sessions_manifest_drift" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1, + "path": "/sessions", + "severity": "warning", + "code": "envelope_sessions_manifest_drift" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/envelope-sessions-manifest-multiple.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 1, + "path": "/sessions", + "severity": "warning", + "code": "envelope_sessions_manifest_drift" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1, + "path": "/sessions", + "severity": "warning", + "code": "envelope_sessions_manifest_drift" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/envelope-without-session-header.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2, + "path": "", + "severity": "error", + "code": "missing_header_after_envelope" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "", + "severity": "error", + "code": "missing_header_after_envelope" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/header-has-parent-id.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "/parent_id", + "severity": "error", + "code": "header_has_parent_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "/parent_id", + "severity": "error", + "code": "header_has_parent_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/multi-session-cross-group-parent.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 4, + "path": "/parent_id", + "severity": "error", + "code": "unknown_parent_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 4, + "path": "/parent_id", + "severity": "error", + "code": "unknown_parent_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/multi-session-orphan-prelude.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "/type", + "severity": "error", + "code": "events_before_first_session_header" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "/type", + "severity": "error", + "code": "events_before_first_session_header" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/multiple-envelopes.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2, + "path": "/type", + "severity": "error", + "code": "events_before_first_session_header" + }, + { + "line": 2, + "path": "/type", + "severity": "error", + "code": "multiple_envelopes" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/type", + "severity": "error", + "code": "events_before_first_session_header" + }, + { + "line": 2, + "path": "/type", + "severity": "error", + "code": "multiple_envelopes" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/non-interoperable-number.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/source/raw/snowflake", + "severity": "warning", + "code": "non_interoperable_number" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/source/raw/snowflake", + "severity": "warning", + "code": "non_interoperable_number" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/non-monotonic-event-ts.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 3, + "path": "/ts", + "severity": "warning", + "code": "non_monotonic_event_ts" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/ts", + "severity": "warning", + "code": "non_monotonic_event_ts" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/out-of-order-segment-seq.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/segment/seq", + "severity": "warning", + "code": "out_of_order_segment_seq" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/segment/seq", + "severity": "warning", + "code": "out_of_order_segment_seq" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/parent-cycle.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2, + "path": "/parent_id", + "severity": "error", + "code": "parent_cycle" + }, + { + "line": 3, + "path": "/parent_id", + "severity": "error", + "code": "parent_cycle" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/parent_id", + "severity": "error", + "code": "parent_cycle" + }, + { + "line": 3, + "path": "/parent_id", + "severity": "error", + "code": "parent_cycle" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/parse-fidelity-drift.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1, + "path": "/parse_fidelity/quarantined_count", + "severity": "error", + "code": "parse_fidelity_drift" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1, + "path": "/parse_fidelity/quarantined_count", + "severity": "error", + "code": "parse_fidelity_drift" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/sequential-pairing-stays-in-branch.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/sequential-pairing-stays-in-sibling-branch.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/sequential-pairing-stays-in-subagent-sibling-branch.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + }, + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + }, + { + "line": 2, + "path": "/payload/args/prompt", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + }, + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/session-end-forward-final-message-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/payload/final_message_id", + "severity": "warning", + "code": "unknown_final_message_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/final_message_id", + "severity": "warning", + "code": "unknown_final_message_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/session-end-unknown-final-message-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 3, + "path": "/payload/final_message_id", + "severity": "warning", + "code": "unknown_final_message_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/payload/final_message_id", + "severity": "warning", + "code": "unknown_final_message_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/stream-open-with-content-hash.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1, + "path": "/content_hash", + "severity": "error", + "code": "content_hash_mismatch" + }, + { + "line": 1, + "path": "/content_hash", + "severity": "warning", + "code": "stream_open_with_content_hash" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1, + "path": "/content_hash", + "severity": "warning", + "code": "content_hash_mismatch" + }, + { + "line": 1, + "path": "/content_hash", + "severity": "warning", + "code": "stream_open_with_content_hash" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/tool-args-unredacted-secret.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/payload/args/headers/Authorization", + "severity": "warning", + "code": "tool_args_unredacted_secret" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/args/headers/Authorization", + "severity": "warning", + "code": "tool_args_unredacted_secret" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/tool-call-aborted-turn-scope-does-not-close-call.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/tool-result-for-id-wins-over-semantic-conflict.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/unknown-parent-id.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2, + "path": "/parent_id", + "severity": "error", + "code": "unknown_parent_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/parent_id", + "severity": "error", + "code": "unknown_parent_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/unmatched-tool-call-at-eof.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/unmatched-tool-call-partial-suppression.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/unmatched-tool-call-session-terminated-without-open-call-ids.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-graph/user-query-response-unknown-for-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [ + { + "line": 2, + "path": "/payload/for_id", + "severity": "warning", + "code": "unknown_user_query_for_id" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/for_id", + "severity": "warning", + "code": "unknown_user_query_for_id" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/agent-message-attachment-bad-uri.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/agent-message-usage-extra-field.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/payload/usage/cost_usd", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/agent-message-usage-missing-output.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/agent-message-usage-missing-required.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/agent-message-usage-zero-context-window.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/agent-thinking-usage-missing-output.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/capability-change-bad-reason.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/capability-change-bad-scope.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/capability-change-empty.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/command-invoke-bad-kind.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/command-invoke-bad-result-action.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/command-invoke-missing-kind.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/command-invoke-missing-name.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/envelope-missing-producer.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/header-wrong-schema-version.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "", + "severity": "error", + "code": "missing_header" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "", + "severity": "error", + "code": "missing_header" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/redaction-count-non-integer.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/segment-seq-1-with-prev-hash.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/segment-seq-2-without-prev-hash.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/segment-seq-2-without-session-uid.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/segment-seq-zero.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/session-end-final-message-id-null.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/session-metadata-update-bad-field-cwd.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/session-metadata-update-bad-reason.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/session-metadata-update-bad-tags-value.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/session-metadata-update-bad-worktree.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/session-uid-not-ulid-or-uuid.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-aborted-bad-reason.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-aborted-tool-scope-missing-for-id.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-aborted-turn-scope-with-for-id.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-file-list-missing-path.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-file-patch-empty-files.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-file-patch-file-missing-diff.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-missing-args-path.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + }, + { + "line": 2, + "path": "/id", + "severity": "warning", + "code": "unmatched_tool_call_at_eof" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-call-usage-missing-output.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-result-attachment-extra-field.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + }, + { + "line": 3, + "path": "/payload/attachments/0/width", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-result-meta-file-read-range-wrong-length.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-result-meta-mcp-call-block-missing-type.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-result-meta-shell-command-extra-field.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/payload/meta/shell_command/exitcode", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/tool-result-truncated-missing-output-size.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/user-message-missing-text.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/user-message-non-string-text.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/vcs-null-revision-with-empty-branch.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/vcs-null-revision-with-head-commit.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "invalid-schema/vcs-null-revision-without-branch.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1 + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/capability-change-unknown-payload-field.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/future_field", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/ill-formed-string.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/text", + "severity": "error", + "code": "ill_formed_string" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/text", + "severity": "warning", + "code": "ill_formed_string" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/nested-unknown-payload-field.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/attachments/0/future_field", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/patch-compatible-schema-version.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 1 + }, + { + "line": 1, + "path": "", + "severity": "error", + "code": "missing_header" + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 1, + "path": "/schema_version", + "severity": "warning", + "code": "reader_tolerant_schema_version" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/reserved-future-event-type.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/type", + "severity": "warning", + "code": "reader_tolerant_unknown_record" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/tool-result-meta-registered-extra-field.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 3 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 3, + "path": "/payload/meta/shell_command/exitcode", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/unknown-event-type.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/type", + "severity": "warning", + "code": "reader_tolerant_unknown_record" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "reader-tolerant/unknown-payload-field.trail.jsonl", + "strict": { + "valid": false, + "diagnostics": [ + { + "line": 2 + } + ] + }, + "tolerant": { + "clean": false, + "diagnostics": [ + { + "line": 2, + "path": "/payload/future_field", + "severity": "warning", + "code": "reader_tolerant_unknown_payload_field" + } + ] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/agent-message-attachments-multiple.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/agent-message-attachments.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/agent-message-usage.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/agent-thinking-usage.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/capability-change-initial-snapshot.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/capability-change.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/command-invoke-extension-kind.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/command-invoke-full.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/command-invoke-minimal.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/command-invoke-plugin.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/command-invoke-result-action-ext.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/command-invoke-slash.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/context-compact-provenance-only-ids.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/context-compact-replaced-message-ids.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/linear-with-parent-ids.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/minimal-linear.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/minimal-with-content-hash.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/multi-segment-seg1.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/multi-segment-seg2.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/multi-session-fork-from-chain.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/multi-session-two-no-envelope.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/multi-session-with-envelope.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/multiple-session-end-events.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/redaction-count-meta.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-end-final-message-id-references-header.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-end-with-final-message-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-header-metadata-base.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-metadata-update-agent-model-default.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-metadata-update-name.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-metadata-update-tags.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-metadata-update-vcs-branch.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/session-metadata-update-vendor.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/spec-example-incomplete-session.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/spec-example-mcp-call.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/spec-example-synthesized-event.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/spec-example-tool-call-semantic-pairing.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/spec-example-tool-result-fallback-pairing.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/spec-example-tree-abandoned-branch.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/streaming-finalized-clean.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/streaming-open.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/system-event-vcs-commit.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-aborted-closes-call.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-aborted-extension-scope-reason.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-aborted-turn-scope.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-file-list.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-file-patch.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-matched-by-for-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-matched-by-semantic-call-id.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-matched-same-parent-siblings.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-matched-sequentially.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-call-usage.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-attachments-with-mcp-meta.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-attachments.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-for-id-targets-header-falls-through.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-meta-file-read.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-meta-mcp-call.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-meta-shell-command.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-meta-toplevel-vendor-kind.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-meta-unregistered-kind.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-meta-vendor-extension.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/tool-result-output-size-truncated.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/unmatched-tool-call-suppressed-by-session-end.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/unmatched-tool-call-suppressed-by-session-terminated.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/user-message-origin-injected.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/user-query-duplicate-labels-with-ids.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/vcs-unborn-head.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/with-trail-envelope-all-fields.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/with-trail-envelope-and-hash.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + }, + { + "path": "valid/with-trail-envelope.trail.jsonl", + "strict": { + "valid": true, + "diagnostics": [] + }, + "tolerant": { + "clean": true, + "diagnostics": [] + }, + "classes": [ + "W", + "R1", + "R2" + ] + } + ] +} diff --git a/fixtures/validation/manifest.schema.json b/fixtures/validation/manifest.schema.json new file mode 100644 index 0000000..fd88b04 --- /dev/null +++ b/fixtures/validation/manifest.schema.json @@ -0,0 +1,123 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://agent-trail.dev/schema/v0.1.0/conformance-manifest.schema.json", + "title": "Agent Trail validation conformance manifest", + "type": "object", + "additionalProperties": false, + "required": ["schema_version", "fixtures"], + "properties": { + "schema_version": { + "const": "0.1.0" + }, + "fixtures": { + "type": "array", + "items": { + "$ref": "#/$defs/fixture" + } + } + }, + "$defs": { + "fixture": { + "type": "object", + "additionalProperties": false, + "required": ["path", "classes", "strict", "tolerant"], + "properties": { + "path": { + "type": "string", + "pattern": "^[a-z0-9-]+/[a-z0-9.-]+\\.trail\\.jsonl$" + }, + "classes": { + "type": "array", + "items": { + "enum": ["W", "R1", "R2"] + }, + "minItems": 1, + "uniqueItems": true + }, + "comment": { + "type": "string" + }, + "strict": { + "$ref": "#/$defs/strictExpectation" + }, + "tolerant": { + "$ref": "#/$defs/tolerantExpectation" + }, + "expected": { + "$ref": "#/$defs/hashExpectation" + } + } + }, + "strictExpectation": { + "type": "object", + "additionalProperties": false, + "required": ["valid", "diagnostics"], + "properties": { + "valid": { + "type": "boolean" + }, + "diagnostics": { + "$ref": "#/$defs/diagnostics" + } + } + }, + "tolerantExpectation": { + "type": "object", + "additionalProperties": false, + "required": ["clean", "diagnostics"], + "properties": { + "clean": { + "type": "boolean" + }, + "diagnostics": { + "$ref": "#/$defs/diagnostics" + } + } + }, + "diagnostics": { + "type": "array", + "items": { + "$ref": "#/$defs/diagnosticAssertion" + } + }, + "diagnosticAssertion": { + "type": "object", + "additionalProperties": false, + "required": ["line"], + "properties": { + "line": { + "type": "integer", + "minimum": 1 + }, + "path": { + "type": "string" + }, + "severity": { + "enum": ["error", "warning"] + }, + "code": { + "type": "string" + } + } + }, + "hashExpectation": { + "type": "object", + "additionalProperties": false, + "properties": { + "session_hashes": { + "type": "array", + "items": { + "$ref": "#/$defs/sha256" + } + }, + "file_hash": { + "$ref": "#/$defs/sha256" + } + } + }, + "sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + } + } +} diff --git a/fixtures/validation/reader-tolerant/capability-change-unknown-payload-field.trail.jsonl b/fixtures/validation/reader-tolerant/capability-change-unknown-payload-field.trail.jsonl new file mode 100644 index 0000000..f22791f --- /dev/null +++ b/fixtures/validation/reader-tolerant/capability-change-unknown-payload-field.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000128","session_uid":"00000000-0000-0000-0000-000000000128","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001288","ts":"2026-06-01T00:00:01.000Z","payload":{"scope":"tool","reason":"registered","added":[{"name":"ToolSearch"}],"future_field":true}} diff --git a/fixtures/validation/reader-tolerant/ill-formed-string.trail.jsonl b/fixtures/validation/reader-tolerant/ill-formed-string.trail.jsonl new file mode 100644 index 0000000..40f1e93 --- /dev/null +++ b/fixtures/validation/reader-tolerant/ill-formed-string.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"bad \udc00"}} diff --git a/fixtures/validation/reader-tolerant/nested-unknown-payload-field.trail.jsonl b/fixtures/validation/reader-tolerant/nested-unknown-payload-field.trail.jsonl new file mode 100644 index 0000000..59531ec --- /dev/null +++ b/fixtures/validation/reader-tolerant/nested-unknown-payload-field.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello","attachments":[{"kind":"file","name":"notes.txt","future_field":true}]}} diff --git a/fixtures/validation/reader-tolerant/patch-compatible-schema-version.trail.jsonl b/fixtures/validation/reader-tolerant/patch-compatible-schema-version.trail.jsonl new file mode 100644 index 0000000..7f65324 --- /dev/null +++ b/fixtures/validation/reader-tolerant/patch-compatible-schema-version.trail.jsonl @@ -0,0 +1,2 @@ +{"agent":{"name":"codex-cli"},"content_hash":"ce5fa0171af29a5f05837c438cc31e62631e1125083ff468ca8b3932cb4f3e4d","id":"01HSESS0000000000000000001","schema_version":"0.1.1","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000001","payload":{"text":"hello"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} diff --git a/fixtures/validation/reader-tolerant/reserved-future-event-type.trail.jsonl b/fixtures/validation/reader-tolerant/reserved-future-event-type.trail.jsonl new file mode 100644 index 0000000..1ad80f4 --- /dev/null +++ b/fixtures/validation/reader-tolerant/reserved-future-event-type.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"error","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"future":true}} diff --git a/fixtures/validation/reader-tolerant/tool-result-meta-registered-extra-field.trail.jsonl b/fixtures/validation/reader-tolerant/tool-result-meta-registered-extra-field.trail.jsonl new file mode 100644 index 0000000..d6e3b7c --- /dev/null +++ b/fixtures/validation/reader-tolerant/tool-result-meta-registered-extra-field.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"shell_command","args":{"command":"ls"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"a.txt","meta":{"shell_command":{"stdout":"a.txt","exitcode":0}}}} diff --git a/fixtures/validation/reader-tolerant/unknown-event-type.trail.jsonl b/fixtures/validation/reader-tolerant/unknown-event-type.trail.jsonl new file mode 100644 index 0000000..7cef381 --- /dev/null +++ b/fixtures/validation/reader-tolerant/unknown-event-type.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"future_event","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"future":true}} diff --git a/fixtures/validation/reader-tolerant/unknown-payload-field.trail.jsonl b/fixtures/validation/reader-tolerant/unknown-payload-field.trail.jsonl new file mode 100644 index 0000000..4e7627a --- /dev/null +++ b/fixtures/validation/reader-tolerant/unknown-payload-field.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello","future_field":true}} diff --git a/fixtures/validation/valid/agent-message-attachments-multiple.trail.jsonl b/fixtures/validation/valid/agent-message-attachments-multiple.trail.jsonl new file mode 100644 index 0000000..f5e9b73 --- /dev/null +++ b/fixtures/validation/valid/agent-message-attachments-multiple.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"plot two charts"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"here are the charts","attachments":[{"kind":"image","media_type":"image/png","uri":"sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa","name":"chart1.png"},{"kind":"image","media_type":"image/svg+xml","uri":"sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb","name":"chart2.svg"}]}} diff --git a/fixtures/validation/valid/agent-message-attachments.trail.jsonl b/fixtures/validation/valid/agent-message-attachments.trail.jsonl new file mode 100644 index 0000000..651ad51 --- /dev/null +++ b/fixtures/validation/valid/agent-message-attachments.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"plot this"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"here is the chart","attachments":[{"kind":"image","media_type":"image/png","uri":"sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa","name":"chart.png"}]}} diff --git a/fixtures/validation/valid/agent-message-usage.trail.jsonl b/fixtures/validation/valid/agent-message-usage.trail.jsonl new file mode 100644 index 0000000..28a2951 --- /dev/null +++ b/fixtures/validation/valid/agent-message-usage.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi","model":"claude-sonnet-4-5","stop_reason":"end_turn","usage":{"input_tokens":1234,"output_tokens":567,"input_tokens_cumulative":12340,"output_tokens_cumulative":5670,"total_tokens":2151,"total_tokens_cumulative":18010,"cache_read_tokens":100,"cache_creation_tokens":50,"reasoning_tokens":200,"context_input_tokens":1384,"context_window_tokens":200000}}} diff --git a/fixtures/validation/valid/agent-thinking-usage.trail.jsonl b/fixtures/validation/valid/agent-thinking-usage.trail.jsonl new file mode 100644 index 0000000..d083ace --- /dev/null +++ b/fixtures/validation/valid/agent-thinking-usage.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"agent_thinking","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"text":"Need inspect before answer.","model":"claude-sonnet-4-5","usage":{"input_tokens":10,"output_tokens":4,"reasoning_tokens":4}}} diff --git a/fixtures/validation/valid/capability-change-initial-snapshot.trail.jsonl b/fixtures/validation/valid/capability-change-initial-snapshot.trail.jsonl new file mode 100644 index 0000000..89596c1 --- /dev/null +++ b/fixtures/validation/valid/capability-change-initial-snapshot.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000292","session_uid":"00000000-0000-0000-0000-000000000292","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000002921","ts":"2026-06-01T00:00:01.000Z","payload":{"scope":"tool","reason":"initial","snapshot":[{"name":"shell","metadata":{"namespace":"codex"}}]}} diff --git a/fixtures/validation/valid/capability-change.trail.jsonl b/fixtures/validation/valid/capability-change.trail.jsonl new file mode 100644 index 0000000..63c350d --- /dev/null +++ b/fixtures/validation/valid/capability-change.trail.jsonl @@ -0,0 +1,5 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000128","session_uid":"00000000-0000-0000-0000-000000000128","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001281","ts":"2026-06-01T00:00:01.000Z","payload":{"scope":"tool","reason":"registered","added":[{"name":"ToolSearch","metadata":{"namespace":"claude-code"}}]}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001282","ts":"2026-06-01T00:00:02.000Z","payload":{"scope":"skill","reason":"loaded","snapshot":[{"name":"tdd","metadata":{"source":"skill_listing"}}]}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001283","ts":"2026-06-01T00:00:03.000Z","payload":{"scope":"mcp_server","reason":"instructions_updated","changed":[{"name":"linear","field":"instructions","from":"old","to":"new"}]}} +{"type":"capability_change","id":"00000000-0000-0000-0000-000000001284","ts":"2026-06-01T00:00:04.000Z","payload":{"scope":"mcp_tool","reason":"error","removed":[{"name":"linear.list_issues"}]}} diff --git a/fixtures/validation/valid/command-invoke-extension-kind.trail.jsonl b/fixtures/validation/valid/command-invoke-extension-kind.trail.jsonl new file mode 100644 index 0000000..9f886fc --- /dev/null +++ b/fixtures/validation/valid/command-invoke-extension-kind.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000291","session_uid":"00000000-0000-0000-0000-000000000291","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"00000000-0000-0000-0000-000000002911","ts":"2026-06-01T00:00:01.000Z","payload":{"name":"workflow","kind":"x-codex/workflow","via":"user_typed"}} diff --git a/fixtures/validation/valid/command-invoke-full.trail.jsonl b/fixtures/validation/valid/command-invoke-full.trail.jsonl new file mode 100644 index 0000000..c6e46b9 --- /dev/null +++ b/fixtures/validation/valid/command-invoke-full.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"/code-review","kind":"custom_prompt","via":"user_typed","args":{"target":"HEAD"},"expansion_text":"Review the diff against main.","result_action":"expand"}} diff --git a/fixtures/validation/valid/command-invoke-minimal.trail.jsonl b/fixtures/validation/valid/command-invoke-minimal.trail.jsonl new file mode 100644 index 0000000..3a5f849 --- /dev/null +++ b/fixtures/validation/valid/command-invoke-minimal.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"/clear","kind":"builtin","via":"user_typed"}} diff --git a/fixtures/validation/valid/command-invoke-plugin.trail.jsonl b/fixtures/validation/valid/command-invoke-plugin.trail.jsonl new file mode 100644 index 0000000..9e57064 --- /dev/null +++ b/fixtures/validation/valid/command-invoke-plugin.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"pi"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"format","kind":"plugin","via":"agent_invoked","result_action":null}} diff --git a/fixtures/validation/valid/command-invoke-result-action-ext.trail.jsonl b/fixtures/validation/valid/command-invoke-result-action-ext.trail.jsonl new file mode 100644 index 0000000..b88b030 --- /dev/null +++ b/fixtures/validation/valid/command-invoke-result-action-ext.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"webapp-testing","kind":"skill","via":"auto_trigger","result_action":"x-claude-code/plan-mode"}} diff --git a/fixtures/validation/valid/command-invoke-slash.trail.jsonl b/fixtures/validation/valid/command-invoke-slash.trail.jsonl new file mode 100644 index 0000000..8f6c171 --- /dev/null +++ b/fixtures/validation/valid/command-invoke-slash.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"command_invoke","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"name":"/diff","kind":"slash","via":"user_typed","result_action":"noop"}} diff --git a/fixtures/validation/valid/context-compact-provenance-only-ids.trail.jsonl b/fixtures/validation/valid/context-compact-provenance-only-ids.trail.jsonl new file mode 100644 index 0000000..112292d --- /dev/null +++ b/fixtures/validation/valid/context-compact-provenance-only-ids.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS000000000000000176A","session_uid":"01HZZZZZZZZZZZZZZZZZZZ176A","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"context_compact","id":"01HEVTA000000000000000176A","ts":"2026-05-17T14:00:10.000Z","payload":{"summary":"Earlier messages were compacted before this segment was emitted.","trigger":"auto","replaced_message_ids":["01HEVTA000000000000000176B"]}} diff --git a/fixtures/validation/valid/context-compact-replaced-message-ids.trail.jsonl b/fixtures/validation/valid/context-compact-replaced-message-ids.trail.jsonl new file mode 100644 index 0000000..4185074 --- /dev/null +++ b/fixtures/validation/valid/context-compact-replaced-message-ids.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000176","session_uid":"01HZZZZZZZZZZZZZZZZZZZZ176","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000176","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"long prompt"}} +{"type":"agent_message","id":"01HEVTA0000000000000000177","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"long answer"}} +{"type":"context_compact","id":"01HEVTA0000000000000000178","ts":"2026-05-17T14:00:10.000Z","payload":{"summary":"Long prompt and answer were compacted.","trigger":"auto","replaced_message_ids":["01HEVTA0000000000000000176","01HEVTA0000000000000000177"]}} diff --git a/fixtures/validation/valid/linear-with-parent-ids.trail.jsonl b/fixtures/validation/valid/linear-with-parent-ids.trail.jsonl new file mode 100644 index 0000000..f3d58f0 --- /dev/null +++ b/fixtures/validation/valid/linear-with-parent-ids.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","parent_id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/valid/minimal-linear.trail.jsonl b/fixtures/validation/valid/minimal-linear.trail.jsonl new file mode 100644 index 0000000..0ae9e8f --- /dev/null +++ b/fixtures/validation/valid/minimal-linear.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/valid/minimal-with-content-hash.trail.jsonl b/fixtures/validation/valid/minimal-with-content-hash.trail.jsonl new file mode 100644 index 0000000..d0f060a --- /dev/null +++ b/fixtures/validation/valid/minimal-with-content-hash.trail.jsonl @@ -0,0 +1,3 @@ +{"agent":{"name":"codex-cli"},"content_hash":"8dbf946e5d4ccd2a4ff2681d2c2fe2614f0769bdfeafe5e4f242db14872db5f7","id":"01HSESS0000000000000000001","schema_version":"0.1.0","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000001","payload":{"text":"hello"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} +{"id":"01HEVTA0000000000000000002","payload":{"text":"hi"},"ts":"2026-05-17T14:00:07.000Z","type":"agent_message"} diff --git a/fixtures/validation/valid/multi-segment-seg1.trail.jsonl b/fixtures/validation/valid/multi-segment-seg1.trail.jsonl new file mode 100644 index 0000000..11f1a09 --- /dev/null +++ b/fixtures/validation/valid/multi-segment-seg1.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","segment":{"seq":1},"ts":"2026-05-26T10:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-26T10:00:05.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/valid/multi-segment-seg2.trail.jsonl b/fixtures/validation/valid/multi-segment-seg2.trail.jsonl new file mode 100644 index 0000000..fee6e0f --- /dev/null +++ b/fixtures/validation/valid/multi-segment-seg2.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000002","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","segment":{"seq":2,"prev_content_hash":"deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"},"ts":"2026-05-26T10:05:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-26T10:05:05.000Z","payload":{"text":"continuing"}} diff --git a/fixtures/validation/valid/multi-session-fork-from-chain.trail.jsonl b/fixtures/validation/valid/multi-session-fork-from-chain.trail.jsonl new file mode 100644 index 0000000..84afd13 --- /dev/null +++ b/fixtures/validation/valid/multi-session-fork-from-chain.trail.jsonl @@ -0,0 +1,4 @@ +{"agent":{"name":"codex-cli"},"content_hash":"cb67fb75b08a379e0ef919b7f8436082cb67908edea9d82171a3e5a2af532089","id":"01HSESS0000000000000000C01","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000C01","payload":{"text":"parent"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} +{"agent":{"name":"claude-code"},"content_hash":"51223c899e5b544edcfc47634cbee5a025578f0ca32cb3ffcffade0a1155d4a1","fork_from":{"content_hash":"cb67fb75b08a379e0ef919b7f8436082cb67908edea9d82171a3e5a2af532089","session_id":"01HSESS0000000000000000C01"},"id":"01HSESS0000000000000000C02","schema_version":"0.1.0","ts":"2026-05-17T14:05:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000C02","payload":{"text":"fork"},"ts":"2026-05-17T14:05:05.000Z","type":"user_message"} diff --git a/fixtures/validation/valid/multi-session-two-no-envelope.trail.jsonl b/fixtures/validation/valid/multi-session-two-no-envelope.trail.jsonl new file mode 100644 index 0000000..1198c67 --- /dev/null +++ b/fixtures/validation/valid/multi-session-two-no-envelope.trail.jsonl @@ -0,0 +1,4 @@ +{"agent":{"name":"codex-cli"},"content_hash":"0e0b83e6e0a6bee7044824ea6ecafcbec8fdfd12dd5bfae47b173efbdb83ce89","id":"01HSESS0000000000000000A01","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000A01","payload":{"text":"first session msg"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} +{"agent":{"name":"claude-code"},"content_hash":"286b0f8884d56ef90b6e13b43f90a4a0fa156bea780341bd2297e2af3a2f482a","id":"01HSESS0000000000000000A02","schema_version":"0.1.0","ts":"2026-05-17T14:05:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000A02","payload":{"text":"second session msg"},"ts":"2026-05-17T14:05:05.000Z","type":"user_message"} diff --git a/fixtures/validation/valid/multi-session-with-envelope.trail.jsonl b/fixtures/validation/valid/multi-session-with-envelope.trail.jsonl new file mode 100644 index 0000000..5e9f79a --- /dev/null +++ b/fixtures/validation/valid/multi-session-with-envelope.trail.jsonl @@ -0,0 +1,5 @@ +{"content_hash":"1ead799274fc3b9e80a1a9c26539a906303b9a56332d5c2058f61142df4fea30","id":"01HTRA0X00000000000000A001","producer":"trail-cli/0.3.0","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"trail"} +{"agent":{"name":"codex-cli"},"content_hash":"8abae38e97120ce1b6df571e7bb34895b50f9ad8000bdc40f296b292adb25128","id":"01HSESS0000000000000000B01","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000B01","payload":{"text":"msg1"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} +{"agent":{"name":"claude-code"},"content_hash":"158803f33d7a1bca6ba219d503df6549deedede039ad2fc890611338d9f794ec","id":"01HSESS0000000000000000B02","schema_version":"0.1.0","ts":"2026-05-17T14:05:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000B02","payload":{"text":"msg2"},"ts":"2026-05-17T14:05:05.000Z","type":"user_message"} diff --git a/fixtures/validation/valid/multiple-session-end-events.trail.jsonl b/fixtures/validation/valid/multiple-session-end-events.trail.jsonl new file mode 100644 index 0000000..33ff750 --- /dev/null +++ b/fixtures/validation/valid/multiple-session-end-events.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"session_end","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"reason":"agent_idle","final_message_id":"01HSESS0000000000000000001"}} +{"type":"session_end","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:07.000Z","payload":{"reason":"complete","final_message_id":"01HEVTA0000000000000000002"}} diff --git a/fixtures/validation/valid/redaction-count-meta.trail.jsonl b/fixtures/validation/valid/redaction-count-meta.trail.jsonl new file mode 100644 index 0000000..7b8f59c --- /dev/null +++ b/fixtures/validation/valid/redaction-count-meta.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"redacted"},"meta":{"redaction_count":2}} diff --git a/fixtures/validation/valid/session-end-final-message-id-references-header.trail.jsonl b/fixtures/validation/valid/session-end-final-message-id-references-header.trail.jsonl new file mode 100644 index 0000000..46fc925 --- /dev/null +++ b/fixtures/validation/valid/session-end-final-message-id-references-header.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"session_end","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:08.000Z","payload":{"reason":"agent_idle","final_message_id":"01HSESS0000000000000000001"}} diff --git a/fixtures/validation/valid/session-end-with-final-message-id.trail.jsonl b/fixtures/validation/valid/session-end-with-final-message-id.trail.jsonl new file mode 100644 index 0000000..625f5bb --- /dev/null +++ b/fixtures/validation/valid/session-end-with-final-message-id.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +{"type":"session_end","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:08.000Z","payload":{"reason":"complete","final_message_id":"01HEVTA0000000000000000002"}} diff --git a/fixtures/validation/valid/session-header-metadata-base.trail.jsonl b/fixtures/validation/valid/session-header-metadata-base.trail.jsonl new file mode 100644 index 0000000..492c4f4 --- /dev/null +++ b/fixtures/validation/valid/session-header-metadata-base.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000249","session_uid":"00000000-0000-0000-0000-000000000249","name":"Initial adapter title","description":"Initial adapter description","tags":["adapter","metadata"],"ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000002491","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"name","value":"Updated session title","previous_value":"Initial adapter title","reason":"ai_generated"}} diff --git a/fixtures/validation/valid/session-metadata-update-agent-model-default.trail.jsonl b/fixtures/validation/valid/session-metadata-update-agent-model-default.trail.jsonl new file mode 100644 index 0000000..84168cb --- /dev/null +++ b/fixtures/validation/valid/session-metadata-update-agent-model-default.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001327","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"agent.model_default","value":"gpt-5-codex","previous_value":"gpt-5","reason":"runtime_inferred"}} diff --git a/fixtures/validation/valid/session-metadata-update-name.trail.jsonl b/fixtures/validation/valid/session-metadata-update-name.trail.jsonl new file mode 100644 index 0000000..1a9f248 --- /dev/null +++ b/fixtures/validation/valid/session-metadata-update-name.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001321","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"name","value":"Refactor adapter metadata","reason":"ai_generated"}} diff --git a/fixtures/validation/valid/session-metadata-update-tags.trail.jsonl b/fixtures/validation/valid/session-metadata-update-tags.trail.jsonl new file mode 100644 index 0000000..cc2859e --- /dev/null +++ b/fixtures/validation/valid/session-metadata-update-tags.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001322","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"tags","value":["release","docs"],"previous_value":["docs"],"reason":"user_set"}} diff --git a/fixtures/validation/valid/session-metadata-update-vcs-branch.trail.jsonl b/fixtures/validation/valid/session-metadata-update-vcs-branch.trail.jsonl new file mode 100644 index 0000000..9151de2 --- /dev/null +++ b/fixtures/validation/valid/session-metadata-update-vcs-branch.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001328","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"vcs.branch","value":"feature/session-metadata","reason":"runtime_inferred"}} diff --git a/fixtures/validation/valid/session-metadata-update-vendor.trail.jsonl b/fixtures/validation/valid/session-metadata-update-vendor.trail.jsonl new file mode 100644 index 0000000..afad3fe --- /dev/null +++ b/fixtures/validation/valid/session-metadata-update-vendor.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000132","session_uid":"00000000-0000-0000-0000-000000000132","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"session_metadata_update","id":"00000000-0000-0000-0000-000000001329","ts":"2026-06-01T00:00:01.000Z","payload":{"field":"x-codex/thread_goal","value":{"summary":null,"items":["ship"]},"previous_value":{"items":[]},"reason":"ai_generated"}} diff --git a/fixtures/validation/valid/spec-example-incomplete-session.trail.jsonl b/fixtures/validation/valid/spec-example-incomplete-session.trail.jsonl new file mode 100644 index 0000000..5045fbb --- /dev/null +++ b/fixtures/validation/valid/spec-example-incomplete-session.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000006","session_uid":"00000000-0000-4000-8000-000000000186","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"x-com/example-agent"}} +{"type":"user_message","id":"01HEVTF0000000000000000001","ts":"2026-05-17T14:00:00.000Z","payload":{"text":"Run the test suite"}} +{"type":"tool_call","id":"01HEVTF0000000000000000002","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"shell_command","args":{"command":"npm test"}}} +{"type":"session_terminated","id":"01HEVTF0000000000000000003","ts":"2026-05-17T14:01:30.000Z","payload":{"reason":"eof_with_open_tool_calls","open_call_ids":["01HEVTF0000000000000000002"]},"source":{"synthesized":true}} diff --git a/fixtures/validation/valid/spec-example-mcp-call.trail.jsonl b/fixtures/validation/valid/spec-example-mcp-call.trail.jsonl new file mode 100644 index 0000000..f51b01d --- /dev/null +++ b/fixtures/validation/valid/spec-example-mcp-call.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000005","session_uid":"00000000-0000-4000-8000-000000000187","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"x-com/example-agent"}} +{"type":"user_message","id":"01HEVTE0000000000000000001","ts":"2026-05-17T14:00:00.000Z","payload":{"text":"Find my open Linear issues"}} +{"type":"tool_call","id":"01HEVTE0000000000000000002","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"mcp_call","args":{"server":"linear","tool":"list_issues","args":{"status":"open","assignee":"me"},"headers":{"Authorization":"[REDACTED]"}}}} +{"type":"tool_result","id":"01HEVTE0000000000000000003","ts":"2026-05-17T14:00:02.000Z","payload":{"for_id":"01HEVTE0000000000000000002","ok":true,"output":"[{\"id\":\"ABC-123\",\"title\":\"Fix auth\"}]"}} diff --git a/fixtures/validation/valid/spec-example-synthesized-event.trail.jsonl b/fixtures/validation/valid/spec-example-synthesized-event.trail.jsonl new file mode 100644 index 0000000..918a47e --- /dev/null +++ b/fixtures/validation/valid/spec-example-synthesized-event.trail.jsonl @@ -0,0 +1,5 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000004","session_uid":"00000000-0000-4000-8000-000000000185","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"x-com/example-agent"},"vcs":{"type":"git","revision":"a1b2c3d4"}} +{"type":"user_message","id":"01HEVTD0000000000000000001","ts":"2026-05-17T14:00:00.000Z","payload":{"text":"Add a logger"}} +{"type":"agent_message","id":"01HEVTD0000000000000000002","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"Adding logger..."}} +{"type":"tool_call","id":"01HEVTD0000000000000000003","ts":"2026-05-17T14:00:06.000Z","payload":{"tool":"file_edit","args":{"path":"src/main.ts","diff":"--- a/src/main.ts\n+++ b/src/main.ts\n@@ -1,3 +1,5 @@\n+import { logger } from './logger';\n+\n const main = () => {"}},"source":{"agent":"x-com/example-agent","original_type":"git_commit_diff","synthesized":true}} +{"type":"session_end","id":"01HEVTD0000000000000000004","ts":"2026-05-17T14:00:09.000Z","payload":{"reason":"complete"}} diff --git a/fixtures/validation/valid/spec-example-tool-call-semantic-pairing.trail.jsonl b/fixtures/validation/valid/spec-example-tool-call-semantic-pairing.trail.jsonl new file mode 100644 index 0000000..276bf2b --- /dev/null +++ b/fixtures/validation/valid/spec-example-tool-call-semantic-pairing.trail.jsonl @@ -0,0 +1,5 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000002","session_uid":"00000000-0000-4000-8000-000000000182","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"x-com/example-agent"}} +{"type":"user_message","id":"01HEVTB0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"Read package.json"}} +{"type":"tool_call","id":"01HEVTB0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"tool":"file_read","args":{"path":"package.json"}},"semantic":{"call_id":"toolu_01abc"}} +{"type":"tool_result","id":"01HEVTB0000000000000000003","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTB0000000000000000002","ok":true,"output":"{\"name\":\"trail\"}"},"semantic":{"call_id":"toolu_01abc","tool_kind":"file_read"}} +{"type":"agent_message","id":"01HEVTB0000000000000000004","ts":"2026-05-17T14:00:08.000Z","payload":{"text":"Your package is called trail."}} diff --git a/fixtures/validation/valid/spec-example-tool-result-fallback-pairing.trail.jsonl b/fixtures/validation/valid/spec-example-tool-result-fallback-pairing.trail.jsonl new file mode 100644 index 0000000..42420b2 --- /dev/null +++ b/fixtures/validation/valid/spec-example-tool-result-fallback-pairing.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS000000000000000002B","session_uid":"00000000-0000-4000-8000-000000000183","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"x-com/example-agent"}} +{"type":"user_message","id":"01HEVTX0000000000000000001","ts":"2026-05-17T14:00:00.000Z","payload":{"text":"Read package.json"}} +{"type":"tool_call","id":"01HEVTX0000000000000000002","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_read","args":{"path":"package.json"}},"semantic":{"call_id":"toolu_xyz"}} +{"type":"tool_result","id":"01HEVTX0000000000000000003","ts":"2026-05-17T14:00:02.000Z","payload":{"ok":true,"output":"{\"name\":\"trail\"}"},"semantic":{"call_id":"toolu_xyz"}} diff --git a/fixtures/validation/valid/spec-example-tree-abandoned-branch.trail.jsonl b/fixtures/validation/valid/spec-example-tree-abandoned-branch.trail.jsonl new file mode 100644 index 0000000..636fa9e --- /dev/null +++ b/fixtures/validation/valid/spec-example-tree-abandoned-branch.trail.jsonl @@ -0,0 +1,6 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000003","session_uid":"00000000-0000-4000-8000-000000000184","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"x-com/example-tree"}} +{"type":"user_message","id":"01HEVTC0000000000000000001","ts":"2026-05-17T14:00:00.000Z","payload":{"text":"Try approach A"}} +{"type":"agent_message","id":"01HEVTC0000000000000000002","parent_id":"01HEVTC0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"Approach A: ..."}} +{"type":"user_message","id":"01HEVTC0000000000000000003","parent_id":"01HEVTC0000000000000000001","ts":"2026-05-17T14:01:00.000Z","payload":{"text":"Actually, try approach B"}} +{"type":"branch_summary","id":"01HEVTC0000000000000000004","parent_id":"01HEVTC0000000000000000003","ts":"2026-05-17T14:01:01.000Z","payload":{"abandoned_branch_id":"01HEVTC0000000000000000002","summary":"Approach A explored but did not work because of X"}} +{"type":"agent_message","id":"01HEVTC0000000000000000005","parent_id":"01HEVTC0000000000000000004","ts":"2026-05-17T14:01:05.000Z","payload":{"text":"For approach B: ..."}} diff --git a/fixtures/validation/valid/streaming-finalized-clean.trail.jsonl b/fixtures/validation/valid/streaming-finalized-clean.trail.jsonl new file mode 100644 index 0000000..3ac53c1 --- /dev/null +++ b/fixtures/validation/valid/streaming-finalized-clean.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","stream":{"state":"closed","started_at":"2026-05-17T14:00:00.000Z"},"agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +{"type":"session_end","id":"01HEVTA0000000000000000003","ts":"2026-05-17T14:00:08.000Z","payload":{"reason":"complete","final_message_id":"01HEVTA0000000000000000002"}} diff --git a/fixtures/validation/valid/streaming-open.trail.jsonl b/fixtures/validation/valid/streaming-open.trail.jsonl new file mode 100644 index 0000000..3ba19ba --- /dev/null +++ b/fixtures/validation/valid/streaming-open.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","stream":{"state":"open","started_at":"2026-05-17T14:00:00.000Z"},"agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/fixtures/validation/valid/system-event-vcs-commit.trail.jsonl b/fixtures/validation/valid/system-event-vcs-commit.trail.jsonl new file mode 100644 index 0000000..7a15bcf --- /dev/null +++ b/fixtures/validation/valid/system-event-vcs-commit.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000010","session_uid":"00000000-0000-4000-8000-000000000190","ts":"2026-06-11T10:00:00.000Z","agent":{"name":"x-com/example-agent"}} +{"type":"tool_call","id":"01HEVTV0000000000000000001","ts":"2026-06-11T10:00:01.000Z","payload":{"tool":"shell_command","args":{"command":"git add src/main.ts && git commit -m \"feat: add marker\""}},"semantic":{"call_id":"toolu_vcs_commit_001","tool_kind":"shell_command"}} +{"type":"tool_result","id":"01HEVTV0000000000000000002","ts":"2026-06-11T10:00:02.000Z","payload":{"for_id":"01HEVTV0000000000000000001","ok":true,"output":"[main a1b2c3d] feat: add marker\n 1 file changed, 1 insertion(+)\n"},"semantic":{"call_id":"toolu_vcs_commit_001","tool_kind":"shell_command"}} +{"type":"system_event","id":"01HEVTV0000000000000000003","ts":"2026-06-11T10:00:02.000Z","payload":{"kind":"vcs_commit","data":{"sha":"a1b2c3d","tool_call_id":"01HEVTV0000000000000000001","branch":"main","message":"feat: add marker"}},"parent_id":"01HEVTV0000000000000000002","semantic":{"call_id":"toolu_vcs_commit_001"},"source":{"agent":"x-com/example-agent","original_type":"shell_command","synthesized":true}} diff --git a/fixtures/validation/valid/tool-call-aborted-closes-call.trail.jsonl b/fixtures/validation/valid/tool-call-aborted-closes-call.trail.jsonl new file mode 100644 index 0000000..38e2856 --- /dev/null +++ b/fixtures/validation/valid/tool-call-aborted-closes-call.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"shell_command","args":{"command":"sleep 100"}},"semantic":{"call_id":"call-1"}} +{"type":"tool_call_aborted","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"scope":"tool_call","for_id":"01HEVTA0000000000000000001","reason":"user_interrupt"}} diff --git a/fixtures/validation/valid/tool-call-aborted-extension-scope-reason.trail.jsonl b/fixtures/validation/valid/tool-call-aborted-extension-scope-reason.trail.jsonl new file mode 100644 index 0000000..5b71ae3 --- /dev/null +++ b/fixtures/validation/valid/tool-call-aborted-extension-scope-reason.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call_aborted","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"scope":"x-codex/turn","reason":"x-codex/interrupted"}} diff --git a/fixtures/validation/valid/tool-call-aborted-turn-scope.trail.jsonl b/fixtures/validation/valid/tool-call-aborted-turn-scope.trail.jsonl new file mode 100644 index 0000000..a794631 --- /dev/null +++ b/fixtures/validation/valid/tool-call-aborted-turn-scope.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call_aborted","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"scope":"turn","reason":"x-codex/interrupted"}} diff --git a/fixtures/validation/valid/tool-call-file-list.trail.jsonl b/fixtures/validation/valid/tool-call-file-list.trail.jsonl new file mode 100644 index 0000000..a5433cb --- /dev/null +++ b/fixtures/validation/valid/tool-call-file-list.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000201","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"00000000-0000-4000-8000-000000000202","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_list","args":{"path":"src","recursive":false,"glob":"*.ts"}}} +{"type":"tool_result","id":"00000000-0000-4000-8000-000000000203","ts":"2026-05-17T14:00:02.000Z","payload":{"for_id":"00000000-0000-4000-8000-000000000202","ok":true,"output":"index.ts"}} diff --git a/fixtures/validation/valid/tool-call-file-patch.trail.jsonl b/fixtures/validation/valid/tool-call-file-patch.trail.jsonl new file mode 100644 index 0000000..f8fb522 --- /dev/null +++ b/fixtures/validation/valid/tool-call-file-patch.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-4000-8000-000000000101","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"00000000-0000-4000-8000-000000000102","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_patch","args":{"files":[{"path":"src/a.ts","diff":"--- a/src/a.ts\n+++ b/src/a.ts\n@@\n-old\n+new"},{"path":"src/b.ts","diff":"--- a/src/b.ts\n+++ b/src/b.ts\n@@\n-old\n+new"}],"atomic":true}}} +{"type":"tool_result","id":"00000000-0000-4000-8000-000000000103","ts":"2026-05-17T14:00:02.000Z","payload":{"for_id":"00000000-0000-4000-8000-000000000102","ok":true,"output":"patched"}} diff --git a/fixtures/validation/valid/tool-call-matched-by-for-id.trail.jsonl b/fixtures/validation/valid/tool-call-matched-by-for-id.trail.jsonl new file mode 100644 index 0000000..dacd53b --- /dev/null +++ b/fixtures/validation/valid/tool-call-matched-by-for-id.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"hi"}} diff --git a/fixtures/validation/valid/tool-call-matched-by-semantic-call-id.trail.jsonl b/fixtures/validation/valid/tool-call-matched-by-semantic-call-id.trail.jsonl new file mode 100644 index 0000000..028ed9d --- /dev/null +++ b/fixtures/validation/valid/tool-call-matched-by-semantic-call-id.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","semantic":{"call_id":"call_abc"},"payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","semantic":{"call_id":"call_abc"},"payload":{"ok":true,"output":"hi"}} diff --git a/fixtures/validation/valid/tool-call-matched-same-parent-siblings.trail.jsonl b/fixtures/validation/valid/tool-call-matched-same-parent-siblings.trail.jsonl new file mode 100644 index 0000000..9372f3c --- /dev/null +++ b/fixtures/validation/valid/tool-call-matched-same-parent-siblings.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"session","schema_version":"0.1.0","id":"01HEVTA0000000000000000000","ts":"2025-01-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"agent_message","id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:01.000Z","payload":{"text":"run command"}} +{"type":"tool_call","id":"01HEVTA0000000000000000002","parent_id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:02.000Z","payload":{"tool":"shell_command","args":{"command":"echo hi"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000003","parent_id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:03.000Z","payload":{"ok":true,"output":"hi"}} diff --git a/fixtures/validation/valid/tool-call-matched-sequentially.trail.jsonl b/fixtures/validation/valid/tool-call-matched-sequentially.trail.jsonl new file mode 100644 index 0000000..027006d --- /dev/null +++ b/fixtures/validation/valid/tool-call-matched-sequentially.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"ok":true,"output":"hi"}} diff --git a/fixtures/validation/valid/tool-call-usage.trail.jsonl b/fixtures/validation/valid/tool-call-usage.trail.jsonl new file mode 100644 index 0000000..3550da2 --- /dev/null +++ b/fixtures/validation/valid/tool-call-usage.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:01.000Z","payload":{"tool":"file_read","args":{"path":"spec.md"},"usage":{"input_tokens":10,"output_tokens":4,"cache_read_tokens":2}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:02.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"# Agent Trail\n"}} diff --git a/fixtures/validation/valid/tool-result-attachments-with-mcp-meta.trail.jsonl b/fixtures/validation/valid/tool-result-attachments-with-mcp-meta.trail.jsonl new file mode 100644 index 0000000..a3eb44b --- /dev/null +++ b/fixtures/validation/valid/tool-result-attachments-with-mcp-meta.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"mcp_call","args":{"server":"docs","tool":"render"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"rendered page","attachments":[{"kind":"image","media_type":"image/png","uri":"sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc","name":"render.png"}],"meta":{"mcp_call":{"content_blocks":[{"type":"text","text":"page rendered"}],"is_error":false}}}} diff --git a/fixtures/validation/valid/tool-result-attachments.trail.jsonl b/fixtures/validation/valid/tool-result-attachments.trail.jsonl new file mode 100644 index 0000000..fa37efc --- /dev/null +++ b/fixtures/validation/valid/tool-result-attachments.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"other","args":{"name":"screenshot"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"captured screenshot","attachments":[{"kind":"image","media_type":"image/png","uri":"sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb","name":"screen.png"}]}} diff --git a/fixtures/validation/valid/tool-result-for-id-targets-header-falls-through.trail.jsonl b/fixtures/validation/valid/tool-result-for-id-targets-header-falls-through.trail.jsonl new file mode 100644 index 0000000..0f10a9e --- /dev/null +++ b/fixtures/validation/valid/tool-result-for-id-targets-header-falls-through.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","semantic":{"call_id":"call_xyz"},"payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","semantic":{"call_id":"call_xyz"},"payload":{"for_id":"01HSESS0000000000000000001","ok":true,"output":"hi"}} diff --git a/fixtures/validation/valid/tool-result-meta-file-read.trail.jsonl b/fixtures/validation/valid/tool-result-meta-file-read.trail.jsonl new file mode 100644 index 0000000..5163021 --- /dev/null +++ b/fixtures/validation/valid/tool-result-meta-file-read.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt","range":[10,50]}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"...","meta":{"file_read":{"range":[10,50],"total_lines":240,"encoding":"utf-8","truncated_at_line":null}}}} diff --git a/fixtures/validation/valid/tool-result-meta-mcp-call.trail.jsonl b/fixtures/validation/valid/tool-result-meta-mcp-call.trail.jsonl new file mode 100644 index 0000000..f68c7e0 --- /dev/null +++ b/fixtures/validation/valid/tool-result-meta-mcp-call.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"mcp_call","args":{"server":"docs","tool":"search"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"found 1 result","meta":{"mcp_call":{"content_blocks":[{"type":"text","text":"result body"},{"type":"resource","uri":"file:///doc.md","mime_type":"text/markdown"}],"is_error":false}}}} diff --git a/fixtures/validation/valid/tool-result-meta-shell-command.trail.jsonl b/fixtures/validation/valid/tool-result-meta-shell-command.trail.jsonl new file mode 100644 index 0000000..6faf0bc --- /dev/null +++ b/fixtures/validation/valid/tool-result-meta-shell-command.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"shell_command","args":{"command":"ls"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"a.txt\nb.txt","meta":{"shell_command":{"stdout":"a.txt\nb.txt","stderr":"","exit_code":0,"signal":null,"duration_ms":12}}}} diff --git a/fixtures/validation/valid/tool-result-meta-toplevel-vendor-kind.trail.jsonl b/fixtures/validation/valid/tool-result-meta-toplevel-vendor-kind.trail.jsonl new file mode 100644 index 0000000..ef6117b --- /dev/null +++ b/fixtures/validation/valid/tool-result-meta-toplevel-vendor-kind.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"other","args":{"name":"custom"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"ok","meta":{"x-acme/custom_kind":{"foo":"bar"}}}} diff --git a/fixtures/validation/valid/tool-result-meta-unregistered-kind.trail.jsonl b/fixtures/validation/valid/tool-result-meta-unregistered-kind.trail.jsonl new file mode 100644 index 0000000..1ff88c5 --- /dev/null +++ b/fixtures/validation/valid/tool-result-meta-unregistered-kind.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"web_fetch","args":{"url":"https://example.com"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"","meta":{"web_fetch":{"status":200,"size_bytes":12345}}}} diff --git a/fixtures/validation/valid/tool-result-meta-vendor-extension.trail.jsonl b/fixtures/validation/valid/tool-result-meta-vendor-extension.trail.jsonl new file mode 100644 index 0000000..5a0d8fc --- /dev/null +++ b/fixtures/validation/valid/tool-result-meta-vendor-extension.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"claude-code"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"mcp_call","args":{"server":"docs","tool":"search"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"ok","meta":{"mcp_call":{"is_error":false,"x-acme/cache_hit":true}}}} diff --git a/fixtures/validation/valid/tool-result-output-size-truncated.trail.jsonl b/fixtures/validation/valid/tool-result-output-size-truncated.trail.jsonl new file mode 100644 index 0000000..86af750 --- /dev/null +++ b/fixtures/validation/valid/tool-result-output-size-truncated.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"shell_command","args":{"command":"printf '%s' large-output"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"large-output\n…[truncated]","truncated":true,"output_size":20000,"overflow_ref":"sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}} diff --git a/fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-end.trail.jsonl b/fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-end.trail.jsonl new file mode 100644 index 0000000..fa5fa57 --- /dev/null +++ b/fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-end.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"session_end","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"reason":"complete"}} diff --git a/fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-terminated.trail.jsonl b/fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-terminated.trail.jsonl new file mode 100644 index 0000000..9338dbe --- /dev/null +++ b/fixtures/validation/valid/unmatched-tool-call-suppressed-by-session-terminated.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"session_terminated","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"reason":"eof_with_open_tool_calls","open_call_ids":["01HEVTA0000000000000000001"]}} diff --git a/fixtures/validation/valid/user-message-origin-injected.trail.jsonl b/fixtures/validation/valid/user-message-origin-injected.trail.jsonl new file mode 100644 index 0000000..660ad29 --- /dev/null +++ b/fixtures/validation/valid/user-message-origin-injected.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000129","session_uid":"00000000-0000-0000-0000-000000000129","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"00000000-0000-0000-0000-000000001291","ts":"2026-06-01T00:00:01.000Z","payload":{"text":"Injected runtime context","origin":"injected"}} diff --git a/fixtures/validation/valid/user-query-duplicate-labels-with-ids.trail.jsonl b/fixtures/validation/valid/user-query-duplicate-labels-with-ids.trail.jsonl new file mode 100644 index 0000000..3d78dbe --- /dev/null +++ b/fixtures/validation/valid/user-query-duplicate-labels-with-ids.trail.jsonl @@ -0,0 +1,2 @@ +{"type":"session","schema_version":"0.1.0","id":"01HEVTA0000000000000000000","ts":"2025-01-01T00:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_query","id":"01HEVTA0000000000000000001","ts":"2025-01-01T00:00:01.000Z","payload":{"questions":[{"id":"ship","question":"Ship it?","options":[{"id":"yes-safe","label":"yes"},{"id":"yes-force","label":"yes"}]}]}} diff --git a/fixtures/validation/valid/vcs-unborn-head.trail.jsonl b/fixtures/validation/valid/vcs-unborn-head.trail.jsonl new file mode 100644 index 0000000..353f2b6 --- /dev/null +++ b/fixtures/validation/valid/vcs-unborn-head.trail.jsonl @@ -0,0 +1 @@ +{"type":"session","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000289","session_uid":"00000000-0000-0000-0000-000000000289","ts":"2026-06-01T00:00:00.000Z","agent":{"name":"codex-cli"},"vcs":{"type":"git","revision":null,"branch":"main"}} diff --git a/fixtures/validation/valid/with-trail-envelope-all-fields.trail.jsonl b/fixtures/validation/valid/with-trail-envelope-all-fields.trail.jsonl new file mode 100644 index 0000000..5b06528 --- /dev/null +++ b/fixtures/validation/valid/with-trail-envelope-all-fields.trail.jsonl @@ -0,0 +1,3 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0","name":"OAuth refactor","description":"Trail covering the OAuth middleware rewrite","tags":["compliance","oauth"],"sessions":[{"id":"01HSESS0000000000000000001","agent":"codex-cli"}],"fork_from":{"trail_id":"00000000-0000-0000-0000-000000000111"},"meta":{"x-example/workflow":"nightly"}} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} diff --git a/fixtures/validation/valid/with-trail-envelope-and-hash.trail.jsonl b/fixtures/validation/valid/with-trail-envelope-and-hash.trail.jsonl new file mode 100644 index 0000000..57c22ff --- /dev/null +++ b/fixtures/validation/valid/with-trail-envelope-and-hash.trail.jsonl @@ -0,0 +1,3 @@ +{"content_hash":"742252b991c5305d84786420b56e9e98dd4facbc575bed8c71fc003a66d4381d","id":"01HTRACE000000000000000001","producer":"trail-cli/0.3.0","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"trail"} +{"agent":{"name":"codex-cli"},"content_hash":"2be81234cd4d38dd4b40e3b20a30addcebacc06dca1218cb66d97578eecba022","id":"01HSESS0000000000000000001","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000001","payload":{"text":"hello"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} diff --git a/fixtures/validation/valid/with-trail-envelope.trail.jsonl b/fixtures/validation/valid/with-trail-envelope.trail.jsonl new file mode 100644 index 0000000..cf4a887 --- /dev/null +++ b/fixtures/validation/valid/with-trail-envelope.trail.jsonl @@ -0,0 +1,4 @@ +{"type":"trail","schema_version":"0.1.0","id":"01HTRACE000000000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0","name":"OAuth refactor"} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} diff --git a/schema/draft.json b/schema/draft.json new file mode 100644 index 0000000..23da99f --- /dev/null +++ b/schema/draft.json @@ -0,0 +1,1915 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://agent-trail.dev/schema/v0.1.0.json", + "title": "Agent Trail v0.1.0", + "description": "Validates a single Agent Trail JSONL record: trail envelope, session header, or event entry. File layout rules such as envelope position and multi-session grouping are enforced by whole-file validation; per-event payload shapes are enforced via the events subschemas.", + + "oneOf": [ + { "$ref": "#/$defs/trailEnvelope" }, + { "$ref": "#/$defs/header" }, + { "$ref": "#/$defs/entry" } + ], + + "$defs": { + + "iso8601": { + "type": "string", + "format": "date-time", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z$", + "description": "Writer timestamp: UTC ISO-8601 with millisecond precision. Format-aware validators may use the date-time annotation; whole-file validation rule 6 remains authoritative for calendar validity." + }, + + "id": { + "$ref": "#/$defs/sessionUid", + "description": "Globally-unique identifier shape: canonical uppercase ULID (26 Crockford base32 chars), lowercase hyphenated UUID (36 chars), or lowercase unhyphenated UUID (32 hex chars). Header ids, event ids, and envelope ids share this shape so cross-segment reconciliation can dedup by exact string equality (spec §9.5)." + }, + + "ulid": { + "type": "string", + "pattern": "^[0-9A-HJKMNP-TV-Z]{26}$", + "description": "Canonical uppercase ULID (Crockford base32, 26 chars, no I/L/O/U). Time-prefixed and lexicographically sortable." + }, + + "sessionUid": { + "type": "string", + "pattern": "^(?:[0-9A-HJKMNP-TV-Z]{26}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[0-9a-f]{32})$", + "description": "Globally-unique source-session identifier shape: canonical uppercase ULID (26 Crockford base32 chars), lowercase hyphenated UUID (36 chars), or lowercase unhyphenated UUID (32 hex chars). Reconcilers group segments by exact string equality on session_uid (spec §9.5)." + }, + + "segment": { + "type": "object", + "description": "Multi-segment marker. Absent or {seq:1} for a single-segment trail. Reconciler primitive for daemon resume and multi-file sessions (spec §9.5).", + "oneOf": [ + { + "type": "object", + "required": ["seq"], + "properties": { + "seq": { "const": 1 } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["seq", "prev_content_hash"], + "properties": { + "seq": { "type": "integer", "minimum": 2 }, + "prev_content_hash": { + "oneOf": [ + { "$ref": "#/$defs/sha256Hex" }, + { "type": "null" } + ] + } + }, + "additionalProperties": false + } + ] + }, + + "sha256Hex": { + "type": "string", + "pattern": "^[a-f0-9]{64}$", + "description": "SHA-256 hash as lowercase hex (64 chars)" + }, + + "agentName": { + "oneOf": [ + { + "type": "string", + "enum": [ + "claude-code", + "pi", + "openclaw", + "codex-cli", + "cursor", + "opencode", + "aider", + "amp", + "cline", + "crush", + "kimi-code", + "qwen-code", + "factory", + "vibe", + "copilot-cli", + "copilot-chat", + "chatgpt", + "clawdbot" + ] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$", + "description": "Custom unregistered agent using the x-/ extension grammar, e.g. x-example/myagent" + } + ] + }, + + "toolKind": { + "type": "string", + "enum": [ + "file_read", + "file_write", + "file_edit", + "file_patch", + "file_list", + "file_search", + "shell_command", + "shell_output", + "shell_input", + "mcp_call", + "web_fetch", + "web_search", + "tool_search", + "notebook_edit", + "subagent_invoke", + "other" + ] + }, + + "taskPlanStatus": { + "type": "string", + "enum": ["pending", "in_progress", "completed", "cancelled", "blocked"] + }, + + "taskPlanItem": { + "type": "object", + "required": ["id", "content", "status"], + "properties": { + "id": { "type": "string", "minLength": 1 }, + "content": { "type": "string" }, + "status": { "$ref": "#/$defs/taskPlanStatus" }, + "active_form": { "type": "string" } + }, + "additionalProperties": false + }, + + "taskPlanDelta": { + "oneOf": [ + { + "type": "object", + "required": ["kind", "item_id", "to_content", "to_status"], + "properties": { + "kind": { "const": "added" }, + "item_id": { "type": "string", "minLength": 1 }, + "to_content": { "type": "string" }, + "to_status": { "$ref": "#/$defs/taskPlanStatus" }, + "to_active_form": { "type": "string" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["kind", "item_id", "from_content", "from_status"], + "properties": { + "kind": { "const": "removed" }, + "item_id": { "type": "string", "minLength": 1 }, + "from_content": { "type": "string" }, + "from_status": { "$ref": "#/$defs/taskPlanStatus" }, + "from_active_form": { "type": "string" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["kind", "item_id", "from_status", "to_status"], + "properties": { + "kind": { "const": "status_changed" }, + "item_id": { "type": "string", "minLength": 1 }, + "from_status": { "$ref": "#/$defs/taskPlanStatus" }, + "to_status": { "$ref": "#/$defs/taskPlanStatus" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["kind", "item_id", "from_content", "to_content"], + "properties": { + "kind": { "const": "content_changed" }, + "item_id": { "type": "string", "minLength": 1 }, + "from_content": { "type": "string" }, + "to_content": { "type": "string" } + }, + "additionalProperties": false + } + ] + }, + + "vcs": { + "type": "object", + "required": ["type", "revision"], + "anyOf": [ + { + "properties": { "revision": { "type": "string" } } + }, + { + "required": ["branch"], + "properties": { + "revision": { "type": "null" }, + "branch": { "type": "string", "minLength": 1 } + }, + "not": { "properties": { "head_commit": {} }, "required": ["head_commit"] } + } + ], + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "enum": ["git", "jj", "hg", "svn"] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "revision": { "type": ["string", "null"] }, + "remote_url": { + "type": "string", + "description": "Canonical remote URL for the working tree. Adapters MUST normalize before emission: strip embedded credentials, strip trailing .git for git URLs, and normalize SSH/HTTPS variants to a single canonical form (https://host/path)." + }, + "branch": { + "type": "string", + "description": "Active branch / bookmark / topic name the session is running on. For git, the short branch name (e.g., `feature/x`). Detached-HEAD sessions MAY omit this field." + }, + "head_commit": { + "type": "string", + "pattern": "^[a-f0-9]{7,64}$", + "description": "Commit hash at session start (lowercase hex, 7-64 chars). For git this is typically the same value as `revision`; the field exists as an explicit, version-control-neutral alias and survives across VCS migrations." + }, + "worktree": { "$ref": "#/$defs/worktree" } + }, + "additionalProperties": false + }, + + "worktree": { + "type": "object", + "required": ["name", "path"], + "additionalProperties": false, + "description": "Worktree context when the session ran inside a working-tree clone or worktree (git worktree, jj workspace, etc.).", + "properties": { + "name": { "type": "string" }, + "path": { "type": "string" }, + "original_cwd": { + "type": "string", + "description": "Working directory of the parent repository at the time the worktree was created." + }, + "original_branch": { + "type": "string", + "description": "Branch the parent repository was on when the worktree was created." + }, + "original_head_commit": { + "type": "string", + "pattern": "^[a-f0-9]{7,64}$", + "description": "Commit hash the worktree was forked from." + } + } + }, + + "sourceMetadata": { + "type": "object", + "description": "Adapter-provided metadata about the source event.", + "properties": { + "agent": { "$ref": "#/$defs/agentName" }, + "original_type": { "type": "string" }, + "schema_version": { "type": "string" }, + "raw": { + "description": "Opaque source object preserved verbatim. If an object, may use envelope_ref to reference an earlier entry's inlined envelope.", + "if": { "type": "object" }, + "then": { + "type": "object", + "properties": { + "envelope_ref": { "type": "string" } + } + } + }, + "synthesized": { "type": "boolean", "default": false } + }, + "additionalProperties": false + }, + + "semanticMetadata": { + "type": "object", + "description": "Semantic linking for cross-event references when explicit IDs are unreliable.", + "properties": { + "group_id": { "type": "string" }, + "call_id": { "type": "string" }, + "tool_kind": { "$ref": "#/$defs/toolKind" } + }, + "additionalProperties": false + }, + + "agentMessageUsage": { + "type": "object", + "description": "Token usage for this source agent envelope. May appear on agent_message, agent_thinking, or tool_call when that entry is the first entry derived from the envelope. input_tokens/output_tokens are deltas for this envelope; *_cumulative variants are running totals through this point. total_tokens/total_tokens_cumulative are source-reported inclusive totals for exact total-token analytics. cache_read_tokens and cache_creation_tokens are independent billing categories. context_input_tokens captures source-reported prompt/context pressure for this request, cache-inclusive when the source exposes enough detail; context_window_tokens captures the model context-window size when exposed. When present, usage must include either input/output coverage or total-token coverage.", + "properties": { + "input_tokens": { "type": "integer", "minimum": 0 }, + "output_tokens": { "type": "integer", "minimum": 0 }, + "input_tokens_cumulative": { "type": "integer", "minimum": 0 }, + "output_tokens_cumulative": { "type": "integer", "minimum": 0 }, + "total_tokens": { "type": "integer", "minimum": 0 }, + "total_tokens_cumulative": { "type": "integer", "minimum": 0 }, + "cache_read_tokens": { "type": "integer", "minimum": 0 }, + "cache_creation_tokens": { "type": "integer", "minimum": 0 }, + "reasoning_tokens": { "type": "integer", "minimum": 0 }, + "context_input_tokens": { "type": "integer", "minimum": 0 }, + "context_window_tokens": { "type": "integer", "minimum": 1 } + }, + "anyOf": [ + { + "allOf": [ + { + "anyOf": [ + { + "properties": { "input_tokens": {} }, + "required": ["input_tokens"] + }, + { + "properties": { "input_tokens_cumulative": {} }, + "required": ["input_tokens_cumulative"] + } + ] + }, + { + "anyOf": [ + { + "properties": { "output_tokens": {} }, + "required": ["output_tokens"] + }, + { + "properties": { "output_tokens_cumulative": {} }, + "required": ["output_tokens_cumulative"] + } + ] + } + ] + }, + { + "properties": { "total_tokens": {} }, + "required": ["total_tokens"] + }, + { + "properties": { "total_tokens_cumulative": {} }, + "required": ["total_tokens_cumulative"] + } + ], + "additionalProperties": false + }, + + "attachment": { + "type": "object", + "description": "An image or file carried by a message or tool result, by reference. v0.1.0 uri schemes are references only (https:, local file:, content-addressed sha256:); inline data: payloads are deferred.", + "required": ["kind"], + "properties": { + "kind": { "type": "string", "enum": ["image", "file", "other"] }, + "media_type": { "type": "string" }, + "uri": { + "type": "string", + "pattern": "^(https:|file:|sha256:)" + }, + "name": { "type": "string" } + }, + "anyOf": [ + { + "properties": { "uri": {} }, + "required": ["uri"] + }, + { + "properties": { "name": {} }, + "required": ["name"] + } + ], + "additionalProperties": false + }, + + "sessionTerminationReason": { + "type": "string", + "anyOf": [ + { "enum": ["eof_with_open_tool_calls", "process_terminated", "truncated", "user_abort"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + + "parseFidelity": { + "type": "object", + "description": "At-a-glance session parse fidelity summary. When present, quarantined_count MUST equal the number of x-*/unknown_record system_event entries in the session group; termination_reason MUST match the final session_terminated reason when one exists.", + "required": ["quarantined_count"], + "properties": { + "quarantined_count": { + "type": "integer", + "minimum": 0, + "description": "Number of quarantined source records emitted as x-*/unknown_record system_event entries in this session group." + }, + "termination_reason": { + "$ref": "#/$defs/sessionTerminationReason", + "description": "Final abnormal session termination reason, when a session_terminated event is present." + } + }, + "additionalProperties": false + }, + + "trailEnvelope": { + "type": "object", + "description": "Optional trail envelope record (line 1). File-level metadata; not part of the event graph. When present, MUST appear at line 1 and the first session header MUST follow on line 2. At most one per file. Multi-session files (spec §9.6) carry one envelope followed by N session groups in file order.", + "required": ["type", "schema_version", "id", "ts", "producer"], + "properties": { + "type": { "const": "trail" }, + "schema_version": { "const": "0.1.0" }, + "id": { "$ref": "#/$defs/id" }, + "name": { "type": "string" }, + "description": { "type": "string" }, + "ts": { "$ref": "#/$defs/iso8601" }, + "producer": { "type": "string", "minLength": 1 }, + "content_hash": { + "oneOf": [ + { "$ref": "#/$defs/sha256Hex" }, + { "const": "" } + ] + }, + "tags": { + "type": "array", + "items": { "type": "string" } + }, + "vcs": { "$ref": "#/$defs/vcs" }, + "fork_from": { + "type": "object", + "required": ["trail_id"], + "properties": { + "trail_id": { "$ref": "#/$defs/id" }, + "content_hash": { "$ref": "#/$defs/sha256Hex" } + }, + "additionalProperties": false + }, + "redacted_from": { + "type": "object", + "required": ["content_hash"], + "properties": { + "content_hash": { "$ref": "#/$defs/sha256Hex" } + }, + "additionalProperties": false + }, + "sessions": { + "type": "array", + "description": "Optional manifest of sessions contained in the file, one entry per session group in file order (spec §8.4, §9.6). Validator warns on length mismatch or per-entry drift vs actual file content.", + "items": { + "type": "object", + "required": ["id", "agent"], + "properties": { + "id": { "$ref": "#/$defs/id" }, + "agent": { "$ref": "#/$defs/agentName" } + }, + "additionalProperties": false + } + }, + "meta": { + "type": "object", + "description": "Free-form vendor extensions. Recommended keys use the x-/ extension grammar." + } + }, + "additionalProperties": false + }, + + "header": { + "type": "object", + "description": "Session header. The first session header is required at line 1, or at line 2 when a trail envelope occupies line 1. Multi-session files (spec §9.6) carry additional session headers later in the file; each opens a new (header, events*) group. Not part of the event graph.", + "required": ["type", "schema_version", "id", "ts", "agent"], + "properties": { + "type": { "const": "session" }, + "schema_version": { + "const": "0.1.0" + }, + "id": { "$ref": "#/$defs/id" }, + "name": { "type": "string" }, + "description": { "type": "string" }, + "tags": { + "type": "array", + "items": { "type": "string" } + }, + "session_uid": { + "$ref": "#/$defs/sessionUid", + "description": "Globally-unique source-session identifier. Stable across all segments of one source session (spec §9.5). Reconcilers group segments by session_uid. Optional in v0.1 single-segment trails; writers SHOULD emit it for forward-compat. Required (and enforced by the header allOf if/then) when segment.seq > 1. ULID is recommended (lexicographic tie-breaker); UUID accepted." + }, + "segment": { "$ref": "#/$defs/segment" }, + "content_hash": { + "oneOf": [ + { "$ref": "#/$defs/sha256Hex" }, + { "const": "" } + ] + }, + "ts": { "$ref": "#/$defs/iso8601" }, + "stream": { + "type": "object", + "description": "Live-capture marker. Present means writer is actively appending or last appended in streaming mode. Absent means non-streaming or unaware writer.", + "required": ["state"], + "properties": { + "state": { + "type": "string", + "enum": ["open", "closed"] + }, + "started_at": { "$ref": "#/$defs/iso8601" } + }, + "additionalProperties": false + }, + "agent": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "$ref": "#/$defs/agentName" }, + "version": { "type": "string" }, + "model_default": { "type": "string" } + }, + "additionalProperties": false + }, + "cwd": { "type": "string" }, + "vcs": { "$ref": "#/$defs/vcs" }, + "fork_from": { + "type": "object", + "required": ["session_id"], + "properties": { + "session_id": { "$ref": "#/$defs/id" }, + "content_hash": { "$ref": "#/$defs/sha256Hex" }, + "entry_id": { "$ref": "#/$defs/id" } + }, + "additionalProperties": false + }, + "redacted_from": { + "type": "object", + "required": ["content_hash"], + "properties": { + "content_hash": { "$ref": "#/$defs/sha256Hex" } + }, + "additionalProperties": false + }, + "parse_fidelity": { "$ref": "#/$defs/parseFidelity" }, + "source": { + "type": "object", + "properties": { + "agent": { "$ref": "#/$defs/agentName" }, + "path": { "type": "string" }, + "format_version": { "type": "string" } + }, + "additionalProperties": false + }, + "meta": { + "type": "object", + "description": "Free-form vendor extensions. Recommended keys use the x-/ extension grammar (spec §8.3)." + } + }, + "additionalProperties": false, + "allOf": [ + { + "description": "Spec §9.5: session_uid is required when segment.seq >= 2 so reconcilers can group continuation segments. Single-segment trails (seq absent or seq=1) keep session_uid optional in v0.1.", + "if": { + "type": "object", + "required": ["segment"], + "properties": { + "segment": { + "type": "object", + "required": ["seq"], + "properties": { + "seq": { "type": "integer", "minimum": 2 } + } + } + } + }, + "then": { + "type": "object", + "required": ["session_uid"], + "properties": { + "session_uid": { "$ref": "#/$defs/sessionUid" } + } + } + } + ] + }, + + "entryBase": { + "type": "object", + "required": ["type", "id", "ts", "payload"], + "properties": { + "type": { "type": "string" }, + "id": { "$ref": "#/$defs/id" }, + "parent_id": { + "oneOf": [ + { "$ref": "#/$defs/id" }, + { "type": "null" } + ] + }, + "ts": { "$ref": "#/$defs/iso8601" }, + "payload": { "type": "object" }, + "semantic": { "$ref": "#/$defs/semanticMetadata" }, + "source": { "$ref": "#/$defs/sourceMetadata" }, + "meta": { + "type": "object", + "properties": { + "redaction_count": { + "type": "integer", + "minimum": 0, + "description": "Number of redactor mutations applied to this event entry." + } + } + } + }, + "additionalProperties": false + }, + + "entry": { + "allOf": [ + { "$ref": "#/$defs/entryBase" }, + { + "oneOf": [ + { "$ref": "#/$defs/events/user_message" }, + { "$ref": "#/$defs/events/agent_message" }, + { "$ref": "#/$defs/events/task_plan_update" }, + { "$ref": "#/$defs/events/tool_call" }, + { "$ref": "#/$defs/events/tool_result" }, + { "$ref": "#/$defs/events/tool_call_aborted" }, + { "$ref": "#/$defs/events/user_query" }, + { "$ref": "#/$defs/events/user_query_response" }, + { "$ref": "#/$defs/events/session_summary" }, + { "$ref": "#/$defs/events/system_event" }, + { "$ref": "#/$defs/events/agent_thinking" }, + { "$ref": "#/$defs/events/user_interrupt" }, + { "$ref": "#/$defs/events/context_compact" }, + { "$ref": "#/$defs/events/branch_point" }, + { "$ref": "#/$defs/events/branch_summary" }, + { "$ref": "#/$defs/events/model_change" }, + { "$ref": "#/$defs/events/mode_change" }, + { "$ref": "#/$defs/events/thinking_level_change" }, + { "$ref": "#/$defs/events/session_terminated" }, + { "$ref": "#/$defs/events/session_end" }, + { "$ref": "#/$defs/events/command_invoke" }, + { "$ref": "#/$defs/events/capability_change" }, + { "$ref": "#/$defs/events/session_metadata_update" } + ] + } + ] + }, + + "events": { + + "user_message": { + "type": "object", + "properties": { + "type": { "const": "user_message" }, + "payload": { + "type": "object", + "required": ["text"], + "properties": { + "text": { "type": "string" }, + "origin": { + "type": "string", + "description": "Authorship marker for user-role text. Absent means user-authored.", + "anyOf": [ + { "enum": ["user", "injected", "mixed"] }, + { + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "attachments": { + "type": "array", + "items": { "$ref": "#/$defs/attachment" } + } + }, + "additionalProperties": false + } + } + }, + + "agent_message": { + "type": "object", + "properties": { + "type": { "const": "agent_message" }, + "payload": { + "type": "object", + "required": ["text"], + "properties": { + "text": { "type": "string" }, + "model": { "type": "string" }, + "stop_reason": { "type": "string" }, + "usage": { "$ref": "#/$defs/agentMessageUsage" }, + "attachments": { + "type": "array", + "items": { "$ref": "#/$defs/attachment" } + } + }, + "additionalProperties": false + } + } + }, + + "task_plan_update": { + "type": "object", + "properties": { + "type": { "const": "task_plan_update" }, + "payload": { + "type": "object", + "required": ["items"], + "properties": { + "explanation": { "type": "string" }, + "items": { + "type": "array", + "items": { "$ref": "#/$defs/taskPlanItem" } + }, + "deltas": { + "type": "array", + "items": { "$ref": "#/$defs/taskPlanDelta" } + } + }, + "additionalProperties": false + } + } + }, + + "tool_call": { + "type": "object", + "properties": { + "type": { "const": "tool_call" }, + "payload": { + "type": "object", + "required": ["tool", "args"], + "properties": { + "tool": { "$ref": "#/$defs/toolKind" }, + "args": { "type": "object" }, + "usage": { "$ref": "#/$defs/agentMessageUsage" }, + "truncated": { "type": "boolean" }, + "args_size": { + "type": "integer", + "minimum": 0, + "description": "UTF-8 byte length of the original args object before truncation. Required when truncated is true." + }, + "overflow_ref": { + "oneOf": [ + { "type": "string", "pattern": "^sha256:[a-f0-9]{64}$" }, + { "type": "null" } + ] + } + }, + "additionalProperties": false, + "dependentSchemas": { + "truncated": { + "if": { + "properties": { + "truncated": { "const": true } + }, + "required": ["truncated"] + }, + "then": { + "properties": { + "args_size": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["args_size"] + } + } + }, + "allOf": [ + { + "if": { "properties": { "tool": { "const": "file_read" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path"], + "properties": { + "path": { "type": "string" }, + "range": { + "type": "array", + "items": { "type": "integer" }, + "minItems": 2, + "maxItems": 2 + } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_write" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path", "content"], + "properties": { + "path": { "type": "string" }, + "content": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_edit" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "oneOf": [ + { + "type": "object", + "required": ["path", "diff"], + "properties": { + "path": { "type": "string" }, + "diff": { "type": "string" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["path", "old", "new"], + "properties": { + "path": { "type": "string" }, + "old": { "type": "string" }, + "new": { "type": "string" }, + "replace_all": { "type": "boolean" } + }, + "additionalProperties": false + } + ] + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_patch" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["files"], + "properties": { + "files": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["path", "diff"], + "properties": { + "path": { "type": "string" }, + "diff": { "type": "string" } + }, + "additionalProperties": false + } + }, + "atomic": { "type": "boolean" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_list" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path"], + "properties": { + "path": { "type": "string" }, + "recursive": { "type": "boolean" }, + "glob": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_search" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["query"], + "properties": { + "query": { "type": "string" }, + "path": { "type": "string" }, + "glob": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "shell_command" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["command"], + "properties": { + "command": { "type": "string" }, + "cwd": { "type": "string" }, + "timeout": { "type": "integer" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "shell_output" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "properties": { + "command_id": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "shell_input" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["input"], + "properties": { + "input": { "type": "string" }, + "session_id": { "type": "string" }, + "command_id": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "mcp_call" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["server", "tool"], + "properties": { + "server": { "type": "string" }, + "tool": { "type": "string" }, + "args": { "type": "object" }, + "headers": { "type": "object" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "web_fetch" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["url"], + "properties": { + "url": { "type": "string" }, + "method": { "type": "string" }, + "headers": { "type": "object" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "web_search" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["query"], + "properties": { + "query": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "tool_search" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["query"], + "properties": { + "query": { "type": "string" }, + "limit": { "type": "integer" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "notebook_edit" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path"], + "properties": { + "path": { "type": "string" }, + "cell_id": { "type": "string" }, + "diff": { "type": "string" }, + "content": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "subagent_invoke" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["task"], + "properties": { + "task": { "type": "string" }, + "agent_type": { "type": "string" }, + "session_id": { "$ref": "#/$defs/id" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "other" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" }, + "args": { "type": "object" } + }, + "additionalProperties": false + } + } + } + } + ] + } + } + }, + + "tool_result": { + "type": "object", + "properties": { + "type": { "const": "tool_result" }, + "payload": { + "type": "object", + "required": ["ok"], + "properties": { + "for_id": { + "$ref": "#/$defs/id" + }, + "ok": { "type": "boolean" }, + "output": { "type": "string" }, + "truncated": { "type": "boolean" }, + "output_size": { + "type": "integer", + "minimum": 0, + "description": "UTF-8 byte length of the original output before truncation. Required when truncated is true." + }, + "overflow_ref": { + "oneOf": [ + { "type": "string", "pattern": "^sha256:[a-f0-9]{64}$" }, + { "type": "null" } + ] + }, + "error": { + "oneOf": [{ "type": "string" }, { "type": "null" }] + }, + "attachments": { + "type": "array", + "items": { "$ref": "#/$defs/attachment" } + }, + "meta": { + "type": "object", + "description": "Structured per-toolkind outputs, keyed by the originating tool_call.tool. Optional; consumers fall back to payload.output when the relevant key is absent. Registered keys are writer-strict; unregistered/future toolkinds are opaque objects. Vendors extend a registered key via x-/ pattern keys.", + "properties": { + "mcp_call": { + "type": "object", + "properties": { + "content_blocks": { + "type": "array", + "items": { + "type": "object", + "required": ["type"], + "properties": { + "type": { "type": "string", "enum": ["text", "image", "resource"] }, + "text": { "type": "string" }, + "data": { "type": "string" }, + "mime_type": { "type": "string" }, + "uri": { "type": "string" } + }, + "additionalProperties": false + } + }, + "is_error": { "type": "boolean" } + }, + "patternProperties": { + "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$": {} + }, + "additionalProperties": false + }, + "file_read": { + "type": "object", + "properties": { + "range": { + "type": "array", + "items": { "type": "integer" }, + "minItems": 2, + "maxItems": 2 + }, + "total_lines": { "type": "integer", "minimum": 0 }, + "encoding": { "type": "string" }, + "truncated_at_line": { + "oneOf": [{ "type": "integer", "minimum": 0 }, { "type": "null" }] + } + }, + "patternProperties": { + "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$": {} + }, + "additionalProperties": false + }, + "shell_command": { + "type": "object", + "properties": { + "stdout": { "type": "string" }, + "stderr": { "type": "string" }, + "exit_code": { + "oneOf": [{ "type": "integer" }, { "type": "null" }] + }, + "signal": { + "oneOf": [{ "type": "string" }, { "type": "null" }] + }, + "duration_ms": { "type": "integer", "minimum": 0 } + }, + "patternProperties": { + "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$": {} + }, + "additionalProperties": false + } + }, + "propertyNames": { + "pattern": "^(?:[a-z][a-z0-9_]*|x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*)$" + }, + "additionalProperties": { "type": "object" } + } + }, + "additionalProperties": false, + "dependentSchemas": { + "truncated": { + "if": { + "properties": { + "truncated": { "const": true } + }, + "required": ["truncated"] + }, + "then": { + "properties": { + "output_size": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["output_size"] + } + } + } + } + } + }, + + "tool_call_aborted": { + "type": "object", + "properties": { + "type": { "const": "tool_call_aborted" }, + "payload": { + "type": "object", + "required": ["scope", "reason"], + "properties": { + "scope": { + "description": "Abort granularity. tool_call aborts reference a specific tool_call by for_id; turn aborts describe a broader turn-level stop when the source cannot identify one call.", + "oneOf": [ + { "type": "string", "enum": ["tool_call", "turn"] }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "for_id": { "$ref": "#/$defs/id" }, + "reason": { + "description": "Why execution stopped before a normal tool_result.", + "oneOf": [ + { + "type": "string", + "enum": [ + "user_interrupt", + "hook_blocked", + "timeout", + "permission_denied", + "runtime_error" + ] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "blocked_by": { "type": "string" } + }, + "additionalProperties": false, + "allOf": [ + { + "if": { + "properties": { + "scope": { "const": "tool_call" } + }, + "required": ["scope"] + }, + "then": { + "required": ["for_id"], + "properties": { + "for_id": { "$ref": "#/$defs/id" } + } + } + }, + { + "if": { + "properties": { + "scope": { "not": { "const": "tool_call" } } + }, + "required": ["scope"] + }, + "then": { + "not": { "required": ["for_id"] } + } + } + ] + } + } + }, + + "user_query": { + "type": "object", + "properties": { + "type": { "const": "user_query" }, + "payload": { + "type": "object", + "required": ["questions"], + "properties": { + "questions": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["id", "question"], + "properties": { + "id": { "type": "string" }, + "question": { "type": "string" }, + "header": { "type": "string" }, + "multi_select": { "type": "boolean" }, + "is_secret": { "type": "boolean" }, + "allow_other": { "type": "boolean" }, + "options": { + "type": "array", + "items": { + "type": "object", + "required": ["label"], + "properties": { + "id": { "type": "string", "minLength": 1 }, + "label": { "type": "string" }, + "description": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + } + }, + + "user_query_response": { + "type": "object", + "properties": { + "type": { "const": "user_query_response" }, + "payload": { + "type": "object", + "required": ["for_id", "answers"], + "properties": { + "for_id": { "$ref": "#/$defs/id" }, + "answers": { + "type": "object", + "additionalProperties": { + "type": "object", + "required": ["selected"], + "properties": { + "selected": { + "type": "array", + "items": { "type": "string" } + }, + "other": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + } + }, + + "session_summary": { + "type": "object", + "properties": { + "type": { "const": "session_summary" }, + "payload": { + "type": "object", + "required": ["scope", "text"], + "properties": { + "scope": { "type": "string", "enum": ["session"] }, + "text": { "type": "string" }, + "model": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + + "system_event": { + "type": "object", + "properties": { + "type": { "const": "system_event" }, + "payload": { + "type": "object", + "required": ["kind"], + "properties": { + "kind": { + "type": "string", + "description": "Lifecycle/hook signal category. Either one of the reserved cross-agent values, or a vendor-namespaced extension of the form `x-/`.", + "anyOf": [ + { + "enum": [ + "session_start", + "turn_start", + "turn_end", + "subagent_start", + "subagent_end", + "pre_tool_use", + "post_tool_use", + "hook_fired", + "permission_request", + "permission_decision", + "cwd_change", + "env_snapshot", + "task_started", + "task_completed", + "plan_completed", + "turn_aborted", + "tool_decision", + "context_injected", + "hook_progress", + "queue_operation", + "heartbeat", + "agent_error", + "agent_warning", + "api_error", + "stream_error", + "deprecation_notice", + "guardian_alert", + "model_rerouted", + "hook_failed", + "vcs_commit" + ] + }, + { + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "text": { "type": "string" }, + "data": { "type": "object" } + }, + "additionalProperties": false + } + } + }, + + "agent_thinking": { + "type": "object", + "properties": { + "type": { "const": "agent_thinking" }, + "payload": { + "type": "object", + "required": ["text"], + "properties": { + "text": { "type": "string" }, + "model": { "type": "string" }, + "level": { "type": "string", "minLength": 1 }, + "usage": { "$ref": "#/$defs/agentMessageUsage" } + }, + "additionalProperties": false + } + } + }, + + "user_interrupt": { + "type": "object", + "properties": { + "type": { "const": "user_interrupt" }, + "payload": { + "type": "object", + "properties": { "reason": { "type": "string" } }, + "additionalProperties": false + } + } + }, + + "context_compact": { + "type": "object", + "properties": { + "type": { "const": "context_compact" }, + "payload": { + "type": "object", + "required": ["summary"], + "properties": { + "summary": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { "enum": ["manual", "auto"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "tokens_before": { "type": "integer", "minimum": 0 }, + "tokens_after": { "type": "integer", "minimum": 0 }, + "replaced_message_ids": { + "type": "array", + "items": { "$ref": "#/$defs/id" }, + "description": "Agent Trail entry IDs folded or replaced by this compaction summary. Provenance-only; readers must not require same-file resolution." + } + }, + "additionalProperties": false + } + } + }, + + "branch_point": { + "type": "object", + "properties": { + "type": { "const": "branch_point" }, + "payload": { + "type": "object", + "required": ["from_id"], + "properties": { + "from_id": { "$ref": "#/$defs/id" }, + "reason": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + + "branch_summary": { + "type": "object", + "properties": { + "type": { "const": "branch_summary" }, + "payload": { + "type": "object", + "required": ["abandoned_branch_id", "summary"], + "properties": { + "abandoned_branch_id": { "$ref": "#/$defs/id" }, + "summary": { "type": "string" }, + "model": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + + "model_change": { + "type": "object", + "properties": { + "type": { "const": "model_change" }, + "payload": { + "type": "object", + "required": ["to_model"], + "properties": { + "from_model": { "type": "string" }, + "to_model": { "type": "string" }, + "from_provider": { "type": "string" }, + "to_provider": { "type": "string" }, + "reason": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "user_set", + "agent_set", + "runtime_inferred", + "auto_reroute", + "external" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "turn_id": { "type": "string", "minLength": 1 } + }, + "additionalProperties": false + } + } + }, + + "mode_change": { + "type": "object", + "properties": { + "type": { "const": "mode_change" }, + "payload": { + "type": "object", + "required": ["scope", "to_mode"], + "properties": { + "scope": { + "type": "string", + "anyOf": [ + { "enum": ["collaboration", "permission", "execution", "ui"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "from_mode": { "type": "string", "minLength": 1 }, + "to_mode": { "type": "string", "minLength": 1 }, + "reason": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "user_set", + "agent_set", + "runtime_inferred", + "auto_reroute", + "external" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "turn_id": { "type": "string", "minLength": 1 }, + "data": { "type": "object" } + }, + "additionalProperties": false + } + } + }, + + "thinking_level_change": { + "type": "object", + "properties": { + "type": { "const": "thinking_level_change" }, + "payload": { + "type": "object", + "required": ["to_level"], + "properties": { + "from_level": { "type": "string", "minLength": 1 }, + "to_level": { "type": "string", "minLength": 1 }, + "reason": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "user_set", + "agent_set", + "runtime_inferred", + "auto_reroute", + "external" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "turn_id": { "type": "string", "minLength": 1 }, + "data": { "type": "object" } + }, + "additionalProperties": false + } + } + }, + + "session_terminated": { + "type": "object", + "properties": { + "type": { "const": "session_terminated" }, + "payload": { + "type": "object", + "required": ["reason"], + "properties": { + "reason": { + "$ref": "#/$defs/sessionTerminationReason" + }, + "open_call_ids": { + "type": "array", + "items": { "$ref": "#/$defs/id" } + } + }, + "additionalProperties": false + } + } + }, + + "session_end": { + "type": "object", + "properties": { + "type": { "const": "session_end" }, + "payload": { + "type": "object", + "required": ["reason"], + "properties": { + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["complete", "user_quit", "agent_idle"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "final_message_id": { "$ref": "#/$defs/id" } + }, + "additionalProperties": false + } + } + }, + + "command_invoke": { + "type": "object", + "properties": { + "type": { "const": "command_invoke" }, + "payload": { + "type": "object", + "required": ["name", "kind", "via"], + "properties": { + "name": { + "type": "string", + "description": "User-visible identifier of the invoked capability. Leading slash for slash/builtin/custom_prompt commands (`/clear`); bare name for skills (`webapp-testing`)." + }, + "kind": { + "type": "string", + "description": "What kind of capability was invoked.", + "anyOf": [ + { "enum": ["slash", "builtin", "skill", "custom_prompt", "plugin"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "via": { + "type": "string", + "description": "How the invocation reached the agent. `auto_trigger` covers description-matched skill activation with no user action; adapters MAY synthesize it (set source.synthesized=true).", + "anyOf": [ + { "enum": ["user_typed", "auto_trigger", "agent_invoked"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "args": { "type": "object" }, + "expansion_text": { "type": "string" }, + "result_action": { + "description": "What the runtime did with the invocation. Either one of the reserved values, a vendor-namespaced extension of the form `x-/`, or null.", + "oneOf": [ + { + "type": "string", + "enum": ["compact", "clear", "expand", "load_skill", "noop"] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + }, + { "type": "null" } + ] + } + }, + "additionalProperties": false + } + } + }, + + "capability_change": { + "type": "object", + "properties": { + "type": { "const": "capability_change" }, + "payload": { + "allOf": [ + { + "type": "object", + "required": ["scope", "reason"], + "properties": { + "scope": { + "type": "string", + "anyOf": [ + { "enum": ["tool", "skill", "mcp_server", "mcp_tool", "plugin"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "reason": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "registered", + "deregistered", + "connected", + "disconnected", + "loaded", + "unloaded", + "error", + "instructions_updated" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "added": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityAddedItem" } + }, + "removed": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityRemovedItem" } + }, + "changed": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityChangedItem" } + }, + "snapshot": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityAddedItem" } + } + }, + "additionalProperties": false + }, + { + "anyOf": [ + { "type": "object", "required": ["added"] }, + { "type": "object", "required": ["removed"] }, + { "type": "object", "required": ["changed"] }, + { "type": "object", "required": ["snapshot"] } + ] + } + ] + } + } + }, + + "session_metadata_update": { + "type": "object", + "properties": { + "type": { "const": "session_metadata_update" }, + "payload": { + "oneOf": [ + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { + "type": "string", + "enum": ["name", "description", "agent.model_default", "vcs.branch"] + }, + "value": { "type": "string" }, + "previous_value": { "type": "string" }, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { "const": "tags" }, + "value": { + "type": "array", + "items": { "type": "string" } + }, + "previous_value": { + "type": "array", + "items": { "type": "string" } + }, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { "const": "vcs.worktree" }, + "value": { "$ref": "#/$defs/worktree" }, + "previous_value": { "$ref": "#/$defs/worktree" }, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + }, + "value": {}, + "previous_value": {}, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + } + ] + } + } + } + }, + + "capabilityAddedItem": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" }, + "metadata": { "type": "object" } + }, + "additionalProperties": false + }, + + "capabilityRemovedItem": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" } + }, + "additionalProperties": false + }, + + "capabilityChangedItem": { + "type": "object", + "required": ["name", "field"], + "properties": { + "name": { "type": "string" }, + "field": { "type": "string" }, + "from": {}, + "to": {} + }, + "additionalProperties": false + } + } +} diff --git a/schema/v0.1.0.json b/schema/v0.1.0.json new file mode 100644 index 0000000..23da99f --- /dev/null +++ b/schema/v0.1.0.json @@ -0,0 +1,1915 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://agent-trail.dev/schema/v0.1.0.json", + "title": "Agent Trail v0.1.0", + "description": "Validates a single Agent Trail JSONL record: trail envelope, session header, or event entry. File layout rules such as envelope position and multi-session grouping are enforced by whole-file validation; per-event payload shapes are enforced via the events subschemas.", + + "oneOf": [ + { "$ref": "#/$defs/trailEnvelope" }, + { "$ref": "#/$defs/header" }, + { "$ref": "#/$defs/entry" } + ], + + "$defs": { + + "iso8601": { + "type": "string", + "format": "date-time", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z$", + "description": "Writer timestamp: UTC ISO-8601 with millisecond precision. Format-aware validators may use the date-time annotation; whole-file validation rule 6 remains authoritative for calendar validity." + }, + + "id": { + "$ref": "#/$defs/sessionUid", + "description": "Globally-unique identifier shape: canonical uppercase ULID (26 Crockford base32 chars), lowercase hyphenated UUID (36 chars), or lowercase unhyphenated UUID (32 hex chars). Header ids, event ids, and envelope ids share this shape so cross-segment reconciliation can dedup by exact string equality (spec §9.5)." + }, + + "ulid": { + "type": "string", + "pattern": "^[0-9A-HJKMNP-TV-Z]{26}$", + "description": "Canonical uppercase ULID (Crockford base32, 26 chars, no I/L/O/U). Time-prefixed and lexicographically sortable." + }, + + "sessionUid": { + "type": "string", + "pattern": "^(?:[0-9A-HJKMNP-TV-Z]{26}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[0-9a-f]{32})$", + "description": "Globally-unique source-session identifier shape: canonical uppercase ULID (26 Crockford base32 chars), lowercase hyphenated UUID (36 chars), or lowercase unhyphenated UUID (32 hex chars). Reconcilers group segments by exact string equality on session_uid (spec §9.5)." + }, + + "segment": { + "type": "object", + "description": "Multi-segment marker. Absent or {seq:1} for a single-segment trail. Reconciler primitive for daemon resume and multi-file sessions (spec §9.5).", + "oneOf": [ + { + "type": "object", + "required": ["seq"], + "properties": { + "seq": { "const": 1 } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["seq", "prev_content_hash"], + "properties": { + "seq": { "type": "integer", "minimum": 2 }, + "prev_content_hash": { + "oneOf": [ + { "$ref": "#/$defs/sha256Hex" }, + { "type": "null" } + ] + } + }, + "additionalProperties": false + } + ] + }, + + "sha256Hex": { + "type": "string", + "pattern": "^[a-f0-9]{64}$", + "description": "SHA-256 hash as lowercase hex (64 chars)" + }, + + "agentName": { + "oneOf": [ + { + "type": "string", + "enum": [ + "claude-code", + "pi", + "openclaw", + "codex-cli", + "cursor", + "opencode", + "aider", + "amp", + "cline", + "crush", + "kimi-code", + "qwen-code", + "factory", + "vibe", + "copilot-cli", + "copilot-chat", + "chatgpt", + "clawdbot" + ] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$", + "description": "Custom unregistered agent using the x-/ extension grammar, e.g. x-example/myagent" + } + ] + }, + + "toolKind": { + "type": "string", + "enum": [ + "file_read", + "file_write", + "file_edit", + "file_patch", + "file_list", + "file_search", + "shell_command", + "shell_output", + "shell_input", + "mcp_call", + "web_fetch", + "web_search", + "tool_search", + "notebook_edit", + "subagent_invoke", + "other" + ] + }, + + "taskPlanStatus": { + "type": "string", + "enum": ["pending", "in_progress", "completed", "cancelled", "blocked"] + }, + + "taskPlanItem": { + "type": "object", + "required": ["id", "content", "status"], + "properties": { + "id": { "type": "string", "minLength": 1 }, + "content": { "type": "string" }, + "status": { "$ref": "#/$defs/taskPlanStatus" }, + "active_form": { "type": "string" } + }, + "additionalProperties": false + }, + + "taskPlanDelta": { + "oneOf": [ + { + "type": "object", + "required": ["kind", "item_id", "to_content", "to_status"], + "properties": { + "kind": { "const": "added" }, + "item_id": { "type": "string", "minLength": 1 }, + "to_content": { "type": "string" }, + "to_status": { "$ref": "#/$defs/taskPlanStatus" }, + "to_active_form": { "type": "string" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["kind", "item_id", "from_content", "from_status"], + "properties": { + "kind": { "const": "removed" }, + "item_id": { "type": "string", "minLength": 1 }, + "from_content": { "type": "string" }, + "from_status": { "$ref": "#/$defs/taskPlanStatus" }, + "from_active_form": { "type": "string" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["kind", "item_id", "from_status", "to_status"], + "properties": { + "kind": { "const": "status_changed" }, + "item_id": { "type": "string", "minLength": 1 }, + "from_status": { "$ref": "#/$defs/taskPlanStatus" }, + "to_status": { "$ref": "#/$defs/taskPlanStatus" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["kind", "item_id", "from_content", "to_content"], + "properties": { + "kind": { "const": "content_changed" }, + "item_id": { "type": "string", "minLength": 1 }, + "from_content": { "type": "string" }, + "to_content": { "type": "string" } + }, + "additionalProperties": false + } + ] + }, + + "vcs": { + "type": "object", + "required": ["type", "revision"], + "anyOf": [ + { + "properties": { "revision": { "type": "string" } } + }, + { + "required": ["branch"], + "properties": { + "revision": { "type": "null" }, + "branch": { "type": "string", "minLength": 1 } + }, + "not": { "properties": { "head_commit": {} }, "required": ["head_commit"] } + } + ], + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "enum": ["git", "jj", "hg", "svn"] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "revision": { "type": ["string", "null"] }, + "remote_url": { + "type": "string", + "description": "Canonical remote URL for the working tree. Adapters MUST normalize before emission: strip embedded credentials, strip trailing .git for git URLs, and normalize SSH/HTTPS variants to a single canonical form (https://host/path)." + }, + "branch": { + "type": "string", + "description": "Active branch / bookmark / topic name the session is running on. For git, the short branch name (e.g., `feature/x`). Detached-HEAD sessions MAY omit this field." + }, + "head_commit": { + "type": "string", + "pattern": "^[a-f0-9]{7,64}$", + "description": "Commit hash at session start (lowercase hex, 7-64 chars). For git this is typically the same value as `revision`; the field exists as an explicit, version-control-neutral alias and survives across VCS migrations." + }, + "worktree": { "$ref": "#/$defs/worktree" } + }, + "additionalProperties": false + }, + + "worktree": { + "type": "object", + "required": ["name", "path"], + "additionalProperties": false, + "description": "Worktree context when the session ran inside a working-tree clone or worktree (git worktree, jj workspace, etc.).", + "properties": { + "name": { "type": "string" }, + "path": { "type": "string" }, + "original_cwd": { + "type": "string", + "description": "Working directory of the parent repository at the time the worktree was created." + }, + "original_branch": { + "type": "string", + "description": "Branch the parent repository was on when the worktree was created." + }, + "original_head_commit": { + "type": "string", + "pattern": "^[a-f0-9]{7,64}$", + "description": "Commit hash the worktree was forked from." + } + } + }, + + "sourceMetadata": { + "type": "object", + "description": "Adapter-provided metadata about the source event.", + "properties": { + "agent": { "$ref": "#/$defs/agentName" }, + "original_type": { "type": "string" }, + "schema_version": { "type": "string" }, + "raw": { + "description": "Opaque source object preserved verbatim. If an object, may use envelope_ref to reference an earlier entry's inlined envelope.", + "if": { "type": "object" }, + "then": { + "type": "object", + "properties": { + "envelope_ref": { "type": "string" } + } + } + }, + "synthesized": { "type": "boolean", "default": false } + }, + "additionalProperties": false + }, + + "semanticMetadata": { + "type": "object", + "description": "Semantic linking for cross-event references when explicit IDs are unreliable.", + "properties": { + "group_id": { "type": "string" }, + "call_id": { "type": "string" }, + "tool_kind": { "$ref": "#/$defs/toolKind" } + }, + "additionalProperties": false + }, + + "agentMessageUsage": { + "type": "object", + "description": "Token usage for this source agent envelope. May appear on agent_message, agent_thinking, or tool_call when that entry is the first entry derived from the envelope. input_tokens/output_tokens are deltas for this envelope; *_cumulative variants are running totals through this point. total_tokens/total_tokens_cumulative are source-reported inclusive totals for exact total-token analytics. cache_read_tokens and cache_creation_tokens are independent billing categories. context_input_tokens captures source-reported prompt/context pressure for this request, cache-inclusive when the source exposes enough detail; context_window_tokens captures the model context-window size when exposed. When present, usage must include either input/output coverage or total-token coverage.", + "properties": { + "input_tokens": { "type": "integer", "minimum": 0 }, + "output_tokens": { "type": "integer", "minimum": 0 }, + "input_tokens_cumulative": { "type": "integer", "minimum": 0 }, + "output_tokens_cumulative": { "type": "integer", "minimum": 0 }, + "total_tokens": { "type": "integer", "minimum": 0 }, + "total_tokens_cumulative": { "type": "integer", "minimum": 0 }, + "cache_read_tokens": { "type": "integer", "minimum": 0 }, + "cache_creation_tokens": { "type": "integer", "minimum": 0 }, + "reasoning_tokens": { "type": "integer", "minimum": 0 }, + "context_input_tokens": { "type": "integer", "minimum": 0 }, + "context_window_tokens": { "type": "integer", "minimum": 1 } + }, + "anyOf": [ + { + "allOf": [ + { + "anyOf": [ + { + "properties": { "input_tokens": {} }, + "required": ["input_tokens"] + }, + { + "properties": { "input_tokens_cumulative": {} }, + "required": ["input_tokens_cumulative"] + } + ] + }, + { + "anyOf": [ + { + "properties": { "output_tokens": {} }, + "required": ["output_tokens"] + }, + { + "properties": { "output_tokens_cumulative": {} }, + "required": ["output_tokens_cumulative"] + } + ] + } + ] + }, + { + "properties": { "total_tokens": {} }, + "required": ["total_tokens"] + }, + { + "properties": { "total_tokens_cumulative": {} }, + "required": ["total_tokens_cumulative"] + } + ], + "additionalProperties": false + }, + + "attachment": { + "type": "object", + "description": "An image or file carried by a message or tool result, by reference. v0.1.0 uri schemes are references only (https:, local file:, content-addressed sha256:); inline data: payloads are deferred.", + "required": ["kind"], + "properties": { + "kind": { "type": "string", "enum": ["image", "file", "other"] }, + "media_type": { "type": "string" }, + "uri": { + "type": "string", + "pattern": "^(https:|file:|sha256:)" + }, + "name": { "type": "string" } + }, + "anyOf": [ + { + "properties": { "uri": {} }, + "required": ["uri"] + }, + { + "properties": { "name": {} }, + "required": ["name"] + } + ], + "additionalProperties": false + }, + + "sessionTerminationReason": { + "type": "string", + "anyOf": [ + { "enum": ["eof_with_open_tool_calls", "process_terminated", "truncated", "user_abort"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + + "parseFidelity": { + "type": "object", + "description": "At-a-glance session parse fidelity summary. When present, quarantined_count MUST equal the number of x-*/unknown_record system_event entries in the session group; termination_reason MUST match the final session_terminated reason when one exists.", + "required": ["quarantined_count"], + "properties": { + "quarantined_count": { + "type": "integer", + "minimum": 0, + "description": "Number of quarantined source records emitted as x-*/unknown_record system_event entries in this session group." + }, + "termination_reason": { + "$ref": "#/$defs/sessionTerminationReason", + "description": "Final abnormal session termination reason, when a session_terminated event is present." + } + }, + "additionalProperties": false + }, + + "trailEnvelope": { + "type": "object", + "description": "Optional trail envelope record (line 1). File-level metadata; not part of the event graph. When present, MUST appear at line 1 and the first session header MUST follow on line 2. At most one per file. Multi-session files (spec §9.6) carry one envelope followed by N session groups in file order.", + "required": ["type", "schema_version", "id", "ts", "producer"], + "properties": { + "type": { "const": "trail" }, + "schema_version": { "const": "0.1.0" }, + "id": { "$ref": "#/$defs/id" }, + "name": { "type": "string" }, + "description": { "type": "string" }, + "ts": { "$ref": "#/$defs/iso8601" }, + "producer": { "type": "string", "minLength": 1 }, + "content_hash": { + "oneOf": [ + { "$ref": "#/$defs/sha256Hex" }, + { "const": "" } + ] + }, + "tags": { + "type": "array", + "items": { "type": "string" } + }, + "vcs": { "$ref": "#/$defs/vcs" }, + "fork_from": { + "type": "object", + "required": ["trail_id"], + "properties": { + "trail_id": { "$ref": "#/$defs/id" }, + "content_hash": { "$ref": "#/$defs/sha256Hex" } + }, + "additionalProperties": false + }, + "redacted_from": { + "type": "object", + "required": ["content_hash"], + "properties": { + "content_hash": { "$ref": "#/$defs/sha256Hex" } + }, + "additionalProperties": false + }, + "sessions": { + "type": "array", + "description": "Optional manifest of sessions contained in the file, one entry per session group in file order (spec §8.4, §9.6). Validator warns on length mismatch or per-entry drift vs actual file content.", + "items": { + "type": "object", + "required": ["id", "agent"], + "properties": { + "id": { "$ref": "#/$defs/id" }, + "agent": { "$ref": "#/$defs/agentName" } + }, + "additionalProperties": false + } + }, + "meta": { + "type": "object", + "description": "Free-form vendor extensions. Recommended keys use the x-/ extension grammar." + } + }, + "additionalProperties": false + }, + + "header": { + "type": "object", + "description": "Session header. The first session header is required at line 1, or at line 2 when a trail envelope occupies line 1. Multi-session files (spec §9.6) carry additional session headers later in the file; each opens a new (header, events*) group. Not part of the event graph.", + "required": ["type", "schema_version", "id", "ts", "agent"], + "properties": { + "type": { "const": "session" }, + "schema_version": { + "const": "0.1.0" + }, + "id": { "$ref": "#/$defs/id" }, + "name": { "type": "string" }, + "description": { "type": "string" }, + "tags": { + "type": "array", + "items": { "type": "string" } + }, + "session_uid": { + "$ref": "#/$defs/sessionUid", + "description": "Globally-unique source-session identifier. Stable across all segments of one source session (spec §9.5). Reconcilers group segments by session_uid. Optional in v0.1 single-segment trails; writers SHOULD emit it for forward-compat. Required (and enforced by the header allOf if/then) when segment.seq > 1. ULID is recommended (lexicographic tie-breaker); UUID accepted." + }, + "segment": { "$ref": "#/$defs/segment" }, + "content_hash": { + "oneOf": [ + { "$ref": "#/$defs/sha256Hex" }, + { "const": "" } + ] + }, + "ts": { "$ref": "#/$defs/iso8601" }, + "stream": { + "type": "object", + "description": "Live-capture marker. Present means writer is actively appending or last appended in streaming mode. Absent means non-streaming or unaware writer.", + "required": ["state"], + "properties": { + "state": { + "type": "string", + "enum": ["open", "closed"] + }, + "started_at": { "$ref": "#/$defs/iso8601" } + }, + "additionalProperties": false + }, + "agent": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "$ref": "#/$defs/agentName" }, + "version": { "type": "string" }, + "model_default": { "type": "string" } + }, + "additionalProperties": false + }, + "cwd": { "type": "string" }, + "vcs": { "$ref": "#/$defs/vcs" }, + "fork_from": { + "type": "object", + "required": ["session_id"], + "properties": { + "session_id": { "$ref": "#/$defs/id" }, + "content_hash": { "$ref": "#/$defs/sha256Hex" }, + "entry_id": { "$ref": "#/$defs/id" } + }, + "additionalProperties": false + }, + "redacted_from": { + "type": "object", + "required": ["content_hash"], + "properties": { + "content_hash": { "$ref": "#/$defs/sha256Hex" } + }, + "additionalProperties": false + }, + "parse_fidelity": { "$ref": "#/$defs/parseFidelity" }, + "source": { + "type": "object", + "properties": { + "agent": { "$ref": "#/$defs/agentName" }, + "path": { "type": "string" }, + "format_version": { "type": "string" } + }, + "additionalProperties": false + }, + "meta": { + "type": "object", + "description": "Free-form vendor extensions. Recommended keys use the x-/ extension grammar (spec §8.3)." + } + }, + "additionalProperties": false, + "allOf": [ + { + "description": "Spec §9.5: session_uid is required when segment.seq >= 2 so reconcilers can group continuation segments. Single-segment trails (seq absent or seq=1) keep session_uid optional in v0.1.", + "if": { + "type": "object", + "required": ["segment"], + "properties": { + "segment": { + "type": "object", + "required": ["seq"], + "properties": { + "seq": { "type": "integer", "minimum": 2 } + } + } + } + }, + "then": { + "type": "object", + "required": ["session_uid"], + "properties": { + "session_uid": { "$ref": "#/$defs/sessionUid" } + } + } + } + ] + }, + + "entryBase": { + "type": "object", + "required": ["type", "id", "ts", "payload"], + "properties": { + "type": { "type": "string" }, + "id": { "$ref": "#/$defs/id" }, + "parent_id": { + "oneOf": [ + { "$ref": "#/$defs/id" }, + { "type": "null" } + ] + }, + "ts": { "$ref": "#/$defs/iso8601" }, + "payload": { "type": "object" }, + "semantic": { "$ref": "#/$defs/semanticMetadata" }, + "source": { "$ref": "#/$defs/sourceMetadata" }, + "meta": { + "type": "object", + "properties": { + "redaction_count": { + "type": "integer", + "minimum": 0, + "description": "Number of redactor mutations applied to this event entry." + } + } + } + }, + "additionalProperties": false + }, + + "entry": { + "allOf": [ + { "$ref": "#/$defs/entryBase" }, + { + "oneOf": [ + { "$ref": "#/$defs/events/user_message" }, + { "$ref": "#/$defs/events/agent_message" }, + { "$ref": "#/$defs/events/task_plan_update" }, + { "$ref": "#/$defs/events/tool_call" }, + { "$ref": "#/$defs/events/tool_result" }, + { "$ref": "#/$defs/events/tool_call_aborted" }, + { "$ref": "#/$defs/events/user_query" }, + { "$ref": "#/$defs/events/user_query_response" }, + { "$ref": "#/$defs/events/session_summary" }, + { "$ref": "#/$defs/events/system_event" }, + { "$ref": "#/$defs/events/agent_thinking" }, + { "$ref": "#/$defs/events/user_interrupt" }, + { "$ref": "#/$defs/events/context_compact" }, + { "$ref": "#/$defs/events/branch_point" }, + { "$ref": "#/$defs/events/branch_summary" }, + { "$ref": "#/$defs/events/model_change" }, + { "$ref": "#/$defs/events/mode_change" }, + { "$ref": "#/$defs/events/thinking_level_change" }, + { "$ref": "#/$defs/events/session_terminated" }, + { "$ref": "#/$defs/events/session_end" }, + { "$ref": "#/$defs/events/command_invoke" }, + { "$ref": "#/$defs/events/capability_change" }, + { "$ref": "#/$defs/events/session_metadata_update" } + ] + } + ] + }, + + "events": { + + "user_message": { + "type": "object", + "properties": { + "type": { "const": "user_message" }, + "payload": { + "type": "object", + "required": ["text"], + "properties": { + "text": { "type": "string" }, + "origin": { + "type": "string", + "description": "Authorship marker for user-role text. Absent means user-authored.", + "anyOf": [ + { "enum": ["user", "injected", "mixed"] }, + { + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "attachments": { + "type": "array", + "items": { "$ref": "#/$defs/attachment" } + } + }, + "additionalProperties": false + } + } + }, + + "agent_message": { + "type": "object", + "properties": { + "type": { "const": "agent_message" }, + "payload": { + "type": "object", + "required": ["text"], + "properties": { + "text": { "type": "string" }, + "model": { "type": "string" }, + "stop_reason": { "type": "string" }, + "usage": { "$ref": "#/$defs/agentMessageUsage" }, + "attachments": { + "type": "array", + "items": { "$ref": "#/$defs/attachment" } + } + }, + "additionalProperties": false + } + } + }, + + "task_plan_update": { + "type": "object", + "properties": { + "type": { "const": "task_plan_update" }, + "payload": { + "type": "object", + "required": ["items"], + "properties": { + "explanation": { "type": "string" }, + "items": { + "type": "array", + "items": { "$ref": "#/$defs/taskPlanItem" } + }, + "deltas": { + "type": "array", + "items": { "$ref": "#/$defs/taskPlanDelta" } + } + }, + "additionalProperties": false + } + } + }, + + "tool_call": { + "type": "object", + "properties": { + "type": { "const": "tool_call" }, + "payload": { + "type": "object", + "required": ["tool", "args"], + "properties": { + "tool": { "$ref": "#/$defs/toolKind" }, + "args": { "type": "object" }, + "usage": { "$ref": "#/$defs/agentMessageUsage" }, + "truncated": { "type": "boolean" }, + "args_size": { + "type": "integer", + "minimum": 0, + "description": "UTF-8 byte length of the original args object before truncation. Required when truncated is true." + }, + "overflow_ref": { + "oneOf": [ + { "type": "string", "pattern": "^sha256:[a-f0-9]{64}$" }, + { "type": "null" } + ] + } + }, + "additionalProperties": false, + "dependentSchemas": { + "truncated": { + "if": { + "properties": { + "truncated": { "const": true } + }, + "required": ["truncated"] + }, + "then": { + "properties": { + "args_size": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["args_size"] + } + } + }, + "allOf": [ + { + "if": { "properties": { "tool": { "const": "file_read" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path"], + "properties": { + "path": { "type": "string" }, + "range": { + "type": "array", + "items": { "type": "integer" }, + "minItems": 2, + "maxItems": 2 + } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_write" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path", "content"], + "properties": { + "path": { "type": "string" }, + "content": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_edit" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "oneOf": [ + { + "type": "object", + "required": ["path", "diff"], + "properties": { + "path": { "type": "string" }, + "diff": { "type": "string" } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["path", "old", "new"], + "properties": { + "path": { "type": "string" }, + "old": { "type": "string" }, + "new": { "type": "string" }, + "replace_all": { "type": "boolean" } + }, + "additionalProperties": false + } + ] + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_patch" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["files"], + "properties": { + "files": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["path", "diff"], + "properties": { + "path": { "type": "string" }, + "diff": { "type": "string" } + }, + "additionalProperties": false + } + }, + "atomic": { "type": "boolean" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_list" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path"], + "properties": { + "path": { "type": "string" }, + "recursive": { "type": "boolean" }, + "glob": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "file_search" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["query"], + "properties": { + "query": { "type": "string" }, + "path": { "type": "string" }, + "glob": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "shell_command" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["command"], + "properties": { + "command": { "type": "string" }, + "cwd": { "type": "string" }, + "timeout": { "type": "integer" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "shell_output" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "properties": { + "command_id": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "shell_input" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["input"], + "properties": { + "input": { "type": "string" }, + "session_id": { "type": "string" }, + "command_id": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "mcp_call" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["server", "tool"], + "properties": { + "server": { "type": "string" }, + "tool": { "type": "string" }, + "args": { "type": "object" }, + "headers": { "type": "object" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "web_fetch" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["url"], + "properties": { + "url": { "type": "string" }, + "method": { "type": "string" }, + "headers": { "type": "object" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "web_search" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["query"], + "properties": { + "query": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "tool_search" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["query"], + "properties": { + "query": { "type": "string" }, + "limit": { "type": "integer" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "notebook_edit" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["path"], + "properties": { + "path": { "type": "string" }, + "cell_id": { "type": "string" }, + "diff": { "type": "string" }, + "content": { "type": "string" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "subagent_invoke" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["task"], + "properties": { + "task": { "type": "string" }, + "agent_type": { "type": "string" }, + "session_id": { "$ref": "#/$defs/id" } + }, + "additionalProperties": false + } + } + } + }, + { + "if": { "properties": { "tool": { "const": "other" } }, "required": ["tool"] }, + "then": { + "properties": { + "args": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" }, + "args": { "type": "object" } + }, + "additionalProperties": false + } + } + } + } + ] + } + } + }, + + "tool_result": { + "type": "object", + "properties": { + "type": { "const": "tool_result" }, + "payload": { + "type": "object", + "required": ["ok"], + "properties": { + "for_id": { + "$ref": "#/$defs/id" + }, + "ok": { "type": "boolean" }, + "output": { "type": "string" }, + "truncated": { "type": "boolean" }, + "output_size": { + "type": "integer", + "minimum": 0, + "description": "UTF-8 byte length of the original output before truncation. Required when truncated is true." + }, + "overflow_ref": { + "oneOf": [ + { "type": "string", "pattern": "^sha256:[a-f0-9]{64}$" }, + { "type": "null" } + ] + }, + "error": { + "oneOf": [{ "type": "string" }, { "type": "null" }] + }, + "attachments": { + "type": "array", + "items": { "$ref": "#/$defs/attachment" } + }, + "meta": { + "type": "object", + "description": "Structured per-toolkind outputs, keyed by the originating tool_call.tool. Optional; consumers fall back to payload.output when the relevant key is absent. Registered keys are writer-strict; unregistered/future toolkinds are opaque objects. Vendors extend a registered key via x-/ pattern keys.", + "properties": { + "mcp_call": { + "type": "object", + "properties": { + "content_blocks": { + "type": "array", + "items": { + "type": "object", + "required": ["type"], + "properties": { + "type": { "type": "string", "enum": ["text", "image", "resource"] }, + "text": { "type": "string" }, + "data": { "type": "string" }, + "mime_type": { "type": "string" }, + "uri": { "type": "string" } + }, + "additionalProperties": false + } + }, + "is_error": { "type": "boolean" } + }, + "patternProperties": { + "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$": {} + }, + "additionalProperties": false + }, + "file_read": { + "type": "object", + "properties": { + "range": { + "type": "array", + "items": { "type": "integer" }, + "minItems": 2, + "maxItems": 2 + }, + "total_lines": { "type": "integer", "minimum": 0 }, + "encoding": { "type": "string" }, + "truncated_at_line": { + "oneOf": [{ "type": "integer", "minimum": 0 }, { "type": "null" }] + } + }, + "patternProperties": { + "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$": {} + }, + "additionalProperties": false + }, + "shell_command": { + "type": "object", + "properties": { + "stdout": { "type": "string" }, + "stderr": { "type": "string" }, + "exit_code": { + "oneOf": [{ "type": "integer" }, { "type": "null" }] + }, + "signal": { + "oneOf": [{ "type": "string" }, { "type": "null" }] + }, + "duration_ms": { "type": "integer", "minimum": 0 } + }, + "patternProperties": { + "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$": {} + }, + "additionalProperties": false + } + }, + "propertyNames": { + "pattern": "^(?:[a-z][a-z0-9_]*|x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*)$" + }, + "additionalProperties": { "type": "object" } + } + }, + "additionalProperties": false, + "dependentSchemas": { + "truncated": { + "if": { + "properties": { + "truncated": { "const": true } + }, + "required": ["truncated"] + }, + "then": { + "properties": { + "output_size": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["output_size"] + } + } + } + } + } + }, + + "tool_call_aborted": { + "type": "object", + "properties": { + "type": { "const": "tool_call_aborted" }, + "payload": { + "type": "object", + "required": ["scope", "reason"], + "properties": { + "scope": { + "description": "Abort granularity. tool_call aborts reference a specific tool_call by for_id; turn aborts describe a broader turn-level stop when the source cannot identify one call.", + "oneOf": [ + { "type": "string", "enum": ["tool_call", "turn"] }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "for_id": { "$ref": "#/$defs/id" }, + "reason": { + "description": "Why execution stopped before a normal tool_result.", + "oneOf": [ + { + "type": "string", + "enum": [ + "user_interrupt", + "hook_blocked", + "timeout", + "permission_denied", + "runtime_error" + ] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "blocked_by": { "type": "string" } + }, + "additionalProperties": false, + "allOf": [ + { + "if": { + "properties": { + "scope": { "const": "tool_call" } + }, + "required": ["scope"] + }, + "then": { + "required": ["for_id"], + "properties": { + "for_id": { "$ref": "#/$defs/id" } + } + } + }, + { + "if": { + "properties": { + "scope": { "not": { "const": "tool_call" } } + }, + "required": ["scope"] + }, + "then": { + "not": { "required": ["for_id"] } + } + } + ] + } + } + }, + + "user_query": { + "type": "object", + "properties": { + "type": { "const": "user_query" }, + "payload": { + "type": "object", + "required": ["questions"], + "properties": { + "questions": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["id", "question"], + "properties": { + "id": { "type": "string" }, + "question": { "type": "string" }, + "header": { "type": "string" }, + "multi_select": { "type": "boolean" }, + "is_secret": { "type": "boolean" }, + "allow_other": { "type": "boolean" }, + "options": { + "type": "array", + "items": { + "type": "object", + "required": ["label"], + "properties": { + "id": { "type": "string", "minLength": 1 }, + "label": { "type": "string" }, + "description": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + } + }, + + "user_query_response": { + "type": "object", + "properties": { + "type": { "const": "user_query_response" }, + "payload": { + "type": "object", + "required": ["for_id", "answers"], + "properties": { + "for_id": { "$ref": "#/$defs/id" }, + "answers": { + "type": "object", + "additionalProperties": { + "type": "object", + "required": ["selected"], + "properties": { + "selected": { + "type": "array", + "items": { "type": "string" } + }, + "other": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + } + }, + + "session_summary": { + "type": "object", + "properties": { + "type": { "const": "session_summary" }, + "payload": { + "type": "object", + "required": ["scope", "text"], + "properties": { + "scope": { "type": "string", "enum": ["session"] }, + "text": { "type": "string" }, + "model": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + + "system_event": { + "type": "object", + "properties": { + "type": { "const": "system_event" }, + "payload": { + "type": "object", + "required": ["kind"], + "properties": { + "kind": { + "type": "string", + "description": "Lifecycle/hook signal category. Either one of the reserved cross-agent values, or a vendor-namespaced extension of the form `x-/`.", + "anyOf": [ + { + "enum": [ + "session_start", + "turn_start", + "turn_end", + "subagent_start", + "subagent_end", + "pre_tool_use", + "post_tool_use", + "hook_fired", + "permission_request", + "permission_decision", + "cwd_change", + "env_snapshot", + "task_started", + "task_completed", + "plan_completed", + "turn_aborted", + "tool_decision", + "context_injected", + "hook_progress", + "queue_operation", + "heartbeat", + "agent_error", + "agent_warning", + "api_error", + "stream_error", + "deprecation_notice", + "guardian_alert", + "model_rerouted", + "hook_failed", + "vcs_commit" + ] + }, + { + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + } + ] + }, + "text": { "type": "string" }, + "data": { "type": "object" } + }, + "additionalProperties": false + } + } + }, + + "agent_thinking": { + "type": "object", + "properties": { + "type": { "const": "agent_thinking" }, + "payload": { + "type": "object", + "required": ["text"], + "properties": { + "text": { "type": "string" }, + "model": { "type": "string" }, + "level": { "type": "string", "minLength": 1 }, + "usage": { "$ref": "#/$defs/agentMessageUsage" } + }, + "additionalProperties": false + } + } + }, + + "user_interrupt": { + "type": "object", + "properties": { + "type": { "const": "user_interrupt" }, + "payload": { + "type": "object", + "properties": { "reason": { "type": "string" } }, + "additionalProperties": false + } + } + }, + + "context_compact": { + "type": "object", + "properties": { + "type": { "const": "context_compact" }, + "payload": { + "type": "object", + "required": ["summary"], + "properties": { + "summary": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { "enum": ["manual", "auto"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "tokens_before": { "type": "integer", "minimum": 0 }, + "tokens_after": { "type": "integer", "minimum": 0 }, + "replaced_message_ids": { + "type": "array", + "items": { "$ref": "#/$defs/id" }, + "description": "Agent Trail entry IDs folded or replaced by this compaction summary. Provenance-only; readers must not require same-file resolution." + } + }, + "additionalProperties": false + } + } + }, + + "branch_point": { + "type": "object", + "properties": { + "type": { "const": "branch_point" }, + "payload": { + "type": "object", + "required": ["from_id"], + "properties": { + "from_id": { "$ref": "#/$defs/id" }, + "reason": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + + "branch_summary": { + "type": "object", + "properties": { + "type": { "const": "branch_summary" }, + "payload": { + "type": "object", + "required": ["abandoned_branch_id", "summary"], + "properties": { + "abandoned_branch_id": { "$ref": "#/$defs/id" }, + "summary": { "type": "string" }, + "model": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + + "model_change": { + "type": "object", + "properties": { + "type": { "const": "model_change" }, + "payload": { + "type": "object", + "required": ["to_model"], + "properties": { + "from_model": { "type": "string" }, + "to_model": { "type": "string" }, + "from_provider": { "type": "string" }, + "to_provider": { "type": "string" }, + "reason": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "user_set", + "agent_set", + "runtime_inferred", + "auto_reroute", + "external" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "turn_id": { "type": "string", "minLength": 1 } + }, + "additionalProperties": false + } + } + }, + + "mode_change": { + "type": "object", + "properties": { + "type": { "const": "mode_change" }, + "payload": { + "type": "object", + "required": ["scope", "to_mode"], + "properties": { + "scope": { + "type": "string", + "anyOf": [ + { "enum": ["collaboration", "permission", "execution", "ui"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "from_mode": { "type": "string", "minLength": 1 }, + "to_mode": { "type": "string", "minLength": 1 }, + "reason": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "user_set", + "agent_set", + "runtime_inferred", + "auto_reroute", + "external" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "turn_id": { "type": "string", "minLength": 1 }, + "data": { "type": "object" } + }, + "additionalProperties": false + } + } + }, + + "thinking_level_change": { + "type": "object", + "properties": { + "type": { "const": "thinking_level_change" }, + "payload": { + "type": "object", + "required": ["to_level"], + "properties": { + "from_level": { "type": "string", "minLength": 1 }, + "to_level": { "type": "string", "minLength": 1 }, + "reason": { "type": "string" }, + "trigger": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "user_set", + "agent_set", + "runtime_inferred", + "auto_reroute", + "external" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "turn_id": { "type": "string", "minLength": 1 }, + "data": { "type": "object" } + }, + "additionalProperties": false + } + } + }, + + "session_terminated": { + "type": "object", + "properties": { + "type": { "const": "session_terminated" }, + "payload": { + "type": "object", + "required": ["reason"], + "properties": { + "reason": { + "$ref": "#/$defs/sessionTerminationReason" + }, + "open_call_ids": { + "type": "array", + "items": { "$ref": "#/$defs/id" } + } + }, + "additionalProperties": false + } + } + }, + + "session_end": { + "type": "object", + "properties": { + "type": { "const": "session_end" }, + "payload": { + "type": "object", + "required": ["reason"], + "properties": { + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["complete", "user_quit", "agent_idle"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "final_message_id": { "$ref": "#/$defs/id" } + }, + "additionalProperties": false + } + } + }, + + "command_invoke": { + "type": "object", + "properties": { + "type": { "const": "command_invoke" }, + "payload": { + "type": "object", + "required": ["name", "kind", "via"], + "properties": { + "name": { + "type": "string", + "description": "User-visible identifier of the invoked capability. Leading slash for slash/builtin/custom_prompt commands (`/clear`); bare name for skills (`webapp-testing`)." + }, + "kind": { + "type": "string", + "description": "What kind of capability was invoked.", + "anyOf": [ + { "enum": ["slash", "builtin", "skill", "custom_prompt", "plugin"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "via": { + "type": "string", + "description": "How the invocation reached the agent. `auto_trigger` covers description-matched skill activation with no user action; adapters MAY synthesize it (set source.synthesized=true).", + "anyOf": [ + { "enum": ["user_typed", "auto_trigger", "agent_invoked"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "args": { "type": "object" }, + "expansion_text": { "type": "string" }, + "result_action": { + "description": "What the runtime did with the invocation. Either one of the reserved values, a vendor-namespaced extension of the form `x-/`, or null.", + "oneOf": [ + { + "type": "string", + "enum": ["compact", "clear", "expand", "load_skill", "noop"] + }, + { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + }, + { "type": "null" } + ] + } + }, + "additionalProperties": false + } + } + }, + + "capability_change": { + "type": "object", + "properties": { + "type": { "const": "capability_change" }, + "payload": { + "allOf": [ + { + "type": "object", + "required": ["scope", "reason"], + "properties": { + "scope": { + "type": "string", + "anyOf": [ + { "enum": ["tool", "skill", "mcp_server", "mcp_tool", "plugin"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "reason": { + "type": "string", + "anyOf": [ + { + "enum": [ + "initial", + "registered", + "deregistered", + "connected", + "disconnected", + "loaded", + "unloaded", + "error", + "instructions_updated" + ] + }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + }, + "added": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityAddedItem" } + }, + "removed": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityRemovedItem" } + }, + "changed": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityChangedItem" } + }, + "snapshot": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/capabilityAddedItem" } + } + }, + "additionalProperties": false + }, + { + "anyOf": [ + { "type": "object", "required": ["added"] }, + { "type": "object", "required": ["removed"] }, + { "type": "object", "required": ["changed"] }, + { "type": "object", "required": ["snapshot"] } + ] + } + ] + } + } + }, + + "session_metadata_update": { + "type": "object", + "properties": { + "type": { "const": "session_metadata_update" }, + "payload": { + "oneOf": [ + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { + "type": "string", + "enum": ["name", "description", "agent.model_default", "vcs.branch"] + }, + "value": { "type": "string" }, + "previous_value": { "type": "string" }, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { "const": "tags" }, + "value": { + "type": "array", + "items": { "type": "string" } + }, + "previous_value": { + "type": "array", + "items": { "type": "string" } + }, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { "const": "vcs.worktree" }, + "value": { "$ref": "#/$defs/worktree" }, + "previous_value": { "$ref": "#/$defs/worktree" }, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + }, + { + "type": "object", + "required": ["field", "value", "reason"], + "properties": { + "field": { + "type": "string", + "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" + }, + "value": {}, + "previous_value": {}, + "reason": { + "type": "string", + "anyOf": [ + { "enum": ["ai_generated", "user_set", "runtime_inferred", "external"] }, + { "pattern": "^x-[a-z0-9]+(?:-[a-z0-9]+)*/[a-z0-9][a-z0-9_-]*$" } + ] + } + }, + "additionalProperties": false + } + ] + } + } + } + }, + + "capabilityAddedItem": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" }, + "metadata": { "type": "object" } + }, + "additionalProperties": false + }, + + "capabilityRemovedItem": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" } + }, + "additionalProperties": false + }, + + "capabilityChangedItem": { + "type": "object", + "required": ["name", "field"], + "properties": { + "name": { "type": "string" }, + "field": { "type": "string" }, + "from": {}, + "to": {} + }, + "additionalProperties": false + } + } +} diff --git a/spec.md b/spec.md new file mode 100644 index 0000000..15f7a48 --- /dev/null +++ b/spec.md @@ -0,0 +1,132 @@ +# Agent Trail Specification + +This file is a compatibility index for links that previously targeted the monorepo root `spec.md`. + +Frozen v0.1.0 content now lives in [`spec/v0.1.0/`](./spec/v0.1.0/). Draft content lives in [`spec/draft/`](./spec/draft/). + +## Canonical documents + +- [v0.1.0 specification](./spec/v0.1.0/) +- [Draft specification](./spec/draft/) + +## Legacy anchor aliases + +- [Agent Trail Specification](./spec/v0.1.0/) +- [1. Motivation](./spec/v0.1.0/01-motivation.md#1-motivation) +- [2. Goals and non-goals](./spec/v0.1.0/02-goals-and-non-goals.md#2-goals-and-non-goals) +- [Goals](./spec/v0.1.0/02-goals-and-non-goals.md#goals) +- [Non-goals](./spec/v0.1.0/02-goals-and-non-goals.md#non-goals) +- [2.1 Conformance and normativity](./spec/v0.1.0/02-goals-and-non-goals.md#21-conformance-and-normativity) +- [3. At a glance](./spec/v0.1.0/03-at-a-glance.md#3-at-a-glance) +- [4. Terminology](./spec/v0.1.0/04-terminology.md#4-terminology) +- [5. File format](./spec/v0.1.0/05-file-format.md#5-file-format) +- [5.1 File extension and MIME type](./spec/v0.1.0/05-file-format.md#51-file-extension-and-mime-type) +- [5.2 Encoding](./spec/v0.1.0/05-file-format.md#52-encoding) +- [5.3 File layout](./spec/v0.1.0/05-file-format.md#53-file-layout) +- [6. Versioning](./spec/v0.1.0/06-versioning.md#6-versioning) +- [7. Identity, artifacts, and content addressing](./spec/v0.1.0/07-identity-artifacts-and-content-addressing.md#7-identity-artifacts-and-content-addressing) +- [7.1 Session identity](./spec/v0.1.0/07-identity-artifacts-and-content-addressing.md#71-session-identity) +- [7.2 Artifact classes](./spec/v0.1.0/07-identity-artifacts-and-content-addressing.md#72-artifact-classes) +- [7.3 Content hash](./spec/v0.1.0/07-identity-artifacts-and-content-addressing.md#73-content-hash) +- [7.4 Two-tier identity](./spec/v0.1.0/07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) +- [7.4.1 Hash tier for `fork_from` and `redacted_from`](./spec/v0.1.0/07-identity-artifacts-and-content-addressing.md#741-hash-tier-for-fork_from-and-redacted_from) +- [7.5 Event identifiers](./spec/v0.1.0/07-identity-artifacts-and-content-addressing.md#75-event-identifiers) +- [8. The trail envelope](./spec/v0.1.0/08-the-trail-envelope.md#8-the-trail-envelope) +- [8.1 Schema](./spec/v0.1.0/08-the-trail-envelope.md#81-schema) +- [8.2 Fields](./spec/v0.1.0/08-the-trail-envelope.md#82-fields) +- [8.3 The `meta` extension convention](./spec/v0.1.0/08-the-trail-envelope.md#83-the-meta-extension-convention) +- [8.4 The `sessions` manifest](./spec/v0.1.0/08-the-trail-envelope.md#84-the-sessions-manifest) +- [8.5 File identity defaults when envelope is absent](./spec/v0.1.0/08-the-trail-envelope.md#85-file-identity-defaults-when-envelope-is-absent) +- [9. The session header](./spec/v0.1.0/09-the-session-header.md#9-the-session-header) +- [9.1 Schema](./spec/v0.1.0/09-the-session-header.md#91-schema) +- [9.2 Fields](./spec/v0.1.0/09-the-session-header.md#92-fields) +- [9.3 Example](./spec/v0.1.0/09-the-session-header.md#93-example) +- [9.4 Streaming and live capture](./spec/v0.1.0/09-the-session-header.md#94-streaming-and-live-capture) +- [9.5 Session segments (multi-segment sessions)](./spec/v0.1.0/09-the-session-header.md#95-session-segments-multi-segment-sessions) +- [Segment reconciliation](./spec/v0.1.0/09-the-session-header.md#segment-reconciliation) +- [Writer guidance](./spec/v0.1.0/09-the-session-header.md#writer-guidance) +- [Composition with multi-session files](./spec/v0.1.0/09-the-session-header.md#composition-with-multi-session-files) +- [9.6 Multi-session trail files](./spec/v0.1.0/09-the-session-header.md#96-multi-session-trail-files) +- [9.6.1 File grammar](./spec/v0.1.0/09-the-session-header.md#961-file-grammar) +- [9.6.2 Group boundaries and reader-tolerant recovery](./spec/v0.1.0/09-the-session-header.md#962-group-boundaries-and-reader-tolerant-recovery) +- [9.6.3 Per-group validation](./spec/v0.1.0/09-the-session-header.md#963-per-group-validation) +- [9.6.4 Per-group `content_hash`](./spec/v0.1.0/09-the-session-header.md#964-per-group-content_hash) +- [9.6.5 Cross-group references](./spec/v0.1.0/09-the-session-header.md#965-cross-group-references) +- [9.6.6 Order, divergence, and per-session metadata](./spec/v0.1.0/09-the-session-header.md#966-order-divergence-and-per-session-metadata) +- [9.6.7 Redaction of multi-session files](./spec/v0.1.0/09-the-session-header.md#967-redaction-of-multi-session-files) +- [9.6.8 No hard cap](./spec/v0.1.0/09-the-session-header.md#968-no-hard-cap) +- [10. Events](./spec/v0.1.0/10-events.md#10-events) +- [10.1 Base shape](./spec/v0.1.0/10-events.md#101-base-shape) +- [10.2 Mandatory event types](./spec/v0.1.0/10-events.md#102-mandatory-event-types) +- [`user_message`](./spec/v0.1.0/10-events.md#user_message) +- [`agent_message`](./spec/v0.1.0/10-events.md#agent_message) +- [`agent_message.payload.usage`](./spec/v0.1.0/10-events.md#agent_messagepayloadusage) +- [`task_plan_update`](./spec/v0.1.0/10-events.md#task_plan_update) +- [`tool_call`](./spec/v0.1.0/10-events.md#tool_call) +- [`tool_result`](./spec/v0.1.0/10-events.md#tool_result) +- [`tool_call_aborted`](./spec/v0.1.0/10-events.md#tool_call_aborted) +- [`tool_result.payload.meta` — structured outputs](./spec/v0.1.0/10-events.md#tool_resultpayloadmeta-structured-outputs) +- [`user_query`](./spec/v0.1.0/10-events.md#user_query) +- [`user_query_response`](./spec/v0.1.0/10-events.md#user_query_response) +- [`session_summary`](./spec/v0.1.0/10-events.md#session_summary) +- [10.3 Optional event types](./spec/v0.1.0/10-events.md#103-optional-event-types) +- [`session_metadata_update`](./spec/v0.1.0/10-events.md#session_metadata_update) +- [`system_event`](./spec/v0.1.0/10-events.md#system_event) +- [Reserved lifecycle vocabulary](./spec/v0.1.0/10-events.md#reserved-lifecycle-vocabulary) +- [Reserved source-signal vocabulary](./spec/v0.1.0/10-events.md#reserved-source-signal-vocabulary) +- [Reserved diagnostic vocabulary](./spec/v0.1.0/10-events.md#reserved-diagnostic-vocabulary) +- [Recommended `payload.data` shapes (permission kinds)](./spec/v0.1.0/10-events.md#recommended-payloaddata-shapes-permission-kinds) +- [Extension policy and promotion](./spec/v0.1.0/10-events.md#extension-policy-and-promotion) +- [`capability_change`](./spec/v0.1.0/10-events.md#capability_change) +- [`command_invoke`](./spec/v0.1.0/10-events.md#command_invoke) +- [`agent_thinking`](./spec/v0.1.0/10-events.md#agent_thinking) +- [`user_interrupt`](./spec/v0.1.0/10-events.md#user_interrupt) +- [`context_compact`](./spec/v0.1.0/10-events.md#context_compact) +- [`branch_point`](./spec/v0.1.0/10-events.md#branch_point) +- [`branch_summary`](./spec/v0.1.0/10-events.md#branch_summary) +- [`model_change`](./spec/v0.1.0/10-events.md#model_change) +- [`mode_change`](./spec/v0.1.0/10-events.md#mode_change) +- [`thinking_level_change`](./spec/v0.1.0/10-events.md#thinking_level_change) +- [`session_terminated`](./spec/v0.1.0/10-events.md#session_terminated) +- [`session_end`](./spec/v0.1.0/10-events.md#session_end) +- [10.4 Semantic linking](./spec/v0.1.0/10-events.md#104-semantic-linking) +- [10.5 Tool call terminal pairing](./spec/v0.1.0/10-events.md#105-tool-call-terminal-pairing) +- [10.6 Unknown event types](./spec/v0.1.0/10-events.md#106-unknown-event-types) +- [10.7 Source envelope referencing](./spec/v0.1.0/10-events.md#107-source-envelope-referencing) +- [11. Canonical tool taxonomy](./spec/v0.1.0/11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy) +- [11.1 `file_edit`](./spec/v0.1.0/11-canonical-tool-taxonomy.md#111-file_edit) +- [11.2 `file_patch`](./spec/v0.1.0/11-canonical-tool-taxonomy.md#112-file_patch) +- [11.3 `file_list`](./spec/v0.1.0/11-canonical-tool-taxonomy.md#113-file_list) +- [11.4 `shell_command`](./spec/v0.1.0/11-canonical-tool-taxonomy.md#114-shell_command) +- [11.5 `mcp_call`](./spec/v0.1.0/11-canonical-tool-taxonomy.md#115-mcp_call) +- [11.6 `subagent_invoke`](./spec/v0.1.0/11-canonical-tool-taxonomy.md#116-subagent_invoke) +- [11.7 The `other` escape hatch](./spec/v0.1.0/11-canonical-tool-taxonomy.md#117-the-other-escape-hatch) +- [12. Vendor extensions](./spec/v0.1.0/12-vendor-extensions.md#12-vendor-extensions) +- [12.1 Extension grammar](./spec/v0.1.0/12-vendor-extensions.md#121-extension-grammar) +- [13. Tree and branching](./spec/v0.1.0/13-tree-and-branching.md#13-tree-and-branching) +- [13.1 When to emit `parent_id`](./spec/v0.1.0/13-tree-and-branching.md#131-when-to-emit-parent_id) +- [13.2 Acyclicity](./spec/v0.1.0/13-tree-and-branching.md#132-acyclicity) +- [14. Canonical agent registry](./spec/v0.1.0/14-canonical-agent-registry.md#14-canonical-agent-registry) +- [15. Truncation, overflow, and raw source size](./spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md#15-truncation-overflow-and-raw-source-size) +- [15.1 `source.raw` elision and redaction](./spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction) +- [16. Redaction](./spec/v0.1.0/16-redaction.md#16-redaction) +- [17. Security Considerations](./spec/v0.1.0/17-security-considerations.md#17-security-considerations) +- [18. Validation](./spec/v0.1.0/18-validation.md#18-validation) +- [18.1 Writer schema](./spec/v0.1.0/18-validation.md#181-writer-schema) +- [18.2 Reader tolerance](./spec/v0.1.0/18-validation.md#182-reader-tolerance) +- [18.3 Conformance classes and diagnostics](./spec/v0.1.0/18-validation.md#183-conformance-classes-and-diagnostics) +- [Claiming conformance (non-normative)](./spec/v0.1.0/18-validation.md#claiming-conformance-non-normative) +- [Validation diagnostics](./spec/v0.1.0/18-validation.md#validation-diagnostics) +- [Conformance suite (non-normative)](./spec/v0.1.0/18-validation.md#conformance-suite-non-normative) +- [18.4 File graph checks](./spec/v0.1.0/18-validation.md#184-file-graph-checks) +- [18.4.1 Errors](./spec/v0.1.0/18-validation.md#1841-errors) +- [18.4.2 Warnings](./spec/v0.1.0/18-validation.md#1842-warnings) +- [18.4.3 Streaming-state rules](./spec/v0.1.0/18-validation.md#1843-streaming-state-rules) +- [19. Formal schema](./spec/v0.1.0/19-formal-schema.md#19-formal-schema) +- [20. Examples](./spec/v0.1.0/20-examples.md#20-examples) +- [Changelog](./spec/v0.1.0/changelog.md#changelog) +- [v0.1.0 (June 12, 2026)](./spec/v0.1.0/changelog.md#v010-june-12-2026) +- [Appendix A — Minimal valid record](./spec/v0.1.0/appendix-a-minimal-valid-record.md#appendix-a-minimal-valid-record) +- [Appendix A.1 — Minimal valid record with trail envelope](./spec/v0.1.0/appendix-a-minimal-valid-record.md#appendix-a1-minimal-valid-record-with-trail-envelope) +- [Appendix B — Content hash worked example](./spec/v0.1.0/appendix-b-content-hash-worked-example.md#appendix-b-content-hash-worked-example) +- [License](./spec/v0.1.0/license.md#license) diff --git a/spec/draft/01-motivation.md b/spec/draft/01-motivation.md new file mode 100644 index 0000000..8931d2d --- /dev/null +++ b/spec/draft/01-motivation.md @@ -0,0 +1,8 @@ +## 1. Motivation + +Engineers using multiple coding agents lose continuity between them. A debugging session in Claude Code is invisible from Cursor; an Aider conversation can't be shared with a colleague using Pi. Each tool stores sessions in its own format, and tools that try to bridge them re-implement the same parsing work. + +Agent Trail defines a portable file format for coding agent sessions, so any compliant tool can read and share sessions produced by any other. + +--- + diff --git a/spec/draft/02-goals-and-non-goals.md b/spec/draft/02-goals-and-non-goals.md new file mode 100644 index 0000000..83312f4 --- /dev/null +++ b/spec/draft/02-goals-and-non-goals.md @@ -0,0 +1,44 @@ +## 2. Goals and non-goals + +### Goals + +- Map common coding agents to one canonical event vocabulary with acceptable fidelity (~70%+ semantic fit on average across supported agents). +- Renderable in a generic viewer with no source-agent code. +- Searchable with standard text tooling. +- Trivially streamable, line by line. +- Trivially versionable, with graceful reader degradation. +- Content-addressable for safe sharing and deduplication. + +### Non-goals + +- Replacing agents' native storage formats. +- Bit-perfect reproduction of source sessions. Use `source.raw` if needed. +- Encoding model internals (logits, sampling parameters, tokens). +- Cryptographic signing (deferred). +- Cross-segment `parent_id` references (deferred). +- Real-time bidirectional sync between agents. + +Deferred format surfaces: + +- A structured message-parts model for mixed human-authored and injected `user_message` content. +- Inline `data:` attachment payloads; v0.1.0 attachment `uri` values are references only. + +--- + +### 2.1 Conformance and normativity + +The normative Agent Trail contract is this specification plus `schema.json`. +`schema.json` is the canonical writer-strict machine-readable contract through +v1.0. + +The key words "MUST", "MUST NOT", "REQUIRED", "SHOULD", "SHOULD NOT", and "MAY" +are to be interpreted as described in BCP 14 when, and only when, they appear in +all capitals. + +Examples, notes, rationale, implementation guidance, adapter mappings, reader +display choices, CLI behavior, store layout, and redaction workflow are +non-normative unless explicitly stated otherwise. Implementation guidance lives +in `docs/implementation-semantics.md`. + +--- + diff --git a/spec/draft/03-at-a-glance.md b/spec/draft/03-at-a-glance.md new file mode 100644 index 0000000..332b0e1 --- /dev/null +++ b/spec/draft/03-at-a-glance.md @@ -0,0 +1,14 @@ +## 3. At a glance + +The smallest valid Agent Trail file: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + +Line 1 is the header. Lines 2 and on are events. Everything else is optional structure layered on top. + +--- + diff --git a/spec/draft/04-terminology.md b/spec/draft/04-terminology.md new file mode 100644 index 0000000..b9ba347 --- /dev/null +++ b/spec/draft/04-terminology.md @@ -0,0 +1,29 @@ +## 4. Terminology + +| Term | Definition | +|---|---| +| **Trail file** | A JSONL file conforming to this specification; contains one or more session groups. | +| **Trail envelope** | Optional `type:"trail"` record at line 1 carrying file-level metadata (producer, file label, file-scope hash, manifest, vendor extensions). Not part of the event graph. | +| **Header** | The session header (`type:"session"`). On line 1 when there is no envelope, on line 2 when the envelope is present. Not part of the event graph. | +| **Session group** | One `type:"session"` header plus the events after it until the next session header or EOF. | +| **Session bundle** | A trail file with one or more session groups. At session-group level the bundle is a forest; each group MAY itself be linear or tree-native. | +| **Child session** | A separate session group or external session spawned or forked from another session, linked by the child header's `fork_from`. | +| **Event** | Any object after the header line; one unit of session content. | +| **Turn** | One user-prompt-to-agent-completion cycle as delimited by the source. `turn_id` values are opaque source-correlation tokens; readers MUST NOT require them to resolve to any entry. | +| **File-level content hash** | SHA-256 of the canonical bytes covering the whole file with the trail envelope's `content_hash` pinned to ``. | +| **Session-level content hash** | SHA-256 of the canonical bytes covering ONLY the session header and its events (envelope excluded), with the session header's `content_hash` pinned to ``. | +| **Entry** | Equivalent to "event"; either term MAY appear. | +| **Adapter** | Software that reads a source agent's storage and emits a trail file. | +| **Linear session** | A session whose events do not use `parent_id`. Events are ordered by file position. | +| **Tree session** | A session where some events use `parent_id` to form a DAG. | +| **Canonical event** | One of the mandatory or optional event types in [§10.2](#10-2-mandatory-event-types) and [§10.3](#10-3-optional-event-types). | +| **Raw trail** | A local artifact preserving source fidelity as much as possible. | +| **Redacted trail** | A separate artifact produced from a raw trail for sharing. It has its own `content_hash`. | +| **Shared trail** | A redacted trail transported through a sharing mechanism. | +| **Synthesized event** | An event the adapter constructed from indirect source data (e.g., a git diff), not mapped from a real source event. Flagged with `source.synthesized: true`. | +| **Content hash** | SHA-256 of the exact artifact's canonical bytes (§7). | +| **Canonical bytes** | The file content normalized per §7 for hashing. | +| **Source escape hatch** | The `source.raw` field; preserves verbatim source-format data for lossless round-trip. | + +--- + diff --git a/spec/draft/05-file-format.md b/spec/draft/05-file-format.md new file mode 100644 index 0000000..4d3c275 --- /dev/null +++ b/spec/draft/05-file-format.md @@ -0,0 +1,34 @@ +## 5. File format + +### 5.1 File extension and MIME type + +- Recommended extension: `.trail.jsonl` +- Native compressed extension: `.trail.jsonl.gz` +- MIME type: `application/vnd.trail+jsonl`. The `vnd.` form is the intended canonical type and follows IANA conventions for vendor MIME types. IANA registration is deferred to v1.0; until then the type is documented here but not officially registered. +- Native compressed MIME type: `application/vnd.trail+jsonl+gzip`. +- The `+jsonl` suffix is provisional rather than an IANA-registered structured syntax suffix, and `+jsonl+gzip` is a nonstandard double suffix; these media types may be revised during registration. +- Editors render as JSON via the `.jsonl` suffix. A dedicated language extension MAY provide richer highlighting later. + +### 5.2 Encoding + +- UTF-8, no BOM. +- LF line endings (`\n`). CRLF is tolerated by readers; writers MUST NOT produce it. +- Each line is one self-contained JSON object. +- Empty lines are not allowed. +- A trailing newline at EOF is recommended but not REQUIRED. +- Writers MUST replace invalid UTF-8 bytes and unpaired surrogate escapes with U+FFFD at emission time. Emitted JSON strings MUST NOT contain unpaired surrogates. +- Writers MUST NOT emit JSON integer numbers outside the IEEE-754 exact-integer range (`-(2^53-1)` through `2^53-1`) anywhere in a trail file. Adapters that receive oversized source integers, such as snowflake ids or nanosecond timestamps in `source.raw`, MUST emit them as strings instead. Validator warnings use code `non_interoperable_number` at the offending JSON Pointer. +- `.trail.jsonl.gz` files are a whole-file gzip wrapper around the UTF-8 trail JSONL bytes above. Writers MUST NOT gzip individual JSONL lines independently. Readers MUST decompress `.trail.jsonl.gz` files before validation and processing. +- For `.trail.jsonl.gz`, `content_hash` is computed and verified by first decompressing the file to produce plain UTF-8 JSONL, then applying the canonical bytes procedure defined in §7.3 to the decompressed JSONL. The compressed bytes themselves are never hashed. + +### 5.3 File layout + +Every valid trail file has: + +1. **Optionally**, a trail envelope (`type:"trail"`) on line 1 (§8). +2. One **or more** session header groups in file order. Each group starts with a `type:"session"` record and continues with zero or more event lines until the next `type:"session"` record or EOF (§9.6). The first session header MUST appear on line 1 when there is no envelope, or on line 2 when an envelope is present. + +When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in §9.6. + +--- + diff --git a/spec/draft/06-versioning.md b/spec/draft/06-versioning.md new file mode 100644 index 0000000..6dde7bb --- /dev/null +++ b/spec/draft/06-versioning.md @@ -0,0 +1,32 @@ +## 6. Versioning + +The header's `schema_version` is a SemVer string. The current version is `"0.1.0"`. Writers MUST emit the exact version they conform to. + +Agent Trail uses SemVer for the interoperability contract: + +| Change type | Version bump | Examples | +|---|---|---| +| Editorial-only change | no bump or patch | Typos, formatting, non-normative wording, examples that do not change validity or semantics. | +| Normative clarification with no behavior change | patch | Resolving ambiguity while preserving the same valid files and reader behavior. | +| Backward-compatible feature addition | minor | New optional field, new optional event type, new registered agent or tool kind that readers MAY ignore. | +| Breaking change | major | Required field changes, field removal, incompatible meaning changes, or changes that make existing valid trails invalid. | + +Before `1.0.0`, Agent Trail still uses this compatibility discipline conservatively: + +- `0.1.x` versions are the same feature family. Readers that support `0.1.0` SHOULD accept later `0.1.x` patch versions. +- `0.2.0` and later `0.x` versions MAY add backward-compatible features. Readers MAY accept them best-effort by skipping unknown event types and ignoring unknown payload fields. +- Breaking changes SHOULD be avoided before real adapter and reader experience proves they are necessary. If unavoidable, they MUST get a new minor while the spec is still pre-1.0, and the changelog MUST mark them explicitly as breaking. +- `1.0.0` is reserved for the first stable interoperability contract. + +Published spec and schema URLs are immutable. Local source files (`spec.md` and `schema.json`) represent the current working draft or next release candidate; released snapshots live at versioned URLs such as `/spec/v0.1.0` and `/schema/v0.1.0.json`. + +Writer schemas are exact per release: the v0.1.0 writer schema requires `schema_version: "0.1.0"`. Reader tolerance is runtime behavior, not permission for writers to emit a version other than the release they implement. + +| Source version | Reader behavior | +|---|---| +| Same `major.minor`, any patch | Fully supported if the reader supports that feature family. | +| Newer `0.x` minor | Best-effort: skip unknown event types, ignore unknown payload fields, preserve unknown records when round-tripping, and warn instead of aborting where possible. | +| New major version | Readers MAY reject unless they explicitly support that major version. | + +--- + diff --git a/spec/draft/07-identity-artifacts-and-content-addressing.md b/spec/draft/07-identity-artifacts-and-content-addressing.md new file mode 100644 index 0000000..077c5c6 --- /dev/null +++ b/spec/draft/07-identity-artifacts-and-content-addressing.md @@ -0,0 +1,65 @@ +## 7. Identity, artifacts, and content addressing + +### 7.1 Session identity + +Every session has a local identifier `id` in the header. Writers emit uppercase ULIDs (26 Crockford base32 chars) or lowercase UUIDs (RFC 4122, hyphenated or unhyphenated). The schema enforces this canonical casing so cross-segment reconciliation can dedup events by exact string equality; older v0.1 fixtures whose ids were free-form strings or non-canonical casing have been migrated. + +### 7.2 Artifact classes + +Agent Trail distinguishes local fidelity from shared safety: + +- **Raw trail:** the local artifact emitted by an adapter. It SHOULD preserve source fidelity, including `source.raw` where useful and safe. +- **Redacted trail:** a separate artifact produced from a raw trail for sharing. It removes or normalizes sensitive content and has its own `content_hash`. +- **Shared trail:** a redacted trail transported by a share tool. + +Redacted artifacts MAY include `redacted_from.content_hash` in the header to record provenance from the raw artifact. They MUST NOT expose the raw artifact's local path or local session identifier. + +### 7.3 Content hash + +Finalized artifacts SHOULD populate `content_hash` in the header. This is the SHA-256 of the artifact's canonical bytes, not a hash of the physical on-disk serialization and not a logical-session identifier shared across raw and redacted variants. + +Canonical bytes are defined as: + +- All JSONL lines in order. +- LF line endings. +- No trailing whitespace. +- A trailing newline at EOF. +- Each JSON object serialized using RFC 8785 JSON Canonicalization Scheme (JCS). +- Writer-valid strings are well-formed per §5.2, so canonical bytes remain pure JCS; hash-time string repair is not part of this procedure. + +Because the hash depends on the file content that includes the hash field, we use a two-pass approach: + +1. Serialize the file with the header's `content_hash` field set to the literal `""`. If the field is absent, insert `content_hash:""` into the header before canonicalization; this gives stamped and unstamped forms one digest for the same logical content. +2. Canonicalize per the rules above. +3. Compute SHA-256 of the canonicalized bytes. +4. Replace only the header's `content_hash` field with the resulting hex digest. + +Verifying a file's hash uses the same procedure: replace the present hash with `""`, canonicalize, hash, compare. + +Writers that produce streaming or in-progress files MAY omit `content_hash` or leave it as `""`. Readers MAY verify the hash but MUST NOT abort on mismatch — only warn. Strict validators MUST report a present but incorrect finalized `content_hash` as an error. + +### 7.4 Two-tier identity + +When a trail envelope is present, the file carries two independent content hashes: + +- **Session-level `content_hash`** lives on the session header. It is SHA-256 over the canonical bytes covering only the session header and its events (the envelope record is excluded from the hashed input). In a multi-session file (§9.6) the slice for a session covers that session's header and the events between it and the next `type:"session"` record (or EOF). This makes each session's identity independent of whether it is wrapped in an envelope or sits beside sibling sessions — extracting one session from a multi-session file recomputes the same digest. +- **File-level `content_hash`** lives on the trail envelope. It is SHA-256 over the canonical bytes of the whole file, with the envelope's `content_hash` field replaced by `""` per the same two-pass procedure as §7.3. The session-level `content_hash`, if already populated, is treated as opaque file content. + +Writers that emit both hashes MUST stamp every session-level hash first, then compute and stamp the file-level hash. Readers verify them independently. Different consumers care about different scopes: extraction tools recompute the session hash; share/transport tools verify the file hash. + +#### 7.4.1 Hash tier for `fork_from` and `redacted_from` + +Lineage references mirror the tier of the linking context: + +- **Header-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **session-level** `content_hash` of the parent artifact (the forked-from session or the raw session that was redacted). This keeps session lineage independent of any envelope wrapper — extracting either side recomputes the same digest. +- **Envelope-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **file-level** `content_hash` of the parent file (envelope and all sessions included). Use these to link whole files rather than individual sessions. +- `segment.prev_content_hash` (§9.5) is always session-level, since segments chain at session grain. + +Writers MUST choose the matching tier; mixing tiers across a chain breaks verification. + +### 7.5 Event identifiers + +Event `id` values are globally unique. Writers emit uppercase ULIDs or lowercase UUIDs, matching §7.1 and the schema. Globally-unique canonical ids let a reconciler dedup events across segments by exact string equality. + +--- + diff --git a/spec/draft/08-the-trail-envelope.md b/spec/draft/08-the-trail-envelope.md new file mode 100644 index 0000000..74786fb --- /dev/null +++ b/spec/draft/08-the-trail-envelope.md @@ -0,0 +1,78 @@ +## 8. The trail envelope + +The trail envelope is an OPTIONAL record on line 1 that carries file-scope metadata distinct from per-session metadata. When absent, the session header occupies line 1 and behaviour matches earlier drafts. When present, the session header MUST follow on line 2 and at most one envelope is permitted per file. + +### 8.1 Schema + +```jsonc +{ + "type": "trail", + "schema_version": "0.1.0", + "id": "", + "name": "", // optional + "description": "", // optional + "ts": "", + "producer": "trail-cli/0.3.0", + "content_hash": "", // optional; populated at finalize + "tags": ["..."], // optional + "vcs": { "type": "git", "revision": "..." }, // optional; same shape as §9 vcs + "fork_from": { // optional; file-level fork link + "trail_id": "", // UUID or ULID id + "content_hash": "" // optional + }, + "redacted_from": { // optional; redacted artifacts only + "content_hash": "" + }, + "sessions": [ // optional manifest + { "id": "", "agent": "" } + ], + "meta": { // optional; see §8.3 + "x-entire/checkpoint_id": "ckpt-7" + } +} +``` + +### 8.2 Fields + +| Field | Required | Type | Notes | +|---|---|---|---| +| `type` | yes | literal `"trail"` | discriminator | +| `schema_version` | yes | string | currently `"0.1.0"` for the envelope shape — independent of session `schema_version` | +| `id` | yes | string | file-level identifier; distinct from any session `id` in the file | +| `name` | no | string | human label | +| `description` | no | string | free text | +| `ts` | yes | string | ISO-8601 timestamp when the file was assembled or exported | +| `producer` | yes | string | identifier of the writer (e.g., `trail-cli/0.3.0`) | +| `content_hash` | no | string | SHA-256 hex of the whole-file canonical bytes; see §7.4 | +| `tags` | no | string[] | free-form labels | +| `vcs` | no | object | working-tree context at file-assembly time | +| `fork_from` | no | object | reference to a parent file when forked; `trail_id` is a UUID or ULID id and `content_hash` is optional | +| `redacted_from` | no | object | provenance link from a redacted file to its raw counterpart | +| `sessions` | no | array | manifest of sessions in this file; validator warns on drift vs file content | +| `meta` | no | object | free-form vendor extensions (§8.3) | + +The envelope MUST NOT carry a `parent_id`. It is not part of the event graph. + +### 8.3 The `meta` extension convention + +The trail envelope (§8), the session header (§9), and every event entry (§10.1) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar (§12.1) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record (§7.4): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. + +For verbatim source-event preservation, use `source.raw` ([§10.1](#10-1-base-shape), [§10.7](#10-7-source-envelope-referencing), [§15.1](#15-1-source-raw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. + +This draft defines one standard event-entry `meta` key: `redaction_count` (§16). Other standard keys MAY be promoted in later minor bumps based on observed usage. + +### 8.4 The `sessions` manifest + +When `sessions` is present, the validator warns if the manifest disagrees with the file: + +- The manifest MUST list one entry per session group (§9.6) in file order. Each entry's `id` and `agent` MUST match the corresponding session header's `id` and `agent.name`. Length mismatch and per-entry drift both emit `envelope_sessions_manifest_drift` warnings — never errors, so renderers can still display the file. +- The manifest is an index/rendering hint only. It MUST NOT carry graph facts such as child-session role or follows edges; session headers are authoritative for lineage. + +### 8.5 File identity defaults when envelope is absent + +When no envelope is written, file-level identity defaults derive from the session: + +- File `id` = session `id`. +- File `name` is unset. +- The file-level content hash is unavailable; only the session content hash is meaningful. + diff --git a/spec/draft/09-the-session-header.md b/spec/draft/09-the-session-header.md new file mode 100644 index 0000000..f22988f --- /dev/null +++ b/spec/draft/09-the-session-header.md @@ -0,0 +1,248 @@ +## 9. The session header + +### 9.1 Schema + +```jsonc +{ + "type": "session", + "schema_version": "0.1.0", + "id": "", + "session_uid": "", // optional; stable across segments + "segment": { "seq": 1 }, // optional; multi-segment marker + "name": "", // optional + "description": "", // optional + "tags": ["feature", "debug"], // optional + "content_hash": "", // optional; populated at finalize + "ts": "", + "stream": { // optional; live-capture marker (§9.4) + "state": "open" | "closed", + "started_at": "" // optional + }, + "agent": { + "name": "", + "version": "", // optional + "model_default": "" // optional + }, + "cwd": "", // optional + "vcs": { // optional + "type": "git" | "jj" | "hg" | "svn" | "x-/", + "revision": "" | null, + "branch": "", // required when revision is null + "remote_url": "" // optional; see §9.2 + }, + "fork_from": { // optional + "session_id": "", + "content_hash": "", // optional + "entry_id": "" // optional + }, + "redacted_from": { // optional; redacted artifacts only + "content_hash": "" + }, + "parse_fidelity": { // optional; at-a-glance parse summary + "quarantined_count": 0, + "termination_reason": "truncated" // optional; when session_terminated exists + }, + "source": { // optional + "agent": "", + "path": "", + "format_version": "" + }, + "meta": { // optional; vendor extensions (§8.3 / §12) + "x-example/custom_field": "..." + } +} +``` + +### 9.2 Fields + +| Field | Required | Type | Notes | +|---|---|---|---| +| `type` | yes | literal `"session"` | discriminator | +| `schema_version` | yes | string | currently `"0.1.0"` | +| `id` | yes | string | UUID or ULID per §7.1/§19 | +| `session_uid` | no | string | stable source-session identifier shared by all segments of one logical source session | +| `segment` | no | object | multi-segment marker; absent is equivalent to a single segment with `seq: 1` | +| `segment.seq` | yes (if `segment` present) | integer | 1-based segment sequence number | +| `segment.prev_content_hash` | yes when `segment.seq >= 2` | string \| null | previous segment's session-level `content_hash`; `null` marks an unverifiable chain break | +| `name` | no | string | human session label | +| `description` | no | string | free-text session description | +| `tags` | no | string[] | free-form session labels | +| `content_hash` | no | string | SHA-256 hex of this artifact; see §7.3 | +| `ts` | yes | string | ISO-8601 session start time; writers emit UTC `Z` with millisecond precision | +| `stream` | no | object | live-capture marker; see §9.4 | +| `agent.name` | yes | string | from the canonical registry (§14) | +| `agent.version` | no | string | source agent's version | +| `agent.model_default` | no | string | default model for the session | +| `cwd` | no | string | working directory; MAY be normalized for privacy | +| `vcs` | no | object | version control context at session time | +| `vcs.type` | yes (if `vcs` present) | enum or extension | `git`, `jj`, `hg`, `svn`, or `x-/` for non-reserved systems | +| `vcs.revision` | yes (if `vcs` present) | string \| null | commit SHA, change-id, revision identifier, or `null` for unborn HEAD repositories when `vcs.branch` is present | +| `vcs.remote_url` | no | string | canonical remote URL identifying the project across users, machines, and clones; see normalization rules below | +| `vcs.branch` | no | string | active branch / bookmark / topic name the session is running on (e.g., `feature/x`). Detached-HEAD sessions MAY omit. | +| `vcs.head_commit` | no (`vcs.revision` non-null only) | string | commit hash at session start (lowercase hex, 7–64 chars). For git with a committed HEAD, typically equals `vcs.revision`; the explicit field exists as a vcs-neutral alias. | +| `vcs.worktree` | no | object | worktree context when the session ran inside a working-tree clone or worktree (git worktree, jj workspace, etc.) | +| `vcs.worktree.name` | yes (if `vcs.worktree` present) | string | worktree short name | +| `vcs.worktree.path` | yes (if `vcs.worktree` present) | string | absolute path to the worktree | +| `vcs.worktree.original_cwd` | no | string | working directory of the parent repository at worktree-creation time | +| `vcs.worktree.original_branch` | no | string | branch the parent repository was on when the worktree was created | +| `vcs.worktree.original_head_commit` | no | string | commit the worktree was forked from (lowercase hex, 7–64 chars) | +| `fork_from` | no | object | reference to a parent session if forked | +| `redacted_from` | no | object | provenance link from a redacted artifact to the raw artifact hash | +| `parse_fidelity` | no | object | at-a-glance parse fidelity summary; absence means the writer did not provide a summary | +| `parse_fidelity.quarantined_count` | yes (if `parse_fidelity` present) | integer | number of `system_event` entries whose `payload.kind` is `x-*/unknown_record` in this session group | +| `parse_fidelity.termination_reason` | no | enum or extension | final `session_terminated.payload.reason`, when a `session_terminated` event is present | +| `source` | no | object | source-file metadata block (agent, path, format_version) | +| `meta` | no | object | vendor extensions; recommended keys use the `x-/` extension grammar (§8.3 / §12) | + +When `parse_fidelity` is present, validators MUST compare it against the session group's entries. `quarantined_count` MUST equal the count of quarantined unknown source records emitted as `system_event` entries with `payload.kind` matching `x-*/unknown_record`; see the §10.3 quarantine convention. `termination_reason`, when a `session_terminated` entry exists, MUST match the final `session_terminated.payload.reason`; if no `session_terminated` entry exists, writers MUST omit `termination_reason`. This field is denormalized for cheap listing/filtering only; the event stream remains authoritative. Quarantined records are suspect parse fidelity, not necessarily lossy, because the raw source record is preserved. + +`vcs.remote_url` provides a canonical project identifier that survives across users, machines, and clones — useful for cross-machine aggregation, profile filtering, and project-scoped analysis. Adapters that populate it: + +- MUST normalize SSH and HTTPS variants of the same repository to a single canonical form. The reference normalization maps `git@host:org/repo.git`, `ssh://git@host/org/repo.git`, and `https://host/org/repo.git` to `https://host/org/repo` (strip trailing `.git`, strip userinfo, rewrite SSH to HTTPS). +- MUST strip embedded credentials (`https://user:pass@host/...` → `https://host/...`) before emission. +- SHOULD populate when the source agent records repository location or when `cwd` is detectably a versioned working directory. When the source declares multiple remotes (e.g., git `origin` plus `upstream`), prefer `origin`. +- MUST omit the field when no remote is configured — do not fabricate one. +- For submodules and worktrees, emit the remote of the outermost working tree's toplevel; `cwd` and `vcs.revision` disambiguate within. + +Fresh repositories with an unborn HEAD MAY emit `vcs.revision:null` when a branch is known. A `vcs` block with `vcs.revision:null` MUST include `vcs.branch`, MUST omit `vcs.head_commit`, and writers MUST NOT emit an information-free VCS block. When `vcs.revision` is non-null for git, `vcs.head_commit` typically equals `vcs.revision`. + +Privacy: `remote_url` reveals repository identity and MAY identify a private repo. Redacted artifacts MAY strip or normalize it (§16). + +When a trail file carries both header-level `vcs` (session-time context) and envelope-level `vcs` (file-assembly-time context, §8), they represent different observation points. File-assembly tools SHOULD preserve both when present. For multi-segment reconciliation rules, see §9.5. + +### 9.3 Example + +```json +{"type":"session","schema_version":"0.1.0","id":"01HM7K5R9X2QZJ8VD6W4P3T1F0","content_hash":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","ts":"2026-05-17T14:02:00.000Z","agent":{"name":"claude-code","version":"2.1.42","model_default":"claude-sonnet-4-5"},"cwd":"","vcs":{"type":"git","revision":"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"}} +``` + +### 9.4 Streaming and live capture + +JSONL is append-friendly by design: trail files can be written event by event as a session unfolds, and readers can `tail -f` them. v0.1.x adds an explicit marker so writers and readers can agree on live-capture state without overloading other header fields. + +The optional header `stream` object: + +| Field | Required | Type | Notes | +|---|---|---|---| +| `stream.state` | yes (if `stream` present) | enum | `open` while the writer is actively appending; `closed` once finalized | +| `stream.started_at` | no | string | ISO-8601 timestamp when the stream began; matches the §9 `ts` semantics | + +Lifecycle: + +1. **Live phase.** Writer emits the header with `stream: { state: "open" }`. `content_hash` is omitted or set to `""`. Events are appended as they happen. +2. **Finalize.** Writer rewrites the header with `stream` either removed or set to `state: "closed"`, then computes `content_hash` per §7.3. Appending stops. +3. **Clean end.** Writer MAY append a `session_end` event (§10.3) to mark a normal conclusion before finalize. Abnormal ends still use `session_terminated`. + +Tail readers that observe `stream.state == "open"` SHOULD assume more events MAY arrive. Readers observing `stream` absent or `state == "closed"` SHOULD treat the file as a finalized artifact and verify `content_hash` when present. + +`stream` is absent in trail files produced by stream-unaware writers; readers MUST treat that case as equivalent to a finalized non-streaming artifact (existing v0.1.0 behavior). + +A live `system_event` heartbeat convention is described in §10.3. + +--- + +### 9.5 Session segments (multi-segment sessions) + +A single logical source session MAY be split across multiple trail-file artifacts — "segments" — when a long-running session is captured in chunks (e.g., a daemon writing periodically) or recovered after a writer is killed mid-session. The header carries three fields that let a reconciler group, order, and verify segment chains. All three are optional in v0.1; a single-segment trail simply omits them. + +- `session_uid` — globally-unique source-session identifier. Stable across **all** segments of one source session. Reconcilers group segments by exact string equality on `session_uid`. Format: uppercase ULID (recommended, lexicographic time-prefix) or lowercase UUID (any RFC 4122 version, hyphenated or unhyphenated). Writers SHOULD emit `session_uid` even for single-segment trails, so a later segment can be reconciled against the first without rewriting the head. The schema enforces `session_uid` as REQUIRED when `segment.seq >= 2` (multi-segment continuation MUST be linkable). + +- `segment.seq` — 1-based integer identifying which segment of the session this file is. Single-segment trails MAY omit `segment` entirely, which is equivalent to `{seq: 1}`. + +- `segment.prev_content_hash` — the **session-level** `content_hash` (§7.3) of the previous segment's finalized bytes. Required when `seq >= 2`. Forms a verifiable chain (HLS / Postgres-WAL pattern). If the previous segment was lost and the chain cannot be verified, writers MAY emit `null` and readers MUST emit a `segment_chain_break` warning. + +#### Segment reconciliation + +Segment reconciliation is implementation behavior. A conforming writer emits the +fields above; a conforming reader can validate each segment independently. Tools +that merge segments SHOULD preserve event order by `segment.seq`, verify +`segment.prev_content_hash` where present, deduplicate exact event `id` matches, +and emit a new finalized trail with freshly computed hashes. + +Implementation merge policy is documented in `docs/implementation-semantics.md`. + +Whole-file graph rules (§18) apply **within** a segment, not across. Cross-segment references are out of scope for v0.1 (event `parent_id` chains do not span segments). + +#### Writer guidance + +- Writers SHOULD generate `session_uid` once per source session and reuse it for every segment. +- Writers SHOULD finalize each segment normally before starting a new segment. +- To produce `segment.prev_content_hash` for segment N, finalize segment N-1 per §7.3 and copy its session-level `content_hash` verbatim into segment N's header. +- Recovered writers MAY emit `segment.prev_content_hash: null` when the previous segment is lost. + +#### Composition with multi-session files + +`session_uid` and `segment.*` sit at the **session-header** grain, not the file grain. A multi-session trail file (§9.6) MAY contain N session headers, each independently multi-segmentable. The trail envelope (§8) is unaffected. + +Within one file, two groups with the same `session_uid` SHOULD NOT claim the same normalized `segment.seq` value; a missing `segment` is equivalent to `seq: 1`. Duplicate pairs emit `duplicate_segment_seq` warnings. Groups for the same `session_uid` SHOULD appear in non-descending `segment.seq` order in file order; a descending sequence emits `out_of_order_segment_seq`. + +--- + +### 9.6 Multi-session trail files + +A trail file MAY contain one OR more `(session header, events*)` groups concatenated. Boundaries are positional: a group extends from a `type:"session"` record up to (but excluding) the next `type:"session"` record, or to EOF. Single-session trails are the N=1 case and are unchanged. + +A multi-session trail is a session bundle: a forest of session groups. Each group MAY be linear or tree-native. Branches represented inside one source session use `parent_id` within that group; separate spawned or forked transcripts use separate groups linked by `header.fork_from`. + +#### 9.6.1 File grammar + +```text +trail-file := envelope? group+ +envelope := on line 1 +group := events* +events := zero or more event records (§10) +``` + +The trail envelope (§8) remains optional even when N ≥ 2. When present with N ≥ 2 groups, the file-level `content_hash` on the envelope covers all N groups' already-stamped session hashes, applying the §7.4 two-pass procedure unchanged (every session hash stamped first; envelope hash stamped over the finalized record set). When absent, file-level identity defaults from §8.5 apply (no file-level `content_hash` is meaningful; only per-session hashes). + +#### 9.6.2 Group boundaries and reader-tolerant recovery + +Readers detect group boundaries by `type:"session"` alone. A record with `type:"session"` always opens a new group, regardless of `schema_version` value: this lets reader-tolerant parsers (§6) recover from a malformed mid-file header and continue parsing subsequent groups instead of treating the rest of the file as orphan events. The strict validator still errors on individual records that fail schema validation; recovery affects parsing structure, not per-record validity. + +Entries that appear before the first `type:"session"` record (and after any envelope) are not part of any group and are always invalid: `events_before_first_session_header`. + +#### 9.6.3 Per-group validation + +Whole-file graph rules (§18) apply **within** a group, not across: + +- `parent_id` resolution is scoped to the enclosing group. A `parent_id` that references an `id` in another group is treated as `unknown_parent_id` (cross-group references go through `fork_from`, not `parent_id`). +- `tool_call` / `tool_result` pairing (§10.5) runs per group. An unmatched `tool_call` in group A is not satisfied by a `tool_result` in group B. +- `session_end.payload.final_message_id`, `source.raw.envelope_ref`, `payload.usage` checks, and the `stream` consistency rule each run per group. + +Event `id` uniqueness (§7.5) remains **file-scoped**: every `id` (across every group's header and events) MUST be unique within the file. + +#### 9.6.4 Per-group `content_hash` + +Each group's session-level `content_hash` is computed over the canonical bytes of that group's slice only (header + its events, envelope and sibling groups excluded). This is the same procedure as §7.3 / §7.4 applied to the slice. As a consequence, extracting one session from a multi-session file (drop the envelope, drop sibling groups, write only that group's canonical bytes) reproduces the same digest as the in-file value. + +When a reader extracts a single session from a multi-session file outside writer-strict validation and the recomputed `content_hash` does not match the value stored in the in-file header, it SHOULD emit a warning rather than an error. Strict validation of a finalized trail file still treats an in-place finalized `content_hash` mismatch as an error (§18.4). + +#### 9.6.5 Cross-group references + +The only sanctioned cross-group reference primitive is the session header's `fork_from`: + +- `fork_from.session_id` MAY reference a sibling session within the same file or an external session. +- When `fork_from.session_id` matches a sibling's `id` in the same file and `fork_from.content_hash` is also present, the hash MUST match that sibling's session-level `content_hash`. Mismatch is a `cross_group_fork_from_hash_mismatch` warning. +- External references (`session_id` not matched in-file) are not validated here; if the referenced session's bytes are available, callers MAY verify the hash through their own resolver. + +`parent_id` is event-graph topology only and MUST NOT span groups. + +#### 9.6.6 Order, divergence, and per-session metadata + +- Sessions in a file SHOULD appear in chronological order by header `ts`. Out-of-order placement emits `out_of_order_session_headers` (warning, not error). +- Per-session `cwd` and `vcs` MAY diverge across sessions in the same file. Divergent `vcs.revision` across groups emits `vcs_revision_divergence` (warning, not error) — useful for spotting accidental cross-checkout bundling. +- `schema_version` is carried on every session header. Sessions in the same file are independently versioned (reader-tolerant patch acceptance per §6 applies per-header). +- Empty groups (a header with zero events) are legal — they represent "session started, nothing happened." + +#### 9.6.7 Redaction of multi-session files + +Redacting a multi-session trail produces a multi-session redacted trail with the same group count in the same order, redacted in place. The redactor resets `content_hash` to `` on every session header (and on the envelope when present) before share/transport tooling re-stamps via the two-pass §7.4 procedure. + +When redaction changes bytes, lineage hashes that point to artifacts in the same redacted file MUST be rewritten to the target's redacted content hash, using the §7.4.1 hash tier. Header-level `fork_from.content_hash` is rewritten when `fork_from.session_id` names an in-file sibling. `segment.prev_content_hash` is rewritten when the previous `segment.seq` for the same `session_uid` is in the file. When the lineage target is not in the redacted file, redactors MUST drop `fork_from.content_hash` while keeping id references, and MUST set `segment.prev_content_hash` to `null` for an unverifiable previous segment. `redacted_from.content_hash` remains raw-artifact provenance: header-level `redacted_from.content_hash` links the redacted session to its raw counterpart; envelope-level `redacted_from.content_hash` links the redacted file to its raw counterpart. + +#### 9.6.8 No hard cap + +This spec does not impose a maximum on the number of session groups per file. Consumers MAY apply their own limits. + +--- + diff --git a/spec/draft/10-events.md b/spec/draft/10-events.md new file mode 100644 index 0000000..fbba318 --- /dev/null +++ b/spec/draft/10-events.md @@ -0,0 +1,916 @@ +## 10. Events + +### 10.1 Base shape + +Every event entry has this base shape: + +```jsonc +{ + "type": "", + "id": "", + "parent_id": "", // optional; tree topology only + "ts": "", + "payload": { /* type-specific */ }, + "semantic": { // optional; see §10.4 + "group_id": "", + "call_id": "", + "tool_kind": "" + }, + "source": { // optional + "agent": "", + "original_type": "", + "schema_version": "", + "raw": { /* opaque source object; see §10.6 and §15 */ }, + "synthesized": false + }, + "meta": { // optional; vendor extensions (§8.3 / §12) + "x-example/field": "..." + } +} +``` + +| Field | Required | Type | Notes | +|---|---|---|---| +| `type` | yes | string | event type; see §10.2-10.3 | +| `id` | yes | string | globally unique; ULID or UUID per §19 | +| `parent_id` | no | string | references another `id` for tree topology; absent = linear file order | +| `ts` | yes | string | ISO-8601 timestamp | +| `payload` | yes | object | type-specific data | +| `semantic` | no | object | linking metadata for fallback pairing | +| `source` | no | object | adapter-provided source metadata | +| `meta` | no | object | vendor extensions (§8.3 / §12) | + +### 10.2 Mandatory event types + +Every adapter MUST be able to emit these when the source data contains the corresponding semantics. Readers MUST support them. + +#### `user_message` + +A user-role message. By default this is text typed by the human user; `payload.origin` marks runtime-injected or mixed user-role content. + +```jsonc +{ + "type": "user_message", + "id": "...", + "ts": "...", + "payload": { + "text": "How do I parse a CSV in Python?", + "attachments": [ + { "kind": "image", "media_type": "image/png", "uri": "" } + ] + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `text` | yes | string | the user's input | +| `origin` | no | enum or extension | `user`, `injected`, `mixed`, or `x-/`. Absent means `user`. | +| `attachments` | no | array | images or files by reference | + +`origin:"user"` means the text was typed by the human. `origin:"injected"` means runtime-injected content (system reminders, attached-file blobs, hook output) carried as a user-role message. `origin:"mixed"` means both human-authored and injected content appear in one body. Structured part-level decomposition is deferred. + +Attachment entries require `kind` plus at least one of `uri` or `name`. `uri` values in v0.1.0 are references, not inline binary payloads. Writers MAY use `https:`, local `file:` references for private/local trails, or content-addressed references such as `sha256:`. Plain `http:` is deliberately excluded to avoid unauthenticated network fetches in shared trails. Inline `data:` payloads are deferred. + +#### `agent_message` + +A text response from the agent. + +```jsonc +{ + "type": "agent_message", + "id": "...", + "ts": "...", + "payload": { + "text": "You can use pandas:", + "model": "claude-sonnet-4-5", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 1234, + "output_tokens": 567, + "cache_read_tokens": 100, + "cache_creation_tokens": 50, + "reasoning_tokens": 200, + "context_input_tokens": 1384, + "context_window_tokens": 200000 + } + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `text` | yes | string | the agent's output | +| `model` | no | string | model that produced this message | +| `stop_reason` | no | string | source-specific stop reason | +| `usage` | no | object | token usage for the source envelope; see below | +| `attachments` | no | array | agent-side images or files by reference (e.g. a generated chart or vision output); same object shape as `user_message.payload.attachments` | + +`stop_reason` is source-specific and remains an opaque string. Writers SHOULD use this RECOMMENDED vocabulary when it matches the source semantics: `end_turn`, `max_tokens`, `tool_use`, `refusal`, `error`, `aborted`. Source-specific values remain legal; readers MUST treat unknown values as opaque. + +`attachments[]` entries share one object shape across `user_message`, `agent_message`, and `tool_result` (`kind` ∈ `image`/`file`/`other`, optional `media_type`, and at least one of `uri` or `name`). The same v0.1.0 `uri` reference policy applies: `https:`, local `file:`, or content-addressed `sha256:`; inline `data:` payloads are deferred. + +##### `agent_message.payload.usage` + +Captures token accounting emitted by the source agent for a model-response envelope. Optional. When the source provides no token data, writers MUST omit `usage` — fabricating zeros is not allowed. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `input_tokens` | conditional | integer ≥0 | delta for this envelope | +| `output_tokens` | conditional | integer ≥0 | delta for this envelope | +| `input_tokens_cumulative` | conditional | integer ≥0 | running total through this envelope | +| `output_tokens_cumulative` | conditional | integer ≥0 | running total through this envelope | +| `total_tokens` | conditional | integer ≥0 | source-reported inclusive total for this envelope | +| `total_tokens_cumulative` | conditional | integer ≥0 | source-reported inclusive running total through this envelope | +| `cache_read_tokens` | no | integer ≥0 | input tokens served from prompt cache; billed separately from `input_tokens` | +| `cache_creation_tokens` | no | integer ≥0 | input tokens written to prompt cache; billed separately from `input_tokens` | +| `reasoning_tokens` | no | integer ≥0 | output reasoning portion (Anthropic thinking, OpenAI reasoning) | +| `context_input_tokens` | no | integer ≥0 | prompt/context tokens submitted to the model for this request; cache-inclusive when the source exposes enough detail | +| `context_window_tokens` | no | integer ≥1 | model context-window size for this request, only when the source exposes it | + +When `usage` is present, writers MUST emit either input/output coverage or total-token coverage. Input/output coverage means at least one of (`input_tokens`, `input_tokens_cumulative`) AND at least one of (`output_tokens`, `output_tokens_cumulative`). Total-token coverage means at least one of (`total_tokens`, `total_tokens_cumulative`). These shapes are supported because sources differ. Readers SHOULD prefer delta fields and fall back to subtracting consecutive cumulative values. + +Total token semantics: `total_tokens` and `total_tokens_cumulative` are source-reported inclusive totals for exact total-token analytics. Writers MUST NOT fabricate total-token fields by summing buckets. Readers that need exact total counts SHOULD prefer `total_tokens`, fall back to deriving a delta from consecutive `total_tokens_cumulative` values, and only then fall back to summing known bucket fields. + +Cache token semantics: `input_tokens` counts non-cached input only; `cache_read_tokens` and `cache_creation_tokens` are independent billing categories. Total billed input = `input_tokens + cache_read_tokens + cache_creation_tokens`. They are additive, not a subset of `input_tokens`. + +Context token semantics are for context-pressure analytics, not billing. Writers MAY emit `context_input_tokens` when the source exposes prompt/context tokens for the request, including cache-read and cache-creation tokens when those count against the context window. Writers MAY emit `context_window_tokens` when the source reports the model's positive context-window size for the request. Writers MUST NOT estimate either field from raw text or tokenizer assumptions, and MUST NOT fabricate a `context_window_tokens` value from model name alone. Consumers derive context pressure as `context_input_tokens / context_window_tokens` when both fields are present; otherwise the ratio is unavailable. + +Model identification for downstream cost analysis uses `payload.model` first, falls back to `header.agent.model_default`, and is otherwise unknown. The `usage` object does not carry its own model field. + +When a single source envelope fans out to multiple entries (text blocks, tool calls, thinking blocks sharing one API response), `usage` accounts for the whole envelope. Writers MUST attach it to the first derived entry whose payload supports `usage`, skip non-usage-capable derived entries, and MUST NOT repeat it on later derived entries. In v0.1.0, `usage` is valid on `agent_message`, `agent_thinking`, and `tool_call` payloads; if an envelope emits none of those entries, canonical `usage` is omitted. + +Monetary cost is intentionally not a canonical trail field or event. Analyzers compute cost from token usage, model identification, and their own pricing tables, and carry pricing provenance such as currency, pricing source, and effective date in analyzer output. If a source exposes a billing estimate, writers MAY preserve it as opaque source data under `x-/` keys on the entry's `meta` field (§8.3). Latency and wall-clock telemetry are deferred to a future minor version; sources rarely expose them consistently. + +#### `task_plan_update` + +The agent emitted a checklist or plan snapshot. This is the canonical representation for structured planning state. Writers MUST NOT represent these snapshots as `tool_call.payload.tool:"task_plan"`. + +```jsonc +{ + "type": "task_plan_update", + "id": "...", + "ts": "...", + "payload": { + "explanation": "optional note", + "items": [ + { + "id": "item-1", + "content": "Write failing test", + "status": "in_progress", + "active_form": "Writing failing test" + } + ], + "deltas": [ + { + "kind": "status_changed", + "item_id": "item-1", + "from_status": "pending", + "to_status": "in_progress" + } + ] + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `explanation` | no | string | source-provided explanation for this plan update, when present | +| `items` | yes | array | full current snapshot of plan items | +| `deltas` | no | array | best-effort differences from the previous `task_plan_update` in the same source session | + +Each `items[]` entry has: + +| Item field | Required | Type | Notes | +|---|---|---|---| +| `id` | yes | string | upstream item id if present; otherwise a deterministic adapter-synthesized id | +| `content` | yes | string | human-readable task text | +| `status` | yes | string | one of `pending`, `in_progress`, `completed`, `cancelled`, `blocked` | +| `active_form` | no | string | source-provided active/progressive wording | + +When the upstream source does not provide item ids, or provides empty or whitespace-only strings, adapters SHOULD synthesize deterministic ids. Empty and whitespace-only item ids are treated as missing. The synthesized id is derived per source session from normalized content plus that content's duplicate occurrence position in the snapshot. With synthesized ids, status deltas are reliable when normalized content remains stable; content changes are best-effort because the source did not provide stable identity. + +`deltas[]` entries are optional. When present, each has `kind` and `item_id` plus fields determined by `kind`: + +| Delta kind | Required fields | +|---|---| +| `added` | `to_content`, `to_status` | +| `removed` | `from_content`, `from_status` | +| `status_changed` | `from_status`, `to_status` | +| `content_changed` | `from_content`, `to_content` | + +`added` MAY include `to_active_form`; `removed` MAY include `from_active_form`. Sources that only report plan-completed notifications with no item status snapshot SHOULD preserve them as `system_event` records instead of inventing checklist state. + +#### `tool_call` + +The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-tool-taxonomy). + +```jsonc +{ + "type": "tool_call", + "id": "...", + "ts": "...", + "payload": { + "tool": "file_read", + "args": { "path": "package.json" }, + "truncated": false, + "args_size": 23, + "overflow_ref": null + }, + "semantic": { + "call_id": "toolu_01abc" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `tool` | yes | string | canonical tool kind ([§11](#11-canonical-tool-taxonomy)) | +| `args` | yes | object | tool-specific args | +| `truncated` | no | boolean | true when `args` is a bounded excerpt rather than complete tool arguments | +| `args_size` | conditional | integer | original serialized argument byte size; REQUIRED when `truncated: true` | +| `overflow_ref` | no | string or null | optional content-addressed reference to full argument bytes when `args` is truncated; writer-strict values use `sha256:<64 lowercase hex>` | +| `usage` | no | object | token usage when this is the first entry derived from a source envelope; see [`payload.usage`](#agent_messagepayloadusage) | + +#### `tool_result` + +The result of a `tool_call`. References the call via `for_id`. Writers omit `for_id` when the source does not provide a reliable match. Readers MAY tolerate legacy/null values; when `for_id` is null or missing, see [§10.5](#10-5-tool-call-terminal-pairing). + +```jsonc +{ + "type": "tool_result", + "id": "...", + "ts": "...", + "payload": { + "for_id": "", + "ok": true, + "output": "", + "truncated": false, + "output_size": 12345, + "overflow_ref": null, + "error": null + }, + "semantic": { + "call_id": "toolu_01abc", + "tool_kind": "file_read" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `for_id` | no | string | id of the matching `tool_call`; omit when unknown | +| `ok` | yes | boolean | did the call succeed | +| `output` | no | string | textual output | +| `truncated` | no | boolean | true if `output` was truncated | +| `output_size` | no | integer ≥0 | UTF-8 byte length of the original output before truncation; REQUIRED when `truncated` is true | +| `overflow_ref` | no | string | reference to full output | +| `error` | no | string | error message if `ok` is false | +| `attachments` | no | array | non-MCP image / multi-part tool output by reference (e.g. a screenshot or plot tool returning an image that `output` flattens); same object shape as `user_message.payload.attachments` | +| `meta` | no | object | structured per-toolkind outputs; see below | + +`attachments[]` on `tool_result` carries image or binary results from tools whose output `output` (a display string) cannot represent — typically `tool: "other"` kinds such as a screenshot or plotting tool. MCP tools instead preserve their native block structure in `meta.mcp_call.content_blocks` (below); `attachments[]` is the generic escape hatch for everything else. + +#### `tool_call_aborted` + +The agent attempted or began a tool invocation, but the invocation was cancelled, blocked, timed out, denied, or otherwise stopped without a normal `tool_result`. Use this instead of inventing a failed `tool_result` when the source evidence says no result was produced. + +```jsonc +{ + "type": "tool_call_aborted", + "id": "...", + "ts": "...", + "payload": { + "scope": "tool_call", + "reason": "hook_blocked", + "for_id": "", + "blocked_by": "PreToolUse:Bash" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `scope` | yes | enum or extension | `tool_call` when a specific call is known; `turn` when the source only proves a turn-level abort. Extensions MUST use `x-/`. | +| `reason` | yes | enum or extension | One of `user_interrupt`, `hook_blocked`, `timeout`, `permission_denied`, `runtime_error`, or `x-/`. | +| `for_id` | when `scope:"tool_call"` | string | id of the matching `tool_call`; omitted for `scope:"turn"` and other non-call-specific scopes. | +| `blocked_by` | no | string | hook, policy, permission system, or runtime component that stopped the call. | + +Bare unknown `scope` and `reason` values are writer-strict errors. Readers are tolerant of unknown `x-*` extension values. + +##### `tool_result.payload.meta` — structured outputs + +`output` is a display string. When the source tool returned structured data, writers MAY also +populate `meta`, an object keyed by the originating `tool_call.tool` (the canonical tool kind, [§11](#11-canonical-tool-taxonomy)). +Consumers that understand a kind read `meta.`; everyone else falls back to `output`. `meta` +is optional and additive — existing writers that emit only `output` stay valid. + +Registered keys are writer-strict (unknown fields inside a registered shape are rejected). Vendors +extend a registered tool kind by adding sibling keys to its object that match the `x-/` +pattern (e.g. `meta.mcp_call.x-acme/cache_hit`). Unregistered and future tool kinds are accepted as +opaque objects, so new kinds can be standardized in a later minor version without a schema migration. + +The v0.1 registry covers three tool kinds: + +`meta.mcp_call` — preserves MCP content-block structure that `output` flattens. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `content_blocks` | no | array | MCP content blocks; each block has `type` (`text`/`image`/`resource`) plus `text`/`data`/`mime_type`/`uri` as applicable | +| `is_error` | no | boolean | MCP-protocol error flag. Distinct from envelope `payload.ok`: `is_error` is the tool's own success signal, `ok` is the trail-level call outcome | + +`meta.file_read` — read range and truncation metadata. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `range` | no | array | `[start_line, end_line]` requested | +| `total_lines` | no | integer ≥0 | total lines in the file | +| `encoding` | no | string | detected/used encoding | +| `truncated_at_line` | no | integer ≥0 \| null | line where output was cut, or null if untruncated | + +`meta.shell_command` — separated streams and exit status. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `stdout` | no | string | standard output stream | +| `stderr` | no | string | standard error stream | +| `exit_code` | no | integer \| null | process exit code; null when terminated by signal | +| `signal` | no | string \| null | terminating signal (e.g. `SIGKILL`), or null | +| `duration_ms` | no | integer ≥0 | wall-clock duration | + +`meta.shell_command.exit_code` is the canonical home for shell exit status; there is no generic +top-level `exit_code` on `tool_result`, because the concept does not apply to kinds like `mcp_call` +or `web_fetch`. + +Privacy: `meta` carries the same raw content as `output` (shell stdout, MCP block text), so the +redaction pipeline scrubs `meta` string leaves alongside `output` (§16). + +#### `user_query` + +The agent asks the user one or more structured questions and yields control until the user answers or dismisses the prompt. This is not a `tool_call`: no external tool executes. + +```jsonc +{ + "type": "user_query", + "id": "...", + "ts": "...", + "payload": { + "questions": [ + { + "id": "ship", + "header": "Ship", + "question": "Ship it?", + "multi_select": false, + "is_secret": false, + "allow_other": true, + "options": [ + { "label": "yes", "description": "Ship now" }, + { "label": "no" } + ] + } + ] + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `questions` | yes | array | One or more structured questions. | + +| Question field | Required | Type | Notes | +|---|---|---|---| +| `id` | yes | string | Stable within this `user_query`; responses key answers by this value. | +| `question` | yes | string | Full prompt shown to the user. | +| `header` | no | string | Short label/chip. | +| `multi_select` | no | boolean | True when the user MAY select multiple options. Omitted means false. | +| `is_secret` | no | boolean | True when answers SHOULD be hidden and stripped by redaction. Omitted means false. | +| `allow_other` | no | boolean | True when free-form input beyond listed options is allowed. Omitted means false. | +| `options` | no | array | Option objects with REQUIRED `label`, optional stable `id`, and optional `description`. | + +#### `user_query_response` + +The user's response to a `user_query`. `payload.for_id` links to the query entry id. A dismissed prompt emits a response with an empty `answers` object. + +```jsonc +{ + "type": "user_query_response", + "id": "...", + "ts": "...", + "payload": { + "for_id": "", + "answers": { + "ship": { + "selected": ["yes"], + "other": "with changelog" + } + } + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `for_id` | yes | string | Entry id of the `user_query`. | +| `answers` | yes | object | Keys are `questions[].id`. May be empty for dismissed/unanswered prompts. | + +| Answer field | Required | Type | Notes | +|---|---|---|---| +| `selected` | yes | string[] | Selected option ids when that question's options carry ids, otherwise selected option labels. Use one value for single-select answers. | +| `other` | no | string | Free-form answer when `allow_other` was used. | + +Privacy: share-time redaction MUST strip answers for questions whose `is_secret` is true, regardless of pattern matching. + +#### `session_summary` + +A summary entry. Used for whole-session summaries. Branch and compaction summaries use `branch_summary` and `context_compact`. + +```jsonc +{ + "type": "session_summary", + "id": "...", + "ts": "...", + "payload": { + "scope": "session", + "text": "", + "model": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `scope` | yes | enum | `session` | +| `text` | yes | string | the summary | +| `model` | no | string | model that produced the summary | + +Multiple `session_summary` entries are allowed. The last one in file order is authoritative; position is unconstrained. + +### 10.3 Optional event types + +Part of the canonical vocabulary. Adapters need not emit them. Readers MUST tolerate them either way. + +#### `session_metadata_update` + +Post-creation update to logical session metadata. The session header carries the base value when it is known at write time; consumers that need effective session metadata start with the header value and then replay these events in file order, with the last update to a field winning. The header remains as-written, and the event is part of normal session content that contributes to the session-level `content_hash`. + +```jsonc +{ + "type": "session_metadata_update", + "id": "...", + "ts": "...", + "payload": { + "field": "name", + "value": "Implement metadata updates", + "previous_value": "Old title", + "reason": "ai_generated" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `field` | yes | enum or extension | One of `name`, `description`, `tags`, `agent.model_default`, `vcs.branch`, `vcs.worktree`, or `x-/`. | +| `value` | yes | field-specific | Replacement value. Must match the field type: string for `name`/`description`/`agent.model_default`/`vcs.branch`, string array for `tags`, and the §9.2 worktree shape for `vcs.worktree`. Extension fields MAY carry any JSON value. | +| `previous_value` | no | field-specific | Prior value when the adapter knows it. Same type as `value`. | +| `reason` | yes | enum or extension | `ai_generated`, `user_set`, `runtime_inferred`, `external`, or `x-/`. | + +Writers MUST NOT use this event for immutable identity or cryptographic fields such as `id`, `session_uid`, `content_hash`, `redacted_from`, `vcs.revision`, or `vcs.head_commit`. Working-directory changes remain `system_event.kind:"cwd_change"`. + +#### `system_event` + +A meaningful source timeline record that is not a user message, agent message, tool call, tool result, summary, or known lifecycle event. Use this for source status/progress/bookkeeping records that SHOULD remain visible in a timeline. Do not use it as a dumping ground for high-volume internal state or records that map cleanly to a more specific canonical event. + +```jsonc +{ + "type": "system_event", + "id": "...", + "ts": "...", + "payload": { + "kind": "hook_fired", + "text": "Hook progress: PreToolUse", + "data": { "hook": "PreToolUse" } + } +} +``` + +`kind` is REQUIRED and writer-strict. It MUST be either one of the reserved cross-agent values below, or a vendor-namespaced extension of the form `x-/`. Bare unknown strings are rejected by writer-strict validation. Readers are tolerant of unknown `x-*` kinds and pass them through. `data` is curated structured metadata for rendering and search, not a replacement for `source.raw`. + +`context_compact`, `user_interrupt`, `model_change`, `mode_change`, `thinking_level_change`, and `session_end` are first-class record types ([§10.3](#10-3-optional-event-types)). Do not duplicate them under `system_event.kind`. + +##### Reserved lifecycle vocabulary + +| `kind` | When to use | +| --- | --- | +| `session_start` | Explicit mid-stream session-start marker (header already covers, useful for tooling that splits on events). | +| `turn_start` | User prompt accepted, agent begins work. | +| `turn_end` | Agent finishes a turn. | +| `subagent_start` | A spawned subagent begins. | +| `subagent_end` | A spawned subagent returns. | +| `pre_tool_use` | Tool about to fire (hook intercept point). | +| `post_tool_use` | Tool finished. | +| `hook_fired` | Generic adapter-emitted hook trace. | +| `permission_request` | Agent asked the user for tool approval. | +| `permission_decision` | User allowed/denied a specific tool invocation. | +| `cwd_change` | Working directory shifted. | +| `env_snapshot` | Shell/env state capture. | + +##### Reserved source-signal vocabulary + +| `kind` | When to use | Suggested `data` shape | +| --- | --- | --- | +| `task_started` | Source emits a structured task/step begin marker. | `{ task_id, title? }` | +| `task_completed` | Pair to `task_started`. May be synthesized at EOF for unclosed tasks (set `source.synthesized: true`). | `{ task_id, summary?, status? }` | +| `plan_completed` | Source emits a plan or todo completion marker without a full plan snapshot. | `{ plan_id, preview? }` | +| `turn_aborted` | Model or system stopped a turn for non-user reasons (length limit, refusal, error) with no tool in flight. Distinct from `user_interrupt`. | `{ reason }` | +| `tool_decision` | Source recorded a user approve/reject decision on a tool call. | `{ decision, tool_call_id }` | +| `context_injected` | Runtime injected standalone context that SHOULD remain visible outside a `user_message`. | `{ source_kind, name?, size_bytes? }` | +| `hook_progress` | Catch-all for source-emitted progress/hook/queue records that do not map to a more specific reserved lifecycle kind. Adapters SHOULD prefer `session_start` / `turn_end` / `pre_tool_use` / `post_tool_use` / `subagent_end` / `hook_fired` when the source signal is unambiguous, and fall back to `hook_progress` only for unrecognised progress streams. | `{ hook_event?, hook_name?, ... }` | +| `queue_operation` | Source recorded an enqueue or dequeue operation. | Free-form. | +| `heartbeat` | Periodic liveness ping during streaming capture (§9.4). Optional. Non-normative; readers MAY treat as informational. | `{ interval_ms? }` | +| `vcs_commit` | Adapter detected a VCS commit created during the session. | `{ sha, tool_call_id, branch?, message?, repo? }` | + +Use `tool_call_aborted{scope:"turn"}` for stops in a tool-invocation context where no specific call is identifiable. Use `system_event.kind:"turn_aborted"` for model/system-level turn stops with no tool in flight. + +##### Reserved diagnostic vocabulary + +Cross-agent diagnostic signals. Adapters MAY emit these to surface non-fatal errors, warnings, deprecations, routing decisions, and hook failures in the timeline. Out of scope: per-tool errors (those stay on `tool_result.error` + `tool_result.ok=false`). + +| `kind` | When to use | Suggested `data` shape | +| --- | --- | --- | +| `agent_error` | Agent-side error not tied to a specific tool call. | `{ severity?, code?, category?, blocking?, recovered?, source?, details? }` | +| `agent_warning` | Non-fatal agent-side warning. | `{ severity?, code?, category?, blocking?, recovered?, source?, details? }` | +| `api_error` | Upstream LLM/API failure surfaced to the user. | `{ severity?, code?, category?, source?, details? }` | +| `stream_error` | Streaming response interrupted or failed. | `{ severity?, code?, recovered?, details? }` | +| `deprecation_notice` | Source announced a feature or capability deprecation. | `{ feature?, replacement?, details? }` | +| `guardian_alert` | Safety rail, guardian system, or content moderation triggered. | `{ severity?, policy?, action?, details? }` | +| `model_rerouted` | Model fallback or capability re-routing decision. | `{ from?, to?, reason?, details? }` | +| `hook_failed` | Runtime hook execution failed, blocking or non-blocking. | `{ severity?, blocking?, hook_name?, code?, details? }` | + +**Severity vocabulary (informative).** When adapters include `data.severity`, recommended values are `info`, `warning`, `error`, `critical`. Not schema-enforced; readers SHOULD treat unknown severities as opaque. + +**Source vocabulary (informative).** When `data.source` is present, common values include `anthropic`, `openai`, `hook`, `guardian`, `runtime`. Free-form at the schema layer. + +##### Recommended `payload.data` shapes (permission kinds) + +`data` stays freeform at the schema layer. Adapters SHOULD use the shapes below so cross-agent consumers can render permission flow without per-adapter switches. Promote to schema-enforced once 2+ adapters converge. + +| `kind` | Recommended `data` | +| --- | --- | +| `permission_request` | `{ tool_call_id?: string, capability?: string, prompt?: string }` | +| `permission_decision` | `{ decision: "allow" \| "deny", tool_call_id?: string, capability?: string }` | + +##### Extension policy and promotion + +- Reserved values above are the only bare strings allowed by writer-strict validation. +- Anything else MUST use `x-/` form, e.g. `x-claudecode/notification`. +- Readers are tolerant of unknown `x-*` kinds — they pass through with no diagnostic. +- Bare unknown strings (no `x-` prefix, not in the reserved set) are rejected by writer-strict validation. +- Adapters quarantining an unparseable source record MUST emit `system_event` with `kind:"x-/unknown_record"` and preserve the record in `source.raw`; `parse_fidelity.quarantined_count` counts this pattern (§9.2). +- If an `x-*` kind proves cross-agent, promote it to the reserved enum in a minor format version bump. Document emitted kinds per adapter in `docs/parser-source-matrix.md`. + +#### `capability_change` + +A change in the set of capabilities available to the agent at a point in the session. Use this for tool, skill, plugin, MCP server, and MCP tool registry snapshots/deltas. This records availability changes, not tool invocations; calls still use `tool_call` / `tool_result`. + +```jsonc +{ + "type": "capability_change", + "id": "...", + "ts": "...", + "payload": { + "scope": "tool", + "reason": "registered", + "added": [{ "name": "Search", "metadata": { "namespace": "example" } }] + } +} +``` + +| Payload field | Required | Type | Notes | +| --- | --- | --- | --- | +| `scope` | yes | enum or extension | `tool` \| `skill` \| `mcp_server` \| `mcp_tool` \| `plugin` \| `x-/` | +| `reason` | yes | enum or extension | `initial` \| `registered` \| `deregistered` \| `connected` \| `disconnected` \| `loaded` \| `unloaded` \| `error` \| `instructions_updated` \| `x-/` | +| `added` | no | array | Non-empty array of `{ name, metadata? }`. | +| `removed` | no | array | Non-empty array of `{ name }`. | +| `changed` | no | array | Non-empty array of `{ name, field, from?, to? }`. | +| `snapshot` | no | array | Non-empty array of `{ name, metadata? }`; replaces accumulated state for this `scope` at this point. | + +Writer-strict validation requires at least one of `added`, `removed`, `changed`, or `snapshot`. + +Out of scope: full tool input/output schemas; they are static registry data and can be large or sensitive. Writers SHOULD keep only compact identifying metadata in `metadata`. + +#### `command_invoke` + +A named capability invoked with optional arguments: a user-typed slash command, a built-in CLI affordance, a skill activation, a user-defined prompt template, or a plugin command. These surfaces share the "named capability invoked" semantic but vary along two orthogonal axes — `kind` records *what* was invoked, `via` records *how* it reached the agent. Without this event they leak as `user_message.text="/foo"`, `tool_call.tool=other` with `args.name="Skill"`, or get dropped. + +```jsonc +{ + "type": "command_invoke", + "id": "...", + "ts": "...", + "payload": { + "name": "/code-review", + "kind": "custom_prompt", + "via": "user_typed", + "args": { "target": "HEAD" }, + "expansion_text": "Review the diff against main.", + "result_action": "expand" + } +} +``` + +| Payload field | Required | Type | Notes | +| --- | --- | --- | --- | +| `name` | yes | string | User-visible identifier. Leading slash for slash/builtin/custom_prompt (`/clear`); bare name for skills (`webapp-testing`). | +| `kind` | yes | enum or extension | `slash` \| `builtin` \| `skill` \| `custom_prompt` \| `plugin` \| `x-/`. What kind of capability was invoked. | +| `via` | yes | enum or extension | `user_typed` \| `auto_trigger` \| `agent_invoked` \| `x-/`. How the invocation reached the agent. | +| `args` | no | object | Free-form invocation arguments. | +| `expansion_text` | no | string | Post-expansion prompt text the agent saw (for prompt-template commands). | +| `result_action` | no | string \| null | What the runtime did with it. Reserved value, `x-/` extension, or null. | + +`kind` discriminates the capability: skill activation → `skill`, built-in command → `builtin`, user-defined prompt template → `custom_prompt`, generic slash command → `slash`, extension/plugin command → `plugin`. + +`via=auto_trigger` covers description-matched skill activation with no user action. Adapters MAY synthesize it when they observe a skill load without a corresponding `Skill` tool call; set `source.synthesized: true` in that case. + +`result_action` helps analyzers correlate to subsequent `context_compact` or session resets without inferring from content. Reserved values: + +| `result_action` | When to use | +| --- | --- | +| `compact` | Invocation triggered a context compaction (`/compact`). | +| `clear` | Invocation reset the session (`/clear`). | +| `expand` | Prompt-template command expanded into agent input. | +| `load_skill` | A skill was loaded into context. | +| `noop` | Runtime accepted the command with no observable state change. | + +Beyond these, `result_action` accepts a vendor-namespaced extension of the form `x-/`, or `null`. Bare unknown strings are rejected by writer-strict validation; readers are tolerant of unknown `x-*` values. + +Out of scope: skill *contents* (static config, not session history); MCP server tools (covered by `tool_call.tool=mcp_call`); permission gates (covered by `system_event.kind=permission_request/decision`). + +#### `agent_thinking` + +Chain-of-thought or reasoning block. + +```jsonc +{ + "type": "agent_thinking", + "id": "...", + "ts": "...", + "payload": { "text": "...", "model": "...", "level": "medium" } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `text` | yes | string | reasoning content exposed by the source | +| `model` | no | string | model that produced this thinking block | +| `level` | no | string | non-empty source-defined string; readers MUST treat unknown level tokens as opaque | +| `usage` | no | object | token usage when this is the first entry derived from a source envelope; see [`payload.usage`](#agent_messagepayloadusage) | + +#### `user_interrupt` + +User interrupted an in-progress agent response. + +```jsonc +{ + "type": "user_interrupt", + "id": "...", + "ts": "...", + "payload": { "reason": "" } +} +``` + +#### `context_compact` + +Session was compacted to free context window. + +```jsonc +{ + "type": "context_compact", + "id": "...", + "ts": "...", + "payload": { + "summary": "", + "trigger": "auto", + "tokens_before": 12000, + "tokens_after": 4000, + "replaced_message_ids": ["", ""] + } +} +``` + +`trigger`: `manual` | `auto` | `x-/`. + +`replaced_message_ids`: optional Agent Trail entry IDs folded or replaced by this +compaction summary, in source order. These IDs are provenance-only; readers MUST +validate their ID shape but MUST NOT require them to resolve to entries present in +the same trail file. + +#### `branch_point` + +Marks where a branch was made. + +```jsonc +{ + "type": "branch_point", + "id": "...", + "ts": "...", + "payload": { + "from_id": "", + "reason": "" + } +} +``` + +#### `branch_summary` + +A summary of an abandoned branch, attached to the active branch. + +```jsonc +{ + "type": "branch_summary", + "id": "...", + "ts": "...", + "payload": { + "abandoned_branch_id": "", + "summary": "", + "model": "..." + } +} +``` + +#### `model_change` + +Active model changed mid-session. + +```jsonc +{ + "type": "model_change", + "id": "...", + "ts": "...", + "payload": { + "from_model": "", + "to_model": "", + "trigger": "runtime_inferred", + "turn_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `from_model` | no | string | previous model id; omit when the source did not track the prior model | +| `to_model` | yes | string | new active model id | +| `from_provider` | no | string | previous model provider when known | +| `to_provider` | no | string | new model provider when known | +| `reason` | no | string | source-provided or adapter-inferred reason | +| `trigger` | no | enum or extension | `initial`, `user_set`, `agent_set`, `runtime_inferred`, `auto_reroute`, `external`, or `x-/` | +| `turn_id` | no | string | source turn id associated with the observation | + +#### `mode_change` + +Active runtime mode changed or was first observed. Use this for common mode axes such as collaboration mode (`plan`, `auto`), permission mode, execution/sandbox mode, or UI mode. Per-tool approval still uses `system_event.kind:"permission_request"` / `"permission_decision"`. + +```jsonc +{ + "type": "mode_change", + "id": "...", + "ts": "...", + "payload": { + "scope": "permission", + "from_mode": "default", + "to_mode": "acceptEdits", + "trigger": "runtime_inferred", + "turn_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `scope` | yes | enum or extension | `collaboration`, `permission`, `execution`, `ui`, or `x-/` | +| `from_mode` | no | string | previous mode token | +| `to_mode` | yes | string | new or initially observed mode token | +| `reason` | no | string | source-provided or adapter-inferred reason | +| `trigger` | no | enum or extension | `initial`, `user_set`, `agent_set`, `runtime_inferred`, `auto_reroute`, `external`, or `x-/` | +| `turn_id` | no | string | source turn id associated with the observation | +| `data` | no | object | curated adapter metadata for this mode axis | + +#### `thinking_level_change` + +Active reasoning/thinking level changed or was first observed. This records the selected thinking budget/effort level, not the model's private chain of thought. Reasoning text remains `agent_thinking`. + +```jsonc +{ + "type": "thinking_level_change", + "id": "...", + "ts": "...", + "payload": { + "from_level": "medium", + "to_level": "high", + "trigger": "runtime_inferred", + "turn_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `from_level` | no | string | previous thinking-level token | +| `to_level` | yes | string | new or initially observed thinking-level token | +| `reason` | no | string | source-provided or adapter-inferred reason | +| `trigger` | no | enum or extension | `initial`, `user_set`, `agent_set`, `runtime_inferred`, `auto_reroute`, `external`, or `x-/` | +| `turn_id` | no | string | source turn id associated with the observation | +| `data` | no | object | curated adapter metadata for this level axis | + +Recommended thinking-level vocabulary is `none`, `low`, `medium`, `high`, and `xhigh`. This vocabulary is not schema-enforced; source-defined tokens remain valid and opaque to readers. + +#### `session_terminated` + +Marks an incomplete session ending. Adapters MAY emit this synthetically at EOF when the source file ends with unmatched `tool_call` events (process killed mid-execution, file truncated, etc.). + +```jsonc +{ + "type": "session_terminated", + "id": "...", + "ts": "...", + "payload": { + "reason": "eof_with_open_tool_calls", + "open_call_ids": ["", ""] + }, + "source": { "synthesized": true } +} +``` + +`reason`: `eof_with_open_tool_calls` | `process_terminated` | `truncated` | `user_abort` | `x-/`. + +Synthesized instances MUST set `source.synthesized: true`. + +#### `session_end` + +Clean terminal marker. Distinct from `session_terminated` (abnormal). Optional; many writers won't emit it. When present at EOF, signals a normal conclusion of the session and suppresses the "unmatched tool calls at EOF" warning of §18.4. + +```jsonc +{ + "type": "session_end", + "id": "...", + "ts": "...", + "payload": { + "reason": "complete", + "final_message_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `reason` | yes | enum or extension | `complete` \| `user_quit` \| `agent_idle` \| `x-/` | +| `final_message_id` | no | string | optional reference to the last meaningful event | + +### 10.4 Semantic linking + +The `semantic` block on an event provides linking metadata when explicit `id` / `parent_id` / `for_id` references are unreliable (source has missing or null IDs). + +| Field | Type | Purpose | +|---|---|---| +| `semantic.group_id` | string | Groups events that belong to one logical unit. | +| `semantic.call_id` | string | Source format's native ID for a tool call. Used as fallback pairing key. | +| `semantic.tool_kind` | string | Canonical tool kind. Useful on `tool_result` events that don't carry it directly. | + +Writers SHOULD populate `semantic.call_id` on tool_call/tool_result pairs when the source has reliable native call IDs that are not Agent Trail entry IDs. + +### 10.5 Tool call terminal pairing + +`tool_result.payload.for_id` and `tool_call_aborted.payload.for_id` SHOULD reference the matching `tool_call`. Writers SHOULD populate `tool_result.payload.for_id` or `semantic.call_id` when the source records concurrent (overlapping) tool calls, and SHOULD populate one of them for every result. A `tool_call_aborted` only closes a call when `payload.scope == "tool_call"` and `payload.for_id` resolves to a `tool_call`; turn-level aborts do not close any specific call. + +When `tool_result.payload.for_id` is null, missing, or refers to a non-existent event, readers use these fallback rules in order: + +1. **Semantic match.** If both events have `semantic.call_id` and they're equal, pair them. +2. **Sequential match.** Pair the `tool_result` with the most recent prior unmatched `tool_call` in the same branch scope. Sequential fallback considers only calls in the same nearest `parent_id` ancestry as the result, so an inline subagent subtree cannot capture a parent timeline result and a parent timeline result cannot capture a child subtree call. Linear sessions without `parent_id` are unchanged. +3. **Heuristic match.** Readers MAY use further heuristics (timestamp proximity, payload shape) but MUST flag the pairing as uncertain in rendered output. + +Writers SHOULD avoid relying on fallbacks. Populate `for_id` when reliable; use `semantic.call_id` when the source's native ID doesn't map cleanly to event `id`. Do not use semantic or sequential fallback pairing for `tool_call_aborted`; if a source cannot identify the call, emit `scope:"turn"` without `for_id`. + +Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning (§18.4): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. + +### 10.6 Unknown event types + +Readers MUST tolerate unknown types: + +- Preserve them when round-tripping. +- Render with a generic fallback. +- Do not abort parsing. + +Writers MUST NOT invent new top-level event types in v0.1 writer-strict output. Use the `other` tool kind ([§11](#11-canonical-tool-taxonomy)) or `source.raw` ([§10.1](#10-1-base-shape), [§15.1](#15-1-source-raw-elision-and-redaction)) for adapter-specific data, or `meta` ([§8.3](#8-3-the-meta-extension-convention) / [§12](#12-vendor-extensions)) for vendor extensions. Reader-tolerant parsing MAY preserve unknown future event types at runtime; this tolerance is not part of the writer schema. + +### 10.7 Source envelope referencing + +When a single source envelope produces multiple entries — for example, an assistant message envelope whose `content` array is split across one `agent_message`, one `agent_thinking`, and one `tool_call` entry — writers SHOULD NOT inline the full envelope on every derived entry. Use *inline-first / ref-subsequent* dedup: + +- The **first** entry derived from a given source envelope sets `source.raw.envelope` (and `source.raw.block`, `source.raw.block_index` if applicable). +- **Subsequent** entries derived from the same envelope set `source.raw.envelope_ref` to the first entry's `id`. They omit `source.raw.envelope` and keep `block` / `block_index`. + +`source.raw.envelope_ref` is an optional string. Writers MUST ensure it references the `id` of an entry that appears **earlier** in the same file — the same envelope, inlined once. Forward references and dangling references are reader errors (`source_raw_envelope_ref_unresolved`, §18.4). The first-inline-then-ref shape is streaming-write friendly: readers resolve refs in a single pass without backtracking. + +This mechanism is additive over v0.1.0. Readers that do not understand `envelope_ref` will see it as an unknown raw-source field and ignore it; the entry's other fields (`type`, `payload`, `semantic`) remain fully self-describing. + +--- + diff --git a/spec/draft/11-canonical-tool-taxonomy.md b/spec/draft/11-canonical-tool-taxonomy.md new file mode 100644 index 0000000..7fb04cd --- /dev/null +++ b/spec/draft/11-canonical-tool-taxonomy.md @@ -0,0 +1,90 @@ +## 11. Canonical tool taxonomy + +The `tool_call.payload.tool` field uses these values. Each defines the expected shape of `args`. + +| Name | Args | +|---|---| +| `file_read` | `{ path, range? }` | +| `file_write` | `{ path, content }` | +| `file_edit` | `{ path, diff }` (unified diff) or `{ path, old, new, replace_all? }` | +| `file_patch` | `{ files: [{ path, diff }], atomic? }` | +| `file_list` | `{ path, recursive?, glob? }` | +| `file_search` | `{ query, path?, glob? }` | +| `shell_command` | `{ command, cwd?, timeout? }` | +| `shell_output` | `{ command_id? }` | +| `shell_input` | `{ input, session_id?, command_id? }` | +| `mcp_call` | `{ server, tool, args?, headers? }` | +| `web_fetch` | `{ url, method?, headers? }` | +| `web_search` | `{ query }` | +| `tool_search` | `{ query, limit? }` | +| `notebook_edit` | `{ path, cell_id?, diff?, content? }` | +| `subagent_invoke` | `{ task, agent_type?, session_id? }` | +| `other` | `{ name, args? }` | + +Checklist and plan snapshots use `task_plan_update` ([§10.2](#10-2-mandatory-event-types)) rather than `tool_call`. + +### 11.1 `file_edit` + +`file_edit` has two exclusive argument forms: + +- `{ path, diff }` where `diff` is a unified diff. +- `{ path, old, new, replace_all? }` for sources that record only string replacement with no line context. + +Writers MUST prefer the diff form when a real unified diff is derivable from source data. Writers MUST NOT fabricate hunk headers to fake the diff form. + +The `diff` form uses a unified diff: + +```diff +--- a/src/main.ts ++++ b/src/main.ts +@@ -1,4 +1,4 @@ + unchanged +-removed ++added + unchanged +``` + +Writers with native before/after content MUST convert to a diff before emitting. Writers that synthesize the edit from indirect source data set `source.synthesized: true`. + +### 11.2 `file_patch` + +Use `file_patch` when one source tool call represents a patch touching one or more files, and +single-file `file_edit` would either lose the call's multi-file grouping or force consumers to +reconstruct it from synthesized sibling calls. Each `files[]` entry carries the affected `path` and a +per-file unified diff. Writers that split source-native patch text into per-file hunks SHOULD add +`---` and `+++` file headers when the source omits them, so generic consumers can render each file +without parsing the source-native patch envelope. For renames, `path` is the destination path and the +diff headers carry both source and destination paths. Set `atomic: true` when the source represented +the patch as one operation. + +### 11.3 `file_list` + +Use `file_list` when the agent inspected a directory or file tree. The result's display listing +lives in the matching `tool_result.payload.output`. Do not map directory listing to +`shell_command` unless the source only records a literal shell command. + +### 11.4 `shell_command` + +Full command in `command`; output in the corresponding `tool_result.payload.output`. Redactors SHOULD scrub env vars, `Authorization` headers in piped curls, etc. + +### 11.5 `mcp_call` + +- `server` — MCP server identifier (e.g., `github`, `linear`). +- `tool` — tool name within that server. +- `headers` — SHOULD be redacted before writing: `Authorization`, `X-API-Key`, `Cookie`, `Bearer ...`. + +### 11.6 `subagent_invoke` + +Indicates a child conversation was spawned. Two cases: + +- **Inline subtree:** when the source stores child events inline in the same session, child events use this event's `id` as their root `parent_id`. +- **External child session:** when the source stores the child as a separate transcript, set `args.session_id` to the child session header `id`. The child MAY appear as a sibling group in the same session bundle or as an external trail. Do not use a content hash or source runtime id in `args.session_id`. + +When the external child appears in the same file, the child header SHOULD set `fork_from.session_id` to the parent session header `id` and `fork_from.entry_id` to the parent `subagent_invoke` event `id`. `fork_from.content_hash` is optional best-effort and refers to the parent session-level content hash. + +### 11.7 The `other` escape hatch + +For tools not covered above, use `tool: "other"` with `args: { name, args? }`. Readers render generically. These don't participate in cross-agent comparison. + +--- + diff --git a/spec/draft/12-vendor-extensions.md b/spec/draft/12-vendor-extensions.md new file mode 100644 index 0000000..e6d3fef --- /dev/null +++ b/spec/draft/12-vendor-extensions.md @@ -0,0 +1,39 @@ +## 12. Vendor extensions + +Implementations and vendors can add custom data via the `meta` field on the trail envelope, session header, or any event entry. Use the `x-/` extension grammar (§12.1) for keys to avoid collisions: + +```jsonc +"meta": { + "x-cursor/workspace_id": "ws-abc123", + "x-example/custom_flag": true, + "x-anthropic/usage": { "input_tokens": 1234, "output_tokens": 567 } +} +``` + +Readers MAY preserve, ignore, or render `meta` fields. They MUST NOT abort on unknown keys. + +`entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see §16. + +The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](#15-1-source-raw-elision-and-redaction)) instead. See [§8.3](#8-3-the-meta-extension-convention) for the full convention. + +### 12.1 Extension grammar + +One extension grammar is used across extension surfaces: `x-/`. + +- `vendor`: lowercase alphanumeric with optional hyphen-separated segments, e.g. `acme` or `acme-labs`. +- `name`: starts with lowercase alphanumeric and MAY contain lowercase alphanumeric, `_`, or `-`. + +| Surface | Applies to | Example | +| --- | --- | --- | +| Envelope `meta` keys | Trail-level vendor annotations | `x-acme/build_id` | +| Header `meta` keys | Session-level vendor annotations | `x-acme/team` | +| Entry `meta` keys | Event-level vendor annotations | `x-acme/run_id` | +| `system_event.kind` | Non-reserved source signals | `x-claudecode/notification` | +| Enum extensions | Descriptive state vocabulary: `scope`, `reason`, `trigger`, `result_action`, `command_invoke.kind`, `command_invoke.via`, `session_metadata_update.field`, `vcs.type`, `user_message.origin` | `x-acme/custom_scope` | +| `tool_result.payload.meta` vendor keys | Sibling keys under registered tool-kind output objects | `meta.mcp_call.x-acme/cache_hit` | +| Custom `agent.name` | Unregistered source agents | `x-example/myagent` | + +Structural discriminators, including event `type`, delta `kind`, attachment `kind`, and `taskPlanStatus`, stay closed. Descriptive state vocabulary is extensible through the grammar above. + +--- + diff --git a/spec/draft/13-tree-and-branching.md b/spec/draft/13-tree-and-branching.md new file mode 100644 index 0000000..922fbac --- /dev/null +++ b/spec/draft/13-tree-and-branching.md @@ -0,0 +1,18 @@ +## 13. Tree and branching + +### 13.1 When to emit `parent_id` + +`parent_id` represents tree topology, not ordinary linear sequencing. Linear sessions use file order. Tool call/result pairing uses `tool_result.payload.for_id` and `semantic.call_id`, not `parent_id`. + +Writers SHOULD emit `parent_id` only when source data contains branch, fork, or inline child-event topology that can be mapped to Agent Trail event ids. + +`parent_id` is intra-group topology only. It MUST NOT span session groups. When source data stores a spawned or forked transcript as a separate session, use a child session with `header.fork_from` instead of cross-group `parent_id`. + +Reader display policies for linear and tree-aware renderers are implementation semantics, not wire-format rules. + +### 13.2 Acyclicity + +The `parent_id` graph MUST be acyclic. The header isn't part of the graph; nothing references it via `parent_id`. + +--- + diff --git a/spec/draft/14-canonical-agent-registry.md b/spec/draft/14-canonical-agent-registry.md new file mode 100644 index 0000000..8703891 --- /dev/null +++ b/spec/draft/14-canonical-agent-registry.md @@ -0,0 +1,12 @@ +## 14. Canonical agent registry + +Lowercase, hyphenated: + +`claude-code`, `pi`, `openclaw`, `codex-cli`, `cursor`, `opencode`, `aider`, `amp`, `cline`, `crush`, `kimi-code`, `qwen-code`, `factory`, `vibe`, `copilot-cli`, `copilot-chat`, `chatgpt`, `clawdbot`. + +The registry reserves canonical names. It does not imply adapter support. + +New agents MAY be added by amending this spec. Until registered, adapters MAY use a custom `x-/` agent name (e.g., `x-example/myagent`) to reduce collisions while keeping the vendor and agent name parseable. + +--- + diff --git a/spec/draft/15-truncation-overflow-and-raw-source-size.md b/spec/draft/15-truncation-overflow-and-raw-source-size.md new file mode 100644 index 0000000..5db9e52 --- /dev/null +++ b/spec/draft/15-truncation-overflow-and-raw-source-size.md @@ -0,0 +1,50 @@ +## 15. Truncation, overflow, and raw source size + +Writers MAY truncate large `tool_result` outputs to keep trails tractable. The wire format records truncation with three fields on `tool_result.payload`: + +| Field | Type | Notes | +|---|---|---| +| `truncated` | boolean | `true` when `output` was shortened from its original length | +| `output_size` | integer ≥0 | UTF-8 byte length of the original output before truncation; REQUIRED when `truncated` is true | +| `overflow_ref` | string or null | optional content-addressed reference to the full output (`sha256:<64 lowercase hex>`); colocated blob storage is implementation-defined | + +Specific inline-size thresholds, the truncation algorithm (e.g., head-only, head-and-tail, line-aligned), and the choice of overflow storage are writer policy and belong in writer documentation, not the format. + +Tool call arguments use the same top-level marker on `tool_call.payload`: + +| Field | Type | Notes | +|---|---|---| +| `truncated` | boolean | `true` when `args` was shortened from its original object | +| `args_size` | integer ≥0 | UTF-8 byte length of the JCS-serialized original `args` object before truncation; REQUIRED when `truncated` is true | +| `overflow_ref` | string or null | optional content-addressed reference to the full args object (`sha256:<64 lowercase hex>`) | + +The marker applies to the `args` object as a whole. Individual arg strings keep their declared per-toolkind shape, just shortened. Specific thresholds and algorithms remain writer policy. + +`source.raw` is optional. Writers SHOULD omit or summarize very large or sensitive raw source objects when they would make trail files unwieldy or unsafe. Share tools MUST inspect `source.raw` during redaction before producing a shared artifact. + +### 15.1 `source.raw` elision and redaction + +Writers MAY elide all or part of a `source.raw` value when it is unwieldy or unsafe to inline. Elision uses a single wire-format marker, in place of either the entire `source.raw` or any nested string leaf: + +```jsonc +{ "elided": true, "size_bytes": 41208 } +``` + +| Field | Type | Notes | +|---|---|---| +| `elided` | boolean `true` | sentinel; readers detect elided regions by this field | +| `size_bytes` | integer | UTF-8 byte length of the elided original (informational; readers MAY use it for display or budgeting) | + +Two placements are valid: + +- **Whole-value elide:** `source.raw` itself is the marker. The original envelope is fully omitted; only its byte size is recorded. +- **Leaf elide:** any nested string is replaced with the marker. The envelope's structural skeleton (ids, parent refs, role, timestamps, block kinds) stays intact; only the bulky string body is removed. + +Specific size thresholds, the algorithm a writer uses to choose which leaves to elide, and whether elision is gated by a hard cap are implementation policy — they belong in writer documentation, not the format. Validators MAY warn on entries whose `source.raw` exceeds an implementation-chosen size budget, but the wire format itself imposes no fixed limit. + +When elision happens at the first emission of a source envelope (§10.7), subsequent `envelope_ref` entries still resolve — the ref points at the elided entry's `id`, not at its inlined envelope. + +Adapters MUST redact known secret patterns in `source.raw` before writing — emission-time redaction is a writer responsibility, not a share-time concern. Validators emit `source_raw_unredacted_secret` (warning) when a string leaf in `source.raw` matches a known credential pattern (Authorization headers, Bearer tokens, JWT, vendor API keys, PEM private key blocks, ENV-style assignments). Share-time redaction (§16) layers additional normalization on top — paths, PII — and produces a separate artifact. + +--- + diff --git a/spec/draft/16-redaction.md b/spec/draft/16-redaction.md new file mode 100644 index 0000000..1e0008b --- /dev/null +++ b/spec/draft/16-redaction.md @@ -0,0 +1,31 @@ +## 16. Redaction + +The raw file format does not mandate redaction. Sharing tools produce a separate redacted artifact before upload. Raw and redacted artifacts have different `content_hash` values. + +A complete redaction protocol is out of scope for the file format; it belongs to share tooling. Redacted artifacts MAY record `redacted_from.content_hash` to link back to the raw artifact without exposing local paths or raw local IDs. + +Share-time redactors MUST apply the privacy rules below before producing shared artifacts. They MAY normalize a field instead of deleting it only when the normalized value no longer exposes raw local paths, raw local session identifiers, credentials, or private repository identity. + +| Field or value | Share-time action | +|---|---| +| `cwd` | Normalize or strip. | +| `vcs.remote_url` | Strip or normalize per §9.2 unless the user explicitly opts in. | +| `system_event.payload.data.repo` for `vcs_commit` | Treat like `vcs.remote_url`; strip or normalize unless the user explicitly opts in. | +| `vcs.worktree.path`, `vcs.worktree.original_cwd` | Normalize or strip. | +| `source.path` | Normalize or strip. | +| `attachments[].uri` | Remove or rewrite local `file:` URIs. Rewrite to `sha256:` only when the referenced blob is content-addressed and transported with the share; otherwise remove `uri` and keep visible stub metadata such as `kind`, `name`, and `media_type`. | +| `tool_result.payload.overflow_ref` | Keep `sha256:` references when useful; strip every other scheme or implementation-local reference. When stripped, keep `truncated` and `output_size` unchanged. | +| `tool_call.payload.args.headers` for `mcp_call` and `web_fetch` | Strip or replace credential-bearing values with placeholders. | +| `name`, `description`, `tags`, message text, output strings, and `meta` string leaves | Scrub secret patterns and PII according to the redactor's configured policy. | + +Redactors MUST resolve each `user_query_response.payload.for_id` to a `user_query` in the same session group before preserving answers for questions marked `is_secret`. If the query is unresolvable, the redactor MUST strip the response's `answers` entirely (fail closed). +If a resolved response contains answer keys that do not appear on the referenced `user_query`, the redactor MUST strip those unknown answers and any raw source payload for that response. + +Share-time redactors SHOULD populate `entry.meta.redaction_count` on each changed event entry. The count is a non-negative integer equal to the number of redactor mutations applied to that entry. Existing numeric `redaction_count` values are additive when a redacted trail is redacted again; unchanged entries keep their existing value. + +When redaction changes bytes, lineage hashes are updated as described in §9.6.7. This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. + +Specific secret patterns, exact PII detectors, path-normalization strings, image preview behavior, token-usage policy, blob upload mechanics, and share workflow remain implementation semantics. + +--- + diff --git a/spec/draft/17-security-considerations.md b/spec/draft/17-security-considerations.md new file mode 100644 index 0000000..f6af7f0 --- /dev/null +++ b/spec/draft/17-security-considerations.md @@ -0,0 +1,22 @@ +## 17. Security Considerations + +Trail files are untrusted input. All string content, including messages, tool output, file paths, URIs, agent names, titles, and source metadata, can be attacker-controlled. Renderers SHOULD escape HTML, SHOULD NOT execute or auto-open rendered Markdown links, and CLI viewers SHOULD sanitize terminal control sequences before writing text to a terminal. + +Agent Trail intentionally has no format-level size caps. Consumers SHOULD enforce deployment-specific limits for maximum line length, file size, event count, graph depth, and decoded attachment or overflow bytes. Consumers SHOULD stream rather than buffer whole files where possible; JSONL is the interchange shape partly to make bounded streaming readers practical. + +Hostile files can contain invalid graph structure even though `parent_id` cycles and cross-group links are invalid (§13, §18.4). Validators MUST NOT loop indefinitely while checking graph topology, and tree renderers SHOULD bound recursion or use iterative traversal when displaying deep parent chains. + +`content_hash` provides byte integrity for the canonical artifact (§7.3, §7.4), not authorship, provenance, or trust. A trail claiming `agent.name: "claude-code"` proves only that the file contains that string. Agent Trail v0.1.0 has no signature or attestation mechanism; signing MAY be added by a future extension. + +In v0.1.0, `content_hash` values are bare 64-character SHA-256 hex strings (§7.3). Other content-addressed references, such as attachment URIs (§10.2) and `overflow_ref` values (§15), use `sha256:` references. Consumers that verify prefixed content-addressed references MUST reject unknown algorithm prefixes rather than treating the reference as verified. + +Attachment URIs and overflow references can identify local resources on the producer's machine. Viewers SHOULD NOT dereference `file:` URIs, `overflow_ref` values, or other external references automatically. Viewers MUST NOT dereference local `file:` URIs or non-`sha256:` overflow references from redacted or shared trails; §16 requires share-time redactors to remove or rewrite those values before transport. + +Redaction reduces content exposure but does not make a shared trail private. Timestamps, event counts, tool names, model names, branch shape, unredacted file names, and remaining metadata can still reveal workflow information. Sharing a redacted trail SHOULD be treated as publishing it to anyone who can access the transport. + +Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads (§16). + +The implementation-maintained detector catalog and rule pack schema live in [`docs/redaction-patterns.md`](./docs/redaction-patterns.md); this catalog is non-normative and does not change the trail file format. + +--- + diff --git a/spec/draft/18-validation.md b/spec/draft/18-validation.md new file mode 100644 index 0000000..c7eee92 --- /dev/null +++ b/spec/draft/18-validation.md @@ -0,0 +1,172 @@ +## 18. Validation + +Validation is layered because JSON Schema validates one line at a time, while several Agent Trail rules require whole-file context. + +### 18.1 Writer schema + +`schema.json` is the writer-strict schema for v0.1.0. It validates a single JSON object line and requires header and envelope records to use `schema_version: "0.1.0"`. It rejects unknown top-level event types. Writers use this schema for emitted envelope, header, and event lines. + +`schema.json` is the canonical format contract through v1.0. Generated types, validators, and packages MUST derive from it rather than maintaining a separate manual contract. + +### 18.2 Reader tolerance + +Readers MAY accept compatible future v0.x files best-effort: skip unknown event types, ignore unknown payload fields, preserve unknown records when round-tripping, and warn instead of aborting where possible. Reader tolerance is runtime behavior, not the writer-strict schema contract. + +### 18.3 Conformance classes and diagnostics + +Agent Trail defines named conformance classes so tools can describe the depth of +reader or writer support they implement. + +| Class | Name | Requirements | +|---|---|---| +| **R0** | Renderer | Reader-tolerant JSONL parsing per §6 and §18.2; renders the mandatory event types in §10.2, including user messages, agent messages, tool calls, tool results, and summaries; preserves or displays fallback output for unknown records it can parse; does not crash on valid or quarantinable input. | +| **R1** | Structural reader | R0 plus the non-hash whole-file layout, graph, pairing, streaming-state, and diagnostic checks in §18.4. R1 catches duplicate ids, unknown parents, parent cycles, unresolved `source.raw.envelope_ref`, tool-call pairing diagnostics, and other file-level checks that do not require recomputing content hashes or comparing segment-chain hashes. | +| **R2** | Verifying reader | R1 plus content-hash verification per §7.3 and §7.4, and segment-chain verification per §9.5. Readers in this class warn rather than abort on reader-tolerant hash mismatches, per §18.4.1. | +| **W** | Writer | Emits writer-strict records that validate against `schema.json` and satisfy the strict whole-file validation rules in §18.4. Writer conformance is about emitted trail files, not reader tolerance. | + +`@agent-trail/core` implements Class R2 reader behavior through its parsing, +validation, canonicalization, hashing, and multi-segment reconciliation APIs. + +The validation conformance suite manifest tags each fixture with the applicable +classes. The current validation suite does not tag fixtures as R0 because it +asserts validation outcomes, not rendering behavior. + +#### Claiming conformance (non-normative) + +Projects MAY claim support using the class name they implement, for example +"Agent Trail R0 reader", "Agent Trail R2 reader", or "Agent Trail W writer". +Such claims mean the implementation passes the conformance checks tagged for +that class and follows the referenced normative sections. Agent Trail does not +define a certification registry or badge authority. + +#### Validation diagnostics + +Validators SHOULD report normalized diagnostics with `line`, `path` (JSON Pointer), `severity`, `code`, and `message`. Implementations MAY include extra fields, but these five fields are the portable diagnostic surface. + +Portable diagnostic code registry: + +| Code | Severity | Defining section | +|---|---|---| +| `ambiguous_sequential_pairing` | warning | §10.5 / §18.4.2 | +| `child_session_fork_from_mismatch` | warning | §18.4.2 | +| `child_session_parent_link_mismatch` | warning | §18.4.2 | +| `content_hash_invalid` | error | §7.3 / §18.4.1 | +| `content_hash_mismatch` | error (strict), warning (reader-tolerant) | §7.3 / §18.4.1 | +| `cross_group_fork_from_hash_mismatch` | warning | §9.6.5 | +| `duplicate_id` | error | §18.4.1 | +| `duplicate_option_labels` | warning | §10.2 / §18.4.2 | +| `duplicate_segment_seq` | warning | §9.5 / §18.4.2 | +| `duplicate_tool_result` | warning | §10.5 / §18.4.2 | +| `duplicate_user_query_question_id` | error | §10.2 | +| `envelope_has_parent_id` | error | §8 / §18.4.1 | +| `envelope_not_at_line_1` | error | §8 / §18.4.1 | +| `envelope_sessions_manifest_drift` | warning | §8.4 / §18.4.2 | +| `events_before_first_session_header` | error | §9.6 / §18.4.1 | +| `header_has_parent_id` | error | §9 / §18.4.1 | +| `ill_formed_string` | error (strict), warning (reader-tolerant) | §5.2 / §18.4.1 | +| `missing_header` | error | §9 / §18.4.1 | +| `missing_header_after_envelope` | error | §8 / §18.4.1 | +| `multiple_envelopes` | error | §8 / §18.4.1 | +| `non_interoperable_number` | warning | §5.2 / §18.4.2 | +| `non_monotonic_event_ts` | warning | §18.4.2 | +| `out_of_order_segment_seq` | warning | §9.5 / §18.4.2 | +| `out_of_order_session_headers` | warning | §9.6.6 | +| `parent_cycle` | error | §13.2 / §18.4.1 | +| `parse_fidelity_drift` | error | §9.2 / §18.4.1 | +| `reader_tolerant_schema_version` | warning | §6 / §18.2 | +| `reader_tolerant_unknown_payload_field` | warning | §18.2 | +| `reader_tolerant_unknown_record` | warning | §18.2 | +| `segment_chain_break` | warning | §9.5 | +| `source_raw_envelope_ref_unresolved` | error | §10.7 / §18.4.1 | +| `source_raw_unredacted_secret` | warning | §15.1 / §18.4.2 | +| `stream_open_with_content_hash` | warning | §18.4.3 | +| `stream_open_with_terminal_event` | warning | §18.4.3 | +| `tool_args_unredacted_secret` | warning | §16 / §18.4.2 | +| `tool_result_semantic_conflict` | warning | §10.5 / §18.4.2 | +| `unknown_abandoned_branch_id` | warning | §10.3 / §18.4.2 | +| `unknown_branch_point_from_id` | warning | §10.3 / §18.4.2 | +| `unknown_final_message_id` | warning | §10.3 / §18.4.2 | +| `unknown_parent_id` | error | §10.1 / §18.4.1 | +| `unknown_user_query_answer_key` | error | §10.2 | +| `unknown_user_query_for_id` | warning | §10.2 / §18.4.2 | +| `unmatched_tool_call_at_eof` | warning | §10.5 / §18.4.2 | +| `vcs_remote_url_with_credentials` | warning or error | §9.2 / §18.4 | +| `vcs_revision_divergence` | warning | §9.6.6 | + +#### Conformance suite (non-normative) + +The repository publishes a versioned validation conformance suite with the schema package. The canonical corpus lives under `tests/fixtures/validation/` and is mirrored into the `@agent-trail/schema` package under `conformance/`. + +The suite manifest uses three assertion tiers: + +- Writer-strict validity verdicts and reader-tolerant cleanliness outcomes for every fixture. +- Portable diagnostic assertions (`severity`, `code`, `line`, `path`) only for spec-named diagnostic codes. +- Line-only assertions for schema-layer failures, because JSON Schema validator keyword vocabularies are implementation-specific. +- Class tags (`classes`) marking which conformance classes each fixture applies to. Validation fixtures use `W`, `R1`, and `R2`; R0 renderer conformance needs a separate rendering suite. + +### 18.4 File graph checks + +A v0.1.0-compliant trail file MUST also pass whole-file checks. + +#### 18.4.1 Errors + +1. The first line is either a trail envelope (`type: "trail"`, §8) or a session header (`type: "session"`, `schema_version: "0.1.0"`). When the envelope is present, the session header MUST occupy line 2. +2. Subsequent lines match an event schema (`type`, `id`, `ts`, `payload`). +3. All `id` values are unique within the file. +4. Every non-null `parent_id` references an `id` in the same file. +5. The `parent_id` graph is acyclic. +6. Writer timestamps are valid UTC `Z` ISO-8601 values with millisecond precision. Readers MAY tolerate broader ISO-8601 timestamps. +7. All string values are well-formed: no unpaired high or low surrogate code units. Violations are `ill_formed_string` diagnostics at the offending JSON Pointer. Strict validation reports an error; reader-tolerant validation reports a warning and does not repair the value. + +If `content_hash` is present: + +1. The value is 64 hex characters (SHA-256). Invalid hash shape emits `content_hash_invalid` at `/content_hash`. +2. Strict validators recompute and verify per §7.3. On mismatch, strict validation fails with `content_hash_mismatch` at `/content_hash`. Reader-tolerant parsers MAY warn but MUST NOT abort. + +Additional whole-file errors: + +- `parse_fidelity`, when present, MUST match the session group's entries (§9.2). Drift emits `parse_fidelity_drift` at the mismatched `parse_fidelity` field. +- A `user_query` question id MUST be unique within that query. Duplicate ids emit `duplicate_user_query_question_id` at the repeated question id. +- A `user_query_response.payload.answers` key not present in the resolved `user_query.payload.questions[].id` set emits `unknown_user_query_answer_key` at that answer key. +- `source.raw.envelope_ref`, when set, MUST reference the `id` of an earlier entry in the same file (§10.7). Dangling or forward references are errors with code `source_raw_envelope_ref_unresolved` at `/source/raw/envelope_ref`. +- Trail envelope position and uniqueness (§8): + - `envelope_not_at_line_1` (error): a `type:"trail"` record appears on a line other than line 1. + - `multiple_envelopes` (error): more than one envelope appears in the file. + - `missing_header_after_envelope` (error): an envelope at line 1 is not followed by a session header on line 2. + - `envelope_has_parent_id` (error): the trail envelope carries a `parent_id`. + +#### 18.4.2 Warnings + +- Each `tool_call.id` SHOULD be referenced by exactly one `tool_result.payload.for_id` (or paired via §10.5). +- Inline `subagent_invoke` events SHOULD have descendants in the same group, or external child invocations SHOULD set `args.session_id` to the child header `id` when known. +- When an in-file child session is present, the parent `subagent_invoke.args.session_id` and child `header.fork_from.{session_id,entry_id}` SHOULD agree. Mismatches are warnings, not errors, so partial bundles and external-only references remain readable. +- `branch_point.payload.from_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_branch_point_from_id` at `/payload/from_id`. +- `branch_summary.payload.abandoned_branch_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_abandoned_branch_id` at `/payload/abandoned_branch_id`. +- Writers SHOULD emit `session_terminated` if any `tool_call` remains unmatched at EOF. The warning code is `unmatched_tool_call_at_eof`. Suppression: + - A `session_end` event anywhere in the file suppresses this warning for every unmatched `tool_call` (clean conclusion, §10.3). + - A `session_terminated` event whose `payload.open_call_ids` lists a given `tool_call.id` suppresses the warning for that id only (explicit acknowledgement). A `session_terminated` event without `open_call_ids` does not suppress the warning. +- A `tool_result` paired by sequential fallback when two or more unmatched prior same-branch `tool_call` candidates existed emits `ambiguous_sequential_pairing` at `/payload`. +- A `user_query` question with duplicate option labels among options that do not carry stable option ids emits `duplicate_option_labels` at the repeated option's `/payload/questions//options//label`. +- `user_query_response.payload.for_id` SHOULD reference a `user_query` in the same session group. An unresolved reference emits `unknown_user_query_for_id` at `/payload/for_id`. +- `session_end.payload.final_message_id`, when present, SHOULD reference an `id` that appears in the same file (the session header or a prior event). A dangling reference is a warning with code `unknown_final_message_id` at `/payload/final_message_id`. +- An event's `ts` SHOULD NOT be earlier than its parent event's `ts` inside the same parent chain. Equal timestamps are allowed; sibling branches may interleave in wall-clock time. A strictly earlier child timestamp emits `non_monotonic_event_ts` (warning) at `/ts`. +- Validators MAY report implementation-defined size budgets for `source.raw`; specific numbers are writer policy (§15.1). +- `source.raw` SHOULD NOT contain unredacted credentials. A string leaf matching a known credential pattern emits `source_raw_unredacted_secret` (warning) at the matching JSON pointer. +- JSON integer numbers outside the IEEE-754 exact-integer range SHOULD be emitted as strings. Unsafe integer numbers emit `non_interoperable_number` (warning) at the offending JSON Pointer. +- Privacy-sensitive tool arguments SHOULD NOT contain unredacted credentials. A string leaf in `mcp_call` / `web_fetch` `tool_call.payload.args.headers` or `shell_command` `tool_call.payload.args.command` matching a known credential pattern emits `tool_args_unredacted_secret` (warning) at the matching JSON pointer. +- `envelope_sessions_manifest_drift` (warning): the envelope's `sessions` manifest length disagrees with the number of session groups, or a manifest entry disagrees with the matching session header's `id` or `agent.name`. +- Multi-segment consistency within one file (§9.5): + - `duplicate_segment_seq` (warning): two groups share the same `(session_uid, segment.seq)` pair, treating missing `segment` as `seq: 1`. + - `out_of_order_segment_seq` (warning): groups with the same `session_uid` appear with descending `segment.seq` in file order. + +#### 18.4.3 Streaming-state rules + +Streaming rules (§9.4) are evaluated against the *current* header `stream.state` at validation time — the validator reads the present value, not a history of transitions. Crash-recovery writers MUST finalize (`stream.state` to `"closed"` or remove `stream`) before appending terminal events; once the stream is no longer marked live, the rules below stop applying. + +10. If the current `header.stream.state == "open"`: + - **10a.** `content_hash` SHOULD be absent or `""`. A populated hex hash is a warning, since the canonical bytes are still in flux. + - **10b.** Terminal events (`session_end`, `session_terminated`) SHOULD NOT appear. A terminal event in a file whose current `header.stream.state == "open"` is a warning — the writer claims the stream is still open but has already emitted a terminal event. Finalize the header (set `stream.state` to `"closed"` or remove `stream`) before appending terminal events. +11. If the current `header.stream.state == "closed"` or `stream` is absent, finalized artifacts SHOULD populate `content_hash`. Readers MAY warn but MUST NOT abort when it is missing on otherwise complete files. Trail files produced by stream-unaware writers, or files appended across crashes and recoveries, MAY contain both `session_end` and `session_terminated` legitimately; rule 10b does not apply once the stream is no longer marked live. + +--- + diff --git a/spec/draft/19-formal-schema.md b/spec/draft/19-formal-schema.md new file mode 100644 index 0000000..f2b4a19 --- /dev/null +++ b/spec/draft/19-formal-schema.md @@ -0,0 +1,8 @@ +## 19. Formal schema + +The normative writer-strict JSON Schema lives in `schema.json` and is published at `https://agent-trail.dev/schema/v0.1.0.json`. + +This spec intentionally does not duplicate the full schema inline. Implementations SHOULD validate each JSONL line against `schema.json`, then run the whole-file checks in §18.4. Reader-tolerant parsing, including unknown future event preservation, is separate from writer-strict schema validation. + +--- + diff --git a/spec/draft/20-examples.md b/spec/draft/20-examples.md new file mode 100644 index 0000000..e5da8f6 --- /dev/null +++ b/spec/draft/20-examples.md @@ -0,0 +1,14 @@ +## 20. Examples + +More complete examples are maintained as validated conformance fixtures under `tests/fixtures/validation/valid/spec-example-*.trail.jsonl` and published with concrete package exports such as `@agent-trail/schema/conformance/manifest.json` and `@agent-trail/schema/conformance/fixtures/valid/spec-example-*.trail.jsonl`. This keeps examples executable without splitting normative meaning away from the schema and validation rules. + +Minimal at-a-glance trail: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + +--- + diff --git a/spec/draft/README.md b/spec/draft/README.md new file mode 100644 index 0000000..1b4731a --- /dev/null +++ b/spec/draft/README.md @@ -0,0 +1,37 @@ +# Agent Trail Specification v0.1.0 + + +**Version:** 0.1.0 +**Status:** Draft +**Date:** June 12, 2026 +**License:** Apache-2.0 +**Schema URL:** `https://agent-trail.dev/schema/v0.1.0.json` *(release snapshot; local source: `schema.json`)* + +--- + +## Contents + +- [1. Motivation](./01-motivation.md) +- [2. Goals and non-goals](./02-goals-and-non-goals.md) +- [3. At a glance](./03-at-a-glance.md) +- [4. Terminology](./04-terminology.md) +- [5. File format](./05-file-format.md) +- [6. Versioning](./06-versioning.md) +- [7. Identity, artifacts, and content addressing](./07-identity-artifacts-and-content-addressing.md) +- [8. The trail envelope](./08-the-trail-envelope.md) +- [9. The session header](./09-the-session-header.md) +- [10. Events](./10-events.md) +- [11. Canonical tool taxonomy](./11-canonical-tool-taxonomy.md) +- [12. Vendor extensions](./12-vendor-extensions.md) +- [13. Tree and branching](./13-tree-and-branching.md) +- [14. Canonical agent registry](./14-canonical-agent-registry.md) +- [15. Truncation, overflow, and raw source size](./15-truncation-overflow-and-raw-source-size.md) +- [16. Redaction](./16-redaction.md) +- [17. Security Considerations](./17-security-considerations.md) +- [18. Validation](./18-validation.md) +- [19. Formal schema](./19-formal-schema.md) +- [20. Examples](./20-examples.md) +- [Changelog](./changelog.md) +- [Appendix A — Minimal valid record](./appendix-a-minimal-valid-record.md) +- [Appendix B — Content hash worked example](./appendix-b-content-hash-worked-example.md) +- [License](./license.md) diff --git a/spec/draft/appendix-a-minimal-valid-record.md b/spec/draft/appendix-a-minimal-valid-record.md new file mode 100644 index 0000000..63690d2 --- /dev/null +++ b/spec/draft/appendix-a-minimal-valid-record.md @@ -0,0 +1,17 @@ +## Appendix A — Minimal valid record + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +``` + +A session with only a header is valid. Events are optional. + +### Appendix A.1 — Minimal valid record with trail envelope + +```jsonl +{"type":"trail","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0"} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +``` + +An envelope at line 1 followed by a session header at line 2 is valid. Events are optional. + diff --git a/spec/draft/appendix-b-content-hash-worked-example.md b/spec/draft/appendix-b-content-hash-worked-example.md new file mode 100644 index 0000000..5ca73b3 --- /dev/null +++ b/spec/draft/appendix-b-content-hash-worked-example.md @@ -0,0 +1,38 @@ +## Appendix B — Content hash worked example + +This example shows the §7.3 two-pass procedure for the +`hash-vectors/minimal-pending-roundtrip.trail.jsonl` conformance fixture. The +remaining canonicalization and two-tier identity cases are published in the +`hash-vectors/` fixture category. + +Input file before stamping: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000101","content_hash":"","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000101","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000102","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + +Canonical bytes hashed by SHA-256, shown as UTF-8 text with the required trailing +newline after the last line: + +```jsonl +{"agent":{"name":"codex-cli"},"content_hash":"","id":"01HSESS0000000000000000101","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000101","payload":{"text":"hello"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} +{"id":"01HEVTA0000000000000000102","payload":{"text":"hi"},"ts":"2026-05-17T14:00:07.000Z","type":"agent_message"} +``` + +Resulting session-level digest: + +```text +f215ed334d3928e1abde804f2c4a870431b18d4fa7d755ec94d94be2a6ddd06e +``` + +Stamped file: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000101","content_hash":"f215ed334d3928e1abde804f2c4a870431b18d4fa7d755ec94d94be2a6ddd06e","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000101","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000102","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + diff --git a/spec/draft/changelog.md b/spec/draft/changelog.md new file mode 100644 index 0000000..7576f0e --- /dev/null +++ b/spec/draft/changelog.md @@ -0,0 +1,23 @@ +## Changelog + +### v0.1.0 (June 12, 2026) + +Initial public draft. v0.1.0 defines: + +- JSONL file layout, session header, core event envelope, mandatory event types, optional events, the canonical tool taxonomy, vendor `meta` extensions (§8.3), tree semantics, layered validation, and artifact-level content addressing. +- Stable local source filenames (`spec.md`, `schema.json`) with immutable hosted release snapshots at `/spec/v0.1.0` and `/schema/v0.1.0.json`. +- The optional trail envelope record `type:"trail"` at line 1 (§8) with Tier 1 fields (`id`, `name`, `description`, `ts`, `producer`, `content_hash`) and Tier 2 fields (`tags`, `vcs`, `fork_from`, `redacted_from`, `sessions`, `meta`), and two-tier identity (§7.4): session-level `content_hash` excludes the envelope, file-level `content_hash` covers the whole file. +- Session headers MAY carry base `name`, `description`, and `tags`; `session_metadata_update` events replay on top of those base values. `vcs.type` allows reserved systems or `x-/` extensions, and envelope `fork_from.trail_id` uses the standard id shape. +- Multi-segment session primitives (`session_uid`, `segment.seq`, `segment.prev_content_hash`) and reconciliation invariants (§9.5). +- The optional header `stream` field, the `session_end` event, and the recommended `system_event` heartbeat convention (§9.4, §10.3). +- Tool-surface fidelity for truncated tool-call args, string-replacement `file_edit`, branch-scoped pairing warnings, stable user-query option ids, stricter attachment identity, and tool-result meta key hygiene. +- The `source.raw.envelope_ref` inline-first / ref-subsequent envelope dedup convention (§10.7), the `{ elided: true, size_bytes: N }` elide marker for `source.raw` (§15.1), and the writer-side redaction requirement for credential patterns in `source.raw`. +- Normative share-time redaction rules for local attachment URIs, unsafe `overflow_ref` values, unresolved `user_query_response` answers, and privacy-sensitive field handling (§16), plus the `tool_args_unredacted_secret` validator warning (§18.4). +- Envelope-level `payload.usage` on the first entry derived from a source envelope, including `agent_message`, `agent_thinking`, and `tool_call` (§10.2). +- During the v0.1.0 draft cycle, planning snapshots moved from the legacy `tool_call.payload.tool:"task_plan"` shape to the canonical `task_plan_update` event. Final v0.1.0 writer-strict output MUST use `task_plan_update`; legacy `task_plan` tool calls are invalid. +- During the v0.1.0 draft cycle, duplicate `system_event` kinds for `session_end` and `permission_mode_change` were removed, thinking levels became source-defined strings, `user_message.origin` was added, and related vocabulary clarifications landed. +- During the v0.1.0 draft cycle, vendor extensions converged on one `x-/` grammar across `meta`, enum extensions, `system_event.kind`, `tool_result.payload.meta`, and custom `agent.name`. +- During the v0.1.0 draft cycle, writer-strict identity and encoding were hardened: ULIDs are uppercase, UUIDs are lowercase, timestamps carry schema `format:"date-time"` annotation, and strings with unpaired surrogates are invalid (`ill_formed_string`). + +--- + diff --git a/spec/draft/license.md b/spec/draft/license.md new file mode 100644 index 0000000..8656b12 --- /dev/null +++ b/spec/draft/license.md @@ -0,0 +1,7 @@ +## License + +This specification is released under Apache-2.0. + +--- + +*End of Agent Trail Specification v0.1.0* diff --git a/spec/v0.1.0/01-motivation.md b/spec/v0.1.0/01-motivation.md new file mode 100644 index 0000000..8931d2d --- /dev/null +++ b/spec/v0.1.0/01-motivation.md @@ -0,0 +1,8 @@ +## 1. Motivation + +Engineers using multiple coding agents lose continuity between them. A debugging session in Claude Code is invisible from Cursor; an Aider conversation can't be shared with a colleague using Pi. Each tool stores sessions in its own format, and tools that try to bridge them re-implement the same parsing work. + +Agent Trail defines a portable file format for coding agent sessions, so any compliant tool can read and share sessions produced by any other. + +--- + diff --git a/spec/v0.1.0/02-goals-and-non-goals.md b/spec/v0.1.0/02-goals-and-non-goals.md new file mode 100644 index 0000000..83312f4 --- /dev/null +++ b/spec/v0.1.0/02-goals-and-non-goals.md @@ -0,0 +1,44 @@ +## 2. Goals and non-goals + +### Goals + +- Map common coding agents to one canonical event vocabulary with acceptable fidelity (~70%+ semantic fit on average across supported agents). +- Renderable in a generic viewer with no source-agent code. +- Searchable with standard text tooling. +- Trivially streamable, line by line. +- Trivially versionable, with graceful reader degradation. +- Content-addressable for safe sharing and deduplication. + +### Non-goals + +- Replacing agents' native storage formats. +- Bit-perfect reproduction of source sessions. Use `source.raw` if needed. +- Encoding model internals (logits, sampling parameters, tokens). +- Cryptographic signing (deferred). +- Cross-segment `parent_id` references (deferred). +- Real-time bidirectional sync between agents. + +Deferred format surfaces: + +- A structured message-parts model for mixed human-authored and injected `user_message` content. +- Inline `data:` attachment payloads; v0.1.0 attachment `uri` values are references only. + +--- + +### 2.1 Conformance and normativity + +The normative Agent Trail contract is this specification plus `schema.json`. +`schema.json` is the canonical writer-strict machine-readable contract through +v1.0. + +The key words "MUST", "MUST NOT", "REQUIRED", "SHOULD", "SHOULD NOT", and "MAY" +are to be interpreted as described in BCP 14 when, and only when, they appear in +all capitals. + +Examples, notes, rationale, implementation guidance, adapter mappings, reader +display choices, CLI behavior, store layout, and redaction workflow are +non-normative unless explicitly stated otherwise. Implementation guidance lives +in `docs/implementation-semantics.md`. + +--- + diff --git a/spec/v0.1.0/03-at-a-glance.md b/spec/v0.1.0/03-at-a-glance.md new file mode 100644 index 0000000..332b0e1 --- /dev/null +++ b/spec/v0.1.0/03-at-a-glance.md @@ -0,0 +1,14 @@ +## 3. At a glance + +The smallest valid Agent Trail file: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + +Line 1 is the header. Lines 2 and on are events. Everything else is optional structure layered on top. + +--- + diff --git a/spec/v0.1.0/04-terminology.md b/spec/v0.1.0/04-terminology.md new file mode 100644 index 0000000..b9ba347 --- /dev/null +++ b/spec/v0.1.0/04-terminology.md @@ -0,0 +1,29 @@ +## 4. Terminology + +| Term | Definition | +|---|---| +| **Trail file** | A JSONL file conforming to this specification; contains one or more session groups. | +| **Trail envelope** | Optional `type:"trail"` record at line 1 carrying file-level metadata (producer, file label, file-scope hash, manifest, vendor extensions). Not part of the event graph. | +| **Header** | The session header (`type:"session"`). On line 1 when there is no envelope, on line 2 when the envelope is present. Not part of the event graph. | +| **Session group** | One `type:"session"` header plus the events after it until the next session header or EOF. | +| **Session bundle** | A trail file with one or more session groups. At session-group level the bundle is a forest; each group MAY itself be linear or tree-native. | +| **Child session** | A separate session group or external session spawned or forked from another session, linked by the child header's `fork_from`. | +| **Event** | Any object after the header line; one unit of session content. | +| **Turn** | One user-prompt-to-agent-completion cycle as delimited by the source. `turn_id` values are opaque source-correlation tokens; readers MUST NOT require them to resolve to any entry. | +| **File-level content hash** | SHA-256 of the canonical bytes covering the whole file with the trail envelope's `content_hash` pinned to ``. | +| **Session-level content hash** | SHA-256 of the canonical bytes covering ONLY the session header and its events (envelope excluded), with the session header's `content_hash` pinned to ``. | +| **Entry** | Equivalent to "event"; either term MAY appear. | +| **Adapter** | Software that reads a source agent's storage and emits a trail file. | +| **Linear session** | A session whose events do not use `parent_id`. Events are ordered by file position. | +| **Tree session** | A session where some events use `parent_id` to form a DAG. | +| **Canonical event** | One of the mandatory or optional event types in [§10.2](#10-2-mandatory-event-types) and [§10.3](#10-3-optional-event-types). | +| **Raw trail** | A local artifact preserving source fidelity as much as possible. | +| **Redacted trail** | A separate artifact produced from a raw trail for sharing. It has its own `content_hash`. | +| **Shared trail** | A redacted trail transported through a sharing mechanism. | +| **Synthesized event** | An event the adapter constructed from indirect source data (e.g., a git diff), not mapped from a real source event. Flagged with `source.synthesized: true`. | +| **Content hash** | SHA-256 of the exact artifact's canonical bytes (§7). | +| **Canonical bytes** | The file content normalized per §7 for hashing. | +| **Source escape hatch** | The `source.raw` field; preserves verbatim source-format data for lossless round-trip. | + +--- + diff --git a/spec/v0.1.0/05-file-format.md b/spec/v0.1.0/05-file-format.md new file mode 100644 index 0000000..4d3c275 --- /dev/null +++ b/spec/v0.1.0/05-file-format.md @@ -0,0 +1,34 @@ +## 5. File format + +### 5.1 File extension and MIME type + +- Recommended extension: `.trail.jsonl` +- Native compressed extension: `.trail.jsonl.gz` +- MIME type: `application/vnd.trail+jsonl`. The `vnd.` form is the intended canonical type and follows IANA conventions for vendor MIME types. IANA registration is deferred to v1.0; until then the type is documented here but not officially registered. +- Native compressed MIME type: `application/vnd.trail+jsonl+gzip`. +- The `+jsonl` suffix is provisional rather than an IANA-registered structured syntax suffix, and `+jsonl+gzip` is a nonstandard double suffix; these media types may be revised during registration. +- Editors render as JSON via the `.jsonl` suffix. A dedicated language extension MAY provide richer highlighting later. + +### 5.2 Encoding + +- UTF-8, no BOM. +- LF line endings (`\n`). CRLF is tolerated by readers; writers MUST NOT produce it. +- Each line is one self-contained JSON object. +- Empty lines are not allowed. +- A trailing newline at EOF is recommended but not REQUIRED. +- Writers MUST replace invalid UTF-8 bytes and unpaired surrogate escapes with U+FFFD at emission time. Emitted JSON strings MUST NOT contain unpaired surrogates. +- Writers MUST NOT emit JSON integer numbers outside the IEEE-754 exact-integer range (`-(2^53-1)` through `2^53-1`) anywhere in a trail file. Adapters that receive oversized source integers, such as snowflake ids or nanosecond timestamps in `source.raw`, MUST emit them as strings instead. Validator warnings use code `non_interoperable_number` at the offending JSON Pointer. +- `.trail.jsonl.gz` files are a whole-file gzip wrapper around the UTF-8 trail JSONL bytes above. Writers MUST NOT gzip individual JSONL lines independently. Readers MUST decompress `.trail.jsonl.gz` files before validation and processing. +- For `.trail.jsonl.gz`, `content_hash` is computed and verified by first decompressing the file to produce plain UTF-8 JSONL, then applying the canonical bytes procedure defined in §7.3 to the decompressed JSONL. The compressed bytes themselves are never hashed. + +### 5.3 File layout + +Every valid trail file has: + +1. **Optionally**, a trail envelope (`type:"trail"`) on line 1 (§8). +2. One **or more** session header groups in file order. Each group starts with a `type:"session"` record and continues with zero or more event lines until the next `type:"session"` record or EOF (§9.6). The first session header MUST appear on line 1 when there is no envelope, or on line 2 when an envelope is present. + +When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in §9.6. + +--- + diff --git a/spec/v0.1.0/06-versioning.md b/spec/v0.1.0/06-versioning.md new file mode 100644 index 0000000..6dde7bb --- /dev/null +++ b/spec/v0.1.0/06-versioning.md @@ -0,0 +1,32 @@ +## 6. Versioning + +The header's `schema_version` is a SemVer string. The current version is `"0.1.0"`. Writers MUST emit the exact version they conform to. + +Agent Trail uses SemVer for the interoperability contract: + +| Change type | Version bump | Examples | +|---|---|---| +| Editorial-only change | no bump or patch | Typos, formatting, non-normative wording, examples that do not change validity or semantics. | +| Normative clarification with no behavior change | patch | Resolving ambiguity while preserving the same valid files and reader behavior. | +| Backward-compatible feature addition | minor | New optional field, new optional event type, new registered agent or tool kind that readers MAY ignore. | +| Breaking change | major | Required field changes, field removal, incompatible meaning changes, or changes that make existing valid trails invalid. | + +Before `1.0.0`, Agent Trail still uses this compatibility discipline conservatively: + +- `0.1.x` versions are the same feature family. Readers that support `0.1.0` SHOULD accept later `0.1.x` patch versions. +- `0.2.0` and later `0.x` versions MAY add backward-compatible features. Readers MAY accept them best-effort by skipping unknown event types and ignoring unknown payload fields. +- Breaking changes SHOULD be avoided before real adapter and reader experience proves they are necessary. If unavoidable, they MUST get a new minor while the spec is still pre-1.0, and the changelog MUST mark them explicitly as breaking. +- `1.0.0` is reserved for the first stable interoperability contract. + +Published spec and schema URLs are immutable. Local source files (`spec.md` and `schema.json`) represent the current working draft or next release candidate; released snapshots live at versioned URLs such as `/spec/v0.1.0` and `/schema/v0.1.0.json`. + +Writer schemas are exact per release: the v0.1.0 writer schema requires `schema_version: "0.1.0"`. Reader tolerance is runtime behavior, not permission for writers to emit a version other than the release they implement. + +| Source version | Reader behavior | +|---|---| +| Same `major.minor`, any patch | Fully supported if the reader supports that feature family. | +| Newer `0.x` minor | Best-effort: skip unknown event types, ignore unknown payload fields, preserve unknown records when round-tripping, and warn instead of aborting where possible. | +| New major version | Readers MAY reject unless they explicitly support that major version. | + +--- + diff --git a/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md b/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md new file mode 100644 index 0000000..077c5c6 --- /dev/null +++ b/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md @@ -0,0 +1,65 @@ +## 7. Identity, artifacts, and content addressing + +### 7.1 Session identity + +Every session has a local identifier `id` in the header. Writers emit uppercase ULIDs (26 Crockford base32 chars) or lowercase UUIDs (RFC 4122, hyphenated or unhyphenated). The schema enforces this canonical casing so cross-segment reconciliation can dedup events by exact string equality; older v0.1 fixtures whose ids were free-form strings or non-canonical casing have been migrated. + +### 7.2 Artifact classes + +Agent Trail distinguishes local fidelity from shared safety: + +- **Raw trail:** the local artifact emitted by an adapter. It SHOULD preserve source fidelity, including `source.raw` where useful and safe. +- **Redacted trail:** a separate artifact produced from a raw trail for sharing. It removes or normalizes sensitive content and has its own `content_hash`. +- **Shared trail:** a redacted trail transported by a share tool. + +Redacted artifacts MAY include `redacted_from.content_hash` in the header to record provenance from the raw artifact. They MUST NOT expose the raw artifact's local path or local session identifier. + +### 7.3 Content hash + +Finalized artifacts SHOULD populate `content_hash` in the header. This is the SHA-256 of the artifact's canonical bytes, not a hash of the physical on-disk serialization and not a logical-session identifier shared across raw and redacted variants. + +Canonical bytes are defined as: + +- All JSONL lines in order. +- LF line endings. +- No trailing whitespace. +- A trailing newline at EOF. +- Each JSON object serialized using RFC 8785 JSON Canonicalization Scheme (JCS). +- Writer-valid strings are well-formed per §5.2, so canonical bytes remain pure JCS; hash-time string repair is not part of this procedure. + +Because the hash depends on the file content that includes the hash field, we use a two-pass approach: + +1. Serialize the file with the header's `content_hash` field set to the literal `""`. If the field is absent, insert `content_hash:""` into the header before canonicalization; this gives stamped and unstamped forms one digest for the same logical content. +2. Canonicalize per the rules above. +3. Compute SHA-256 of the canonicalized bytes. +4. Replace only the header's `content_hash` field with the resulting hex digest. + +Verifying a file's hash uses the same procedure: replace the present hash with `""`, canonicalize, hash, compare. + +Writers that produce streaming or in-progress files MAY omit `content_hash` or leave it as `""`. Readers MAY verify the hash but MUST NOT abort on mismatch — only warn. Strict validators MUST report a present but incorrect finalized `content_hash` as an error. + +### 7.4 Two-tier identity + +When a trail envelope is present, the file carries two independent content hashes: + +- **Session-level `content_hash`** lives on the session header. It is SHA-256 over the canonical bytes covering only the session header and its events (the envelope record is excluded from the hashed input). In a multi-session file (§9.6) the slice for a session covers that session's header and the events between it and the next `type:"session"` record (or EOF). This makes each session's identity independent of whether it is wrapped in an envelope or sits beside sibling sessions — extracting one session from a multi-session file recomputes the same digest. +- **File-level `content_hash`** lives on the trail envelope. It is SHA-256 over the canonical bytes of the whole file, with the envelope's `content_hash` field replaced by `""` per the same two-pass procedure as §7.3. The session-level `content_hash`, if already populated, is treated as opaque file content. + +Writers that emit both hashes MUST stamp every session-level hash first, then compute and stamp the file-level hash. Readers verify them independently. Different consumers care about different scopes: extraction tools recompute the session hash; share/transport tools verify the file hash. + +#### 7.4.1 Hash tier for `fork_from` and `redacted_from` + +Lineage references mirror the tier of the linking context: + +- **Header-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **session-level** `content_hash` of the parent artifact (the forked-from session or the raw session that was redacted). This keeps session lineage independent of any envelope wrapper — extracting either side recomputes the same digest. +- **Envelope-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **file-level** `content_hash` of the parent file (envelope and all sessions included). Use these to link whole files rather than individual sessions. +- `segment.prev_content_hash` (§9.5) is always session-level, since segments chain at session grain. + +Writers MUST choose the matching tier; mixing tiers across a chain breaks verification. + +### 7.5 Event identifiers + +Event `id` values are globally unique. Writers emit uppercase ULIDs or lowercase UUIDs, matching §7.1 and the schema. Globally-unique canonical ids let a reconciler dedup events across segments by exact string equality. + +--- + diff --git a/spec/v0.1.0/08-the-trail-envelope.md b/spec/v0.1.0/08-the-trail-envelope.md new file mode 100644 index 0000000..74786fb --- /dev/null +++ b/spec/v0.1.0/08-the-trail-envelope.md @@ -0,0 +1,78 @@ +## 8. The trail envelope + +The trail envelope is an OPTIONAL record on line 1 that carries file-scope metadata distinct from per-session metadata. When absent, the session header occupies line 1 and behaviour matches earlier drafts. When present, the session header MUST follow on line 2 and at most one envelope is permitted per file. + +### 8.1 Schema + +```jsonc +{ + "type": "trail", + "schema_version": "0.1.0", + "id": "", + "name": "", // optional + "description": "", // optional + "ts": "", + "producer": "trail-cli/0.3.0", + "content_hash": "", // optional; populated at finalize + "tags": ["..."], // optional + "vcs": { "type": "git", "revision": "..." }, // optional; same shape as §9 vcs + "fork_from": { // optional; file-level fork link + "trail_id": "", // UUID or ULID id + "content_hash": "" // optional + }, + "redacted_from": { // optional; redacted artifacts only + "content_hash": "" + }, + "sessions": [ // optional manifest + { "id": "", "agent": "" } + ], + "meta": { // optional; see §8.3 + "x-entire/checkpoint_id": "ckpt-7" + } +} +``` + +### 8.2 Fields + +| Field | Required | Type | Notes | +|---|---|---|---| +| `type` | yes | literal `"trail"` | discriminator | +| `schema_version` | yes | string | currently `"0.1.0"` for the envelope shape — independent of session `schema_version` | +| `id` | yes | string | file-level identifier; distinct from any session `id` in the file | +| `name` | no | string | human label | +| `description` | no | string | free text | +| `ts` | yes | string | ISO-8601 timestamp when the file was assembled or exported | +| `producer` | yes | string | identifier of the writer (e.g., `trail-cli/0.3.0`) | +| `content_hash` | no | string | SHA-256 hex of the whole-file canonical bytes; see §7.4 | +| `tags` | no | string[] | free-form labels | +| `vcs` | no | object | working-tree context at file-assembly time | +| `fork_from` | no | object | reference to a parent file when forked; `trail_id` is a UUID or ULID id and `content_hash` is optional | +| `redacted_from` | no | object | provenance link from a redacted file to its raw counterpart | +| `sessions` | no | array | manifest of sessions in this file; validator warns on drift vs file content | +| `meta` | no | object | free-form vendor extensions (§8.3) | + +The envelope MUST NOT carry a `parent_id`. It is not part of the event graph. + +### 8.3 The `meta` extension convention + +The trail envelope (§8), the session header (§9), and every event entry (§10.1) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar (§12.1) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record (§7.4): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. + +For verbatim source-event preservation, use `source.raw` ([§10.1](#10-1-base-shape), [§10.7](#10-7-source-envelope-referencing), [§15.1](#15-1-source-raw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. + +This draft defines one standard event-entry `meta` key: `redaction_count` (§16). Other standard keys MAY be promoted in later minor bumps based on observed usage. + +### 8.4 The `sessions` manifest + +When `sessions` is present, the validator warns if the manifest disagrees with the file: + +- The manifest MUST list one entry per session group (§9.6) in file order. Each entry's `id` and `agent` MUST match the corresponding session header's `id` and `agent.name`. Length mismatch and per-entry drift both emit `envelope_sessions_manifest_drift` warnings — never errors, so renderers can still display the file. +- The manifest is an index/rendering hint only. It MUST NOT carry graph facts such as child-session role or follows edges; session headers are authoritative for lineage. + +### 8.5 File identity defaults when envelope is absent + +When no envelope is written, file-level identity defaults derive from the session: + +- File `id` = session `id`. +- File `name` is unset. +- The file-level content hash is unavailable; only the session content hash is meaningful. + diff --git a/spec/v0.1.0/09-the-session-header.md b/spec/v0.1.0/09-the-session-header.md new file mode 100644 index 0000000..f22988f --- /dev/null +++ b/spec/v0.1.0/09-the-session-header.md @@ -0,0 +1,248 @@ +## 9. The session header + +### 9.1 Schema + +```jsonc +{ + "type": "session", + "schema_version": "0.1.0", + "id": "", + "session_uid": "", // optional; stable across segments + "segment": { "seq": 1 }, // optional; multi-segment marker + "name": "", // optional + "description": "", // optional + "tags": ["feature", "debug"], // optional + "content_hash": "", // optional; populated at finalize + "ts": "", + "stream": { // optional; live-capture marker (§9.4) + "state": "open" | "closed", + "started_at": "" // optional + }, + "agent": { + "name": "", + "version": "", // optional + "model_default": "" // optional + }, + "cwd": "", // optional + "vcs": { // optional + "type": "git" | "jj" | "hg" | "svn" | "x-/", + "revision": "" | null, + "branch": "", // required when revision is null + "remote_url": "" // optional; see §9.2 + }, + "fork_from": { // optional + "session_id": "", + "content_hash": "", // optional + "entry_id": "" // optional + }, + "redacted_from": { // optional; redacted artifacts only + "content_hash": "" + }, + "parse_fidelity": { // optional; at-a-glance parse summary + "quarantined_count": 0, + "termination_reason": "truncated" // optional; when session_terminated exists + }, + "source": { // optional + "agent": "", + "path": "", + "format_version": "" + }, + "meta": { // optional; vendor extensions (§8.3 / §12) + "x-example/custom_field": "..." + } +} +``` + +### 9.2 Fields + +| Field | Required | Type | Notes | +|---|---|---|---| +| `type` | yes | literal `"session"` | discriminator | +| `schema_version` | yes | string | currently `"0.1.0"` | +| `id` | yes | string | UUID or ULID per §7.1/§19 | +| `session_uid` | no | string | stable source-session identifier shared by all segments of one logical source session | +| `segment` | no | object | multi-segment marker; absent is equivalent to a single segment with `seq: 1` | +| `segment.seq` | yes (if `segment` present) | integer | 1-based segment sequence number | +| `segment.prev_content_hash` | yes when `segment.seq >= 2` | string \| null | previous segment's session-level `content_hash`; `null` marks an unverifiable chain break | +| `name` | no | string | human session label | +| `description` | no | string | free-text session description | +| `tags` | no | string[] | free-form session labels | +| `content_hash` | no | string | SHA-256 hex of this artifact; see §7.3 | +| `ts` | yes | string | ISO-8601 session start time; writers emit UTC `Z` with millisecond precision | +| `stream` | no | object | live-capture marker; see §9.4 | +| `agent.name` | yes | string | from the canonical registry (§14) | +| `agent.version` | no | string | source agent's version | +| `agent.model_default` | no | string | default model for the session | +| `cwd` | no | string | working directory; MAY be normalized for privacy | +| `vcs` | no | object | version control context at session time | +| `vcs.type` | yes (if `vcs` present) | enum or extension | `git`, `jj`, `hg`, `svn`, or `x-/` for non-reserved systems | +| `vcs.revision` | yes (if `vcs` present) | string \| null | commit SHA, change-id, revision identifier, or `null` for unborn HEAD repositories when `vcs.branch` is present | +| `vcs.remote_url` | no | string | canonical remote URL identifying the project across users, machines, and clones; see normalization rules below | +| `vcs.branch` | no | string | active branch / bookmark / topic name the session is running on (e.g., `feature/x`). Detached-HEAD sessions MAY omit. | +| `vcs.head_commit` | no (`vcs.revision` non-null only) | string | commit hash at session start (lowercase hex, 7–64 chars). For git with a committed HEAD, typically equals `vcs.revision`; the explicit field exists as a vcs-neutral alias. | +| `vcs.worktree` | no | object | worktree context when the session ran inside a working-tree clone or worktree (git worktree, jj workspace, etc.) | +| `vcs.worktree.name` | yes (if `vcs.worktree` present) | string | worktree short name | +| `vcs.worktree.path` | yes (if `vcs.worktree` present) | string | absolute path to the worktree | +| `vcs.worktree.original_cwd` | no | string | working directory of the parent repository at worktree-creation time | +| `vcs.worktree.original_branch` | no | string | branch the parent repository was on when the worktree was created | +| `vcs.worktree.original_head_commit` | no | string | commit the worktree was forked from (lowercase hex, 7–64 chars) | +| `fork_from` | no | object | reference to a parent session if forked | +| `redacted_from` | no | object | provenance link from a redacted artifact to the raw artifact hash | +| `parse_fidelity` | no | object | at-a-glance parse fidelity summary; absence means the writer did not provide a summary | +| `parse_fidelity.quarantined_count` | yes (if `parse_fidelity` present) | integer | number of `system_event` entries whose `payload.kind` is `x-*/unknown_record` in this session group | +| `parse_fidelity.termination_reason` | no | enum or extension | final `session_terminated.payload.reason`, when a `session_terminated` event is present | +| `source` | no | object | source-file metadata block (agent, path, format_version) | +| `meta` | no | object | vendor extensions; recommended keys use the `x-/` extension grammar (§8.3 / §12) | + +When `parse_fidelity` is present, validators MUST compare it against the session group's entries. `quarantined_count` MUST equal the count of quarantined unknown source records emitted as `system_event` entries with `payload.kind` matching `x-*/unknown_record`; see the §10.3 quarantine convention. `termination_reason`, when a `session_terminated` entry exists, MUST match the final `session_terminated.payload.reason`; if no `session_terminated` entry exists, writers MUST omit `termination_reason`. This field is denormalized for cheap listing/filtering only; the event stream remains authoritative. Quarantined records are suspect parse fidelity, not necessarily lossy, because the raw source record is preserved. + +`vcs.remote_url` provides a canonical project identifier that survives across users, machines, and clones — useful for cross-machine aggregation, profile filtering, and project-scoped analysis. Adapters that populate it: + +- MUST normalize SSH and HTTPS variants of the same repository to a single canonical form. The reference normalization maps `git@host:org/repo.git`, `ssh://git@host/org/repo.git`, and `https://host/org/repo.git` to `https://host/org/repo` (strip trailing `.git`, strip userinfo, rewrite SSH to HTTPS). +- MUST strip embedded credentials (`https://user:pass@host/...` → `https://host/...`) before emission. +- SHOULD populate when the source agent records repository location or when `cwd` is detectably a versioned working directory. When the source declares multiple remotes (e.g., git `origin` plus `upstream`), prefer `origin`. +- MUST omit the field when no remote is configured — do not fabricate one. +- For submodules and worktrees, emit the remote of the outermost working tree's toplevel; `cwd` and `vcs.revision` disambiguate within. + +Fresh repositories with an unborn HEAD MAY emit `vcs.revision:null` when a branch is known. A `vcs` block with `vcs.revision:null` MUST include `vcs.branch`, MUST omit `vcs.head_commit`, and writers MUST NOT emit an information-free VCS block. When `vcs.revision` is non-null for git, `vcs.head_commit` typically equals `vcs.revision`. + +Privacy: `remote_url` reveals repository identity and MAY identify a private repo. Redacted artifacts MAY strip or normalize it (§16). + +When a trail file carries both header-level `vcs` (session-time context) and envelope-level `vcs` (file-assembly-time context, §8), they represent different observation points. File-assembly tools SHOULD preserve both when present. For multi-segment reconciliation rules, see §9.5. + +### 9.3 Example + +```json +{"type":"session","schema_version":"0.1.0","id":"01HM7K5R9X2QZJ8VD6W4P3T1F0","content_hash":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","ts":"2026-05-17T14:02:00.000Z","agent":{"name":"claude-code","version":"2.1.42","model_default":"claude-sonnet-4-5"},"cwd":"","vcs":{"type":"git","revision":"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"}} +``` + +### 9.4 Streaming and live capture + +JSONL is append-friendly by design: trail files can be written event by event as a session unfolds, and readers can `tail -f` them. v0.1.x adds an explicit marker so writers and readers can agree on live-capture state without overloading other header fields. + +The optional header `stream` object: + +| Field | Required | Type | Notes | +|---|---|---|---| +| `stream.state` | yes (if `stream` present) | enum | `open` while the writer is actively appending; `closed` once finalized | +| `stream.started_at` | no | string | ISO-8601 timestamp when the stream began; matches the §9 `ts` semantics | + +Lifecycle: + +1. **Live phase.** Writer emits the header with `stream: { state: "open" }`. `content_hash` is omitted or set to `""`. Events are appended as they happen. +2. **Finalize.** Writer rewrites the header with `stream` either removed or set to `state: "closed"`, then computes `content_hash` per §7.3. Appending stops. +3. **Clean end.** Writer MAY append a `session_end` event (§10.3) to mark a normal conclusion before finalize. Abnormal ends still use `session_terminated`. + +Tail readers that observe `stream.state == "open"` SHOULD assume more events MAY arrive. Readers observing `stream` absent or `state == "closed"` SHOULD treat the file as a finalized artifact and verify `content_hash` when present. + +`stream` is absent in trail files produced by stream-unaware writers; readers MUST treat that case as equivalent to a finalized non-streaming artifact (existing v0.1.0 behavior). + +A live `system_event` heartbeat convention is described in §10.3. + +--- + +### 9.5 Session segments (multi-segment sessions) + +A single logical source session MAY be split across multiple trail-file artifacts — "segments" — when a long-running session is captured in chunks (e.g., a daemon writing periodically) or recovered after a writer is killed mid-session. The header carries three fields that let a reconciler group, order, and verify segment chains. All three are optional in v0.1; a single-segment trail simply omits them. + +- `session_uid` — globally-unique source-session identifier. Stable across **all** segments of one source session. Reconcilers group segments by exact string equality on `session_uid`. Format: uppercase ULID (recommended, lexicographic time-prefix) or lowercase UUID (any RFC 4122 version, hyphenated or unhyphenated). Writers SHOULD emit `session_uid` even for single-segment trails, so a later segment can be reconciled against the first without rewriting the head. The schema enforces `session_uid` as REQUIRED when `segment.seq >= 2` (multi-segment continuation MUST be linkable). + +- `segment.seq` — 1-based integer identifying which segment of the session this file is. Single-segment trails MAY omit `segment` entirely, which is equivalent to `{seq: 1}`. + +- `segment.prev_content_hash` — the **session-level** `content_hash` (§7.3) of the previous segment's finalized bytes. Required when `seq >= 2`. Forms a verifiable chain (HLS / Postgres-WAL pattern). If the previous segment was lost and the chain cannot be verified, writers MAY emit `null` and readers MUST emit a `segment_chain_break` warning. + +#### Segment reconciliation + +Segment reconciliation is implementation behavior. A conforming writer emits the +fields above; a conforming reader can validate each segment independently. Tools +that merge segments SHOULD preserve event order by `segment.seq`, verify +`segment.prev_content_hash` where present, deduplicate exact event `id` matches, +and emit a new finalized trail with freshly computed hashes. + +Implementation merge policy is documented in `docs/implementation-semantics.md`. + +Whole-file graph rules (§18) apply **within** a segment, not across. Cross-segment references are out of scope for v0.1 (event `parent_id` chains do not span segments). + +#### Writer guidance + +- Writers SHOULD generate `session_uid` once per source session and reuse it for every segment. +- Writers SHOULD finalize each segment normally before starting a new segment. +- To produce `segment.prev_content_hash` for segment N, finalize segment N-1 per §7.3 and copy its session-level `content_hash` verbatim into segment N's header. +- Recovered writers MAY emit `segment.prev_content_hash: null` when the previous segment is lost. + +#### Composition with multi-session files + +`session_uid` and `segment.*` sit at the **session-header** grain, not the file grain. A multi-session trail file (§9.6) MAY contain N session headers, each independently multi-segmentable. The trail envelope (§8) is unaffected. + +Within one file, two groups with the same `session_uid` SHOULD NOT claim the same normalized `segment.seq` value; a missing `segment` is equivalent to `seq: 1`. Duplicate pairs emit `duplicate_segment_seq` warnings. Groups for the same `session_uid` SHOULD appear in non-descending `segment.seq` order in file order; a descending sequence emits `out_of_order_segment_seq`. + +--- + +### 9.6 Multi-session trail files + +A trail file MAY contain one OR more `(session header, events*)` groups concatenated. Boundaries are positional: a group extends from a `type:"session"` record up to (but excluding) the next `type:"session"` record, or to EOF. Single-session trails are the N=1 case and are unchanged. + +A multi-session trail is a session bundle: a forest of session groups. Each group MAY be linear or tree-native. Branches represented inside one source session use `parent_id` within that group; separate spawned or forked transcripts use separate groups linked by `header.fork_from`. + +#### 9.6.1 File grammar + +```text +trail-file := envelope? group+ +envelope := on line 1 +group := events* +events := zero or more event records (§10) +``` + +The trail envelope (§8) remains optional even when N ≥ 2. When present with N ≥ 2 groups, the file-level `content_hash` on the envelope covers all N groups' already-stamped session hashes, applying the §7.4 two-pass procedure unchanged (every session hash stamped first; envelope hash stamped over the finalized record set). When absent, file-level identity defaults from §8.5 apply (no file-level `content_hash` is meaningful; only per-session hashes). + +#### 9.6.2 Group boundaries and reader-tolerant recovery + +Readers detect group boundaries by `type:"session"` alone. A record with `type:"session"` always opens a new group, regardless of `schema_version` value: this lets reader-tolerant parsers (§6) recover from a malformed mid-file header and continue parsing subsequent groups instead of treating the rest of the file as orphan events. The strict validator still errors on individual records that fail schema validation; recovery affects parsing structure, not per-record validity. + +Entries that appear before the first `type:"session"` record (and after any envelope) are not part of any group and are always invalid: `events_before_first_session_header`. + +#### 9.6.3 Per-group validation + +Whole-file graph rules (§18) apply **within** a group, not across: + +- `parent_id` resolution is scoped to the enclosing group. A `parent_id` that references an `id` in another group is treated as `unknown_parent_id` (cross-group references go through `fork_from`, not `parent_id`). +- `tool_call` / `tool_result` pairing (§10.5) runs per group. An unmatched `tool_call` in group A is not satisfied by a `tool_result` in group B. +- `session_end.payload.final_message_id`, `source.raw.envelope_ref`, `payload.usage` checks, and the `stream` consistency rule each run per group. + +Event `id` uniqueness (§7.5) remains **file-scoped**: every `id` (across every group's header and events) MUST be unique within the file. + +#### 9.6.4 Per-group `content_hash` + +Each group's session-level `content_hash` is computed over the canonical bytes of that group's slice only (header + its events, envelope and sibling groups excluded). This is the same procedure as §7.3 / §7.4 applied to the slice. As a consequence, extracting one session from a multi-session file (drop the envelope, drop sibling groups, write only that group's canonical bytes) reproduces the same digest as the in-file value. + +When a reader extracts a single session from a multi-session file outside writer-strict validation and the recomputed `content_hash` does not match the value stored in the in-file header, it SHOULD emit a warning rather than an error. Strict validation of a finalized trail file still treats an in-place finalized `content_hash` mismatch as an error (§18.4). + +#### 9.6.5 Cross-group references + +The only sanctioned cross-group reference primitive is the session header's `fork_from`: + +- `fork_from.session_id` MAY reference a sibling session within the same file or an external session. +- When `fork_from.session_id` matches a sibling's `id` in the same file and `fork_from.content_hash` is also present, the hash MUST match that sibling's session-level `content_hash`. Mismatch is a `cross_group_fork_from_hash_mismatch` warning. +- External references (`session_id` not matched in-file) are not validated here; if the referenced session's bytes are available, callers MAY verify the hash through their own resolver. + +`parent_id` is event-graph topology only and MUST NOT span groups. + +#### 9.6.6 Order, divergence, and per-session metadata + +- Sessions in a file SHOULD appear in chronological order by header `ts`. Out-of-order placement emits `out_of_order_session_headers` (warning, not error). +- Per-session `cwd` and `vcs` MAY diverge across sessions in the same file. Divergent `vcs.revision` across groups emits `vcs_revision_divergence` (warning, not error) — useful for spotting accidental cross-checkout bundling. +- `schema_version` is carried on every session header. Sessions in the same file are independently versioned (reader-tolerant patch acceptance per §6 applies per-header). +- Empty groups (a header with zero events) are legal — they represent "session started, nothing happened." + +#### 9.6.7 Redaction of multi-session files + +Redacting a multi-session trail produces a multi-session redacted trail with the same group count in the same order, redacted in place. The redactor resets `content_hash` to `` on every session header (and on the envelope when present) before share/transport tooling re-stamps via the two-pass §7.4 procedure. + +When redaction changes bytes, lineage hashes that point to artifacts in the same redacted file MUST be rewritten to the target's redacted content hash, using the §7.4.1 hash tier. Header-level `fork_from.content_hash` is rewritten when `fork_from.session_id` names an in-file sibling. `segment.prev_content_hash` is rewritten when the previous `segment.seq` for the same `session_uid` is in the file. When the lineage target is not in the redacted file, redactors MUST drop `fork_from.content_hash` while keeping id references, and MUST set `segment.prev_content_hash` to `null` for an unverifiable previous segment. `redacted_from.content_hash` remains raw-artifact provenance: header-level `redacted_from.content_hash` links the redacted session to its raw counterpart; envelope-level `redacted_from.content_hash` links the redacted file to its raw counterpart. + +#### 9.6.8 No hard cap + +This spec does not impose a maximum on the number of session groups per file. Consumers MAY apply their own limits. + +--- + diff --git a/spec/v0.1.0/10-events.md b/spec/v0.1.0/10-events.md new file mode 100644 index 0000000..fbba318 --- /dev/null +++ b/spec/v0.1.0/10-events.md @@ -0,0 +1,916 @@ +## 10. Events + +### 10.1 Base shape + +Every event entry has this base shape: + +```jsonc +{ + "type": "", + "id": "", + "parent_id": "", // optional; tree topology only + "ts": "", + "payload": { /* type-specific */ }, + "semantic": { // optional; see §10.4 + "group_id": "", + "call_id": "", + "tool_kind": "" + }, + "source": { // optional + "agent": "", + "original_type": "", + "schema_version": "", + "raw": { /* opaque source object; see §10.6 and §15 */ }, + "synthesized": false + }, + "meta": { // optional; vendor extensions (§8.3 / §12) + "x-example/field": "..." + } +} +``` + +| Field | Required | Type | Notes | +|---|---|---|---| +| `type` | yes | string | event type; see §10.2-10.3 | +| `id` | yes | string | globally unique; ULID or UUID per §19 | +| `parent_id` | no | string | references another `id` for tree topology; absent = linear file order | +| `ts` | yes | string | ISO-8601 timestamp | +| `payload` | yes | object | type-specific data | +| `semantic` | no | object | linking metadata for fallback pairing | +| `source` | no | object | adapter-provided source metadata | +| `meta` | no | object | vendor extensions (§8.3 / §12) | + +### 10.2 Mandatory event types + +Every adapter MUST be able to emit these when the source data contains the corresponding semantics. Readers MUST support them. + +#### `user_message` + +A user-role message. By default this is text typed by the human user; `payload.origin` marks runtime-injected or mixed user-role content. + +```jsonc +{ + "type": "user_message", + "id": "...", + "ts": "...", + "payload": { + "text": "How do I parse a CSV in Python?", + "attachments": [ + { "kind": "image", "media_type": "image/png", "uri": "" } + ] + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `text` | yes | string | the user's input | +| `origin` | no | enum or extension | `user`, `injected`, `mixed`, or `x-/`. Absent means `user`. | +| `attachments` | no | array | images or files by reference | + +`origin:"user"` means the text was typed by the human. `origin:"injected"` means runtime-injected content (system reminders, attached-file blobs, hook output) carried as a user-role message. `origin:"mixed"` means both human-authored and injected content appear in one body. Structured part-level decomposition is deferred. + +Attachment entries require `kind` plus at least one of `uri` or `name`. `uri` values in v0.1.0 are references, not inline binary payloads. Writers MAY use `https:`, local `file:` references for private/local trails, or content-addressed references such as `sha256:`. Plain `http:` is deliberately excluded to avoid unauthenticated network fetches in shared trails. Inline `data:` payloads are deferred. + +#### `agent_message` + +A text response from the agent. + +```jsonc +{ + "type": "agent_message", + "id": "...", + "ts": "...", + "payload": { + "text": "You can use pandas:", + "model": "claude-sonnet-4-5", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 1234, + "output_tokens": 567, + "cache_read_tokens": 100, + "cache_creation_tokens": 50, + "reasoning_tokens": 200, + "context_input_tokens": 1384, + "context_window_tokens": 200000 + } + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `text` | yes | string | the agent's output | +| `model` | no | string | model that produced this message | +| `stop_reason` | no | string | source-specific stop reason | +| `usage` | no | object | token usage for the source envelope; see below | +| `attachments` | no | array | agent-side images or files by reference (e.g. a generated chart or vision output); same object shape as `user_message.payload.attachments` | + +`stop_reason` is source-specific and remains an opaque string. Writers SHOULD use this RECOMMENDED vocabulary when it matches the source semantics: `end_turn`, `max_tokens`, `tool_use`, `refusal`, `error`, `aborted`. Source-specific values remain legal; readers MUST treat unknown values as opaque. + +`attachments[]` entries share one object shape across `user_message`, `agent_message`, and `tool_result` (`kind` ∈ `image`/`file`/`other`, optional `media_type`, and at least one of `uri` or `name`). The same v0.1.0 `uri` reference policy applies: `https:`, local `file:`, or content-addressed `sha256:`; inline `data:` payloads are deferred. + +##### `agent_message.payload.usage` + +Captures token accounting emitted by the source agent for a model-response envelope. Optional. When the source provides no token data, writers MUST omit `usage` — fabricating zeros is not allowed. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `input_tokens` | conditional | integer ≥0 | delta for this envelope | +| `output_tokens` | conditional | integer ≥0 | delta for this envelope | +| `input_tokens_cumulative` | conditional | integer ≥0 | running total through this envelope | +| `output_tokens_cumulative` | conditional | integer ≥0 | running total through this envelope | +| `total_tokens` | conditional | integer ≥0 | source-reported inclusive total for this envelope | +| `total_tokens_cumulative` | conditional | integer ≥0 | source-reported inclusive running total through this envelope | +| `cache_read_tokens` | no | integer ≥0 | input tokens served from prompt cache; billed separately from `input_tokens` | +| `cache_creation_tokens` | no | integer ≥0 | input tokens written to prompt cache; billed separately from `input_tokens` | +| `reasoning_tokens` | no | integer ≥0 | output reasoning portion (Anthropic thinking, OpenAI reasoning) | +| `context_input_tokens` | no | integer ≥0 | prompt/context tokens submitted to the model for this request; cache-inclusive when the source exposes enough detail | +| `context_window_tokens` | no | integer ≥1 | model context-window size for this request, only when the source exposes it | + +When `usage` is present, writers MUST emit either input/output coverage or total-token coverage. Input/output coverage means at least one of (`input_tokens`, `input_tokens_cumulative`) AND at least one of (`output_tokens`, `output_tokens_cumulative`). Total-token coverage means at least one of (`total_tokens`, `total_tokens_cumulative`). These shapes are supported because sources differ. Readers SHOULD prefer delta fields and fall back to subtracting consecutive cumulative values. + +Total token semantics: `total_tokens` and `total_tokens_cumulative` are source-reported inclusive totals for exact total-token analytics. Writers MUST NOT fabricate total-token fields by summing buckets. Readers that need exact total counts SHOULD prefer `total_tokens`, fall back to deriving a delta from consecutive `total_tokens_cumulative` values, and only then fall back to summing known bucket fields. + +Cache token semantics: `input_tokens` counts non-cached input only; `cache_read_tokens` and `cache_creation_tokens` are independent billing categories. Total billed input = `input_tokens + cache_read_tokens + cache_creation_tokens`. They are additive, not a subset of `input_tokens`. + +Context token semantics are for context-pressure analytics, not billing. Writers MAY emit `context_input_tokens` when the source exposes prompt/context tokens for the request, including cache-read and cache-creation tokens when those count against the context window. Writers MAY emit `context_window_tokens` when the source reports the model's positive context-window size for the request. Writers MUST NOT estimate either field from raw text or tokenizer assumptions, and MUST NOT fabricate a `context_window_tokens` value from model name alone. Consumers derive context pressure as `context_input_tokens / context_window_tokens` when both fields are present; otherwise the ratio is unavailable. + +Model identification for downstream cost analysis uses `payload.model` first, falls back to `header.agent.model_default`, and is otherwise unknown. The `usage` object does not carry its own model field. + +When a single source envelope fans out to multiple entries (text blocks, tool calls, thinking blocks sharing one API response), `usage` accounts for the whole envelope. Writers MUST attach it to the first derived entry whose payload supports `usage`, skip non-usage-capable derived entries, and MUST NOT repeat it on later derived entries. In v0.1.0, `usage` is valid on `agent_message`, `agent_thinking`, and `tool_call` payloads; if an envelope emits none of those entries, canonical `usage` is omitted. + +Monetary cost is intentionally not a canonical trail field or event. Analyzers compute cost from token usage, model identification, and their own pricing tables, and carry pricing provenance such as currency, pricing source, and effective date in analyzer output. If a source exposes a billing estimate, writers MAY preserve it as opaque source data under `x-/` keys on the entry's `meta` field (§8.3). Latency and wall-clock telemetry are deferred to a future minor version; sources rarely expose them consistently. + +#### `task_plan_update` + +The agent emitted a checklist or plan snapshot. This is the canonical representation for structured planning state. Writers MUST NOT represent these snapshots as `tool_call.payload.tool:"task_plan"`. + +```jsonc +{ + "type": "task_plan_update", + "id": "...", + "ts": "...", + "payload": { + "explanation": "optional note", + "items": [ + { + "id": "item-1", + "content": "Write failing test", + "status": "in_progress", + "active_form": "Writing failing test" + } + ], + "deltas": [ + { + "kind": "status_changed", + "item_id": "item-1", + "from_status": "pending", + "to_status": "in_progress" + } + ] + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `explanation` | no | string | source-provided explanation for this plan update, when present | +| `items` | yes | array | full current snapshot of plan items | +| `deltas` | no | array | best-effort differences from the previous `task_plan_update` in the same source session | + +Each `items[]` entry has: + +| Item field | Required | Type | Notes | +|---|---|---|---| +| `id` | yes | string | upstream item id if present; otherwise a deterministic adapter-synthesized id | +| `content` | yes | string | human-readable task text | +| `status` | yes | string | one of `pending`, `in_progress`, `completed`, `cancelled`, `blocked` | +| `active_form` | no | string | source-provided active/progressive wording | + +When the upstream source does not provide item ids, or provides empty or whitespace-only strings, adapters SHOULD synthesize deterministic ids. Empty and whitespace-only item ids are treated as missing. The synthesized id is derived per source session from normalized content plus that content's duplicate occurrence position in the snapshot. With synthesized ids, status deltas are reliable when normalized content remains stable; content changes are best-effort because the source did not provide stable identity. + +`deltas[]` entries are optional. When present, each has `kind` and `item_id` plus fields determined by `kind`: + +| Delta kind | Required fields | +|---|---| +| `added` | `to_content`, `to_status` | +| `removed` | `from_content`, `from_status` | +| `status_changed` | `from_status`, `to_status` | +| `content_changed` | `from_content`, `to_content` | + +`added` MAY include `to_active_form`; `removed` MAY include `from_active_form`. Sources that only report plan-completed notifications with no item status snapshot SHOULD preserve them as `system_event` records instead of inventing checklist state. + +#### `tool_call` + +The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-tool-taxonomy). + +```jsonc +{ + "type": "tool_call", + "id": "...", + "ts": "...", + "payload": { + "tool": "file_read", + "args": { "path": "package.json" }, + "truncated": false, + "args_size": 23, + "overflow_ref": null + }, + "semantic": { + "call_id": "toolu_01abc" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `tool` | yes | string | canonical tool kind ([§11](#11-canonical-tool-taxonomy)) | +| `args` | yes | object | tool-specific args | +| `truncated` | no | boolean | true when `args` is a bounded excerpt rather than complete tool arguments | +| `args_size` | conditional | integer | original serialized argument byte size; REQUIRED when `truncated: true` | +| `overflow_ref` | no | string or null | optional content-addressed reference to full argument bytes when `args` is truncated; writer-strict values use `sha256:<64 lowercase hex>` | +| `usage` | no | object | token usage when this is the first entry derived from a source envelope; see [`payload.usage`](#agent_messagepayloadusage) | + +#### `tool_result` + +The result of a `tool_call`. References the call via `for_id`. Writers omit `for_id` when the source does not provide a reliable match. Readers MAY tolerate legacy/null values; when `for_id` is null or missing, see [§10.5](#10-5-tool-call-terminal-pairing). + +```jsonc +{ + "type": "tool_result", + "id": "...", + "ts": "...", + "payload": { + "for_id": "", + "ok": true, + "output": "", + "truncated": false, + "output_size": 12345, + "overflow_ref": null, + "error": null + }, + "semantic": { + "call_id": "toolu_01abc", + "tool_kind": "file_read" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `for_id` | no | string | id of the matching `tool_call`; omit when unknown | +| `ok` | yes | boolean | did the call succeed | +| `output` | no | string | textual output | +| `truncated` | no | boolean | true if `output` was truncated | +| `output_size` | no | integer ≥0 | UTF-8 byte length of the original output before truncation; REQUIRED when `truncated` is true | +| `overflow_ref` | no | string | reference to full output | +| `error` | no | string | error message if `ok` is false | +| `attachments` | no | array | non-MCP image / multi-part tool output by reference (e.g. a screenshot or plot tool returning an image that `output` flattens); same object shape as `user_message.payload.attachments` | +| `meta` | no | object | structured per-toolkind outputs; see below | + +`attachments[]` on `tool_result` carries image or binary results from tools whose output `output` (a display string) cannot represent — typically `tool: "other"` kinds such as a screenshot or plotting tool. MCP tools instead preserve their native block structure in `meta.mcp_call.content_blocks` (below); `attachments[]` is the generic escape hatch for everything else. + +#### `tool_call_aborted` + +The agent attempted or began a tool invocation, but the invocation was cancelled, blocked, timed out, denied, or otherwise stopped without a normal `tool_result`. Use this instead of inventing a failed `tool_result` when the source evidence says no result was produced. + +```jsonc +{ + "type": "tool_call_aborted", + "id": "...", + "ts": "...", + "payload": { + "scope": "tool_call", + "reason": "hook_blocked", + "for_id": "", + "blocked_by": "PreToolUse:Bash" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `scope` | yes | enum or extension | `tool_call` when a specific call is known; `turn` when the source only proves a turn-level abort. Extensions MUST use `x-/`. | +| `reason` | yes | enum or extension | One of `user_interrupt`, `hook_blocked`, `timeout`, `permission_denied`, `runtime_error`, or `x-/`. | +| `for_id` | when `scope:"tool_call"` | string | id of the matching `tool_call`; omitted for `scope:"turn"` and other non-call-specific scopes. | +| `blocked_by` | no | string | hook, policy, permission system, or runtime component that stopped the call. | + +Bare unknown `scope` and `reason` values are writer-strict errors. Readers are tolerant of unknown `x-*` extension values. + +##### `tool_result.payload.meta` — structured outputs + +`output` is a display string. When the source tool returned structured data, writers MAY also +populate `meta`, an object keyed by the originating `tool_call.tool` (the canonical tool kind, [§11](#11-canonical-tool-taxonomy)). +Consumers that understand a kind read `meta.`; everyone else falls back to `output`. `meta` +is optional and additive — existing writers that emit only `output` stay valid. + +Registered keys are writer-strict (unknown fields inside a registered shape are rejected). Vendors +extend a registered tool kind by adding sibling keys to its object that match the `x-/` +pattern (e.g. `meta.mcp_call.x-acme/cache_hit`). Unregistered and future tool kinds are accepted as +opaque objects, so new kinds can be standardized in a later minor version without a schema migration. + +The v0.1 registry covers three tool kinds: + +`meta.mcp_call` — preserves MCP content-block structure that `output` flattens. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `content_blocks` | no | array | MCP content blocks; each block has `type` (`text`/`image`/`resource`) plus `text`/`data`/`mime_type`/`uri` as applicable | +| `is_error` | no | boolean | MCP-protocol error flag. Distinct from envelope `payload.ok`: `is_error` is the tool's own success signal, `ok` is the trail-level call outcome | + +`meta.file_read` — read range and truncation metadata. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `range` | no | array | `[start_line, end_line]` requested | +| `total_lines` | no | integer ≥0 | total lines in the file | +| `encoding` | no | string | detected/used encoding | +| `truncated_at_line` | no | integer ≥0 \| null | line where output was cut, or null if untruncated | + +`meta.shell_command` — separated streams and exit status. + +| Sub-field | Required | Type | Notes | +|---|---|---|---| +| `stdout` | no | string | standard output stream | +| `stderr` | no | string | standard error stream | +| `exit_code` | no | integer \| null | process exit code; null when terminated by signal | +| `signal` | no | string \| null | terminating signal (e.g. `SIGKILL`), or null | +| `duration_ms` | no | integer ≥0 | wall-clock duration | + +`meta.shell_command.exit_code` is the canonical home for shell exit status; there is no generic +top-level `exit_code` on `tool_result`, because the concept does not apply to kinds like `mcp_call` +or `web_fetch`. + +Privacy: `meta` carries the same raw content as `output` (shell stdout, MCP block text), so the +redaction pipeline scrubs `meta` string leaves alongside `output` (§16). + +#### `user_query` + +The agent asks the user one or more structured questions and yields control until the user answers or dismisses the prompt. This is not a `tool_call`: no external tool executes. + +```jsonc +{ + "type": "user_query", + "id": "...", + "ts": "...", + "payload": { + "questions": [ + { + "id": "ship", + "header": "Ship", + "question": "Ship it?", + "multi_select": false, + "is_secret": false, + "allow_other": true, + "options": [ + { "label": "yes", "description": "Ship now" }, + { "label": "no" } + ] + } + ] + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `questions` | yes | array | One or more structured questions. | + +| Question field | Required | Type | Notes | +|---|---|---|---| +| `id` | yes | string | Stable within this `user_query`; responses key answers by this value. | +| `question` | yes | string | Full prompt shown to the user. | +| `header` | no | string | Short label/chip. | +| `multi_select` | no | boolean | True when the user MAY select multiple options. Omitted means false. | +| `is_secret` | no | boolean | True when answers SHOULD be hidden and stripped by redaction. Omitted means false. | +| `allow_other` | no | boolean | True when free-form input beyond listed options is allowed. Omitted means false. | +| `options` | no | array | Option objects with REQUIRED `label`, optional stable `id`, and optional `description`. | + +#### `user_query_response` + +The user's response to a `user_query`. `payload.for_id` links to the query entry id. A dismissed prompt emits a response with an empty `answers` object. + +```jsonc +{ + "type": "user_query_response", + "id": "...", + "ts": "...", + "payload": { + "for_id": "", + "answers": { + "ship": { + "selected": ["yes"], + "other": "with changelog" + } + } + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `for_id` | yes | string | Entry id of the `user_query`. | +| `answers` | yes | object | Keys are `questions[].id`. May be empty for dismissed/unanswered prompts. | + +| Answer field | Required | Type | Notes | +|---|---|---|---| +| `selected` | yes | string[] | Selected option ids when that question's options carry ids, otherwise selected option labels. Use one value for single-select answers. | +| `other` | no | string | Free-form answer when `allow_other` was used. | + +Privacy: share-time redaction MUST strip answers for questions whose `is_secret` is true, regardless of pattern matching. + +#### `session_summary` + +A summary entry. Used for whole-session summaries. Branch and compaction summaries use `branch_summary` and `context_compact`. + +```jsonc +{ + "type": "session_summary", + "id": "...", + "ts": "...", + "payload": { + "scope": "session", + "text": "", + "model": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `scope` | yes | enum | `session` | +| `text` | yes | string | the summary | +| `model` | no | string | model that produced the summary | + +Multiple `session_summary` entries are allowed. The last one in file order is authoritative; position is unconstrained. + +### 10.3 Optional event types + +Part of the canonical vocabulary. Adapters need not emit them. Readers MUST tolerate them either way. + +#### `session_metadata_update` + +Post-creation update to logical session metadata. The session header carries the base value when it is known at write time; consumers that need effective session metadata start with the header value and then replay these events in file order, with the last update to a field winning. The header remains as-written, and the event is part of normal session content that contributes to the session-level `content_hash`. + +```jsonc +{ + "type": "session_metadata_update", + "id": "...", + "ts": "...", + "payload": { + "field": "name", + "value": "Implement metadata updates", + "previous_value": "Old title", + "reason": "ai_generated" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `field` | yes | enum or extension | One of `name`, `description`, `tags`, `agent.model_default`, `vcs.branch`, `vcs.worktree`, or `x-/`. | +| `value` | yes | field-specific | Replacement value. Must match the field type: string for `name`/`description`/`agent.model_default`/`vcs.branch`, string array for `tags`, and the §9.2 worktree shape for `vcs.worktree`. Extension fields MAY carry any JSON value. | +| `previous_value` | no | field-specific | Prior value when the adapter knows it. Same type as `value`. | +| `reason` | yes | enum or extension | `ai_generated`, `user_set`, `runtime_inferred`, `external`, or `x-/`. | + +Writers MUST NOT use this event for immutable identity or cryptographic fields such as `id`, `session_uid`, `content_hash`, `redacted_from`, `vcs.revision`, or `vcs.head_commit`. Working-directory changes remain `system_event.kind:"cwd_change"`. + +#### `system_event` + +A meaningful source timeline record that is not a user message, agent message, tool call, tool result, summary, or known lifecycle event. Use this for source status/progress/bookkeeping records that SHOULD remain visible in a timeline. Do not use it as a dumping ground for high-volume internal state or records that map cleanly to a more specific canonical event. + +```jsonc +{ + "type": "system_event", + "id": "...", + "ts": "...", + "payload": { + "kind": "hook_fired", + "text": "Hook progress: PreToolUse", + "data": { "hook": "PreToolUse" } + } +} +``` + +`kind` is REQUIRED and writer-strict. It MUST be either one of the reserved cross-agent values below, or a vendor-namespaced extension of the form `x-/`. Bare unknown strings are rejected by writer-strict validation. Readers are tolerant of unknown `x-*` kinds and pass them through. `data` is curated structured metadata for rendering and search, not a replacement for `source.raw`. + +`context_compact`, `user_interrupt`, `model_change`, `mode_change`, `thinking_level_change`, and `session_end` are first-class record types ([§10.3](#10-3-optional-event-types)). Do not duplicate them under `system_event.kind`. + +##### Reserved lifecycle vocabulary + +| `kind` | When to use | +| --- | --- | +| `session_start` | Explicit mid-stream session-start marker (header already covers, useful for tooling that splits on events). | +| `turn_start` | User prompt accepted, agent begins work. | +| `turn_end` | Agent finishes a turn. | +| `subagent_start` | A spawned subagent begins. | +| `subagent_end` | A spawned subagent returns. | +| `pre_tool_use` | Tool about to fire (hook intercept point). | +| `post_tool_use` | Tool finished. | +| `hook_fired` | Generic adapter-emitted hook trace. | +| `permission_request` | Agent asked the user for tool approval. | +| `permission_decision` | User allowed/denied a specific tool invocation. | +| `cwd_change` | Working directory shifted. | +| `env_snapshot` | Shell/env state capture. | + +##### Reserved source-signal vocabulary + +| `kind` | When to use | Suggested `data` shape | +| --- | --- | --- | +| `task_started` | Source emits a structured task/step begin marker. | `{ task_id, title? }` | +| `task_completed` | Pair to `task_started`. May be synthesized at EOF for unclosed tasks (set `source.synthesized: true`). | `{ task_id, summary?, status? }` | +| `plan_completed` | Source emits a plan or todo completion marker without a full plan snapshot. | `{ plan_id, preview? }` | +| `turn_aborted` | Model or system stopped a turn for non-user reasons (length limit, refusal, error) with no tool in flight. Distinct from `user_interrupt`. | `{ reason }` | +| `tool_decision` | Source recorded a user approve/reject decision on a tool call. | `{ decision, tool_call_id }` | +| `context_injected` | Runtime injected standalone context that SHOULD remain visible outside a `user_message`. | `{ source_kind, name?, size_bytes? }` | +| `hook_progress` | Catch-all for source-emitted progress/hook/queue records that do not map to a more specific reserved lifecycle kind. Adapters SHOULD prefer `session_start` / `turn_end` / `pre_tool_use` / `post_tool_use` / `subagent_end` / `hook_fired` when the source signal is unambiguous, and fall back to `hook_progress` only for unrecognised progress streams. | `{ hook_event?, hook_name?, ... }` | +| `queue_operation` | Source recorded an enqueue or dequeue operation. | Free-form. | +| `heartbeat` | Periodic liveness ping during streaming capture (§9.4). Optional. Non-normative; readers MAY treat as informational. | `{ interval_ms? }` | +| `vcs_commit` | Adapter detected a VCS commit created during the session. | `{ sha, tool_call_id, branch?, message?, repo? }` | + +Use `tool_call_aborted{scope:"turn"}` for stops in a tool-invocation context where no specific call is identifiable. Use `system_event.kind:"turn_aborted"` for model/system-level turn stops with no tool in flight. + +##### Reserved diagnostic vocabulary + +Cross-agent diagnostic signals. Adapters MAY emit these to surface non-fatal errors, warnings, deprecations, routing decisions, and hook failures in the timeline. Out of scope: per-tool errors (those stay on `tool_result.error` + `tool_result.ok=false`). + +| `kind` | When to use | Suggested `data` shape | +| --- | --- | --- | +| `agent_error` | Agent-side error not tied to a specific tool call. | `{ severity?, code?, category?, blocking?, recovered?, source?, details? }` | +| `agent_warning` | Non-fatal agent-side warning. | `{ severity?, code?, category?, blocking?, recovered?, source?, details? }` | +| `api_error` | Upstream LLM/API failure surfaced to the user. | `{ severity?, code?, category?, source?, details? }` | +| `stream_error` | Streaming response interrupted or failed. | `{ severity?, code?, recovered?, details? }` | +| `deprecation_notice` | Source announced a feature or capability deprecation. | `{ feature?, replacement?, details? }` | +| `guardian_alert` | Safety rail, guardian system, or content moderation triggered. | `{ severity?, policy?, action?, details? }` | +| `model_rerouted` | Model fallback or capability re-routing decision. | `{ from?, to?, reason?, details? }` | +| `hook_failed` | Runtime hook execution failed, blocking or non-blocking. | `{ severity?, blocking?, hook_name?, code?, details? }` | + +**Severity vocabulary (informative).** When adapters include `data.severity`, recommended values are `info`, `warning`, `error`, `critical`. Not schema-enforced; readers SHOULD treat unknown severities as opaque. + +**Source vocabulary (informative).** When `data.source` is present, common values include `anthropic`, `openai`, `hook`, `guardian`, `runtime`. Free-form at the schema layer. + +##### Recommended `payload.data` shapes (permission kinds) + +`data` stays freeform at the schema layer. Adapters SHOULD use the shapes below so cross-agent consumers can render permission flow without per-adapter switches. Promote to schema-enforced once 2+ adapters converge. + +| `kind` | Recommended `data` | +| --- | --- | +| `permission_request` | `{ tool_call_id?: string, capability?: string, prompt?: string }` | +| `permission_decision` | `{ decision: "allow" \| "deny", tool_call_id?: string, capability?: string }` | + +##### Extension policy and promotion + +- Reserved values above are the only bare strings allowed by writer-strict validation. +- Anything else MUST use `x-/` form, e.g. `x-claudecode/notification`. +- Readers are tolerant of unknown `x-*` kinds — they pass through with no diagnostic. +- Bare unknown strings (no `x-` prefix, not in the reserved set) are rejected by writer-strict validation. +- Adapters quarantining an unparseable source record MUST emit `system_event` with `kind:"x-/unknown_record"` and preserve the record in `source.raw`; `parse_fidelity.quarantined_count` counts this pattern (§9.2). +- If an `x-*` kind proves cross-agent, promote it to the reserved enum in a minor format version bump. Document emitted kinds per adapter in `docs/parser-source-matrix.md`. + +#### `capability_change` + +A change in the set of capabilities available to the agent at a point in the session. Use this for tool, skill, plugin, MCP server, and MCP tool registry snapshots/deltas. This records availability changes, not tool invocations; calls still use `tool_call` / `tool_result`. + +```jsonc +{ + "type": "capability_change", + "id": "...", + "ts": "...", + "payload": { + "scope": "tool", + "reason": "registered", + "added": [{ "name": "Search", "metadata": { "namespace": "example" } }] + } +} +``` + +| Payload field | Required | Type | Notes | +| --- | --- | --- | --- | +| `scope` | yes | enum or extension | `tool` \| `skill` \| `mcp_server` \| `mcp_tool` \| `plugin` \| `x-/` | +| `reason` | yes | enum or extension | `initial` \| `registered` \| `deregistered` \| `connected` \| `disconnected` \| `loaded` \| `unloaded` \| `error` \| `instructions_updated` \| `x-/` | +| `added` | no | array | Non-empty array of `{ name, metadata? }`. | +| `removed` | no | array | Non-empty array of `{ name }`. | +| `changed` | no | array | Non-empty array of `{ name, field, from?, to? }`. | +| `snapshot` | no | array | Non-empty array of `{ name, metadata? }`; replaces accumulated state for this `scope` at this point. | + +Writer-strict validation requires at least one of `added`, `removed`, `changed`, or `snapshot`. + +Out of scope: full tool input/output schemas; they are static registry data and can be large or sensitive. Writers SHOULD keep only compact identifying metadata in `metadata`. + +#### `command_invoke` + +A named capability invoked with optional arguments: a user-typed slash command, a built-in CLI affordance, a skill activation, a user-defined prompt template, or a plugin command. These surfaces share the "named capability invoked" semantic but vary along two orthogonal axes — `kind` records *what* was invoked, `via` records *how* it reached the agent. Without this event they leak as `user_message.text="/foo"`, `tool_call.tool=other` with `args.name="Skill"`, or get dropped. + +```jsonc +{ + "type": "command_invoke", + "id": "...", + "ts": "...", + "payload": { + "name": "/code-review", + "kind": "custom_prompt", + "via": "user_typed", + "args": { "target": "HEAD" }, + "expansion_text": "Review the diff against main.", + "result_action": "expand" + } +} +``` + +| Payload field | Required | Type | Notes | +| --- | --- | --- | --- | +| `name` | yes | string | User-visible identifier. Leading slash for slash/builtin/custom_prompt (`/clear`); bare name for skills (`webapp-testing`). | +| `kind` | yes | enum or extension | `slash` \| `builtin` \| `skill` \| `custom_prompt` \| `plugin` \| `x-/`. What kind of capability was invoked. | +| `via` | yes | enum or extension | `user_typed` \| `auto_trigger` \| `agent_invoked` \| `x-/`. How the invocation reached the agent. | +| `args` | no | object | Free-form invocation arguments. | +| `expansion_text` | no | string | Post-expansion prompt text the agent saw (for prompt-template commands). | +| `result_action` | no | string \| null | What the runtime did with it. Reserved value, `x-/` extension, or null. | + +`kind` discriminates the capability: skill activation → `skill`, built-in command → `builtin`, user-defined prompt template → `custom_prompt`, generic slash command → `slash`, extension/plugin command → `plugin`. + +`via=auto_trigger` covers description-matched skill activation with no user action. Adapters MAY synthesize it when they observe a skill load without a corresponding `Skill` tool call; set `source.synthesized: true` in that case. + +`result_action` helps analyzers correlate to subsequent `context_compact` or session resets without inferring from content. Reserved values: + +| `result_action` | When to use | +| --- | --- | +| `compact` | Invocation triggered a context compaction (`/compact`). | +| `clear` | Invocation reset the session (`/clear`). | +| `expand` | Prompt-template command expanded into agent input. | +| `load_skill` | A skill was loaded into context. | +| `noop` | Runtime accepted the command with no observable state change. | + +Beyond these, `result_action` accepts a vendor-namespaced extension of the form `x-/`, or `null`. Bare unknown strings are rejected by writer-strict validation; readers are tolerant of unknown `x-*` values. + +Out of scope: skill *contents* (static config, not session history); MCP server tools (covered by `tool_call.tool=mcp_call`); permission gates (covered by `system_event.kind=permission_request/decision`). + +#### `agent_thinking` + +Chain-of-thought or reasoning block. + +```jsonc +{ + "type": "agent_thinking", + "id": "...", + "ts": "...", + "payload": { "text": "...", "model": "...", "level": "medium" } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `text` | yes | string | reasoning content exposed by the source | +| `model` | no | string | model that produced this thinking block | +| `level` | no | string | non-empty source-defined string; readers MUST treat unknown level tokens as opaque | +| `usage` | no | object | token usage when this is the first entry derived from a source envelope; see [`payload.usage`](#agent_messagepayloadusage) | + +#### `user_interrupt` + +User interrupted an in-progress agent response. + +```jsonc +{ + "type": "user_interrupt", + "id": "...", + "ts": "...", + "payload": { "reason": "" } +} +``` + +#### `context_compact` + +Session was compacted to free context window. + +```jsonc +{ + "type": "context_compact", + "id": "...", + "ts": "...", + "payload": { + "summary": "", + "trigger": "auto", + "tokens_before": 12000, + "tokens_after": 4000, + "replaced_message_ids": ["", ""] + } +} +``` + +`trigger`: `manual` | `auto` | `x-/`. + +`replaced_message_ids`: optional Agent Trail entry IDs folded or replaced by this +compaction summary, in source order. These IDs are provenance-only; readers MUST +validate their ID shape but MUST NOT require them to resolve to entries present in +the same trail file. + +#### `branch_point` + +Marks where a branch was made. + +```jsonc +{ + "type": "branch_point", + "id": "...", + "ts": "...", + "payload": { + "from_id": "", + "reason": "" + } +} +``` + +#### `branch_summary` + +A summary of an abandoned branch, attached to the active branch. + +```jsonc +{ + "type": "branch_summary", + "id": "...", + "ts": "...", + "payload": { + "abandoned_branch_id": "", + "summary": "", + "model": "..." + } +} +``` + +#### `model_change` + +Active model changed mid-session. + +```jsonc +{ + "type": "model_change", + "id": "...", + "ts": "...", + "payload": { + "from_model": "", + "to_model": "", + "trigger": "runtime_inferred", + "turn_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `from_model` | no | string | previous model id; omit when the source did not track the prior model | +| `to_model` | yes | string | new active model id | +| `from_provider` | no | string | previous model provider when known | +| `to_provider` | no | string | new model provider when known | +| `reason` | no | string | source-provided or adapter-inferred reason | +| `trigger` | no | enum or extension | `initial`, `user_set`, `agent_set`, `runtime_inferred`, `auto_reroute`, `external`, or `x-/` | +| `turn_id` | no | string | source turn id associated with the observation | + +#### `mode_change` + +Active runtime mode changed or was first observed. Use this for common mode axes such as collaboration mode (`plan`, `auto`), permission mode, execution/sandbox mode, or UI mode. Per-tool approval still uses `system_event.kind:"permission_request"` / `"permission_decision"`. + +```jsonc +{ + "type": "mode_change", + "id": "...", + "ts": "...", + "payload": { + "scope": "permission", + "from_mode": "default", + "to_mode": "acceptEdits", + "trigger": "runtime_inferred", + "turn_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `scope` | yes | enum or extension | `collaboration`, `permission`, `execution`, `ui`, or `x-/` | +| `from_mode` | no | string | previous mode token | +| `to_mode` | yes | string | new or initially observed mode token | +| `reason` | no | string | source-provided or adapter-inferred reason | +| `trigger` | no | enum or extension | `initial`, `user_set`, `agent_set`, `runtime_inferred`, `auto_reroute`, `external`, or `x-/` | +| `turn_id` | no | string | source turn id associated with the observation | +| `data` | no | object | curated adapter metadata for this mode axis | + +#### `thinking_level_change` + +Active reasoning/thinking level changed or was first observed. This records the selected thinking budget/effort level, not the model's private chain of thought. Reasoning text remains `agent_thinking`. + +```jsonc +{ + "type": "thinking_level_change", + "id": "...", + "ts": "...", + "payload": { + "from_level": "medium", + "to_level": "high", + "trigger": "runtime_inferred", + "turn_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `from_level` | no | string | previous thinking-level token | +| `to_level` | yes | string | new or initially observed thinking-level token | +| `reason` | no | string | source-provided or adapter-inferred reason | +| `trigger` | no | enum or extension | `initial`, `user_set`, `agent_set`, `runtime_inferred`, `auto_reroute`, `external`, or `x-/` | +| `turn_id` | no | string | source turn id associated with the observation | +| `data` | no | object | curated adapter metadata for this level axis | + +Recommended thinking-level vocabulary is `none`, `low`, `medium`, `high`, and `xhigh`. This vocabulary is not schema-enforced; source-defined tokens remain valid and opaque to readers. + +#### `session_terminated` + +Marks an incomplete session ending. Adapters MAY emit this synthetically at EOF when the source file ends with unmatched `tool_call` events (process killed mid-execution, file truncated, etc.). + +```jsonc +{ + "type": "session_terminated", + "id": "...", + "ts": "...", + "payload": { + "reason": "eof_with_open_tool_calls", + "open_call_ids": ["", ""] + }, + "source": { "synthesized": true } +} +``` + +`reason`: `eof_with_open_tool_calls` | `process_terminated` | `truncated` | `user_abort` | `x-/`. + +Synthesized instances MUST set `source.synthesized: true`. + +#### `session_end` + +Clean terminal marker. Distinct from `session_terminated` (abnormal). Optional; many writers won't emit it. When present at EOF, signals a normal conclusion of the session and suppresses the "unmatched tool calls at EOF" warning of §18.4. + +```jsonc +{ + "type": "session_end", + "id": "...", + "ts": "...", + "payload": { + "reason": "complete", + "final_message_id": "" + } +} +``` + +| Payload field | Required | Type | Notes | +|---|---|---|---| +| `reason` | yes | enum or extension | `complete` \| `user_quit` \| `agent_idle` \| `x-/` | +| `final_message_id` | no | string | optional reference to the last meaningful event | + +### 10.4 Semantic linking + +The `semantic` block on an event provides linking metadata when explicit `id` / `parent_id` / `for_id` references are unreliable (source has missing or null IDs). + +| Field | Type | Purpose | +|---|---|---| +| `semantic.group_id` | string | Groups events that belong to one logical unit. | +| `semantic.call_id` | string | Source format's native ID for a tool call. Used as fallback pairing key. | +| `semantic.tool_kind` | string | Canonical tool kind. Useful on `tool_result` events that don't carry it directly. | + +Writers SHOULD populate `semantic.call_id` on tool_call/tool_result pairs when the source has reliable native call IDs that are not Agent Trail entry IDs. + +### 10.5 Tool call terminal pairing + +`tool_result.payload.for_id` and `tool_call_aborted.payload.for_id` SHOULD reference the matching `tool_call`. Writers SHOULD populate `tool_result.payload.for_id` or `semantic.call_id` when the source records concurrent (overlapping) tool calls, and SHOULD populate one of them for every result. A `tool_call_aborted` only closes a call when `payload.scope == "tool_call"` and `payload.for_id` resolves to a `tool_call`; turn-level aborts do not close any specific call. + +When `tool_result.payload.for_id` is null, missing, or refers to a non-existent event, readers use these fallback rules in order: + +1. **Semantic match.** If both events have `semantic.call_id` and they're equal, pair them. +2. **Sequential match.** Pair the `tool_result` with the most recent prior unmatched `tool_call` in the same branch scope. Sequential fallback considers only calls in the same nearest `parent_id` ancestry as the result, so an inline subagent subtree cannot capture a parent timeline result and a parent timeline result cannot capture a child subtree call. Linear sessions without `parent_id` are unchanged. +3. **Heuristic match.** Readers MAY use further heuristics (timestamp proximity, payload shape) but MUST flag the pairing as uncertain in rendered output. + +Writers SHOULD avoid relying on fallbacks. Populate `for_id` when reliable; use `semantic.call_id` when the source's native ID doesn't map cleanly to event `id`. Do not use semantic or sequential fallback pairing for `tool_call_aborted`; if a source cannot identify the call, emit `scope:"turn"` without `for_id`. + +Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning (§18.4): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. + +### 10.6 Unknown event types + +Readers MUST tolerate unknown types: + +- Preserve them when round-tripping. +- Render with a generic fallback. +- Do not abort parsing. + +Writers MUST NOT invent new top-level event types in v0.1 writer-strict output. Use the `other` tool kind ([§11](#11-canonical-tool-taxonomy)) or `source.raw` ([§10.1](#10-1-base-shape), [§15.1](#15-1-source-raw-elision-and-redaction)) for adapter-specific data, or `meta` ([§8.3](#8-3-the-meta-extension-convention) / [§12](#12-vendor-extensions)) for vendor extensions. Reader-tolerant parsing MAY preserve unknown future event types at runtime; this tolerance is not part of the writer schema. + +### 10.7 Source envelope referencing + +When a single source envelope produces multiple entries — for example, an assistant message envelope whose `content` array is split across one `agent_message`, one `agent_thinking`, and one `tool_call` entry — writers SHOULD NOT inline the full envelope on every derived entry. Use *inline-first / ref-subsequent* dedup: + +- The **first** entry derived from a given source envelope sets `source.raw.envelope` (and `source.raw.block`, `source.raw.block_index` if applicable). +- **Subsequent** entries derived from the same envelope set `source.raw.envelope_ref` to the first entry's `id`. They omit `source.raw.envelope` and keep `block` / `block_index`. + +`source.raw.envelope_ref` is an optional string. Writers MUST ensure it references the `id` of an entry that appears **earlier** in the same file — the same envelope, inlined once. Forward references and dangling references are reader errors (`source_raw_envelope_ref_unresolved`, §18.4). The first-inline-then-ref shape is streaming-write friendly: readers resolve refs in a single pass without backtracking. + +This mechanism is additive over v0.1.0. Readers that do not understand `envelope_ref` will see it as an unknown raw-source field and ignore it; the entry's other fields (`type`, `payload`, `semantic`) remain fully self-describing. + +--- + diff --git a/spec/v0.1.0/11-canonical-tool-taxonomy.md b/spec/v0.1.0/11-canonical-tool-taxonomy.md new file mode 100644 index 0000000..7fb04cd --- /dev/null +++ b/spec/v0.1.0/11-canonical-tool-taxonomy.md @@ -0,0 +1,90 @@ +## 11. Canonical tool taxonomy + +The `tool_call.payload.tool` field uses these values. Each defines the expected shape of `args`. + +| Name | Args | +|---|---| +| `file_read` | `{ path, range? }` | +| `file_write` | `{ path, content }` | +| `file_edit` | `{ path, diff }` (unified diff) or `{ path, old, new, replace_all? }` | +| `file_patch` | `{ files: [{ path, diff }], atomic? }` | +| `file_list` | `{ path, recursive?, glob? }` | +| `file_search` | `{ query, path?, glob? }` | +| `shell_command` | `{ command, cwd?, timeout? }` | +| `shell_output` | `{ command_id? }` | +| `shell_input` | `{ input, session_id?, command_id? }` | +| `mcp_call` | `{ server, tool, args?, headers? }` | +| `web_fetch` | `{ url, method?, headers? }` | +| `web_search` | `{ query }` | +| `tool_search` | `{ query, limit? }` | +| `notebook_edit` | `{ path, cell_id?, diff?, content? }` | +| `subagent_invoke` | `{ task, agent_type?, session_id? }` | +| `other` | `{ name, args? }` | + +Checklist and plan snapshots use `task_plan_update` ([§10.2](#10-2-mandatory-event-types)) rather than `tool_call`. + +### 11.1 `file_edit` + +`file_edit` has two exclusive argument forms: + +- `{ path, diff }` where `diff` is a unified diff. +- `{ path, old, new, replace_all? }` for sources that record only string replacement with no line context. + +Writers MUST prefer the diff form when a real unified diff is derivable from source data. Writers MUST NOT fabricate hunk headers to fake the diff form. + +The `diff` form uses a unified diff: + +```diff +--- a/src/main.ts ++++ b/src/main.ts +@@ -1,4 +1,4 @@ + unchanged +-removed ++added + unchanged +``` + +Writers with native before/after content MUST convert to a diff before emitting. Writers that synthesize the edit from indirect source data set `source.synthesized: true`. + +### 11.2 `file_patch` + +Use `file_patch` when one source tool call represents a patch touching one or more files, and +single-file `file_edit` would either lose the call's multi-file grouping or force consumers to +reconstruct it from synthesized sibling calls. Each `files[]` entry carries the affected `path` and a +per-file unified diff. Writers that split source-native patch text into per-file hunks SHOULD add +`---` and `+++` file headers when the source omits them, so generic consumers can render each file +without parsing the source-native patch envelope. For renames, `path` is the destination path and the +diff headers carry both source and destination paths. Set `atomic: true` when the source represented +the patch as one operation. + +### 11.3 `file_list` + +Use `file_list` when the agent inspected a directory or file tree. The result's display listing +lives in the matching `tool_result.payload.output`. Do not map directory listing to +`shell_command` unless the source only records a literal shell command. + +### 11.4 `shell_command` + +Full command in `command`; output in the corresponding `tool_result.payload.output`. Redactors SHOULD scrub env vars, `Authorization` headers in piped curls, etc. + +### 11.5 `mcp_call` + +- `server` — MCP server identifier (e.g., `github`, `linear`). +- `tool` — tool name within that server. +- `headers` — SHOULD be redacted before writing: `Authorization`, `X-API-Key`, `Cookie`, `Bearer ...`. + +### 11.6 `subagent_invoke` + +Indicates a child conversation was spawned. Two cases: + +- **Inline subtree:** when the source stores child events inline in the same session, child events use this event's `id` as their root `parent_id`. +- **External child session:** when the source stores the child as a separate transcript, set `args.session_id` to the child session header `id`. The child MAY appear as a sibling group in the same session bundle or as an external trail. Do not use a content hash or source runtime id in `args.session_id`. + +When the external child appears in the same file, the child header SHOULD set `fork_from.session_id` to the parent session header `id` and `fork_from.entry_id` to the parent `subagent_invoke` event `id`. `fork_from.content_hash` is optional best-effort and refers to the parent session-level content hash. + +### 11.7 The `other` escape hatch + +For tools not covered above, use `tool: "other"` with `args: { name, args? }`. Readers render generically. These don't participate in cross-agent comparison. + +--- + diff --git a/spec/v0.1.0/12-vendor-extensions.md b/spec/v0.1.0/12-vendor-extensions.md new file mode 100644 index 0000000..e6d3fef --- /dev/null +++ b/spec/v0.1.0/12-vendor-extensions.md @@ -0,0 +1,39 @@ +## 12. Vendor extensions + +Implementations and vendors can add custom data via the `meta` field on the trail envelope, session header, or any event entry. Use the `x-/` extension grammar (§12.1) for keys to avoid collisions: + +```jsonc +"meta": { + "x-cursor/workspace_id": "ws-abc123", + "x-example/custom_flag": true, + "x-anthropic/usage": { "input_tokens": 1234, "output_tokens": 567 } +} +``` + +Readers MAY preserve, ignore, or render `meta` fields. They MUST NOT abort on unknown keys. + +`entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see §16. + +The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](#15-1-source-raw-elision-and-redaction)) instead. See [§8.3](#8-3-the-meta-extension-convention) for the full convention. + +### 12.1 Extension grammar + +One extension grammar is used across extension surfaces: `x-/`. + +- `vendor`: lowercase alphanumeric with optional hyphen-separated segments, e.g. `acme` or `acme-labs`. +- `name`: starts with lowercase alphanumeric and MAY contain lowercase alphanumeric, `_`, or `-`. + +| Surface | Applies to | Example | +| --- | --- | --- | +| Envelope `meta` keys | Trail-level vendor annotations | `x-acme/build_id` | +| Header `meta` keys | Session-level vendor annotations | `x-acme/team` | +| Entry `meta` keys | Event-level vendor annotations | `x-acme/run_id` | +| `system_event.kind` | Non-reserved source signals | `x-claudecode/notification` | +| Enum extensions | Descriptive state vocabulary: `scope`, `reason`, `trigger`, `result_action`, `command_invoke.kind`, `command_invoke.via`, `session_metadata_update.field`, `vcs.type`, `user_message.origin` | `x-acme/custom_scope` | +| `tool_result.payload.meta` vendor keys | Sibling keys under registered tool-kind output objects | `meta.mcp_call.x-acme/cache_hit` | +| Custom `agent.name` | Unregistered source agents | `x-example/myagent` | + +Structural discriminators, including event `type`, delta `kind`, attachment `kind`, and `taskPlanStatus`, stay closed. Descriptive state vocabulary is extensible through the grammar above. + +--- + diff --git a/spec/v0.1.0/13-tree-and-branching.md b/spec/v0.1.0/13-tree-and-branching.md new file mode 100644 index 0000000..922fbac --- /dev/null +++ b/spec/v0.1.0/13-tree-and-branching.md @@ -0,0 +1,18 @@ +## 13. Tree and branching + +### 13.1 When to emit `parent_id` + +`parent_id` represents tree topology, not ordinary linear sequencing. Linear sessions use file order. Tool call/result pairing uses `tool_result.payload.for_id` and `semantic.call_id`, not `parent_id`. + +Writers SHOULD emit `parent_id` only when source data contains branch, fork, or inline child-event topology that can be mapped to Agent Trail event ids. + +`parent_id` is intra-group topology only. It MUST NOT span session groups. When source data stores a spawned or forked transcript as a separate session, use a child session with `header.fork_from` instead of cross-group `parent_id`. + +Reader display policies for linear and tree-aware renderers are implementation semantics, not wire-format rules. + +### 13.2 Acyclicity + +The `parent_id` graph MUST be acyclic. The header isn't part of the graph; nothing references it via `parent_id`. + +--- + diff --git a/spec/v0.1.0/14-canonical-agent-registry.md b/spec/v0.1.0/14-canonical-agent-registry.md new file mode 100644 index 0000000..8703891 --- /dev/null +++ b/spec/v0.1.0/14-canonical-agent-registry.md @@ -0,0 +1,12 @@ +## 14. Canonical agent registry + +Lowercase, hyphenated: + +`claude-code`, `pi`, `openclaw`, `codex-cli`, `cursor`, `opencode`, `aider`, `amp`, `cline`, `crush`, `kimi-code`, `qwen-code`, `factory`, `vibe`, `copilot-cli`, `copilot-chat`, `chatgpt`, `clawdbot`. + +The registry reserves canonical names. It does not imply adapter support. + +New agents MAY be added by amending this spec. Until registered, adapters MAY use a custom `x-/` agent name (e.g., `x-example/myagent`) to reduce collisions while keeping the vendor and agent name parseable. + +--- + diff --git a/spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md b/spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md new file mode 100644 index 0000000..5db9e52 --- /dev/null +++ b/spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md @@ -0,0 +1,50 @@ +## 15. Truncation, overflow, and raw source size + +Writers MAY truncate large `tool_result` outputs to keep trails tractable. The wire format records truncation with three fields on `tool_result.payload`: + +| Field | Type | Notes | +|---|---|---| +| `truncated` | boolean | `true` when `output` was shortened from its original length | +| `output_size` | integer ≥0 | UTF-8 byte length of the original output before truncation; REQUIRED when `truncated` is true | +| `overflow_ref` | string or null | optional content-addressed reference to the full output (`sha256:<64 lowercase hex>`); colocated blob storage is implementation-defined | + +Specific inline-size thresholds, the truncation algorithm (e.g., head-only, head-and-tail, line-aligned), and the choice of overflow storage are writer policy and belong in writer documentation, not the format. + +Tool call arguments use the same top-level marker on `tool_call.payload`: + +| Field | Type | Notes | +|---|---|---| +| `truncated` | boolean | `true` when `args` was shortened from its original object | +| `args_size` | integer ≥0 | UTF-8 byte length of the JCS-serialized original `args` object before truncation; REQUIRED when `truncated` is true | +| `overflow_ref` | string or null | optional content-addressed reference to the full args object (`sha256:<64 lowercase hex>`) | + +The marker applies to the `args` object as a whole. Individual arg strings keep their declared per-toolkind shape, just shortened. Specific thresholds and algorithms remain writer policy. + +`source.raw` is optional. Writers SHOULD omit or summarize very large or sensitive raw source objects when they would make trail files unwieldy or unsafe. Share tools MUST inspect `source.raw` during redaction before producing a shared artifact. + +### 15.1 `source.raw` elision and redaction + +Writers MAY elide all or part of a `source.raw` value when it is unwieldy or unsafe to inline. Elision uses a single wire-format marker, in place of either the entire `source.raw` or any nested string leaf: + +```jsonc +{ "elided": true, "size_bytes": 41208 } +``` + +| Field | Type | Notes | +|---|---|---| +| `elided` | boolean `true` | sentinel; readers detect elided regions by this field | +| `size_bytes` | integer | UTF-8 byte length of the elided original (informational; readers MAY use it for display or budgeting) | + +Two placements are valid: + +- **Whole-value elide:** `source.raw` itself is the marker. The original envelope is fully omitted; only its byte size is recorded. +- **Leaf elide:** any nested string is replaced with the marker. The envelope's structural skeleton (ids, parent refs, role, timestamps, block kinds) stays intact; only the bulky string body is removed. + +Specific size thresholds, the algorithm a writer uses to choose which leaves to elide, and whether elision is gated by a hard cap are implementation policy — they belong in writer documentation, not the format. Validators MAY warn on entries whose `source.raw` exceeds an implementation-chosen size budget, but the wire format itself imposes no fixed limit. + +When elision happens at the first emission of a source envelope (§10.7), subsequent `envelope_ref` entries still resolve — the ref points at the elided entry's `id`, not at its inlined envelope. + +Adapters MUST redact known secret patterns in `source.raw` before writing — emission-time redaction is a writer responsibility, not a share-time concern. Validators emit `source_raw_unredacted_secret` (warning) when a string leaf in `source.raw` matches a known credential pattern (Authorization headers, Bearer tokens, JWT, vendor API keys, PEM private key blocks, ENV-style assignments). Share-time redaction (§16) layers additional normalization on top — paths, PII — and produces a separate artifact. + +--- + diff --git a/spec/v0.1.0/16-redaction.md b/spec/v0.1.0/16-redaction.md new file mode 100644 index 0000000..1e0008b --- /dev/null +++ b/spec/v0.1.0/16-redaction.md @@ -0,0 +1,31 @@ +## 16. Redaction + +The raw file format does not mandate redaction. Sharing tools produce a separate redacted artifact before upload. Raw and redacted artifacts have different `content_hash` values. + +A complete redaction protocol is out of scope for the file format; it belongs to share tooling. Redacted artifacts MAY record `redacted_from.content_hash` to link back to the raw artifact without exposing local paths or raw local IDs. + +Share-time redactors MUST apply the privacy rules below before producing shared artifacts. They MAY normalize a field instead of deleting it only when the normalized value no longer exposes raw local paths, raw local session identifiers, credentials, or private repository identity. + +| Field or value | Share-time action | +|---|---| +| `cwd` | Normalize or strip. | +| `vcs.remote_url` | Strip or normalize per §9.2 unless the user explicitly opts in. | +| `system_event.payload.data.repo` for `vcs_commit` | Treat like `vcs.remote_url`; strip or normalize unless the user explicitly opts in. | +| `vcs.worktree.path`, `vcs.worktree.original_cwd` | Normalize or strip. | +| `source.path` | Normalize or strip. | +| `attachments[].uri` | Remove or rewrite local `file:` URIs. Rewrite to `sha256:` only when the referenced blob is content-addressed and transported with the share; otherwise remove `uri` and keep visible stub metadata such as `kind`, `name`, and `media_type`. | +| `tool_result.payload.overflow_ref` | Keep `sha256:` references when useful; strip every other scheme or implementation-local reference. When stripped, keep `truncated` and `output_size` unchanged. | +| `tool_call.payload.args.headers` for `mcp_call` and `web_fetch` | Strip or replace credential-bearing values with placeholders. | +| `name`, `description`, `tags`, message text, output strings, and `meta` string leaves | Scrub secret patterns and PII according to the redactor's configured policy. | + +Redactors MUST resolve each `user_query_response.payload.for_id` to a `user_query` in the same session group before preserving answers for questions marked `is_secret`. If the query is unresolvable, the redactor MUST strip the response's `answers` entirely (fail closed). +If a resolved response contains answer keys that do not appear on the referenced `user_query`, the redactor MUST strip those unknown answers and any raw source payload for that response. + +Share-time redactors SHOULD populate `entry.meta.redaction_count` on each changed event entry. The count is a non-negative integer equal to the number of redactor mutations applied to that entry. Existing numeric `redaction_count` values are additive when a redacted trail is redacted again; unchanged entries keep their existing value. + +When redaction changes bytes, lineage hashes are updated as described in §9.6.7. This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. + +Specific secret patterns, exact PII detectors, path-normalization strings, image preview behavior, token-usage policy, blob upload mechanics, and share workflow remain implementation semantics. + +--- + diff --git a/spec/v0.1.0/17-security-considerations.md b/spec/v0.1.0/17-security-considerations.md new file mode 100644 index 0000000..f6af7f0 --- /dev/null +++ b/spec/v0.1.0/17-security-considerations.md @@ -0,0 +1,22 @@ +## 17. Security Considerations + +Trail files are untrusted input. All string content, including messages, tool output, file paths, URIs, agent names, titles, and source metadata, can be attacker-controlled. Renderers SHOULD escape HTML, SHOULD NOT execute or auto-open rendered Markdown links, and CLI viewers SHOULD sanitize terminal control sequences before writing text to a terminal. + +Agent Trail intentionally has no format-level size caps. Consumers SHOULD enforce deployment-specific limits for maximum line length, file size, event count, graph depth, and decoded attachment or overflow bytes. Consumers SHOULD stream rather than buffer whole files where possible; JSONL is the interchange shape partly to make bounded streaming readers practical. + +Hostile files can contain invalid graph structure even though `parent_id` cycles and cross-group links are invalid (§13, §18.4). Validators MUST NOT loop indefinitely while checking graph topology, and tree renderers SHOULD bound recursion or use iterative traversal when displaying deep parent chains. + +`content_hash` provides byte integrity for the canonical artifact (§7.3, §7.4), not authorship, provenance, or trust. A trail claiming `agent.name: "claude-code"` proves only that the file contains that string. Agent Trail v0.1.0 has no signature or attestation mechanism; signing MAY be added by a future extension. + +In v0.1.0, `content_hash` values are bare 64-character SHA-256 hex strings (§7.3). Other content-addressed references, such as attachment URIs (§10.2) and `overflow_ref` values (§15), use `sha256:` references. Consumers that verify prefixed content-addressed references MUST reject unknown algorithm prefixes rather than treating the reference as verified. + +Attachment URIs and overflow references can identify local resources on the producer's machine. Viewers SHOULD NOT dereference `file:` URIs, `overflow_ref` values, or other external references automatically. Viewers MUST NOT dereference local `file:` URIs or non-`sha256:` overflow references from redacted or shared trails; §16 requires share-time redactors to remove or rewrite those values before transport. + +Redaction reduces content exposure but does not make a shared trail private. Timestamps, event counts, tool names, model names, branch shape, unredacted file names, and remaining metadata can still reveal workflow information. Sharing a redacted trail SHOULD be treated as publishing it to anyone who can access the transport. + +Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads (§16). + +The implementation-maintained detector catalog and rule pack schema live in [`docs/redaction-patterns.md`](./docs/redaction-patterns.md); this catalog is non-normative and does not change the trail file format. + +--- + diff --git a/spec/v0.1.0/18-validation.md b/spec/v0.1.0/18-validation.md new file mode 100644 index 0000000..c7eee92 --- /dev/null +++ b/spec/v0.1.0/18-validation.md @@ -0,0 +1,172 @@ +## 18. Validation + +Validation is layered because JSON Schema validates one line at a time, while several Agent Trail rules require whole-file context. + +### 18.1 Writer schema + +`schema.json` is the writer-strict schema for v0.1.0. It validates a single JSON object line and requires header and envelope records to use `schema_version: "0.1.0"`. It rejects unknown top-level event types. Writers use this schema for emitted envelope, header, and event lines. + +`schema.json` is the canonical format contract through v1.0. Generated types, validators, and packages MUST derive from it rather than maintaining a separate manual contract. + +### 18.2 Reader tolerance + +Readers MAY accept compatible future v0.x files best-effort: skip unknown event types, ignore unknown payload fields, preserve unknown records when round-tripping, and warn instead of aborting where possible. Reader tolerance is runtime behavior, not the writer-strict schema contract. + +### 18.3 Conformance classes and diagnostics + +Agent Trail defines named conformance classes so tools can describe the depth of +reader or writer support they implement. + +| Class | Name | Requirements | +|---|---|---| +| **R0** | Renderer | Reader-tolerant JSONL parsing per §6 and §18.2; renders the mandatory event types in §10.2, including user messages, agent messages, tool calls, tool results, and summaries; preserves or displays fallback output for unknown records it can parse; does not crash on valid or quarantinable input. | +| **R1** | Structural reader | R0 plus the non-hash whole-file layout, graph, pairing, streaming-state, and diagnostic checks in §18.4. R1 catches duplicate ids, unknown parents, parent cycles, unresolved `source.raw.envelope_ref`, tool-call pairing diagnostics, and other file-level checks that do not require recomputing content hashes or comparing segment-chain hashes. | +| **R2** | Verifying reader | R1 plus content-hash verification per §7.3 and §7.4, and segment-chain verification per §9.5. Readers in this class warn rather than abort on reader-tolerant hash mismatches, per §18.4.1. | +| **W** | Writer | Emits writer-strict records that validate against `schema.json` and satisfy the strict whole-file validation rules in §18.4. Writer conformance is about emitted trail files, not reader tolerance. | + +`@agent-trail/core` implements Class R2 reader behavior through its parsing, +validation, canonicalization, hashing, and multi-segment reconciliation APIs. + +The validation conformance suite manifest tags each fixture with the applicable +classes. The current validation suite does not tag fixtures as R0 because it +asserts validation outcomes, not rendering behavior. + +#### Claiming conformance (non-normative) + +Projects MAY claim support using the class name they implement, for example +"Agent Trail R0 reader", "Agent Trail R2 reader", or "Agent Trail W writer". +Such claims mean the implementation passes the conformance checks tagged for +that class and follows the referenced normative sections. Agent Trail does not +define a certification registry or badge authority. + +#### Validation diagnostics + +Validators SHOULD report normalized diagnostics with `line`, `path` (JSON Pointer), `severity`, `code`, and `message`. Implementations MAY include extra fields, but these five fields are the portable diagnostic surface. + +Portable diagnostic code registry: + +| Code | Severity | Defining section | +|---|---|---| +| `ambiguous_sequential_pairing` | warning | §10.5 / §18.4.2 | +| `child_session_fork_from_mismatch` | warning | §18.4.2 | +| `child_session_parent_link_mismatch` | warning | §18.4.2 | +| `content_hash_invalid` | error | §7.3 / §18.4.1 | +| `content_hash_mismatch` | error (strict), warning (reader-tolerant) | §7.3 / §18.4.1 | +| `cross_group_fork_from_hash_mismatch` | warning | §9.6.5 | +| `duplicate_id` | error | §18.4.1 | +| `duplicate_option_labels` | warning | §10.2 / §18.4.2 | +| `duplicate_segment_seq` | warning | §9.5 / §18.4.2 | +| `duplicate_tool_result` | warning | §10.5 / §18.4.2 | +| `duplicate_user_query_question_id` | error | §10.2 | +| `envelope_has_parent_id` | error | §8 / §18.4.1 | +| `envelope_not_at_line_1` | error | §8 / §18.4.1 | +| `envelope_sessions_manifest_drift` | warning | §8.4 / §18.4.2 | +| `events_before_first_session_header` | error | §9.6 / §18.4.1 | +| `header_has_parent_id` | error | §9 / §18.4.1 | +| `ill_formed_string` | error (strict), warning (reader-tolerant) | §5.2 / §18.4.1 | +| `missing_header` | error | §9 / §18.4.1 | +| `missing_header_after_envelope` | error | §8 / §18.4.1 | +| `multiple_envelopes` | error | §8 / §18.4.1 | +| `non_interoperable_number` | warning | §5.2 / §18.4.2 | +| `non_monotonic_event_ts` | warning | §18.4.2 | +| `out_of_order_segment_seq` | warning | §9.5 / §18.4.2 | +| `out_of_order_session_headers` | warning | §9.6.6 | +| `parent_cycle` | error | §13.2 / §18.4.1 | +| `parse_fidelity_drift` | error | §9.2 / §18.4.1 | +| `reader_tolerant_schema_version` | warning | §6 / §18.2 | +| `reader_tolerant_unknown_payload_field` | warning | §18.2 | +| `reader_tolerant_unknown_record` | warning | §18.2 | +| `segment_chain_break` | warning | §9.5 | +| `source_raw_envelope_ref_unresolved` | error | §10.7 / §18.4.1 | +| `source_raw_unredacted_secret` | warning | §15.1 / §18.4.2 | +| `stream_open_with_content_hash` | warning | §18.4.3 | +| `stream_open_with_terminal_event` | warning | §18.4.3 | +| `tool_args_unredacted_secret` | warning | §16 / §18.4.2 | +| `tool_result_semantic_conflict` | warning | §10.5 / §18.4.2 | +| `unknown_abandoned_branch_id` | warning | §10.3 / §18.4.2 | +| `unknown_branch_point_from_id` | warning | §10.3 / §18.4.2 | +| `unknown_final_message_id` | warning | §10.3 / §18.4.2 | +| `unknown_parent_id` | error | §10.1 / §18.4.1 | +| `unknown_user_query_answer_key` | error | §10.2 | +| `unknown_user_query_for_id` | warning | §10.2 / §18.4.2 | +| `unmatched_tool_call_at_eof` | warning | §10.5 / §18.4.2 | +| `vcs_remote_url_with_credentials` | warning or error | §9.2 / §18.4 | +| `vcs_revision_divergence` | warning | §9.6.6 | + +#### Conformance suite (non-normative) + +The repository publishes a versioned validation conformance suite with the schema package. The canonical corpus lives under `tests/fixtures/validation/` and is mirrored into the `@agent-trail/schema` package under `conformance/`. + +The suite manifest uses three assertion tiers: + +- Writer-strict validity verdicts and reader-tolerant cleanliness outcomes for every fixture. +- Portable diagnostic assertions (`severity`, `code`, `line`, `path`) only for spec-named diagnostic codes. +- Line-only assertions for schema-layer failures, because JSON Schema validator keyword vocabularies are implementation-specific. +- Class tags (`classes`) marking which conformance classes each fixture applies to. Validation fixtures use `W`, `R1`, and `R2`; R0 renderer conformance needs a separate rendering suite. + +### 18.4 File graph checks + +A v0.1.0-compliant trail file MUST also pass whole-file checks. + +#### 18.4.1 Errors + +1. The first line is either a trail envelope (`type: "trail"`, §8) or a session header (`type: "session"`, `schema_version: "0.1.0"`). When the envelope is present, the session header MUST occupy line 2. +2. Subsequent lines match an event schema (`type`, `id`, `ts`, `payload`). +3. All `id` values are unique within the file. +4. Every non-null `parent_id` references an `id` in the same file. +5. The `parent_id` graph is acyclic. +6. Writer timestamps are valid UTC `Z` ISO-8601 values with millisecond precision. Readers MAY tolerate broader ISO-8601 timestamps. +7. All string values are well-formed: no unpaired high or low surrogate code units. Violations are `ill_formed_string` diagnostics at the offending JSON Pointer. Strict validation reports an error; reader-tolerant validation reports a warning and does not repair the value. + +If `content_hash` is present: + +1. The value is 64 hex characters (SHA-256). Invalid hash shape emits `content_hash_invalid` at `/content_hash`. +2. Strict validators recompute and verify per §7.3. On mismatch, strict validation fails with `content_hash_mismatch` at `/content_hash`. Reader-tolerant parsers MAY warn but MUST NOT abort. + +Additional whole-file errors: + +- `parse_fidelity`, when present, MUST match the session group's entries (§9.2). Drift emits `parse_fidelity_drift` at the mismatched `parse_fidelity` field. +- A `user_query` question id MUST be unique within that query. Duplicate ids emit `duplicate_user_query_question_id` at the repeated question id. +- A `user_query_response.payload.answers` key not present in the resolved `user_query.payload.questions[].id` set emits `unknown_user_query_answer_key` at that answer key. +- `source.raw.envelope_ref`, when set, MUST reference the `id` of an earlier entry in the same file (§10.7). Dangling or forward references are errors with code `source_raw_envelope_ref_unresolved` at `/source/raw/envelope_ref`. +- Trail envelope position and uniqueness (§8): + - `envelope_not_at_line_1` (error): a `type:"trail"` record appears on a line other than line 1. + - `multiple_envelopes` (error): more than one envelope appears in the file. + - `missing_header_after_envelope` (error): an envelope at line 1 is not followed by a session header on line 2. + - `envelope_has_parent_id` (error): the trail envelope carries a `parent_id`. + +#### 18.4.2 Warnings + +- Each `tool_call.id` SHOULD be referenced by exactly one `tool_result.payload.for_id` (or paired via §10.5). +- Inline `subagent_invoke` events SHOULD have descendants in the same group, or external child invocations SHOULD set `args.session_id` to the child header `id` when known. +- When an in-file child session is present, the parent `subagent_invoke.args.session_id` and child `header.fork_from.{session_id,entry_id}` SHOULD agree. Mismatches are warnings, not errors, so partial bundles and external-only references remain readable. +- `branch_point.payload.from_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_branch_point_from_id` at `/payload/from_id`. +- `branch_summary.payload.abandoned_branch_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_abandoned_branch_id` at `/payload/abandoned_branch_id`. +- Writers SHOULD emit `session_terminated` if any `tool_call` remains unmatched at EOF. The warning code is `unmatched_tool_call_at_eof`. Suppression: + - A `session_end` event anywhere in the file suppresses this warning for every unmatched `tool_call` (clean conclusion, §10.3). + - A `session_terminated` event whose `payload.open_call_ids` lists a given `tool_call.id` suppresses the warning for that id only (explicit acknowledgement). A `session_terminated` event without `open_call_ids` does not suppress the warning. +- A `tool_result` paired by sequential fallback when two or more unmatched prior same-branch `tool_call` candidates existed emits `ambiguous_sequential_pairing` at `/payload`. +- A `user_query` question with duplicate option labels among options that do not carry stable option ids emits `duplicate_option_labels` at the repeated option's `/payload/questions//options//label`. +- `user_query_response.payload.for_id` SHOULD reference a `user_query` in the same session group. An unresolved reference emits `unknown_user_query_for_id` at `/payload/for_id`. +- `session_end.payload.final_message_id`, when present, SHOULD reference an `id` that appears in the same file (the session header or a prior event). A dangling reference is a warning with code `unknown_final_message_id` at `/payload/final_message_id`. +- An event's `ts` SHOULD NOT be earlier than its parent event's `ts` inside the same parent chain. Equal timestamps are allowed; sibling branches may interleave in wall-clock time. A strictly earlier child timestamp emits `non_monotonic_event_ts` (warning) at `/ts`. +- Validators MAY report implementation-defined size budgets for `source.raw`; specific numbers are writer policy (§15.1). +- `source.raw` SHOULD NOT contain unredacted credentials. A string leaf matching a known credential pattern emits `source_raw_unredacted_secret` (warning) at the matching JSON pointer. +- JSON integer numbers outside the IEEE-754 exact-integer range SHOULD be emitted as strings. Unsafe integer numbers emit `non_interoperable_number` (warning) at the offending JSON Pointer. +- Privacy-sensitive tool arguments SHOULD NOT contain unredacted credentials. A string leaf in `mcp_call` / `web_fetch` `tool_call.payload.args.headers` or `shell_command` `tool_call.payload.args.command` matching a known credential pattern emits `tool_args_unredacted_secret` (warning) at the matching JSON pointer. +- `envelope_sessions_manifest_drift` (warning): the envelope's `sessions` manifest length disagrees with the number of session groups, or a manifest entry disagrees with the matching session header's `id` or `agent.name`. +- Multi-segment consistency within one file (§9.5): + - `duplicate_segment_seq` (warning): two groups share the same `(session_uid, segment.seq)` pair, treating missing `segment` as `seq: 1`. + - `out_of_order_segment_seq` (warning): groups with the same `session_uid` appear with descending `segment.seq` in file order. + +#### 18.4.3 Streaming-state rules + +Streaming rules (§9.4) are evaluated against the *current* header `stream.state` at validation time — the validator reads the present value, not a history of transitions. Crash-recovery writers MUST finalize (`stream.state` to `"closed"` or remove `stream`) before appending terminal events; once the stream is no longer marked live, the rules below stop applying. + +10. If the current `header.stream.state == "open"`: + - **10a.** `content_hash` SHOULD be absent or `""`. A populated hex hash is a warning, since the canonical bytes are still in flux. + - **10b.** Terminal events (`session_end`, `session_terminated`) SHOULD NOT appear. A terminal event in a file whose current `header.stream.state == "open"` is a warning — the writer claims the stream is still open but has already emitted a terminal event. Finalize the header (set `stream.state` to `"closed"` or remove `stream`) before appending terminal events. +11. If the current `header.stream.state == "closed"` or `stream` is absent, finalized artifacts SHOULD populate `content_hash`. Readers MAY warn but MUST NOT abort when it is missing on otherwise complete files. Trail files produced by stream-unaware writers, or files appended across crashes and recoveries, MAY contain both `session_end` and `session_terminated` legitimately; rule 10b does not apply once the stream is no longer marked live. + +--- + diff --git a/spec/v0.1.0/19-formal-schema.md b/spec/v0.1.0/19-formal-schema.md new file mode 100644 index 0000000..f2b4a19 --- /dev/null +++ b/spec/v0.1.0/19-formal-schema.md @@ -0,0 +1,8 @@ +## 19. Formal schema + +The normative writer-strict JSON Schema lives in `schema.json` and is published at `https://agent-trail.dev/schema/v0.1.0.json`. + +This spec intentionally does not duplicate the full schema inline. Implementations SHOULD validate each JSONL line against `schema.json`, then run the whole-file checks in §18.4. Reader-tolerant parsing, including unknown future event preservation, is separate from writer-strict schema validation. + +--- + diff --git a/spec/v0.1.0/20-examples.md b/spec/v0.1.0/20-examples.md new file mode 100644 index 0000000..e5da8f6 --- /dev/null +++ b/spec/v0.1.0/20-examples.md @@ -0,0 +1,14 @@ +## 20. Examples + +More complete examples are maintained as validated conformance fixtures under `tests/fixtures/validation/valid/spec-example-*.trail.jsonl` and published with concrete package exports such as `@agent-trail/schema/conformance/manifest.json` and `@agent-trail/schema/conformance/fixtures/valid/spec-example-*.trail.jsonl`. This keeps examples executable without splitting normative meaning away from the schema and validation rules. + +Minimal at-a-glance trail: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + +--- + diff --git a/spec/v0.1.0/README.md b/spec/v0.1.0/README.md new file mode 100644 index 0000000..1b4731a --- /dev/null +++ b/spec/v0.1.0/README.md @@ -0,0 +1,37 @@ +# Agent Trail Specification v0.1.0 + + +**Version:** 0.1.0 +**Status:** Draft +**Date:** June 12, 2026 +**License:** Apache-2.0 +**Schema URL:** `https://agent-trail.dev/schema/v0.1.0.json` *(release snapshot; local source: `schema.json`)* + +--- + +## Contents + +- [1. Motivation](./01-motivation.md) +- [2. Goals and non-goals](./02-goals-and-non-goals.md) +- [3. At a glance](./03-at-a-glance.md) +- [4. Terminology](./04-terminology.md) +- [5. File format](./05-file-format.md) +- [6. Versioning](./06-versioning.md) +- [7. Identity, artifacts, and content addressing](./07-identity-artifacts-and-content-addressing.md) +- [8. The trail envelope](./08-the-trail-envelope.md) +- [9. The session header](./09-the-session-header.md) +- [10. Events](./10-events.md) +- [11. Canonical tool taxonomy](./11-canonical-tool-taxonomy.md) +- [12. Vendor extensions](./12-vendor-extensions.md) +- [13. Tree and branching](./13-tree-and-branching.md) +- [14. Canonical agent registry](./14-canonical-agent-registry.md) +- [15. Truncation, overflow, and raw source size](./15-truncation-overflow-and-raw-source-size.md) +- [16. Redaction](./16-redaction.md) +- [17. Security Considerations](./17-security-considerations.md) +- [18. Validation](./18-validation.md) +- [19. Formal schema](./19-formal-schema.md) +- [20. Examples](./20-examples.md) +- [Changelog](./changelog.md) +- [Appendix A — Minimal valid record](./appendix-a-minimal-valid-record.md) +- [Appendix B — Content hash worked example](./appendix-b-content-hash-worked-example.md) +- [License](./license.md) diff --git a/spec/v0.1.0/appendix-a-minimal-valid-record.md b/spec/v0.1.0/appendix-a-minimal-valid-record.md new file mode 100644 index 0000000..63690d2 --- /dev/null +++ b/spec/v0.1.0/appendix-a-minimal-valid-record.md @@ -0,0 +1,17 @@ +## Appendix A — Minimal valid record + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +``` + +A session with only a header is valid. Events are optional. + +### Appendix A.1 — Minimal valid record with trail envelope + +```jsonl +{"type":"trail","schema_version":"0.1.0","id":"00000000-0000-0000-0000-000000000001","ts":"2026-05-17T14:00:00.000Z","producer":"trail-cli/0.3.0"} +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +``` + +An envelope at line 1 followed by a session header at line 2 is valid. Events are optional. + diff --git a/spec/v0.1.0/appendix-b-content-hash-worked-example.md b/spec/v0.1.0/appendix-b-content-hash-worked-example.md new file mode 100644 index 0000000..5ca73b3 --- /dev/null +++ b/spec/v0.1.0/appendix-b-content-hash-worked-example.md @@ -0,0 +1,38 @@ +## Appendix B — Content hash worked example + +This example shows the §7.3 two-pass procedure for the +`hash-vectors/minimal-pending-roundtrip.trail.jsonl` conformance fixture. The +remaining canonicalization and two-tier identity cases are published in the +`hash-vectors/` fixture category. + +Input file before stamping: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000101","content_hash":"","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000101","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000102","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + +Canonical bytes hashed by SHA-256, shown as UTF-8 text with the required trailing +newline after the last line: + +```jsonl +{"agent":{"name":"codex-cli"},"content_hash":"","id":"01HSESS0000000000000000101","schema_version":"0.1.0","ts":"2026-05-17T14:00:00.000Z","type":"session"} +{"id":"01HEVTA0000000000000000101","payload":{"text":"hello"},"ts":"2026-05-17T14:00:05.000Z","type":"user_message"} +{"id":"01HEVTA0000000000000000102","payload":{"text":"hi"},"ts":"2026-05-17T14:00:07.000Z","type":"agent_message"} +``` + +Resulting session-level digest: + +```text +f215ed334d3928e1abde804f2c4a870431b18d4fa7d755ec94d94be2a6ddd06e +``` + +Stamped file: + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000101","content_hash":"f215ed334d3928e1abde804f2c4a870431b18d4fa7d755ec94d94be2a6ddd06e","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"user_message","id":"01HEVTA0000000000000000101","ts":"2026-05-17T14:00:05.000Z","payload":{"text":"hello"}} +{"type":"agent_message","id":"01HEVTA0000000000000000102","ts":"2026-05-17T14:00:07.000Z","payload":{"text":"hi"}} +``` + diff --git a/spec/v0.1.0/changelog.md b/spec/v0.1.0/changelog.md new file mode 100644 index 0000000..7576f0e --- /dev/null +++ b/spec/v0.1.0/changelog.md @@ -0,0 +1,23 @@ +## Changelog + +### v0.1.0 (June 12, 2026) + +Initial public draft. v0.1.0 defines: + +- JSONL file layout, session header, core event envelope, mandatory event types, optional events, the canonical tool taxonomy, vendor `meta` extensions (§8.3), tree semantics, layered validation, and artifact-level content addressing. +- Stable local source filenames (`spec.md`, `schema.json`) with immutable hosted release snapshots at `/spec/v0.1.0` and `/schema/v0.1.0.json`. +- The optional trail envelope record `type:"trail"` at line 1 (§8) with Tier 1 fields (`id`, `name`, `description`, `ts`, `producer`, `content_hash`) and Tier 2 fields (`tags`, `vcs`, `fork_from`, `redacted_from`, `sessions`, `meta`), and two-tier identity (§7.4): session-level `content_hash` excludes the envelope, file-level `content_hash` covers the whole file. +- Session headers MAY carry base `name`, `description`, and `tags`; `session_metadata_update` events replay on top of those base values. `vcs.type` allows reserved systems or `x-/` extensions, and envelope `fork_from.trail_id` uses the standard id shape. +- Multi-segment session primitives (`session_uid`, `segment.seq`, `segment.prev_content_hash`) and reconciliation invariants (§9.5). +- The optional header `stream` field, the `session_end` event, and the recommended `system_event` heartbeat convention (§9.4, §10.3). +- Tool-surface fidelity for truncated tool-call args, string-replacement `file_edit`, branch-scoped pairing warnings, stable user-query option ids, stricter attachment identity, and tool-result meta key hygiene. +- The `source.raw.envelope_ref` inline-first / ref-subsequent envelope dedup convention (§10.7), the `{ elided: true, size_bytes: N }` elide marker for `source.raw` (§15.1), and the writer-side redaction requirement for credential patterns in `source.raw`. +- Normative share-time redaction rules for local attachment URIs, unsafe `overflow_ref` values, unresolved `user_query_response` answers, and privacy-sensitive field handling (§16), plus the `tool_args_unredacted_secret` validator warning (§18.4). +- Envelope-level `payload.usage` on the first entry derived from a source envelope, including `agent_message`, `agent_thinking`, and `tool_call` (§10.2). +- During the v0.1.0 draft cycle, planning snapshots moved from the legacy `tool_call.payload.tool:"task_plan"` shape to the canonical `task_plan_update` event. Final v0.1.0 writer-strict output MUST use `task_plan_update`; legacy `task_plan` tool calls are invalid. +- During the v0.1.0 draft cycle, duplicate `system_event` kinds for `session_end` and `permission_mode_change` were removed, thinking levels became source-defined strings, `user_message.origin` was added, and related vocabulary clarifications landed. +- During the v0.1.0 draft cycle, vendor extensions converged on one `x-/` grammar across `meta`, enum extensions, `system_event.kind`, `tool_result.payload.meta`, and custom `agent.name`. +- During the v0.1.0 draft cycle, writer-strict identity and encoding were hardened: ULIDs are uppercase, UUIDs are lowercase, timestamps carry schema `format:"date-time"` annotation, and strings with unpaired surrogates are invalid (`ill_formed_string`). + +--- + diff --git a/spec/v0.1.0/license.md b/spec/v0.1.0/license.md new file mode 100644 index 0000000..8656b12 --- /dev/null +++ b/spec/v0.1.0/license.md @@ -0,0 +1,7 @@ +## License + +This specification is released under Apache-2.0. + +--- + +*End of Agent Trail Specification v0.1.0* From 3b5c21cc02eb2cb7bf2776f64b75cc36a6857d05 Mon Sep 17 00:00:00 2001 From: Somasundaram Ayyappan <1802828+somus@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:48:46 +0530 Subject: [PATCH 2/4] docs(spec): add explanatory diagrams and examples --- spec/draft/05-file-format.md | 15 +++++++++++++ ...entity-artifacts-and-content-addressing.md | 15 +++++++++++++ spec/draft/09-the-session-header.md | 15 +++++++++++++ spec/draft/10-events.md | 22 +++++++++++++++++++ spec/draft/16-redaction.md | 10 +++++++++ spec/draft/18-validation.md | 13 +++++++++++ spec/draft/20-examples.md | 11 ++++++++++ spec/v0.1.0/05-file-format.md | 15 +++++++++++++ ...entity-artifacts-and-content-addressing.md | 15 +++++++++++++ spec/v0.1.0/09-the-session-header.md | 15 +++++++++++++ spec/v0.1.0/10-events.md | 22 +++++++++++++++++++ spec/v0.1.0/16-redaction.md | 10 +++++++++ spec/v0.1.0/18-validation.md | 13 +++++++++++ spec/v0.1.0/20-examples.md | 11 ++++++++++ 14 files changed, 202 insertions(+) diff --git a/spec/draft/05-file-format.md b/spec/draft/05-file-format.md index 4d3c275..43adb5d 100644 --- a/spec/draft/05-file-format.md +++ b/spec/draft/05-file-format.md @@ -30,5 +30,20 @@ Every valid trail file has: When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in §9.6. +> Non-normative diagram. + +```mermaid +flowchart TD + A["Start of file"] --> B{"First record type"} + B -->|"trail"| C["Trail envelope (§8)"] + C --> D["Session header (§9)"] + B -->|"session"| D + D --> E["Events (§10)"] + E --> F{"Next record"} + F -->|"event"| E + F -->|"session"| D + F -->|"EOF"| G["End of file"] +``` + --- diff --git a/spec/draft/07-identity-artifacts-and-content-addressing.md b/spec/draft/07-identity-artifacts-and-content-addressing.md index 077c5c6..f3d8a3e 100644 --- a/spec/draft/07-identity-artifacts-and-content-addressing.md +++ b/spec/draft/07-identity-artifacts-and-content-addressing.md @@ -47,6 +47,21 @@ When a trail envelope is present, the file carries two independent content hashe Writers that emit both hashes MUST stamp every session-level hash first, then compute and stamp the file-level hash. Readers verify them independently. Different consumers care about different scopes: extraction tools recompute the session hash; share/transport tools verify the file hash. +> Non-normative diagram. + +```mermaid +flowchart TD + A["Session header + events"] --> B["Set session content_hash to "] + B --> C["Canonicalize session bytes (§7.3)"] + C --> D["Stamp session-level hash"] + D --> E{"Trail envelope present?"} + E -->|"no"| F["Finalized session artifact"] + E -->|"yes"| G["Envelope + stamped session groups"] + G --> H["Set envelope content_hash to "] + H --> I["Canonicalize whole file (§7.4)"] + I --> J["Stamp file-level hash"] +``` + #### 7.4.1 Hash tier for `fork_from` and `redacted_from` Lineage references mirror the tier of the linking context: diff --git a/spec/draft/09-the-session-header.md b/spec/draft/09-the-session-header.md index f22988f..661fff8 100644 --- a/spec/draft/09-the-session-header.md +++ b/spec/draft/09-the-session-header.md @@ -184,6 +184,21 @@ A trail file MAY contain one OR more `(session header, events*)` groups concaten A multi-session trail is a session bundle: a forest of session groups. Each group MAY be linear or tree-native. Branches represented inside one source session use `parent_id` within that group; separate spawned or forked transcripts use separate groups linked by `header.fork_from`. +> Non-normative diagram. + +```mermaid +flowchart TD + A["Trail file"] --> B["Session group A"] + A --> C["Session group B"] + B --> D["Header A (§9)"] + B --> E["Events A (§10)"] + C --> F["Header B (§9)"] + C --> G["Events B (§10)"] + F -->|"fork_from.session_id"| D + E -->|"parent_id stays inside group"| E + G -->|"parent_id stays inside group"| G +``` + #### 9.6.1 File grammar ```text diff --git a/spec/draft/10-events.md b/spec/draft/10-events.md index fbba318..353a43a 100644 --- a/spec/draft/10-events.md +++ b/spec/draft/10-events.md @@ -891,6 +891,28 @@ Writers SHOULD avoid relying on fallbacks. Populate `for_id` when reliable; use Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning (§18.4): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. +> Non-normative diagram. + +```mermaid +flowchart TD + A["tool_call"] --> B{"Terminal event"} + B -->|"tool_result.payload.for_id matches"| C["Matched explicitly"] + B -->|"tool_call_aborted scope=tool_call and for_id matches"| C + B -->|"matching semantic.call_id"| D["Matched by semantic fallback"] + B -->|"nearest unmatched prior call in same branch"| E["Matched by sequential fallback"] + B -->|"no deterministic match"| F["Unmatched at EOF warning (§18.4)"] +``` + +> Non-normative example. + +Derived from `fixtures/validation/valid/tool-call-matched-by-for-id.trail.jsonl`. + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"hi"}} +``` + ### 10.6 Unknown event types Readers MUST tolerate unknown types: diff --git a/spec/draft/16-redaction.md b/spec/draft/16-redaction.md index 1e0008b..dee593a 100644 --- a/spec/draft/16-redaction.md +++ b/spec/draft/16-redaction.md @@ -25,6 +25,16 @@ Share-time redactors SHOULD populate `entry.meta.redaction_count` on each change When redaction changes bytes, lineage hashes are updated as described in §9.6.7. This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. +> Non-normative diagram. + +```mermaid +flowchart LR + A["Raw trail"] -->|"share-time redaction"| B["Redacted trail"] + B -->|"transport"| C["Shared trail"] + A -.->|"redacted_from.content_hash"| B + B -->|"new content_hash"| D["Independent artifact identity (§7)"] +``` + Specific secret patterns, exact PII detectors, path-normalization strings, image preview behavior, token-usage policy, blob upload mechanics, and share workflow remain implementation semantics. --- diff --git a/spec/draft/18-validation.md b/spec/draft/18-validation.md index c7eee92..f1a2faa 100644 --- a/spec/draft/18-validation.md +++ b/spec/draft/18-validation.md @@ -104,6 +104,19 @@ The suite manifest uses three assertion tiers: - Line-only assertions for schema-layer failures, because JSON Schema validator keyword vocabularies are implementation-specific. - Class tags (`classes`) marking which conformance classes each fixture applies to. Validation fixtures use `W`, `R1`, and `R2`; R0 renderer conformance needs a separate rendering suite. +> Non-normative diagram. + +```mermaid +flowchart TD + A["JSONL records"] --> B["Writer schema (§18.1)"] + B --> C["Whole-file graph checks (§18.4)"] + C --> D["Content hash verification (§7)"] + D --> E["Conformance fixture assertions"] + B --> F["Schema diagnostics"] + C --> G["Structural diagnostics"] + D --> H["Hash diagnostics"] +``` + ### 18.4 File graph checks A v0.1.0-compliant trail file MUST also pass whole-file checks. diff --git a/spec/draft/20-examples.md b/spec/draft/20-examples.md index e5da8f6..49185d1 100644 --- a/spec/draft/20-examples.md +++ b/spec/draft/20-examples.md @@ -2,6 +2,17 @@ More complete examples are maintained as validated conformance fixtures under `tests/fixtures/validation/valid/spec-example-*.trail.jsonl` and published with concrete package exports such as `@agent-trail/schema/conformance/manifest.json` and `@agent-trail/schema/conformance/fixtures/valid/spec-example-*.trail.jsonl`. This keeps examples executable without splitting normative meaning away from the schema and validation rules. +> Non-normative example index. + +Use these fixture-backed examples for larger scenarios: + +- `fixtures/validation/valid/spec-example-incomplete-session.trail.jsonl` +- `fixtures/validation/valid/spec-example-mcp-call.trail.jsonl` +- `fixtures/validation/valid/spec-example-synthesized-event.trail.jsonl` +- `fixtures/validation/valid/spec-example-tool-call-semantic-pairing.trail.jsonl` +- `fixtures/validation/valid/spec-example-tool-result-fallback-pairing.trail.jsonl` +- `fixtures/validation/valid/spec-example-tree-abandoned-branch.trail.jsonl` + Minimal at-a-glance trail: ```jsonl diff --git a/spec/v0.1.0/05-file-format.md b/spec/v0.1.0/05-file-format.md index 4d3c275..43adb5d 100644 --- a/spec/v0.1.0/05-file-format.md +++ b/spec/v0.1.0/05-file-format.md @@ -30,5 +30,20 @@ Every valid trail file has: When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in §9.6. +> Non-normative diagram. + +```mermaid +flowchart TD + A["Start of file"] --> B{"First record type"} + B -->|"trail"| C["Trail envelope (§8)"] + C --> D["Session header (§9)"] + B -->|"session"| D + D --> E["Events (§10)"] + E --> F{"Next record"} + F -->|"event"| E + F -->|"session"| D + F -->|"EOF"| G["End of file"] +``` + --- diff --git a/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md b/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md index 077c5c6..f3d8a3e 100644 --- a/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md +++ b/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md @@ -47,6 +47,21 @@ When a trail envelope is present, the file carries two independent content hashe Writers that emit both hashes MUST stamp every session-level hash first, then compute and stamp the file-level hash. Readers verify them independently. Different consumers care about different scopes: extraction tools recompute the session hash; share/transport tools verify the file hash. +> Non-normative diagram. + +```mermaid +flowchart TD + A["Session header + events"] --> B["Set session content_hash to "] + B --> C["Canonicalize session bytes (§7.3)"] + C --> D["Stamp session-level hash"] + D --> E{"Trail envelope present?"} + E -->|"no"| F["Finalized session artifact"] + E -->|"yes"| G["Envelope + stamped session groups"] + G --> H["Set envelope content_hash to "] + H --> I["Canonicalize whole file (§7.4)"] + I --> J["Stamp file-level hash"] +``` + #### 7.4.1 Hash tier for `fork_from` and `redacted_from` Lineage references mirror the tier of the linking context: diff --git a/spec/v0.1.0/09-the-session-header.md b/spec/v0.1.0/09-the-session-header.md index f22988f..661fff8 100644 --- a/spec/v0.1.0/09-the-session-header.md +++ b/spec/v0.1.0/09-the-session-header.md @@ -184,6 +184,21 @@ A trail file MAY contain one OR more `(session header, events*)` groups concaten A multi-session trail is a session bundle: a forest of session groups. Each group MAY be linear or tree-native. Branches represented inside one source session use `parent_id` within that group; separate spawned or forked transcripts use separate groups linked by `header.fork_from`. +> Non-normative diagram. + +```mermaid +flowchart TD + A["Trail file"] --> B["Session group A"] + A --> C["Session group B"] + B --> D["Header A (§9)"] + B --> E["Events A (§10)"] + C --> F["Header B (§9)"] + C --> G["Events B (§10)"] + F -->|"fork_from.session_id"| D + E -->|"parent_id stays inside group"| E + G -->|"parent_id stays inside group"| G +``` + #### 9.6.1 File grammar ```text diff --git a/spec/v0.1.0/10-events.md b/spec/v0.1.0/10-events.md index fbba318..353a43a 100644 --- a/spec/v0.1.0/10-events.md +++ b/spec/v0.1.0/10-events.md @@ -891,6 +891,28 @@ Writers SHOULD avoid relying on fallbacks. Populate `for_id` when reliable; use Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning (§18.4): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. +> Non-normative diagram. + +```mermaid +flowchart TD + A["tool_call"] --> B{"Terminal event"} + B -->|"tool_result.payload.for_id matches"| C["Matched explicitly"] + B -->|"tool_call_aborted scope=tool_call and for_id matches"| C + B -->|"matching semantic.call_id"| D["Matched by semantic fallback"] + B -->|"nearest unmatched prior call in same branch"| E["Matched by sequential fallback"] + B -->|"no deterministic match"| F["Unmatched at EOF warning (§18.4)"] +``` + +> Non-normative example. + +Derived from `fixtures/validation/valid/tool-call-matched-by-for-id.trail.jsonl`. + +```jsonl +{"type":"session","schema_version":"0.1.0","id":"01HSESS0000000000000000001","session_uid":"01HZZZZZZZZZZZZZZZZZZZZZ01","ts":"2026-05-17T14:00:00.000Z","agent":{"name":"codex-cli"}} +{"type":"tool_call","id":"01HEVTA0000000000000000001","ts":"2026-05-17T14:00:05.000Z","payload":{"tool":"file_read","args":{"path":"a.txt"}}} +{"type":"tool_result","id":"01HEVTA0000000000000000002","ts":"2026-05-17T14:00:06.000Z","payload":{"for_id":"01HEVTA0000000000000000001","ok":true,"output":"hi"}} +``` + ### 10.6 Unknown event types Readers MUST tolerate unknown types: diff --git a/spec/v0.1.0/16-redaction.md b/spec/v0.1.0/16-redaction.md index 1e0008b..dee593a 100644 --- a/spec/v0.1.0/16-redaction.md +++ b/spec/v0.1.0/16-redaction.md @@ -25,6 +25,16 @@ Share-time redactors SHOULD populate `entry.meta.redaction_count` on each change When redaction changes bytes, lineage hashes are updated as described in §9.6.7. This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. +> Non-normative diagram. + +```mermaid +flowchart LR + A["Raw trail"] -->|"share-time redaction"| B["Redacted trail"] + B -->|"transport"| C["Shared trail"] + A -.->|"redacted_from.content_hash"| B + B -->|"new content_hash"| D["Independent artifact identity (§7)"] +``` + Specific secret patterns, exact PII detectors, path-normalization strings, image preview behavior, token-usage policy, blob upload mechanics, and share workflow remain implementation semantics. --- diff --git a/spec/v0.1.0/18-validation.md b/spec/v0.1.0/18-validation.md index c7eee92..f1a2faa 100644 --- a/spec/v0.1.0/18-validation.md +++ b/spec/v0.1.0/18-validation.md @@ -104,6 +104,19 @@ The suite manifest uses three assertion tiers: - Line-only assertions for schema-layer failures, because JSON Schema validator keyword vocabularies are implementation-specific. - Class tags (`classes`) marking which conformance classes each fixture applies to. Validation fixtures use `W`, `R1`, and `R2`; R0 renderer conformance needs a separate rendering suite. +> Non-normative diagram. + +```mermaid +flowchart TD + A["JSONL records"] --> B["Writer schema (§18.1)"] + B --> C["Whole-file graph checks (§18.4)"] + C --> D["Content hash verification (§7)"] + D --> E["Conformance fixture assertions"] + B --> F["Schema diagnostics"] + C --> G["Structural diagnostics"] + D --> H["Hash diagnostics"] +``` + ### 18.4 File graph checks A v0.1.0-compliant trail file MUST also pass whole-file checks. diff --git a/spec/v0.1.0/20-examples.md b/spec/v0.1.0/20-examples.md index e5da8f6..49185d1 100644 --- a/spec/v0.1.0/20-examples.md +++ b/spec/v0.1.0/20-examples.md @@ -2,6 +2,17 @@ More complete examples are maintained as validated conformance fixtures under `tests/fixtures/validation/valid/spec-example-*.trail.jsonl` and published with concrete package exports such as `@agent-trail/schema/conformance/manifest.json` and `@agent-trail/schema/conformance/fixtures/valid/spec-example-*.trail.jsonl`. This keeps examples executable without splitting normative meaning away from the schema and validation rules. +> Non-normative example index. + +Use these fixture-backed examples for larger scenarios: + +- `fixtures/validation/valid/spec-example-incomplete-session.trail.jsonl` +- `fixtures/validation/valid/spec-example-mcp-call.trail.jsonl` +- `fixtures/validation/valid/spec-example-synthesized-event.trail.jsonl` +- `fixtures/validation/valid/spec-example-tool-call-semantic-pairing.trail.jsonl` +- `fixtures/validation/valid/spec-example-tool-result-fallback-pairing.trail.jsonl` +- `fixtures/validation/valid/spec-example-tree-abandoned-branch.trail.jsonl` + Minimal at-a-glance trail: ```jsonl From ab3c286b7d4ae5abb50b54cee84acf3b880f11f8 Mon Sep 17 00:00:00 2001 From: Somasundaram Ayyappan <1802828+somus@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:53:59 +0530 Subject: [PATCH 3/4] docs(spec): fix split spec links --- spec/draft/04-terminology.md | 2 +- spec/draft/08-the-trail-envelope.md | 2 +- spec/draft/10-events.md | 12 ++++++------ spec/draft/11-canonical-tool-taxonomy.md | 2 +- spec/draft/12-vendor-extensions.md | 2 +- spec/draft/17-security-considerations.md | 2 +- spec/v0.1.0/04-terminology.md | 2 +- spec/v0.1.0/08-the-trail-envelope.md | 2 +- spec/v0.1.0/10-events.md | 12 ++++++------ spec/v0.1.0/11-canonical-tool-taxonomy.md | 2 +- spec/v0.1.0/12-vendor-extensions.md | 2 +- spec/v0.1.0/17-security-considerations.md | 2 +- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/spec/draft/04-terminology.md b/spec/draft/04-terminology.md index b9ba347..19e41cf 100644 --- a/spec/draft/04-terminology.md +++ b/spec/draft/04-terminology.md @@ -16,7 +16,7 @@ | **Adapter** | Software that reads a source agent's storage and emits a trail file. | | **Linear session** | A session whose events do not use `parent_id`. Events are ordered by file position. | | **Tree session** | A session where some events use `parent_id` to form a DAG. | -| **Canonical event** | One of the mandatory or optional event types in [§10.2](#10-2-mandatory-event-types) and [§10.3](#10-3-optional-event-types). | +| **Canonical event** | One of the mandatory or optional event types in [§10.2](./10-events.md#102-mandatory-event-types) and [§10.3](./10-events.md#103-optional-event-types). | | **Raw trail** | A local artifact preserving source fidelity as much as possible. | | **Redacted trail** | A separate artifact produced from a raw trail for sharing. It has its own `content_hash`. | | **Shared trail** | A redacted trail transported through a sharing mechanism. | diff --git a/spec/draft/08-the-trail-envelope.md b/spec/draft/08-the-trail-envelope.md index 74786fb..60bdc0a 100644 --- a/spec/draft/08-the-trail-envelope.md +++ b/spec/draft/08-the-trail-envelope.md @@ -57,7 +57,7 @@ The envelope MUST NOT carry a `parent_id`. It is not part of the event graph. The trail envelope (§8), the session header (§9), and every event entry (§10.1) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar (§12.1) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record (§7.4): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. -For verbatim source-event preservation, use `source.raw` ([§10.1](#10-1-base-shape), [§10.7](#10-7-source-envelope-referencing), [§15.1](#15-1-source-raw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. +For verbatim source-event preservation, use `source.raw` ([§10.1](./10-events.md#101-base-shape), [§10.7](./10-events.md#107-source-envelope-referencing), [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. This draft defines one standard event-entry `meta` key: `redaction_count` (§16). Other standard keys MAY be promoted in later minor bumps based on observed usage. diff --git a/spec/draft/10-events.md b/spec/draft/10-events.md index 353a43a..f2115a8 100644 --- a/spec/draft/10-events.md +++ b/spec/draft/10-events.md @@ -203,7 +203,7 @@ When the upstream source does not provide item ids, or provides empty or whitesp #### `tool_call` -The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-tool-taxonomy). +The agent invoked a tool. Tool kinds use the taxonomy in [§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy). ```jsonc { @@ -225,7 +225,7 @@ The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-to | Payload field | Required | Type | Notes | |---|---|---|---| -| `tool` | yes | string | canonical tool kind ([§11](#11-canonical-tool-taxonomy)) | +| `tool` | yes | string | canonical tool kind ([§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy)) | | `args` | yes | object | tool-specific args | | `truncated` | no | boolean | true when `args` is a bounded excerpt rather than complete tool arguments | | `args_size` | conditional | integer | original serialized argument byte size; REQUIRED when `truncated: true` | @@ -234,7 +234,7 @@ The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-to #### `tool_result` -The result of a `tool_call`. References the call via `for_id`. Writers omit `for_id` when the source does not provide a reliable match. Readers MAY tolerate legacy/null values; when `for_id` is null or missing, see [§10.5](#10-5-tool-call-terminal-pairing). +The result of a `tool_call`. References the call via `for_id`. Writers omit `for_id` when the source does not provide a reliable match. Readers MAY tolerate legacy/null values; when `for_id` is null or missing, see [§10.5](#105-tool-call-terminal-pairing). ```jsonc { @@ -301,7 +301,7 @@ Bare unknown `scope` and `reason` values are writer-strict errors. Readers are t ##### `tool_result.payload.meta` — structured outputs `output` is a display string. When the source tool returned structured data, writers MAY also -populate `meta`, an object keyed by the originating `tool_call.tool` (the canonical tool kind, [§11](#11-canonical-tool-taxonomy)). +populate `meta`, an object keyed by the originating `tool_call.tool` (the canonical tool kind, [§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy)). Consumers that understand a kind read `meta.`; everyone else falls back to `output`. `meta` is optional and additive — existing writers that emit only `output` stay valid. @@ -495,7 +495,7 @@ A meaningful source timeline record that is not a user message, agent message, t `kind` is REQUIRED and writer-strict. It MUST be either one of the reserved cross-agent values below, or a vendor-namespaced extension of the form `x-/`. Bare unknown strings are rejected by writer-strict validation. Readers are tolerant of unknown `x-*` kinds and pass them through. `data` is curated structured metadata for rendering and search, not a replacement for `source.raw`. -`context_compact`, `user_interrupt`, `model_change`, `mode_change`, `thinking_level_change`, and `session_end` are first-class record types ([§10.3](#10-3-optional-event-types)). Do not duplicate them under `system_event.kind`. +`context_compact`, `user_interrupt`, `model_change`, `mode_change`, `thinking_level_change`, and `session_end` are first-class record types ([§10.3](./10-events.md#103-optional-event-types)). Do not duplicate them under `system_event.kind`. ##### Reserved lifecycle vocabulary @@ -921,7 +921,7 @@ Readers MUST tolerate unknown types: - Render with a generic fallback. - Do not abort parsing. -Writers MUST NOT invent new top-level event types in v0.1 writer-strict output. Use the `other` tool kind ([§11](#11-canonical-tool-taxonomy)) or `source.raw` ([§10.1](#10-1-base-shape), [§15.1](#15-1-source-raw-elision-and-redaction)) for adapter-specific data, or `meta` ([§8.3](#8-3-the-meta-extension-convention) / [§12](#12-vendor-extensions)) for vendor extensions. Reader-tolerant parsing MAY preserve unknown future event types at runtime; this tolerance is not part of the writer schema. +Writers MUST NOT invent new top-level event types in v0.1 writer-strict output. Use the `other` tool kind ([§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy)) or `source.raw` ([§10.1](./10-events.md#101-base-shape), [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) for adapter-specific data, or `meta` ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) / [§12](./12-vendor-extensions.md#12-vendor-extensions)) for vendor extensions. Reader-tolerant parsing MAY preserve unknown future event types at runtime; this tolerance is not part of the writer schema. ### 10.7 Source envelope referencing diff --git a/spec/draft/11-canonical-tool-taxonomy.md b/spec/draft/11-canonical-tool-taxonomy.md index 7fb04cd..aa1f870 100644 --- a/spec/draft/11-canonical-tool-taxonomy.md +++ b/spec/draft/11-canonical-tool-taxonomy.md @@ -21,7 +21,7 @@ The `tool_call.payload.tool` field uses these values. Each defines the expected | `subagent_invoke` | `{ task, agent_type?, session_id? }` | | `other` | `{ name, args? }` | -Checklist and plan snapshots use `task_plan_update` ([§10.2](#10-2-mandatory-event-types)) rather than `tool_call`. +Checklist and plan snapshots use `task_plan_update` ([§10.2](./10-events.md#102-mandatory-event-types)) rather than `tool_call`. ### 11.1 `file_edit` diff --git a/spec/draft/12-vendor-extensions.md b/spec/draft/12-vendor-extensions.md index e6d3fef..78ffb78 100644 --- a/spec/draft/12-vendor-extensions.md +++ b/spec/draft/12-vendor-extensions.md @@ -14,7 +14,7 @@ Readers MAY preserve, ignore, or render `meta` fields. They MUST NOT abort on un `entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see §16. -The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](#15-1-source-raw-elision-and-redaction)) instead. See [§8.3](#8-3-the-meta-extension-convention) for the full convention. +The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead. See [§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) for the full convention. ### 12.1 Extension grammar diff --git a/spec/draft/17-security-considerations.md b/spec/draft/17-security-considerations.md index f6af7f0..8dd372b 100644 --- a/spec/draft/17-security-considerations.md +++ b/spec/draft/17-security-considerations.md @@ -16,7 +16,7 @@ Redaction reduces content exposure but does not make a shared trail private. Tim Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads (§16). -The implementation-maintained detector catalog and rule pack schema live in [`docs/redaction-patterns.md`](./docs/redaction-patterns.md); this catalog is non-normative and does not change the trail file format. +The implementation-maintained detector catalog and rule pack schema live in `docs/redaction-patterns.md`; this catalog is non-normative and does not change the trail file format. --- diff --git a/spec/v0.1.0/04-terminology.md b/spec/v0.1.0/04-terminology.md index b9ba347..19e41cf 100644 --- a/spec/v0.1.0/04-terminology.md +++ b/spec/v0.1.0/04-terminology.md @@ -16,7 +16,7 @@ | **Adapter** | Software that reads a source agent's storage and emits a trail file. | | **Linear session** | A session whose events do not use `parent_id`. Events are ordered by file position. | | **Tree session** | A session where some events use `parent_id` to form a DAG. | -| **Canonical event** | One of the mandatory or optional event types in [§10.2](#10-2-mandatory-event-types) and [§10.3](#10-3-optional-event-types). | +| **Canonical event** | One of the mandatory or optional event types in [§10.2](./10-events.md#102-mandatory-event-types) and [§10.3](./10-events.md#103-optional-event-types). | | **Raw trail** | A local artifact preserving source fidelity as much as possible. | | **Redacted trail** | A separate artifact produced from a raw trail for sharing. It has its own `content_hash`. | | **Shared trail** | A redacted trail transported through a sharing mechanism. | diff --git a/spec/v0.1.0/08-the-trail-envelope.md b/spec/v0.1.0/08-the-trail-envelope.md index 74786fb..60bdc0a 100644 --- a/spec/v0.1.0/08-the-trail-envelope.md +++ b/spec/v0.1.0/08-the-trail-envelope.md @@ -57,7 +57,7 @@ The envelope MUST NOT carry a `parent_id`. It is not part of the event graph. The trail envelope (§8), the session header (§9), and every event entry (§10.1) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar (§12.1) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record (§7.4): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. -For verbatim source-event preservation, use `source.raw` ([§10.1](#10-1-base-shape), [§10.7](#10-7-source-envelope-referencing), [§15.1](#15-1-source-raw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. +For verbatim source-event preservation, use `source.raw` ([§10.1](./10-events.md#101-base-shape), [§10.7](./10-events.md#107-source-envelope-referencing), [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. This draft defines one standard event-entry `meta` key: `redaction_count` (§16). Other standard keys MAY be promoted in later minor bumps based on observed usage. diff --git a/spec/v0.1.0/10-events.md b/spec/v0.1.0/10-events.md index 353a43a..f2115a8 100644 --- a/spec/v0.1.0/10-events.md +++ b/spec/v0.1.0/10-events.md @@ -203,7 +203,7 @@ When the upstream source does not provide item ids, or provides empty or whitesp #### `tool_call` -The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-tool-taxonomy). +The agent invoked a tool. Tool kinds use the taxonomy in [§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy). ```jsonc { @@ -225,7 +225,7 @@ The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-to | Payload field | Required | Type | Notes | |---|---|---|---| -| `tool` | yes | string | canonical tool kind ([§11](#11-canonical-tool-taxonomy)) | +| `tool` | yes | string | canonical tool kind ([§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy)) | | `args` | yes | object | tool-specific args | | `truncated` | no | boolean | true when `args` is a bounded excerpt rather than complete tool arguments | | `args_size` | conditional | integer | original serialized argument byte size; REQUIRED when `truncated: true` | @@ -234,7 +234,7 @@ The agent invoked a tool. Tool kinds use the taxonomy in [§11](#11-canonical-to #### `tool_result` -The result of a `tool_call`. References the call via `for_id`. Writers omit `for_id` when the source does not provide a reliable match. Readers MAY tolerate legacy/null values; when `for_id` is null or missing, see [§10.5](#10-5-tool-call-terminal-pairing). +The result of a `tool_call`. References the call via `for_id`. Writers omit `for_id` when the source does not provide a reliable match. Readers MAY tolerate legacy/null values; when `for_id` is null or missing, see [§10.5](#105-tool-call-terminal-pairing). ```jsonc { @@ -301,7 +301,7 @@ Bare unknown `scope` and `reason` values are writer-strict errors. Readers are t ##### `tool_result.payload.meta` — structured outputs `output` is a display string. When the source tool returned structured data, writers MAY also -populate `meta`, an object keyed by the originating `tool_call.tool` (the canonical tool kind, [§11](#11-canonical-tool-taxonomy)). +populate `meta`, an object keyed by the originating `tool_call.tool` (the canonical tool kind, [§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy)). Consumers that understand a kind read `meta.`; everyone else falls back to `output`. `meta` is optional and additive — existing writers that emit only `output` stay valid. @@ -495,7 +495,7 @@ A meaningful source timeline record that is not a user message, agent message, t `kind` is REQUIRED and writer-strict. It MUST be either one of the reserved cross-agent values below, or a vendor-namespaced extension of the form `x-/`. Bare unknown strings are rejected by writer-strict validation. Readers are tolerant of unknown `x-*` kinds and pass them through. `data` is curated structured metadata for rendering and search, not a replacement for `source.raw`. -`context_compact`, `user_interrupt`, `model_change`, `mode_change`, `thinking_level_change`, and `session_end` are first-class record types ([§10.3](#10-3-optional-event-types)). Do not duplicate them under `system_event.kind`. +`context_compact`, `user_interrupt`, `model_change`, `mode_change`, `thinking_level_change`, and `session_end` are first-class record types ([§10.3](./10-events.md#103-optional-event-types)). Do not duplicate them under `system_event.kind`. ##### Reserved lifecycle vocabulary @@ -921,7 +921,7 @@ Readers MUST tolerate unknown types: - Render with a generic fallback. - Do not abort parsing. -Writers MUST NOT invent new top-level event types in v0.1 writer-strict output. Use the `other` tool kind ([§11](#11-canonical-tool-taxonomy)) or `source.raw` ([§10.1](#10-1-base-shape), [§15.1](#15-1-source-raw-elision-and-redaction)) for adapter-specific data, or `meta` ([§8.3](#8-3-the-meta-extension-convention) / [§12](#12-vendor-extensions)) for vendor extensions. Reader-tolerant parsing MAY preserve unknown future event types at runtime; this tolerance is not part of the writer schema. +Writers MUST NOT invent new top-level event types in v0.1 writer-strict output. Use the `other` tool kind ([§11](./11-canonical-tool-taxonomy.md#11-canonical-tool-taxonomy)) or `source.raw` ([§10.1](./10-events.md#101-base-shape), [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) for adapter-specific data, or `meta` ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) / [§12](./12-vendor-extensions.md#12-vendor-extensions)) for vendor extensions. Reader-tolerant parsing MAY preserve unknown future event types at runtime; this tolerance is not part of the writer schema. ### 10.7 Source envelope referencing diff --git a/spec/v0.1.0/11-canonical-tool-taxonomy.md b/spec/v0.1.0/11-canonical-tool-taxonomy.md index 7fb04cd..aa1f870 100644 --- a/spec/v0.1.0/11-canonical-tool-taxonomy.md +++ b/spec/v0.1.0/11-canonical-tool-taxonomy.md @@ -21,7 +21,7 @@ The `tool_call.payload.tool` field uses these values. Each defines the expected | `subagent_invoke` | `{ task, agent_type?, session_id? }` | | `other` | `{ name, args? }` | -Checklist and plan snapshots use `task_plan_update` ([§10.2](#10-2-mandatory-event-types)) rather than `tool_call`. +Checklist and plan snapshots use `task_plan_update` ([§10.2](./10-events.md#102-mandatory-event-types)) rather than `tool_call`. ### 11.1 `file_edit` diff --git a/spec/v0.1.0/12-vendor-extensions.md b/spec/v0.1.0/12-vendor-extensions.md index e6d3fef..78ffb78 100644 --- a/spec/v0.1.0/12-vendor-extensions.md +++ b/spec/v0.1.0/12-vendor-extensions.md @@ -14,7 +14,7 @@ Readers MAY preserve, ignore, or render `meta` fields. They MUST NOT abort on un `entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see §16. -The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](#15-1-source-raw-elision-and-redaction)) instead. See [§8.3](#8-3-the-meta-extension-convention) for the full convention. +The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead. See [§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) for the full convention. ### 12.1 Extension grammar diff --git a/spec/v0.1.0/17-security-considerations.md b/spec/v0.1.0/17-security-considerations.md index f6af7f0..8dd372b 100644 --- a/spec/v0.1.0/17-security-considerations.md +++ b/spec/v0.1.0/17-security-considerations.md @@ -16,7 +16,7 @@ Redaction reduces content exposure but does not make a shared trail private. Tim Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads (§16). -The implementation-maintained detector catalog and rule pack schema live in [`docs/redaction-patterns.md`](./docs/redaction-patterns.md); this catalog is non-normative and does not change the trail file format. +The implementation-maintained detector catalog and rule pack schema live in `docs/redaction-patterns.md`; this catalog is non-normative and does not change the trail file format. --- From 42d960b3b9525f85fc37e0d92609e5caef72d214 Mon Sep 17 00:00:00 2001 From: Somasundaram Ayyappan <1802828+somus@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:59:43 +0530 Subject: [PATCH 4/4] docs(spec): link split section references --- spec/draft/04-terminology.md | 4 +- spec/draft/05-file-format.md | 8 +- ...entity-artifacts-and-content-addressing.md | 10 +- spec/draft/08-the-trail-envelope.md | 10 +- spec/draft/09-the-session-header.md | 52 ++++---- spec/draft/10-events.md | 22 ++-- spec/draft/12-vendor-extensions.md | 4 +- ...truncation-overflow-and-raw-source-size.md | 4 +- spec/draft/16-redaction.md | 4 +- spec/draft/17-security-considerations.md | 10 +- spec/draft/18-validation.md | 118 +++++++++--------- spec/draft/19-formal-schema.md | 2 +- .../appendix-b-content-hash-worked-example.md | 2 +- spec/draft/changelog.md | 14 +-- spec/v0.1.0/04-terminology.md | 4 +- spec/v0.1.0/05-file-format.md | 8 +- ...entity-artifacts-and-content-addressing.md | 10 +- spec/v0.1.0/08-the-trail-envelope.md | 10 +- spec/v0.1.0/09-the-session-header.md | 52 ++++---- spec/v0.1.0/10-events.md | 22 ++-- spec/v0.1.0/12-vendor-extensions.md | 4 +- ...truncation-overflow-and-raw-source-size.md | 4 +- spec/v0.1.0/16-redaction.md | 4 +- spec/v0.1.0/17-security-considerations.md | 10 +- spec/v0.1.0/18-validation.md | 118 +++++++++--------- spec/v0.1.0/19-formal-schema.md | 2 +- .../appendix-b-content-hash-worked-example.md | 2 +- spec/v0.1.0/changelog.md | 14 +-- 28 files changed, 264 insertions(+), 264 deletions(-) diff --git a/spec/draft/04-terminology.md b/spec/draft/04-terminology.md index 19e41cf..db7cb6e 100644 --- a/spec/draft/04-terminology.md +++ b/spec/draft/04-terminology.md @@ -21,8 +21,8 @@ | **Redacted trail** | A separate artifact produced from a raw trail for sharing. It has its own `content_hash`. | | **Shared trail** | A redacted trail transported through a sharing mechanism. | | **Synthesized event** | An event the adapter constructed from indirect source data (e.g., a git diff), not mapped from a real source event. Flagged with `source.synthesized: true`. | -| **Content hash** | SHA-256 of the exact artifact's canonical bytes (§7). | -| **Canonical bytes** | The file content normalized per §7 for hashing. | +| **Content hash** | SHA-256 of the exact artifact's canonical bytes ([§7](./07-identity-artifacts-and-content-addressing.md#7-identity-artifacts-and-content-addressing)). | +| **Canonical bytes** | The file content normalized per [§7](./07-identity-artifacts-and-content-addressing.md#7-identity-artifacts-and-content-addressing) for hashing. | | **Source escape hatch** | The `source.raw` field; preserves verbatim source-format data for lossless round-trip. | --- diff --git a/spec/draft/05-file-format.md b/spec/draft/05-file-format.md index 43adb5d..4df7fd7 100644 --- a/spec/draft/05-file-format.md +++ b/spec/draft/05-file-format.md @@ -19,16 +19,16 @@ - Writers MUST replace invalid UTF-8 bytes and unpaired surrogate escapes with U+FFFD at emission time. Emitted JSON strings MUST NOT contain unpaired surrogates. - Writers MUST NOT emit JSON integer numbers outside the IEEE-754 exact-integer range (`-(2^53-1)` through `2^53-1`) anywhere in a trail file. Adapters that receive oversized source integers, such as snowflake ids or nanosecond timestamps in `source.raw`, MUST emit them as strings instead. Validator warnings use code `non_interoperable_number` at the offending JSON Pointer. - `.trail.jsonl.gz` files are a whole-file gzip wrapper around the UTF-8 trail JSONL bytes above. Writers MUST NOT gzip individual JSONL lines independently. Readers MUST decompress `.trail.jsonl.gz` files before validation and processing. -- For `.trail.jsonl.gz`, `content_hash` is computed and verified by first decompressing the file to produce plain UTF-8 JSONL, then applying the canonical bytes procedure defined in §7.3 to the decompressed JSONL. The compressed bytes themselves are never hashed. +- For `.trail.jsonl.gz`, `content_hash` is computed and verified by first decompressing the file to produce plain UTF-8 JSONL, then applying the canonical bytes procedure defined in [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) to the decompressed JSONL. The compressed bytes themselves are never hashed. ### 5.3 File layout Every valid trail file has: -1. **Optionally**, a trail envelope (`type:"trail"`) on line 1 (§8). -2. One **or more** session header groups in file order. Each group starts with a `type:"session"` record and continues with zero or more event lines until the next `type:"session"` record or EOF (§9.6). The first session header MUST appear on line 1 when there is no envelope, or on line 2 when an envelope is present. +1. **Optionally**, a trail envelope (`type:"trail"`) on line 1 ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)). +2. One **or more** session header groups in file order. Each group starts with a `type:"session"` record and continues with zero or more event lines until the next `type:"session"` record or EOF ([§9.6](./09-the-session-header.md#96-multi-session-trail-files)). The first session header MUST appear on line 1 when there is no envelope, or on line 2 when an envelope is present. -When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in §9.6. +When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in [§9.6](./09-the-session-header.md#96-multi-session-trail-files). > Non-normative diagram. diff --git a/spec/draft/07-identity-artifacts-and-content-addressing.md b/spec/draft/07-identity-artifacts-and-content-addressing.md index f3d8a3e..9a05542 100644 --- a/spec/draft/07-identity-artifacts-and-content-addressing.md +++ b/spec/draft/07-identity-artifacts-and-content-addressing.md @@ -25,7 +25,7 @@ Canonical bytes are defined as: - No trailing whitespace. - A trailing newline at EOF. - Each JSON object serialized using RFC 8785 JSON Canonicalization Scheme (JCS). -- Writer-valid strings are well-formed per §5.2, so canonical bytes remain pure JCS; hash-time string repair is not part of this procedure. +- Writer-valid strings are well-formed per [§5.2](./05-file-format.md#52-encoding), so canonical bytes remain pure JCS; hash-time string repair is not part of this procedure. Because the hash depends on the file content that includes the hash field, we use a two-pass approach: @@ -42,8 +42,8 @@ Writers that produce streaming or in-progress files MAY omit `content_hash` or l When a trail envelope is present, the file carries two independent content hashes: -- **Session-level `content_hash`** lives on the session header. It is SHA-256 over the canonical bytes covering only the session header and its events (the envelope record is excluded from the hashed input). In a multi-session file (§9.6) the slice for a session covers that session's header and the events between it and the next `type:"session"` record (or EOF). This makes each session's identity independent of whether it is wrapped in an envelope or sits beside sibling sessions — extracting one session from a multi-session file recomputes the same digest. -- **File-level `content_hash`** lives on the trail envelope. It is SHA-256 over the canonical bytes of the whole file, with the envelope's `content_hash` field replaced by `""` per the same two-pass procedure as §7.3. The session-level `content_hash`, if already populated, is treated as opaque file content. +- **Session-level `content_hash`** lives on the session header. It is SHA-256 over the canonical bytes covering only the session header and its events (the envelope record is excluded from the hashed input). In a multi-session file ([§9.6](./09-the-session-header.md#96-multi-session-trail-files)) the slice for a session covers that session's header and the events between it and the next `type:"session"` record (or EOF). This makes each session's identity independent of whether it is wrapped in an envelope or sits beside sibling sessions — extracting one session from a multi-session file recomputes the same digest. +- **File-level `content_hash`** lives on the trail envelope. It is SHA-256 over the canonical bytes of the whole file, with the envelope's `content_hash` field replaced by `""` per the same two-pass procedure as [§7.3](#73-content-hash). The session-level `content_hash`, if already populated, is treated as opaque file content. Writers that emit both hashes MUST stamp every session-level hash first, then compute and stamp the file-level hash. Readers verify them independently. Different consumers care about different scopes: extraction tools recompute the session hash; share/transport tools verify the file hash. @@ -68,13 +68,13 @@ Lineage references mirror the tier of the linking context: - **Header-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **session-level** `content_hash` of the parent artifact (the forked-from session or the raw session that was redacted). This keeps session lineage independent of any envelope wrapper — extracting either side recomputes the same digest. - **Envelope-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **file-level** `content_hash` of the parent file (envelope and all sessions included). Use these to link whole files rather than individual sessions. -- `segment.prev_content_hash` (§9.5) is always session-level, since segments chain at session grain. +- `segment.prev_content_hash` ([§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions)) is always session-level, since segments chain at session grain. Writers MUST choose the matching tier; mixing tiers across a chain breaks verification. ### 7.5 Event identifiers -Event `id` values are globally unique. Writers emit uppercase ULIDs or lowercase UUIDs, matching §7.1 and the schema. Globally-unique canonical ids let a reconciler dedup events across segments by exact string equality. +Event `id` values are globally unique. Writers emit uppercase ULIDs or lowercase UUIDs, matching [§7.1](#71-session-identity) and the schema. Globally-unique canonical ids let a reconciler dedup events across segments by exact string equality. --- diff --git a/spec/draft/08-the-trail-envelope.md b/spec/draft/08-the-trail-envelope.md index 60bdc0a..e365a35 100644 --- a/spec/draft/08-the-trail-envelope.md +++ b/spec/draft/08-the-trail-envelope.md @@ -43,29 +43,29 @@ The trail envelope is an OPTIONAL record on line 1 that carries file-scope metad | `description` | no | string | free text | | `ts` | yes | string | ISO-8601 timestamp when the file was assembled or exported | | `producer` | yes | string | identifier of the writer (e.g., `trail-cli/0.3.0`) | -| `content_hash` | no | string | SHA-256 hex of the whole-file canonical bytes; see §7.4 | +| `content_hash` | no | string | SHA-256 hex of the whole-file canonical bytes; see [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) | | `tags` | no | string[] | free-form labels | | `vcs` | no | object | working-tree context at file-assembly time | | `fork_from` | no | object | reference to a parent file when forked; `trail_id` is a UUID or ULID id and `content_hash` is optional | | `redacted_from` | no | object | provenance link from a redacted file to its raw counterpart | | `sessions` | no | array | manifest of sessions in this file; validator warns on drift vs file content | -| `meta` | no | object | free-form vendor extensions (§8.3) | +| `meta` | no | object | free-form vendor extensions ([§8.3](#83-the-meta-extension-convention)) | The envelope MUST NOT carry a `parent_id`. It is not part of the event graph. ### 8.3 The `meta` extension convention -The trail envelope (§8), the session header (§9), and every event entry (§10.1) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar (§12.1) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record (§7.4): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. +The trail envelope ([§8](#8-the-trail-envelope)), the session header ([§9](./09-the-session-header.md#9-the-session-header)), and every event entry ([§10.1](./10-events.md#101-base-shape)) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar ([§12.1](./12-vendor-extensions.md#121-extension-grammar)) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record ([§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity)): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. For verbatim source-event preservation, use `source.raw` ([§10.1](./10-events.md#101-base-shape), [§10.7](./10-events.md#107-source-envelope-referencing), [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. -This draft defines one standard event-entry `meta` key: `redaction_count` (§16). Other standard keys MAY be promoted in later minor bumps based on observed usage. +This draft defines one standard event-entry `meta` key: `redaction_count` ([§16](./16-redaction.md#16-redaction)). Other standard keys MAY be promoted in later minor bumps based on observed usage. ### 8.4 The `sessions` manifest When `sessions` is present, the validator warns if the manifest disagrees with the file: -- The manifest MUST list one entry per session group (§9.6) in file order. Each entry's `id` and `agent` MUST match the corresponding session header's `id` and `agent.name`. Length mismatch and per-entry drift both emit `envelope_sessions_manifest_drift` warnings — never errors, so renderers can still display the file. +- The manifest MUST list one entry per session group ([§9.6](./09-the-session-header.md#96-multi-session-trail-files)) in file order. Each entry's `id` and `agent` MUST match the corresponding session header's `id` and `agent.name`. Length mismatch and per-entry drift both emit `envelope_sessions_manifest_drift` warnings — never errors, so renderers can still display the file. - The manifest is an index/rendering hint only. It MUST NOT carry graph facts such as child-session role or follows edges; session headers are authoritative for lineage. ### 8.5 File identity defaults when envelope is absent diff --git a/spec/draft/09-the-session-header.md b/spec/draft/09-the-session-header.md index 661fff8..aabdb5e 100644 --- a/spec/draft/09-the-session-header.md +++ b/spec/draft/09-the-session-header.md @@ -59,7 +59,7 @@ |---|---|---|---| | `type` | yes | literal `"session"` | discriminator | | `schema_version` | yes | string | currently `"0.1.0"` | -| `id` | yes | string | UUID or ULID per §7.1/§19 | +| `id` | yes | string | UUID or ULID per [§7.1](./07-identity-artifacts-and-content-addressing.md#71-session-identity)/[§19](./19-formal-schema.md#19-formal-schema) | | `session_uid` | no | string | stable source-session identifier shared by all segments of one logical source session | | `segment` | no | object | multi-segment marker; absent is equivalent to a single segment with `seq: 1` | | `segment.seq` | yes (if `segment` present) | integer | 1-based segment sequence number | @@ -67,10 +67,10 @@ | `name` | no | string | human session label | | `description` | no | string | free-text session description | | `tags` | no | string[] | free-form session labels | -| `content_hash` | no | string | SHA-256 hex of this artifact; see §7.3 | +| `content_hash` | no | string | SHA-256 hex of this artifact; see [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) | | `ts` | yes | string | ISO-8601 session start time; writers emit UTC `Z` with millisecond precision | -| `stream` | no | object | live-capture marker; see §9.4 | -| `agent.name` | yes | string | from the canonical registry (§14) | +| `stream` | no | object | live-capture marker; see [§9.4](#94-streaming-and-live-capture) | +| `agent.name` | yes | string | from the canonical registry ([§14](./14-canonical-agent-registry.md#14-canonical-agent-registry)) | | `agent.version` | no | string | source agent's version | | `agent.model_default` | no | string | default model for the session | | `cwd` | no | string | working directory; MAY be normalized for privacy | @@ -92,9 +92,9 @@ | `parse_fidelity.quarantined_count` | yes (if `parse_fidelity` present) | integer | number of `system_event` entries whose `payload.kind` is `x-*/unknown_record` in this session group | | `parse_fidelity.termination_reason` | no | enum or extension | final `session_terminated.payload.reason`, when a `session_terminated` event is present | | `source` | no | object | source-file metadata block (agent, path, format_version) | -| `meta` | no | object | vendor extensions; recommended keys use the `x-/` extension grammar (§8.3 / §12) | +| `meta` | no | object | vendor extensions; recommended keys use the `x-/` extension grammar ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) / [§12](./12-vendor-extensions.md#12-vendor-extensions)) | -When `parse_fidelity` is present, validators MUST compare it against the session group's entries. `quarantined_count` MUST equal the count of quarantined unknown source records emitted as `system_event` entries with `payload.kind` matching `x-*/unknown_record`; see the §10.3 quarantine convention. `termination_reason`, when a `session_terminated` entry exists, MUST match the final `session_terminated.payload.reason`; if no `session_terminated` entry exists, writers MUST omit `termination_reason`. This field is denormalized for cheap listing/filtering only; the event stream remains authoritative. Quarantined records are suspect parse fidelity, not necessarily lossy, because the raw source record is preserved. +When `parse_fidelity` is present, validators MUST compare it against the session group's entries. `quarantined_count` MUST equal the count of quarantined unknown source records emitted as `system_event` entries with `payload.kind` matching `x-*/unknown_record`; see the [§10.3](./10-events.md#103-optional-event-types) quarantine convention. `termination_reason`, when a `session_terminated` entry exists, MUST match the final `session_terminated.payload.reason`; if no `session_terminated` entry exists, writers MUST omit `termination_reason`. This field is denormalized for cheap listing/filtering only; the event stream remains authoritative. Quarantined records are suspect parse fidelity, not necessarily lossy, because the raw source record is preserved. `vcs.remote_url` provides a canonical project identifier that survives across users, machines, and clones — useful for cross-machine aggregation, profile filtering, and project-scoped analysis. Adapters that populate it: @@ -106,9 +106,9 @@ When `parse_fidelity` is present, validators MUST compare it against the session Fresh repositories with an unborn HEAD MAY emit `vcs.revision:null` when a branch is known. A `vcs` block with `vcs.revision:null` MUST include `vcs.branch`, MUST omit `vcs.head_commit`, and writers MUST NOT emit an information-free VCS block. When `vcs.revision` is non-null for git, `vcs.head_commit` typically equals `vcs.revision`. -Privacy: `remote_url` reveals repository identity and MAY identify a private repo. Redacted artifacts MAY strip or normalize it (§16). +Privacy: `remote_url` reveals repository identity and MAY identify a private repo. Redacted artifacts MAY strip or normalize it ([§16](./16-redaction.md#16-redaction)). -When a trail file carries both header-level `vcs` (session-time context) and envelope-level `vcs` (file-assembly-time context, §8), they represent different observation points. File-assembly tools SHOULD preserve both when present. For multi-segment reconciliation rules, see §9.5. +When a trail file carries both header-level `vcs` (session-time context) and envelope-level `vcs` (file-assembly-time context, [§8](./08-the-trail-envelope.md#8-the-trail-envelope)), they represent different observation points. File-assembly tools SHOULD preserve both when present. For multi-segment reconciliation rules, see [§9.5](#95-session-segments-multi-segment-sessions). ### 9.3 Example @@ -125,19 +125,19 @@ The optional header `stream` object: | Field | Required | Type | Notes | |---|---|---|---| | `stream.state` | yes (if `stream` present) | enum | `open` while the writer is actively appending; `closed` once finalized | -| `stream.started_at` | no | string | ISO-8601 timestamp when the stream began; matches the §9 `ts` semantics | +| `stream.started_at` | no | string | ISO-8601 timestamp when the stream began; matches the [§9](#9-the-session-header) `ts` semantics | Lifecycle: 1. **Live phase.** Writer emits the header with `stream: { state: "open" }`. `content_hash` is omitted or set to `""`. Events are appended as they happen. -2. **Finalize.** Writer rewrites the header with `stream` either removed or set to `state: "closed"`, then computes `content_hash` per §7.3. Appending stops. -3. **Clean end.** Writer MAY append a `session_end` event (§10.3) to mark a normal conclusion before finalize. Abnormal ends still use `session_terminated`. +2. **Finalize.** Writer rewrites the header with `stream` either removed or set to `state: "closed"`, then computes `content_hash` per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash). Appending stops. +3. **Clean end.** Writer MAY append a `session_end` event ([§10.3](./10-events.md#103-optional-event-types)) to mark a normal conclusion before finalize. Abnormal ends still use `session_terminated`. Tail readers that observe `stream.state == "open"` SHOULD assume more events MAY arrive. Readers observing `stream` absent or `state == "closed"` SHOULD treat the file as a finalized artifact and verify `content_hash` when present. `stream` is absent in trail files produced by stream-unaware writers; readers MUST treat that case as equivalent to a finalized non-streaming artifact (existing v0.1.0 behavior). -A live `system_event` heartbeat convention is described in §10.3. +A live `system_event` heartbeat convention is described in [§10.3](./10-events.md#103-optional-event-types). --- @@ -149,7 +149,7 @@ A single logical source session MAY be split across multiple trail-file artifact - `segment.seq` — 1-based integer identifying which segment of the session this file is. Single-segment trails MAY omit `segment` entirely, which is equivalent to `{seq: 1}`. -- `segment.prev_content_hash` — the **session-level** `content_hash` (§7.3) of the previous segment's finalized bytes. Required when `seq >= 2`. Forms a verifiable chain (HLS / Postgres-WAL pattern). If the previous segment was lost and the chain cannot be verified, writers MAY emit `null` and readers MUST emit a `segment_chain_break` warning. +- `segment.prev_content_hash` — the **session-level** `content_hash` ([§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash)) of the previous segment's finalized bytes. Required when `seq >= 2`. Forms a verifiable chain (HLS / Postgres-WAL pattern). If the previous segment was lost and the chain cannot be verified, writers MAY emit `null` and readers MUST emit a `segment_chain_break` warning. #### Segment reconciliation @@ -161,18 +161,18 @@ and emit a new finalized trail with freshly computed hashes. Implementation merge policy is documented in `docs/implementation-semantics.md`. -Whole-file graph rules (§18) apply **within** a segment, not across. Cross-segment references are out of scope for v0.1 (event `parent_id` chains do not span segments). +Whole-file graph rules ([§18](./18-validation.md#18-validation)) apply **within** a segment, not across. Cross-segment references are out of scope for v0.1 (event `parent_id` chains do not span segments). #### Writer guidance - Writers SHOULD generate `session_uid` once per source session and reuse it for every segment. - Writers SHOULD finalize each segment normally before starting a new segment. -- To produce `segment.prev_content_hash` for segment N, finalize segment N-1 per §7.3 and copy its session-level `content_hash` verbatim into segment N's header. +- To produce `segment.prev_content_hash` for segment N, finalize segment N-1 per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) and copy its session-level `content_hash` verbatim into segment N's header. - Recovered writers MAY emit `segment.prev_content_hash: null` when the previous segment is lost. #### Composition with multi-session files -`session_uid` and `segment.*` sit at the **session-header** grain, not the file grain. A multi-session trail file (§9.6) MAY contain N session headers, each independently multi-segmentable. The trail envelope (§8) is unaffected. +`session_uid` and `segment.*` sit at the **session-header** grain, not the file grain. A multi-session trail file ([§9.6](#96-multi-session-trail-files)) MAY contain N session headers, each independently multi-segmentable. The trail envelope ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)) is unaffected. Within one file, two groups with the same `session_uid` SHOULD NOT claim the same normalized `segment.seq` value; a missing `segment` is equivalent to `seq: 1`. Duplicate pairs emit `duplicate_segment_seq` warnings. Groups for the same `session_uid` SHOULD appear in non-descending `segment.seq` order in file order; a descending sequence emits `out_of_order_segment_seq`. @@ -208,29 +208,29 @@ group := events* events := zero or more event records (§10) ``` -The trail envelope (§8) remains optional even when N ≥ 2. When present with N ≥ 2 groups, the file-level `content_hash` on the envelope covers all N groups' already-stamped session hashes, applying the §7.4 two-pass procedure unchanged (every session hash stamped first; envelope hash stamped over the finalized record set). When absent, file-level identity defaults from §8.5 apply (no file-level `content_hash` is meaningful; only per-session hashes). +The trail envelope ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)) remains optional even when N ≥ 2. When present with N ≥ 2 groups, the file-level `content_hash` on the envelope covers all N groups' already-stamped session hashes, applying the [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) two-pass procedure unchanged (every session hash stamped first; envelope hash stamped over the finalized record set). When absent, file-level identity defaults from [§8.5](./08-the-trail-envelope.md#85-file-identity-defaults-when-envelope-is-absent) apply (no file-level `content_hash` is meaningful; only per-session hashes). #### 9.6.2 Group boundaries and reader-tolerant recovery -Readers detect group boundaries by `type:"session"` alone. A record with `type:"session"` always opens a new group, regardless of `schema_version` value: this lets reader-tolerant parsers (§6) recover from a malformed mid-file header and continue parsing subsequent groups instead of treating the rest of the file as orphan events. The strict validator still errors on individual records that fail schema validation; recovery affects parsing structure, not per-record validity. +Readers detect group boundaries by `type:"session"` alone. A record with `type:"session"` always opens a new group, regardless of `schema_version` value: this lets reader-tolerant parsers ([§6](./06-versioning.md#6-versioning)) recover from a malformed mid-file header and continue parsing subsequent groups instead of treating the rest of the file as orphan events. The strict validator still errors on individual records that fail schema validation; recovery affects parsing structure, not per-record validity. Entries that appear before the first `type:"session"` record (and after any envelope) are not part of any group and are always invalid: `events_before_first_session_header`. #### 9.6.3 Per-group validation -Whole-file graph rules (§18) apply **within** a group, not across: +Whole-file graph rules ([§18](./18-validation.md#18-validation)) apply **within** a group, not across: - `parent_id` resolution is scoped to the enclosing group. A `parent_id` that references an `id` in another group is treated as `unknown_parent_id` (cross-group references go through `fork_from`, not `parent_id`). -- `tool_call` / `tool_result` pairing (§10.5) runs per group. An unmatched `tool_call` in group A is not satisfied by a `tool_result` in group B. +- `tool_call` / `tool_result` pairing ([§10.5](./10-events.md#105-tool-call-terminal-pairing)) runs per group. An unmatched `tool_call` in group A is not satisfied by a `tool_result` in group B. - `session_end.payload.final_message_id`, `source.raw.envelope_ref`, `payload.usage` checks, and the `stream` consistency rule each run per group. -Event `id` uniqueness (§7.5) remains **file-scoped**: every `id` (across every group's header and events) MUST be unique within the file. +Event `id` uniqueness ([§7.5](./07-identity-artifacts-and-content-addressing.md#75-event-identifiers)) remains **file-scoped**: every `id` (across every group's header and events) MUST be unique within the file. #### 9.6.4 Per-group `content_hash` -Each group's session-level `content_hash` is computed over the canonical bytes of that group's slice only (header + its events, envelope and sibling groups excluded). This is the same procedure as §7.3 / §7.4 applied to the slice. As a consequence, extracting one session from a multi-session file (drop the envelope, drop sibling groups, write only that group's canonical bytes) reproduces the same digest as the in-file value. +Each group's session-level `content_hash` is computed over the canonical bytes of that group's slice only (header + its events, envelope and sibling groups excluded). This is the same procedure as [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) / [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) applied to the slice. As a consequence, extracting one session from a multi-session file (drop the envelope, drop sibling groups, write only that group's canonical bytes) reproduces the same digest as the in-file value. -When a reader extracts a single session from a multi-session file outside writer-strict validation and the recomputed `content_hash` does not match the value stored in the in-file header, it SHOULD emit a warning rather than an error. Strict validation of a finalized trail file still treats an in-place finalized `content_hash` mismatch as an error (§18.4). +When a reader extracts a single session from a multi-session file outside writer-strict validation and the recomputed `content_hash` does not match the value stored in the in-file header, it SHOULD emit a warning rather than an error. Strict validation of a finalized trail file still treats an in-place finalized `content_hash` mismatch as an error ([§18.4](./18-validation.md#184-file-graph-checks)). #### 9.6.5 Cross-group references @@ -246,14 +246,14 @@ The only sanctioned cross-group reference primitive is the session header's `for - Sessions in a file SHOULD appear in chronological order by header `ts`. Out-of-order placement emits `out_of_order_session_headers` (warning, not error). - Per-session `cwd` and `vcs` MAY diverge across sessions in the same file. Divergent `vcs.revision` across groups emits `vcs_revision_divergence` (warning, not error) — useful for spotting accidental cross-checkout bundling. -- `schema_version` is carried on every session header. Sessions in the same file are independently versioned (reader-tolerant patch acceptance per §6 applies per-header). +- `schema_version` is carried on every session header. Sessions in the same file are independently versioned (reader-tolerant patch acceptance per [§6](./06-versioning.md#6-versioning) applies per-header). - Empty groups (a header with zero events) are legal — they represent "session started, nothing happened." #### 9.6.7 Redaction of multi-session files -Redacting a multi-session trail produces a multi-session redacted trail with the same group count in the same order, redacted in place. The redactor resets `content_hash` to `` on every session header (and on the envelope when present) before share/transport tooling re-stamps via the two-pass §7.4 procedure. +Redacting a multi-session trail produces a multi-session redacted trail with the same group count in the same order, redacted in place. The redactor resets `content_hash` to `` on every session header (and on the envelope when present) before share/transport tooling re-stamps via the two-pass [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) procedure. -When redaction changes bytes, lineage hashes that point to artifacts in the same redacted file MUST be rewritten to the target's redacted content hash, using the §7.4.1 hash tier. Header-level `fork_from.content_hash` is rewritten when `fork_from.session_id` names an in-file sibling. `segment.prev_content_hash` is rewritten when the previous `segment.seq` for the same `session_uid` is in the file. When the lineage target is not in the redacted file, redactors MUST drop `fork_from.content_hash` while keeping id references, and MUST set `segment.prev_content_hash` to `null` for an unverifiable previous segment. `redacted_from.content_hash` remains raw-artifact provenance: header-level `redacted_from.content_hash` links the redacted session to its raw counterpart; envelope-level `redacted_from.content_hash` links the redacted file to its raw counterpart. +When redaction changes bytes, lineage hashes that point to artifacts in the same redacted file MUST be rewritten to the target's redacted content hash, using the [§7.4.1](./07-identity-artifacts-and-content-addressing.md#741-hash-tier-for-fork_from-and-redacted_from) hash tier. Header-level `fork_from.content_hash` is rewritten when `fork_from.session_id` names an in-file sibling. `segment.prev_content_hash` is rewritten when the previous `segment.seq` for the same `session_uid` is in the file. When the lineage target is not in the redacted file, redactors MUST drop `fork_from.content_hash` while keeping id references, and MUST set `segment.prev_content_hash` to `null` for an unverifiable previous segment. `redacted_from.content_hash` remains raw-artifact provenance: header-level `redacted_from.content_hash` links the redacted session to its raw counterpart; envelope-level `redacted_from.content_hash` links the redacted file to its raw counterpart. #### 9.6.8 No hard cap diff --git a/spec/draft/10-events.md b/spec/draft/10-events.md index f2115a8..072227d 100644 --- a/spec/draft/10-events.md +++ b/spec/draft/10-events.md @@ -31,14 +31,14 @@ Every event entry has this base shape: | Field | Required | Type | Notes | |---|---|---|---| -| `type` | yes | string | event type; see §10.2-10.3 | -| `id` | yes | string | globally unique; ULID or UUID per §19 | +| `type` | yes | string | event type; see [§10.2](#102-mandatory-event-types)-10.3 | +| `id` | yes | string | globally unique; ULID or UUID per [§19](./19-formal-schema.md#19-formal-schema) | | `parent_id` | no | string | references another `id` for tree topology; absent = linear file order | | `ts` | yes | string | ISO-8601 timestamp | | `payload` | yes | object | type-specific data | | `semantic` | no | object | linking metadata for fallback pairing | | `source` | no | object | adapter-provided source metadata | -| `meta` | no | object | vendor extensions (§8.3 / §12) | +| `meta` | no | object | vendor extensions ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) / [§12](./12-vendor-extensions.md#12-vendor-extensions)) | ### 10.2 Mandatory event types @@ -140,7 +140,7 @@ Model identification for downstream cost analysis uses `payload.model` first, fa When a single source envelope fans out to multiple entries (text blocks, tool calls, thinking blocks sharing one API response), `usage` accounts for the whole envelope. Writers MUST attach it to the first derived entry whose payload supports `usage`, skip non-usage-capable derived entries, and MUST NOT repeat it on later derived entries. In v0.1.0, `usage` is valid on `agent_message`, `agent_thinking`, and `tool_call` payloads; if an envelope emits none of those entries, canonical `usage` is omitted. -Monetary cost is intentionally not a canonical trail field or event. Analyzers compute cost from token usage, model identification, and their own pricing tables, and carry pricing provenance such as currency, pricing source, and effective date in analyzer output. If a source exposes a billing estimate, writers MAY preserve it as opaque source data under `x-/` keys on the entry's `meta` field (§8.3). Latency and wall-clock telemetry are deferred to a future minor version; sources rarely expose them consistently. +Monetary cost is intentionally not a canonical trail field or event. Analyzers compute cost from token usage, model identification, and their own pricing tables, and carry pricing provenance such as currency, pricing source, and effective date in analyzer output. If a source exposes a billing estimate, writers MAY preserve it as opaque source data under `x-/` keys on the entry's `meta` field ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention)). Latency and wall-clock telemetry are deferred to a future minor version; sources rarely expose them consistently. #### `task_plan_update` @@ -343,7 +343,7 @@ top-level `exit_code` on `tool_result`, because the concept does not apply to ki or `web_fetch`. Privacy: `meta` carries the same raw content as `output` (shell stdout, MCP block text), so the -redaction pipeline scrubs `meta` string leaves alongside `output` (§16). +redaction pipeline scrubs `meta` string leaves alongside `output` ([§16](./16-redaction.md#16-redaction)). #### `user_query` @@ -470,7 +470,7 @@ Post-creation update to logical session metadata. The session header carries the | Payload field | Required | Type | Notes | |---|---|---|---| | `field` | yes | enum or extension | One of `name`, `description`, `tags`, `agent.model_default`, `vcs.branch`, `vcs.worktree`, or `x-/`. | -| `value` | yes | field-specific | Replacement value. Must match the field type: string for `name`/`description`/`agent.model_default`/`vcs.branch`, string array for `tags`, and the §9.2 worktree shape for `vcs.worktree`. Extension fields MAY carry any JSON value. | +| `value` | yes | field-specific | Replacement value. Must match the field type: string for `name`/`description`/`agent.model_default`/`vcs.branch`, string array for `tags`, and the [§9.2](./09-the-session-header.md#92-fields) worktree shape for `vcs.worktree`. Extension fields MAY carry any JSON value. | | `previous_value` | no | field-specific | Prior value when the adapter knows it. Same type as `value`. | | `reason` | yes | enum or extension | `ai_generated`, `user_set`, `runtime_inferred`, `external`, or `x-/`. | @@ -526,7 +526,7 @@ A meaningful source timeline record that is not a user message, agent message, t | `context_injected` | Runtime injected standalone context that SHOULD remain visible outside a `user_message`. | `{ source_kind, name?, size_bytes? }` | | `hook_progress` | Catch-all for source-emitted progress/hook/queue records that do not map to a more specific reserved lifecycle kind. Adapters SHOULD prefer `session_start` / `turn_end` / `pre_tool_use` / `post_tool_use` / `subagent_end` / `hook_fired` when the source signal is unambiguous, and fall back to `hook_progress` only for unrecognised progress streams. | `{ hook_event?, hook_name?, ... }` | | `queue_operation` | Source recorded an enqueue or dequeue operation. | Free-form. | -| `heartbeat` | Periodic liveness ping during streaming capture (§9.4). Optional. Non-normative; readers MAY treat as informational. | `{ interval_ms? }` | +| `heartbeat` | Periodic liveness ping during streaming capture ([§9.4](./09-the-session-header.md#94-streaming-and-live-capture)). Optional. Non-normative; readers MAY treat as informational. | `{ interval_ms? }` | | `vcs_commit` | Adapter detected a VCS commit created during the session. | `{ sha, tool_call_id, branch?, message?, repo? }` | Use `tool_call_aborted{scope:"turn"}` for stops in a tool-invocation context where no specific call is identifiable. Use `system_event.kind:"turn_aborted"` for model/system-level turn stops with no tool in flight. @@ -565,7 +565,7 @@ Cross-agent diagnostic signals. Adapters MAY emit these to surface non-fatal err - Anything else MUST use `x-/` form, e.g. `x-claudecode/notification`. - Readers are tolerant of unknown `x-*` kinds — they pass through with no diagnostic. - Bare unknown strings (no `x-` prefix, not in the reserved set) are rejected by writer-strict validation. -- Adapters quarantining an unparseable source record MUST emit `system_event` with `kind:"x-/unknown_record"` and preserve the record in `source.raw`; `parse_fidelity.quarantined_count` counts this pattern (§9.2). +- Adapters quarantining an unparseable source record MUST emit `system_event` with `kind:"x-/unknown_record"` and preserve the record in `source.raw`; `parse_fidelity.quarantined_count` counts this pattern ([§9.2](./09-the-session-header.md#92-fields)). - If an `x-*` kind proves cross-agent, promote it to the reserved enum in a minor format version bump. Document emitted kinds per adapter in `docs/parser-source-matrix.md`. #### `capability_change` @@ -846,7 +846,7 @@ Synthesized instances MUST set `source.synthesized: true`. #### `session_end` -Clean terminal marker. Distinct from `session_terminated` (abnormal). Optional; many writers won't emit it. When present at EOF, signals a normal conclusion of the session and suppresses the "unmatched tool calls at EOF" warning of §18.4. +Clean terminal marker. Distinct from `session_terminated` (abnormal). Optional; many writers won't emit it. When present at EOF, signals a normal conclusion of the session and suppresses the "unmatched tool calls at EOF" warning of [§18.4](./18-validation.md#184-file-graph-checks). ```jsonc { @@ -889,7 +889,7 @@ When `tool_result.payload.for_id` is null, missing, or refers to a non-existent Writers SHOULD avoid relying on fallbacks. Populate `for_id` when reliable; use `semantic.call_id` when the source's native ID doesn't map cleanly to event `id`. Do not use semantic or sequential fallback pairing for `tool_call_aborted`; if a source cannot identify the call, emit `scope:"turn"` without `for_id`. -Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning (§18.4): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. +Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning ([§18.4](./18-validation.md#184-file-graph-checks)): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. > Non-normative diagram. @@ -930,7 +930,7 @@ When a single source envelope produces multiple entries — for example, an assi - The **first** entry derived from a given source envelope sets `source.raw.envelope` (and `source.raw.block`, `source.raw.block_index` if applicable). - **Subsequent** entries derived from the same envelope set `source.raw.envelope_ref` to the first entry's `id`. They omit `source.raw.envelope` and keep `block` / `block_index`. -`source.raw.envelope_ref` is an optional string. Writers MUST ensure it references the `id` of an entry that appears **earlier** in the same file — the same envelope, inlined once. Forward references and dangling references are reader errors (`source_raw_envelope_ref_unresolved`, §18.4). The first-inline-then-ref shape is streaming-write friendly: readers resolve refs in a single pass without backtracking. +`source.raw.envelope_ref` is an optional string. Writers MUST ensure it references the `id` of an entry that appears **earlier** in the same file — the same envelope, inlined once. Forward references and dangling references are reader errors (`source_raw_envelope_ref_unresolved`, [§18.4](./18-validation.md#184-file-graph-checks)). The first-inline-then-ref shape is streaming-write friendly: readers resolve refs in a single pass without backtracking. This mechanism is additive over v0.1.0. Readers that do not understand `envelope_ref` will see it as an unknown raw-source field and ignore it; the entry's other fields (`type`, `payload`, `semantic`) remain fully self-describing. diff --git a/spec/draft/12-vendor-extensions.md b/spec/draft/12-vendor-extensions.md index 78ffb78..d036cd0 100644 --- a/spec/draft/12-vendor-extensions.md +++ b/spec/draft/12-vendor-extensions.md @@ -1,6 +1,6 @@ ## 12. Vendor extensions -Implementations and vendors can add custom data via the `meta` field on the trail envelope, session header, or any event entry. Use the `x-/` extension grammar (§12.1) for keys to avoid collisions: +Implementations and vendors can add custom data via the `meta` field on the trail envelope, session header, or any event entry. Use the `x-/` extension grammar ([§12.1](#121-extension-grammar)) for keys to avoid collisions: ```jsonc "meta": { @@ -12,7 +12,7 @@ Implementations and vendors can add custom data via the `meta` field on the trai Readers MAY preserve, ignore, or render `meta` fields. They MUST NOT abort on unknown keys. -`entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see §16. +`entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see [§16](./16-redaction.md#16-redaction). The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead. See [§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) for the full convention. diff --git a/spec/draft/15-truncation-overflow-and-raw-source-size.md b/spec/draft/15-truncation-overflow-and-raw-source-size.md index 5db9e52..f8ac3be 100644 --- a/spec/draft/15-truncation-overflow-and-raw-source-size.md +++ b/spec/draft/15-truncation-overflow-and-raw-source-size.md @@ -42,9 +42,9 @@ Two placements are valid: Specific size thresholds, the algorithm a writer uses to choose which leaves to elide, and whether elision is gated by a hard cap are implementation policy — they belong in writer documentation, not the format. Validators MAY warn on entries whose `source.raw` exceeds an implementation-chosen size budget, but the wire format itself imposes no fixed limit. -When elision happens at the first emission of a source envelope (§10.7), subsequent `envelope_ref` entries still resolve — the ref points at the elided entry's `id`, not at its inlined envelope. +When elision happens at the first emission of a source envelope ([§10.7](./10-events.md#107-source-envelope-referencing)), subsequent `envelope_ref` entries still resolve — the ref points at the elided entry's `id`, not at its inlined envelope. -Adapters MUST redact known secret patterns in `source.raw` before writing — emission-time redaction is a writer responsibility, not a share-time concern. Validators emit `source_raw_unredacted_secret` (warning) when a string leaf in `source.raw` matches a known credential pattern (Authorization headers, Bearer tokens, JWT, vendor API keys, PEM private key blocks, ENV-style assignments). Share-time redaction (§16) layers additional normalization on top — paths, PII — and produces a separate artifact. +Adapters MUST redact known secret patterns in `source.raw` before writing — emission-time redaction is a writer responsibility, not a share-time concern. Validators emit `source_raw_unredacted_secret` (warning) when a string leaf in `source.raw` matches a known credential pattern (Authorization headers, Bearer tokens, JWT, vendor API keys, PEM private key blocks, ENV-style assignments). Share-time redaction ([§16](./16-redaction.md#16-redaction)) layers additional normalization on top — paths, PII — and produces a separate artifact. --- diff --git a/spec/draft/16-redaction.md b/spec/draft/16-redaction.md index dee593a..fb56498 100644 --- a/spec/draft/16-redaction.md +++ b/spec/draft/16-redaction.md @@ -9,7 +9,7 @@ Share-time redactors MUST apply the privacy rules below before producing shared | Field or value | Share-time action | |---|---| | `cwd` | Normalize or strip. | -| `vcs.remote_url` | Strip or normalize per §9.2 unless the user explicitly opts in. | +| `vcs.remote_url` | Strip or normalize per [§9.2](./09-the-session-header.md#92-fields) unless the user explicitly opts in. | | `system_event.payload.data.repo` for `vcs_commit` | Treat like `vcs.remote_url`; strip or normalize unless the user explicitly opts in. | | `vcs.worktree.path`, `vcs.worktree.original_cwd` | Normalize or strip. | | `source.path` | Normalize or strip. | @@ -23,7 +23,7 @@ If a resolved response contains answer keys that do not appear on the referenced Share-time redactors SHOULD populate `entry.meta.redaction_count` on each changed event entry. The count is a non-negative integer equal to the number of redactor mutations applied to that entry. Existing numeric `redaction_count` values are additive when a redacted trail is redacted again; unchanged entries keep their existing value. -When redaction changes bytes, lineage hashes are updated as described in §9.6.7. This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. +When redaction changes bytes, lineage hashes are updated as described in [§9.6.7](./09-the-session-header.md#967-redaction-of-multi-session-files). This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. > Non-normative diagram. diff --git a/spec/draft/17-security-considerations.md b/spec/draft/17-security-considerations.md index 8dd372b..b9f29f1 100644 --- a/spec/draft/17-security-considerations.md +++ b/spec/draft/17-security-considerations.md @@ -4,17 +4,17 @@ Trail files are untrusted input. All string content, including messages, tool ou Agent Trail intentionally has no format-level size caps. Consumers SHOULD enforce deployment-specific limits for maximum line length, file size, event count, graph depth, and decoded attachment or overflow bytes. Consumers SHOULD stream rather than buffer whole files where possible; JSONL is the interchange shape partly to make bounded streaming readers practical. -Hostile files can contain invalid graph structure even though `parent_id` cycles and cross-group links are invalid (§13, §18.4). Validators MUST NOT loop indefinitely while checking graph topology, and tree renderers SHOULD bound recursion or use iterative traversal when displaying deep parent chains. +Hostile files can contain invalid graph structure even though `parent_id` cycles and cross-group links are invalid ([§13](./13-tree-and-branching.md#13-tree-and-branching), [§18.4](./18-validation.md#184-file-graph-checks)). Validators MUST NOT loop indefinitely while checking graph topology, and tree renderers SHOULD bound recursion or use iterative traversal when displaying deep parent chains. -`content_hash` provides byte integrity for the canonical artifact (§7.3, §7.4), not authorship, provenance, or trust. A trail claiming `agent.name: "claude-code"` proves only that the file contains that string. Agent Trail v0.1.0 has no signature or attestation mechanism; signing MAY be added by a future extension. +`content_hash` provides byte integrity for the canonical artifact ([§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash), [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity)), not authorship, provenance, or trust. A trail claiming `agent.name: "claude-code"` proves only that the file contains that string. Agent Trail v0.1.0 has no signature or attestation mechanism; signing MAY be added by a future extension. -In v0.1.0, `content_hash` values are bare 64-character SHA-256 hex strings (§7.3). Other content-addressed references, such as attachment URIs (§10.2) and `overflow_ref` values (§15), use `sha256:` references. Consumers that verify prefixed content-addressed references MUST reject unknown algorithm prefixes rather than treating the reference as verified. +In v0.1.0, `content_hash` values are bare 64-character SHA-256 hex strings ([§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash)). Other content-addressed references, such as attachment URIs ([§10.2](./10-events.md#102-mandatory-event-types)) and `overflow_ref` values ([§15](./15-truncation-overflow-and-raw-source-size.md#15-truncation-overflow-and-raw-source-size)), use `sha256:` references. Consumers that verify prefixed content-addressed references MUST reject unknown algorithm prefixes rather than treating the reference as verified. -Attachment URIs and overflow references can identify local resources on the producer's machine. Viewers SHOULD NOT dereference `file:` URIs, `overflow_ref` values, or other external references automatically. Viewers MUST NOT dereference local `file:` URIs or non-`sha256:` overflow references from redacted or shared trails; §16 requires share-time redactors to remove or rewrite those values before transport. +Attachment URIs and overflow references can identify local resources on the producer's machine. Viewers SHOULD NOT dereference `file:` URIs, `overflow_ref` values, or other external references automatically. Viewers MUST NOT dereference local `file:` URIs or non-`sha256:` overflow references from redacted or shared trails; [§16](./16-redaction.md#16-redaction) requires share-time redactors to remove or rewrite those values before transport. Redaction reduces content exposure but does not make a shared trail private. Timestamps, event counts, tool names, model names, branch shape, unredacted file names, and remaining metadata can still reveal workflow information. Sharing a redacted trail SHOULD be treated as publishing it to anyone who can access the transport. -Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads (§16). +Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads ([§16](./16-redaction.md#16-redaction)). The implementation-maintained detector catalog and rule pack schema live in `docs/redaction-patterns.md`; this catalog is non-normative and does not change the trail file format. diff --git a/spec/draft/18-validation.md b/spec/draft/18-validation.md index f1a2faa..9aa90d2 100644 --- a/spec/draft/18-validation.md +++ b/spec/draft/18-validation.md @@ -19,10 +19,10 @@ reader or writer support they implement. | Class | Name | Requirements | |---|---|---| -| **R0** | Renderer | Reader-tolerant JSONL parsing per §6 and §18.2; renders the mandatory event types in §10.2, including user messages, agent messages, tool calls, tool results, and summaries; preserves or displays fallback output for unknown records it can parse; does not crash on valid or quarantinable input. | -| **R1** | Structural reader | R0 plus the non-hash whole-file layout, graph, pairing, streaming-state, and diagnostic checks in §18.4. R1 catches duplicate ids, unknown parents, parent cycles, unresolved `source.raw.envelope_ref`, tool-call pairing diagnostics, and other file-level checks that do not require recomputing content hashes or comparing segment-chain hashes. | -| **R2** | Verifying reader | R1 plus content-hash verification per §7.3 and §7.4, and segment-chain verification per §9.5. Readers in this class warn rather than abort on reader-tolerant hash mismatches, per §18.4.1. | -| **W** | Writer | Emits writer-strict records that validate against `schema.json` and satisfy the strict whole-file validation rules in §18.4. Writer conformance is about emitted trail files, not reader tolerance. | +| **R0** | Renderer | Reader-tolerant JSONL parsing per [§6](./06-versioning.md#6-versioning) and [§18.2](#182-reader-tolerance); renders the mandatory event types in [§10.2](./10-events.md#102-mandatory-event-types), including user messages, agent messages, tool calls, tool results, and summaries; preserves or displays fallback output for unknown records it can parse; does not crash on valid or quarantinable input. | +| **R1** | Structural reader | R0 plus the non-hash whole-file layout, graph, pairing, streaming-state, and diagnostic checks in [§18.4](#184-file-graph-checks). R1 catches duplicate ids, unknown parents, parent cycles, unresolved `source.raw.envelope_ref`, tool-call pairing diagnostics, and other file-level checks that do not require recomputing content hashes or comparing segment-chain hashes. | +| **R2** | Verifying reader | R1 plus content-hash verification per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) and [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity), and segment-chain verification per [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions). Readers in this class warn rather than abort on reader-tolerant hash mismatches, per [§18.4.1](#1841-errors). | +| **W** | Writer | Emits writer-strict records that validate against `schema.json` and satisfy the strict whole-file validation rules in [§18.4](#184-file-graph-checks). Writer conformance is about emitted trail files, not reader tolerance. | `@agent-trail/core` implements Class R2 reader behavior through its parsing, validation, canonicalization, hashing, and multi-segment reconciliation APIs. @@ -47,51 +47,51 @@ Portable diagnostic code registry: | Code | Severity | Defining section | |---|---|---| -| `ambiguous_sequential_pairing` | warning | §10.5 / §18.4.2 | -| `child_session_fork_from_mismatch` | warning | §18.4.2 | -| `child_session_parent_link_mismatch` | warning | §18.4.2 | -| `content_hash_invalid` | error | §7.3 / §18.4.1 | -| `content_hash_mismatch` | error (strict), warning (reader-tolerant) | §7.3 / §18.4.1 | -| `cross_group_fork_from_hash_mismatch` | warning | §9.6.5 | -| `duplicate_id` | error | §18.4.1 | -| `duplicate_option_labels` | warning | §10.2 / §18.4.2 | -| `duplicate_segment_seq` | warning | §9.5 / §18.4.2 | -| `duplicate_tool_result` | warning | §10.5 / §18.4.2 | -| `duplicate_user_query_question_id` | error | §10.2 | -| `envelope_has_parent_id` | error | §8 / §18.4.1 | -| `envelope_not_at_line_1` | error | §8 / §18.4.1 | -| `envelope_sessions_manifest_drift` | warning | §8.4 / §18.4.2 | -| `events_before_first_session_header` | error | §9.6 / §18.4.1 | -| `header_has_parent_id` | error | §9 / §18.4.1 | -| `ill_formed_string` | error (strict), warning (reader-tolerant) | §5.2 / §18.4.1 | -| `missing_header` | error | §9 / §18.4.1 | -| `missing_header_after_envelope` | error | §8 / §18.4.1 | -| `multiple_envelopes` | error | §8 / §18.4.1 | -| `non_interoperable_number` | warning | §5.2 / §18.4.2 | -| `non_monotonic_event_ts` | warning | §18.4.2 | -| `out_of_order_segment_seq` | warning | §9.5 / §18.4.2 | -| `out_of_order_session_headers` | warning | §9.6.6 | -| `parent_cycle` | error | §13.2 / §18.4.1 | -| `parse_fidelity_drift` | error | §9.2 / §18.4.1 | -| `reader_tolerant_schema_version` | warning | §6 / §18.2 | -| `reader_tolerant_unknown_payload_field` | warning | §18.2 | -| `reader_tolerant_unknown_record` | warning | §18.2 | -| `segment_chain_break` | warning | §9.5 | -| `source_raw_envelope_ref_unresolved` | error | §10.7 / §18.4.1 | -| `source_raw_unredacted_secret` | warning | §15.1 / §18.4.2 | -| `stream_open_with_content_hash` | warning | §18.4.3 | -| `stream_open_with_terminal_event` | warning | §18.4.3 | -| `tool_args_unredacted_secret` | warning | §16 / §18.4.2 | -| `tool_result_semantic_conflict` | warning | §10.5 / §18.4.2 | -| `unknown_abandoned_branch_id` | warning | §10.3 / §18.4.2 | -| `unknown_branch_point_from_id` | warning | §10.3 / §18.4.2 | -| `unknown_final_message_id` | warning | §10.3 / §18.4.2 | -| `unknown_parent_id` | error | §10.1 / §18.4.1 | -| `unknown_user_query_answer_key` | error | §10.2 | -| `unknown_user_query_for_id` | warning | §10.2 / §18.4.2 | -| `unmatched_tool_call_at_eof` | warning | §10.5 / §18.4.2 | -| `vcs_remote_url_with_credentials` | warning or error | §9.2 / §18.4 | -| `vcs_revision_divergence` | warning | §9.6.6 | +| `ambiguous_sequential_pairing` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `child_session_fork_from_mismatch` | warning | [§18.4.2](#1842-warnings) | +| `child_session_parent_link_mismatch` | warning | [§18.4.2](#1842-warnings) | +| `content_hash_invalid` | error | [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) / [§18.4.1](#1841-errors) | +| `content_hash_mismatch` | error (strict), warning (reader-tolerant) | [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) / [§18.4.1](#1841-errors) | +| `cross_group_fork_from_hash_mismatch` | warning | [§9.6.5](./09-the-session-header.md#965-cross-group-references) | +| `duplicate_id` | error | [§18.4.1](#1841-errors) | +| `duplicate_option_labels` | warning | [§10.2](./10-events.md#102-mandatory-event-types) / [§18.4.2](#1842-warnings) | +| `duplicate_segment_seq` | warning | [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions) / [§18.4.2](#1842-warnings) | +| `duplicate_tool_result` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `duplicate_user_query_question_id` | error | [§10.2](./10-events.md#102-mandatory-event-types) | +| `envelope_has_parent_id` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `envelope_not_at_line_1` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `envelope_sessions_manifest_drift` | warning | [§8.4](./08-the-trail-envelope.md#84-the-sessions-manifest) / [§18.4.2](#1842-warnings) | +| `events_before_first_session_header` | error | [§9.6](./09-the-session-header.md#96-multi-session-trail-files) / [§18.4.1](#1841-errors) | +| `header_has_parent_id` | error | [§9](./09-the-session-header.md#9-the-session-header) / [§18.4.1](#1841-errors) | +| `ill_formed_string` | error (strict), warning (reader-tolerant) | [§5.2](./05-file-format.md#52-encoding) / [§18.4.1](#1841-errors) | +| `missing_header` | error | [§9](./09-the-session-header.md#9-the-session-header) / [§18.4.1](#1841-errors) | +| `missing_header_after_envelope` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `multiple_envelopes` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `non_interoperable_number` | warning | [§5.2](./05-file-format.md#52-encoding) / [§18.4.2](#1842-warnings) | +| `non_monotonic_event_ts` | warning | [§18.4.2](#1842-warnings) | +| `out_of_order_segment_seq` | warning | [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions) / [§18.4.2](#1842-warnings) | +| `out_of_order_session_headers` | warning | [§9.6.6](./09-the-session-header.md#966-order-divergence-and-per-session-metadata) | +| `parent_cycle` | error | [§13.2](./13-tree-and-branching.md#132-acyclicity) / [§18.4.1](#1841-errors) | +| `parse_fidelity_drift` | error | [§9.2](./09-the-session-header.md#92-fields) / [§18.4.1](#1841-errors) | +| `reader_tolerant_schema_version` | warning | [§6](./06-versioning.md#6-versioning) / [§18.2](#182-reader-tolerance) | +| `reader_tolerant_unknown_payload_field` | warning | [§18.2](#182-reader-tolerance) | +| `reader_tolerant_unknown_record` | warning | [§18.2](#182-reader-tolerance) | +| `segment_chain_break` | warning | [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions) | +| `source_raw_envelope_ref_unresolved` | error | [§10.7](./10-events.md#107-source-envelope-referencing) / [§18.4.1](#1841-errors) | +| `source_raw_unredacted_secret` | warning | [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction) / [§18.4.2](#1842-warnings) | +| `stream_open_with_content_hash` | warning | [§18.4.3](#1843-streaming-state-rules) | +| `stream_open_with_terminal_event` | warning | [§18.4.3](#1843-streaming-state-rules) | +| `tool_args_unredacted_secret` | warning | [§16](./16-redaction.md#16-redaction) / [§18.4.2](#1842-warnings) | +| `tool_result_semantic_conflict` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `unknown_abandoned_branch_id` | warning | [§10.3](./10-events.md#103-optional-event-types) / [§18.4.2](#1842-warnings) | +| `unknown_branch_point_from_id` | warning | [§10.3](./10-events.md#103-optional-event-types) / [§18.4.2](#1842-warnings) | +| `unknown_final_message_id` | warning | [§10.3](./10-events.md#103-optional-event-types) / [§18.4.2](#1842-warnings) | +| `unknown_parent_id` | error | [§10.1](./10-events.md#101-base-shape) / [§18.4.1](#1841-errors) | +| `unknown_user_query_answer_key` | error | [§10.2](./10-events.md#102-mandatory-event-types) | +| `unknown_user_query_for_id` | warning | [§10.2](./10-events.md#102-mandatory-event-types) / [§18.4.2](#1842-warnings) | +| `unmatched_tool_call_at_eof` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `vcs_remote_url_with_credentials` | warning or error | [§9.2](./09-the-session-header.md#92-fields) / [§18.4](#184-file-graph-checks) | +| `vcs_revision_divergence` | warning | [§9.6.6](./09-the-session-header.md#966-order-divergence-and-per-session-metadata) | #### Conformance suite (non-normative) @@ -123,7 +123,7 @@ A v0.1.0-compliant trail file MUST also pass whole-file checks. #### 18.4.1 Errors -1. The first line is either a trail envelope (`type: "trail"`, §8) or a session header (`type: "session"`, `schema_version: "0.1.0"`). When the envelope is present, the session header MUST occupy line 2. +1. The first line is either a trail envelope (`type: "trail"`, [§8](./08-the-trail-envelope.md#8-the-trail-envelope)) or a session header (`type: "session"`, `schema_version: "0.1.0"`). When the envelope is present, the session header MUST occupy line 2. 2. Subsequent lines match an event schema (`type`, `id`, `ts`, `payload`). 3. All `id` values are unique within the file. 4. Every non-null `parent_id` references an `id` in the same file. @@ -134,15 +134,15 @@ A v0.1.0-compliant trail file MUST also pass whole-file checks. If `content_hash` is present: 1. The value is 64 hex characters (SHA-256). Invalid hash shape emits `content_hash_invalid` at `/content_hash`. -2. Strict validators recompute and verify per §7.3. On mismatch, strict validation fails with `content_hash_mismatch` at `/content_hash`. Reader-tolerant parsers MAY warn but MUST NOT abort. +2. Strict validators recompute and verify per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash). On mismatch, strict validation fails with `content_hash_mismatch` at `/content_hash`. Reader-tolerant parsers MAY warn but MUST NOT abort. Additional whole-file errors: -- `parse_fidelity`, when present, MUST match the session group's entries (§9.2). Drift emits `parse_fidelity_drift` at the mismatched `parse_fidelity` field. +- `parse_fidelity`, when present, MUST match the session group's entries ([§9.2](./09-the-session-header.md#92-fields)). Drift emits `parse_fidelity_drift` at the mismatched `parse_fidelity` field. - A `user_query` question id MUST be unique within that query. Duplicate ids emit `duplicate_user_query_question_id` at the repeated question id. - A `user_query_response.payload.answers` key not present in the resolved `user_query.payload.questions[].id` set emits `unknown_user_query_answer_key` at that answer key. -- `source.raw.envelope_ref`, when set, MUST reference the `id` of an earlier entry in the same file (§10.7). Dangling or forward references are errors with code `source_raw_envelope_ref_unresolved` at `/source/raw/envelope_ref`. -- Trail envelope position and uniqueness (§8): +- `source.raw.envelope_ref`, when set, MUST reference the `id` of an earlier entry in the same file ([§10.7](./10-events.md#107-source-envelope-referencing)). Dangling or forward references are errors with code `source_raw_envelope_ref_unresolved` at `/source/raw/envelope_ref`. +- Trail envelope position and uniqueness ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)): - `envelope_not_at_line_1` (error): a `type:"trail"` record appears on a line other than line 1. - `multiple_envelopes` (error): more than one envelope appears in the file. - `missing_header_after_envelope` (error): an envelope at line 1 is not followed by a session header on line 2. @@ -150,31 +150,31 @@ Additional whole-file errors: #### 18.4.2 Warnings -- Each `tool_call.id` SHOULD be referenced by exactly one `tool_result.payload.for_id` (or paired via §10.5). +- Each `tool_call.id` SHOULD be referenced by exactly one `tool_result.payload.for_id` (or paired via [§10.5](./10-events.md#105-tool-call-terminal-pairing)). - Inline `subagent_invoke` events SHOULD have descendants in the same group, or external child invocations SHOULD set `args.session_id` to the child header `id` when known. - When an in-file child session is present, the parent `subagent_invoke.args.session_id` and child `header.fork_from.{session_id,entry_id}` SHOULD agree. Mismatches are warnings, not errors, so partial bundles and external-only references remain readable. - `branch_point.payload.from_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_branch_point_from_id` at `/payload/from_id`. - `branch_summary.payload.abandoned_branch_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_abandoned_branch_id` at `/payload/abandoned_branch_id`. - Writers SHOULD emit `session_terminated` if any `tool_call` remains unmatched at EOF. The warning code is `unmatched_tool_call_at_eof`. Suppression: - - A `session_end` event anywhere in the file suppresses this warning for every unmatched `tool_call` (clean conclusion, §10.3). + - A `session_end` event anywhere in the file suppresses this warning for every unmatched `tool_call` (clean conclusion, [§10.3](./10-events.md#103-optional-event-types)). - A `session_terminated` event whose `payload.open_call_ids` lists a given `tool_call.id` suppresses the warning for that id only (explicit acknowledgement). A `session_terminated` event without `open_call_ids` does not suppress the warning. - A `tool_result` paired by sequential fallback when two or more unmatched prior same-branch `tool_call` candidates existed emits `ambiguous_sequential_pairing` at `/payload`. - A `user_query` question with duplicate option labels among options that do not carry stable option ids emits `duplicate_option_labels` at the repeated option's `/payload/questions//options//label`. - `user_query_response.payload.for_id` SHOULD reference a `user_query` in the same session group. An unresolved reference emits `unknown_user_query_for_id` at `/payload/for_id`. - `session_end.payload.final_message_id`, when present, SHOULD reference an `id` that appears in the same file (the session header or a prior event). A dangling reference is a warning with code `unknown_final_message_id` at `/payload/final_message_id`. - An event's `ts` SHOULD NOT be earlier than its parent event's `ts` inside the same parent chain. Equal timestamps are allowed; sibling branches may interleave in wall-clock time. A strictly earlier child timestamp emits `non_monotonic_event_ts` (warning) at `/ts`. -- Validators MAY report implementation-defined size budgets for `source.raw`; specific numbers are writer policy (§15.1). +- Validators MAY report implementation-defined size budgets for `source.raw`; specific numbers are writer policy ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)). - `source.raw` SHOULD NOT contain unredacted credentials. A string leaf matching a known credential pattern emits `source_raw_unredacted_secret` (warning) at the matching JSON pointer. - JSON integer numbers outside the IEEE-754 exact-integer range SHOULD be emitted as strings. Unsafe integer numbers emit `non_interoperable_number` (warning) at the offending JSON Pointer. - Privacy-sensitive tool arguments SHOULD NOT contain unredacted credentials. A string leaf in `mcp_call` / `web_fetch` `tool_call.payload.args.headers` or `shell_command` `tool_call.payload.args.command` matching a known credential pattern emits `tool_args_unredacted_secret` (warning) at the matching JSON pointer. - `envelope_sessions_manifest_drift` (warning): the envelope's `sessions` manifest length disagrees with the number of session groups, or a manifest entry disagrees with the matching session header's `id` or `agent.name`. -- Multi-segment consistency within one file (§9.5): +- Multi-segment consistency within one file ([§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions)): - `duplicate_segment_seq` (warning): two groups share the same `(session_uid, segment.seq)` pair, treating missing `segment` as `seq: 1`. - `out_of_order_segment_seq` (warning): groups with the same `session_uid` appear with descending `segment.seq` in file order. #### 18.4.3 Streaming-state rules -Streaming rules (§9.4) are evaluated against the *current* header `stream.state` at validation time — the validator reads the present value, not a history of transitions. Crash-recovery writers MUST finalize (`stream.state` to `"closed"` or remove `stream`) before appending terminal events; once the stream is no longer marked live, the rules below stop applying. +Streaming rules ([§9.4](./09-the-session-header.md#94-streaming-and-live-capture)) are evaluated against the *current* header `stream.state` at validation time — the validator reads the present value, not a history of transitions. Crash-recovery writers MUST finalize (`stream.state` to `"closed"` or remove `stream`) before appending terminal events; once the stream is no longer marked live, the rules below stop applying. 10. If the current `header.stream.state == "open"`: - **10a.** `content_hash` SHOULD be absent or `""`. A populated hex hash is a warning, since the canonical bytes are still in flux. diff --git a/spec/draft/19-formal-schema.md b/spec/draft/19-formal-schema.md index f2b4a19..fa4e8ec 100644 --- a/spec/draft/19-formal-schema.md +++ b/spec/draft/19-formal-schema.md @@ -2,7 +2,7 @@ The normative writer-strict JSON Schema lives in `schema.json` and is published at `https://agent-trail.dev/schema/v0.1.0.json`. -This spec intentionally does not duplicate the full schema inline. Implementations SHOULD validate each JSONL line against `schema.json`, then run the whole-file checks in §18.4. Reader-tolerant parsing, including unknown future event preservation, is separate from writer-strict schema validation. +This spec intentionally does not duplicate the full schema inline. Implementations SHOULD validate each JSONL line against `schema.json`, then run the whole-file checks in [§18.4](./18-validation.md#184-file-graph-checks). Reader-tolerant parsing, including unknown future event preservation, is separate from writer-strict schema validation. --- diff --git a/spec/draft/appendix-b-content-hash-worked-example.md b/spec/draft/appendix-b-content-hash-worked-example.md index 5ca73b3..3ca8d83 100644 --- a/spec/draft/appendix-b-content-hash-worked-example.md +++ b/spec/draft/appendix-b-content-hash-worked-example.md @@ -1,6 +1,6 @@ ## Appendix B — Content hash worked example -This example shows the §7.3 two-pass procedure for the +This example shows the [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) two-pass procedure for the `hash-vectors/minimal-pending-roundtrip.trail.jsonl` conformance fixture. The remaining canonicalization and two-tier identity cases are published in the `hash-vectors/` fixture category. diff --git a/spec/draft/changelog.md b/spec/draft/changelog.md index 7576f0e..df16223 100644 --- a/spec/draft/changelog.md +++ b/spec/draft/changelog.md @@ -4,16 +4,16 @@ Initial public draft. v0.1.0 defines: -- JSONL file layout, session header, core event envelope, mandatory event types, optional events, the canonical tool taxonomy, vendor `meta` extensions (§8.3), tree semantics, layered validation, and artifact-level content addressing. +- JSONL file layout, session header, core event envelope, mandatory event types, optional events, the canonical tool taxonomy, vendor `meta` extensions ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention)), tree semantics, layered validation, and artifact-level content addressing. - Stable local source filenames (`spec.md`, `schema.json`) with immutable hosted release snapshots at `/spec/v0.1.0` and `/schema/v0.1.0.json`. -- The optional trail envelope record `type:"trail"` at line 1 (§8) with Tier 1 fields (`id`, `name`, `description`, `ts`, `producer`, `content_hash`) and Tier 2 fields (`tags`, `vcs`, `fork_from`, `redacted_from`, `sessions`, `meta`), and two-tier identity (§7.4): session-level `content_hash` excludes the envelope, file-level `content_hash` covers the whole file. +- The optional trail envelope record `type:"trail"` at line 1 ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)) with Tier 1 fields (`id`, `name`, `description`, `ts`, `producer`, `content_hash`) and Tier 2 fields (`tags`, `vcs`, `fork_from`, `redacted_from`, `sessions`, `meta`), and two-tier identity ([§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity)): session-level `content_hash` excludes the envelope, file-level `content_hash` covers the whole file. - Session headers MAY carry base `name`, `description`, and `tags`; `session_metadata_update` events replay on top of those base values. `vcs.type` allows reserved systems or `x-/` extensions, and envelope `fork_from.trail_id` uses the standard id shape. -- Multi-segment session primitives (`session_uid`, `segment.seq`, `segment.prev_content_hash`) and reconciliation invariants (§9.5). -- The optional header `stream` field, the `session_end` event, and the recommended `system_event` heartbeat convention (§9.4, §10.3). +- Multi-segment session primitives (`session_uid`, `segment.seq`, `segment.prev_content_hash`) and reconciliation invariants ([§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions)). +- The optional header `stream` field, the `session_end` event, and the recommended `system_event` heartbeat convention ([§9.4](./09-the-session-header.md#94-streaming-and-live-capture), [§10.3](./10-events.md#103-optional-event-types)). - Tool-surface fidelity for truncated tool-call args, string-replacement `file_edit`, branch-scoped pairing warnings, stable user-query option ids, stricter attachment identity, and tool-result meta key hygiene. -- The `source.raw.envelope_ref` inline-first / ref-subsequent envelope dedup convention (§10.7), the `{ elided: true, size_bytes: N }` elide marker for `source.raw` (§15.1), and the writer-side redaction requirement for credential patterns in `source.raw`. -- Normative share-time redaction rules for local attachment URIs, unsafe `overflow_ref` values, unresolved `user_query_response` answers, and privacy-sensitive field handling (§16), plus the `tool_args_unredacted_secret` validator warning (§18.4). -- Envelope-level `payload.usage` on the first entry derived from a source envelope, including `agent_message`, `agent_thinking`, and `tool_call` (§10.2). +- The `source.raw.envelope_ref` inline-first / ref-subsequent envelope dedup convention ([§10.7](./10-events.md#107-source-envelope-referencing)), the `{ elided: true, size_bytes: N }` elide marker for `source.raw` ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)), and the writer-side redaction requirement for credential patterns in `source.raw`. +- Normative share-time redaction rules for local attachment URIs, unsafe `overflow_ref` values, unresolved `user_query_response` answers, and privacy-sensitive field handling ([§16](./16-redaction.md#16-redaction)), plus the `tool_args_unredacted_secret` validator warning ([§18.4](./18-validation.md#184-file-graph-checks)). +- Envelope-level `payload.usage` on the first entry derived from a source envelope, including `agent_message`, `agent_thinking`, and `tool_call` ([§10.2](./10-events.md#102-mandatory-event-types)). - During the v0.1.0 draft cycle, planning snapshots moved from the legacy `tool_call.payload.tool:"task_plan"` shape to the canonical `task_plan_update` event. Final v0.1.0 writer-strict output MUST use `task_plan_update`; legacy `task_plan` tool calls are invalid. - During the v0.1.0 draft cycle, duplicate `system_event` kinds for `session_end` and `permission_mode_change` were removed, thinking levels became source-defined strings, `user_message.origin` was added, and related vocabulary clarifications landed. - During the v0.1.0 draft cycle, vendor extensions converged on one `x-/` grammar across `meta`, enum extensions, `system_event.kind`, `tool_result.payload.meta`, and custom `agent.name`. diff --git a/spec/v0.1.0/04-terminology.md b/spec/v0.1.0/04-terminology.md index 19e41cf..db7cb6e 100644 --- a/spec/v0.1.0/04-terminology.md +++ b/spec/v0.1.0/04-terminology.md @@ -21,8 +21,8 @@ | **Redacted trail** | A separate artifact produced from a raw trail for sharing. It has its own `content_hash`. | | **Shared trail** | A redacted trail transported through a sharing mechanism. | | **Synthesized event** | An event the adapter constructed from indirect source data (e.g., a git diff), not mapped from a real source event. Flagged with `source.synthesized: true`. | -| **Content hash** | SHA-256 of the exact artifact's canonical bytes (§7). | -| **Canonical bytes** | The file content normalized per §7 for hashing. | +| **Content hash** | SHA-256 of the exact artifact's canonical bytes ([§7](./07-identity-artifacts-and-content-addressing.md#7-identity-artifacts-and-content-addressing)). | +| **Canonical bytes** | The file content normalized per [§7](./07-identity-artifacts-and-content-addressing.md#7-identity-artifacts-and-content-addressing) for hashing. | | **Source escape hatch** | The `source.raw` field; preserves verbatim source-format data for lossless round-trip. | --- diff --git a/spec/v0.1.0/05-file-format.md b/spec/v0.1.0/05-file-format.md index 43adb5d..4df7fd7 100644 --- a/spec/v0.1.0/05-file-format.md +++ b/spec/v0.1.0/05-file-format.md @@ -19,16 +19,16 @@ - Writers MUST replace invalid UTF-8 bytes and unpaired surrogate escapes with U+FFFD at emission time. Emitted JSON strings MUST NOT contain unpaired surrogates. - Writers MUST NOT emit JSON integer numbers outside the IEEE-754 exact-integer range (`-(2^53-1)` through `2^53-1`) anywhere in a trail file. Adapters that receive oversized source integers, such as snowflake ids or nanosecond timestamps in `source.raw`, MUST emit them as strings instead. Validator warnings use code `non_interoperable_number` at the offending JSON Pointer. - `.trail.jsonl.gz` files are a whole-file gzip wrapper around the UTF-8 trail JSONL bytes above. Writers MUST NOT gzip individual JSONL lines independently. Readers MUST decompress `.trail.jsonl.gz` files before validation and processing. -- For `.trail.jsonl.gz`, `content_hash` is computed and verified by first decompressing the file to produce plain UTF-8 JSONL, then applying the canonical bytes procedure defined in §7.3 to the decompressed JSONL. The compressed bytes themselves are never hashed. +- For `.trail.jsonl.gz`, `content_hash` is computed and verified by first decompressing the file to produce plain UTF-8 JSONL, then applying the canonical bytes procedure defined in [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) to the decompressed JSONL. The compressed bytes themselves are never hashed. ### 5.3 File layout Every valid trail file has: -1. **Optionally**, a trail envelope (`type:"trail"`) on line 1 (§8). -2. One **or more** session header groups in file order. Each group starts with a `type:"session"` record and continues with zero or more event lines until the next `type:"session"` record or EOF (§9.6). The first session header MUST appear on line 1 when there is no envelope, or on line 2 when an envelope is present. +1. **Optionally**, a trail envelope (`type:"trail"`) on line 1 ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)). +2. One **or more** session header groups in file order. Each group starts with a `type:"session"` record and continues with zero or more event lines until the next `type:"session"` record or EOF ([§9.6](./09-the-session-header.md#96-multi-session-trail-files)). The first session header MUST appear on line 1 when there is no envelope, or on line 2 when an envelope is present. -When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in §9.6. +When the file contains exactly one group, behaviour is unchanged from earlier drafts. Multi-group ("multi-session") files are described in [§9.6](./09-the-session-header.md#96-multi-session-trail-files). > Non-normative diagram. diff --git a/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md b/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md index f3d8a3e..9a05542 100644 --- a/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md +++ b/spec/v0.1.0/07-identity-artifacts-and-content-addressing.md @@ -25,7 +25,7 @@ Canonical bytes are defined as: - No trailing whitespace. - A trailing newline at EOF. - Each JSON object serialized using RFC 8785 JSON Canonicalization Scheme (JCS). -- Writer-valid strings are well-formed per §5.2, so canonical bytes remain pure JCS; hash-time string repair is not part of this procedure. +- Writer-valid strings are well-formed per [§5.2](./05-file-format.md#52-encoding), so canonical bytes remain pure JCS; hash-time string repair is not part of this procedure. Because the hash depends on the file content that includes the hash field, we use a two-pass approach: @@ -42,8 +42,8 @@ Writers that produce streaming or in-progress files MAY omit `content_hash` or l When a trail envelope is present, the file carries two independent content hashes: -- **Session-level `content_hash`** lives on the session header. It is SHA-256 over the canonical bytes covering only the session header and its events (the envelope record is excluded from the hashed input). In a multi-session file (§9.6) the slice for a session covers that session's header and the events between it and the next `type:"session"` record (or EOF). This makes each session's identity independent of whether it is wrapped in an envelope or sits beside sibling sessions — extracting one session from a multi-session file recomputes the same digest. -- **File-level `content_hash`** lives on the trail envelope. It is SHA-256 over the canonical bytes of the whole file, with the envelope's `content_hash` field replaced by `""` per the same two-pass procedure as §7.3. The session-level `content_hash`, if already populated, is treated as opaque file content. +- **Session-level `content_hash`** lives on the session header. It is SHA-256 over the canonical bytes covering only the session header and its events (the envelope record is excluded from the hashed input). In a multi-session file ([§9.6](./09-the-session-header.md#96-multi-session-trail-files)) the slice for a session covers that session's header and the events between it and the next `type:"session"` record (or EOF). This makes each session's identity independent of whether it is wrapped in an envelope or sits beside sibling sessions — extracting one session from a multi-session file recomputes the same digest. +- **File-level `content_hash`** lives on the trail envelope. It is SHA-256 over the canonical bytes of the whole file, with the envelope's `content_hash` field replaced by `""` per the same two-pass procedure as [§7.3](#73-content-hash). The session-level `content_hash`, if already populated, is treated as opaque file content. Writers that emit both hashes MUST stamp every session-level hash first, then compute and stamp the file-level hash. Readers verify them independently. Different consumers care about different scopes: extraction tools recompute the session hash; share/transport tools verify the file hash. @@ -68,13 +68,13 @@ Lineage references mirror the tier of the linking context: - **Header-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **session-level** `content_hash` of the parent artifact (the forked-from session or the raw session that was redacted). This keeps session lineage independent of any envelope wrapper — extracting either side recomputes the same digest. - **Envelope-level `fork_from.content_hash` and `redacted_from.content_hash`** refer to the **file-level** `content_hash` of the parent file (envelope and all sessions included). Use these to link whole files rather than individual sessions. -- `segment.prev_content_hash` (§9.5) is always session-level, since segments chain at session grain. +- `segment.prev_content_hash` ([§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions)) is always session-level, since segments chain at session grain. Writers MUST choose the matching tier; mixing tiers across a chain breaks verification. ### 7.5 Event identifiers -Event `id` values are globally unique. Writers emit uppercase ULIDs or lowercase UUIDs, matching §7.1 and the schema. Globally-unique canonical ids let a reconciler dedup events across segments by exact string equality. +Event `id` values are globally unique. Writers emit uppercase ULIDs or lowercase UUIDs, matching [§7.1](#71-session-identity) and the schema. Globally-unique canonical ids let a reconciler dedup events across segments by exact string equality. --- diff --git a/spec/v0.1.0/08-the-trail-envelope.md b/spec/v0.1.0/08-the-trail-envelope.md index 60bdc0a..e365a35 100644 --- a/spec/v0.1.0/08-the-trail-envelope.md +++ b/spec/v0.1.0/08-the-trail-envelope.md @@ -43,29 +43,29 @@ The trail envelope is an OPTIONAL record on line 1 that carries file-scope metad | `description` | no | string | free text | | `ts` | yes | string | ISO-8601 timestamp when the file was assembled or exported | | `producer` | yes | string | identifier of the writer (e.g., `trail-cli/0.3.0`) | -| `content_hash` | no | string | SHA-256 hex of the whole-file canonical bytes; see §7.4 | +| `content_hash` | no | string | SHA-256 hex of the whole-file canonical bytes; see [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) | | `tags` | no | string[] | free-form labels | | `vcs` | no | object | working-tree context at file-assembly time | | `fork_from` | no | object | reference to a parent file when forked; `trail_id` is a UUID or ULID id and `content_hash` is optional | | `redacted_from` | no | object | provenance link from a redacted file to its raw counterpart | | `sessions` | no | array | manifest of sessions in this file; validator warns on drift vs file content | -| `meta` | no | object | free-form vendor extensions (§8.3) | +| `meta` | no | object | free-form vendor extensions ([§8.3](#83-the-meta-extension-convention)) | The envelope MUST NOT carry a `parent_id`. It is not part of the event graph. ### 8.3 The `meta` extension convention -The trail envelope (§8), the session header (§9), and every event entry (§10.1) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar (§12.1) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record (§7.4): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. +The trail envelope ([§8](#8-the-trail-envelope)), the session header ([§9](./09-the-session-header.md#9-the-session-header)), and every event entry ([§10.1](./10-events.md#101-base-shape)) accept an optional `meta` object for vendor extensions, modelled on OCI image annotations and Kubernetes `metadata.annotations`. Object-typed values are allowed so nested data fits naturally. Keys SHOULD use the `x-/` extension grammar ([§12.1](./12-vendor-extensions.md#121-extension-grammar)) to avoid collisions (`x-example/team`, `x-acme/build_id`, `x-entire/checkpoint_id`). The validator treats `meta` as opaque; it contributes to whichever `content_hash` tier covers its host record ([§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity)): `meta` on the session header or any event entry feeds the session-level hash, and `meta` on the trail envelope feeds the file-level hash. For verbatim source-event preservation, use `source.raw` ([§10.1](./10-events.md#101-base-shape), [§10.7](./10-events.md#107-source-envelope-referencing), [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead — `meta` is for cross-cutting annotations, not for capturing the source envelope. -This draft defines one standard event-entry `meta` key: `redaction_count` (§16). Other standard keys MAY be promoted in later minor bumps based on observed usage. +This draft defines one standard event-entry `meta` key: `redaction_count` ([§16](./16-redaction.md#16-redaction)). Other standard keys MAY be promoted in later minor bumps based on observed usage. ### 8.4 The `sessions` manifest When `sessions` is present, the validator warns if the manifest disagrees with the file: -- The manifest MUST list one entry per session group (§9.6) in file order. Each entry's `id` and `agent` MUST match the corresponding session header's `id` and `agent.name`. Length mismatch and per-entry drift both emit `envelope_sessions_manifest_drift` warnings — never errors, so renderers can still display the file. +- The manifest MUST list one entry per session group ([§9.6](./09-the-session-header.md#96-multi-session-trail-files)) in file order. Each entry's `id` and `agent` MUST match the corresponding session header's `id` and `agent.name`. Length mismatch and per-entry drift both emit `envelope_sessions_manifest_drift` warnings — never errors, so renderers can still display the file. - The manifest is an index/rendering hint only. It MUST NOT carry graph facts such as child-session role or follows edges; session headers are authoritative for lineage. ### 8.5 File identity defaults when envelope is absent diff --git a/spec/v0.1.0/09-the-session-header.md b/spec/v0.1.0/09-the-session-header.md index 661fff8..aabdb5e 100644 --- a/spec/v0.1.0/09-the-session-header.md +++ b/spec/v0.1.0/09-the-session-header.md @@ -59,7 +59,7 @@ |---|---|---|---| | `type` | yes | literal `"session"` | discriminator | | `schema_version` | yes | string | currently `"0.1.0"` | -| `id` | yes | string | UUID or ULID per §7.1/§19 | +| `id` | yes | string | UUID or ULID per [§7.1](./07-identity-artifacts-and-content-addressing.md#71-session-identity)/[§19](./19-formal-schema.md#19-formal-schema) | | `session_uid` | no | string | stable source-session identifier shared by all segments of one logical source session | | `segment` | no | object | multi-segment marker; absent is equivalent to a single segment with `seq: 1` | | `segment.seq` | yes (if `segment` present) | integer | 1-based segment sequence number | @@ -67,10 +67,10 @@ | `name` | no | string | human session label | | `description` | no | string | free-text session description | | `tags` | no | string[] | free-form session labels | -| `content_hash` | no | string | SHA-256 hex of this artifact; see §7.3 | +| `content_hash` | no | string | SHA-256 hex of this artifact; see [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) | | `ts` | yes | string | ISO-8601 session start time; writers emit UTC `Z` with millisecond precision | -| `stream` | no | object | live-capture marker; see §9.4 | -| `agent.name` | yes | string | from the canonical registry (§14) | +| `stream` | no | object | live-capture marker; see [§9.4](#94-streaming-and-live-capture) | +| `agent.name` | yes | string | from the canonical registry ([§14](./14-canonical-agent-registry.md#14-canonical-agent-registry)) | | `agent.version` | no | string | source agent's version | | `agent.model_default` | no | string | default model for the session | | `cwd` | no | string | working directory; MAY be normalized for privacy | @@ -92,9 +92,9 @@ | `parse_fidelity.quarantined_count` | yes (if `parse_fidelity` present) | integer | number of `system_event` entries whose `payload.kind` is `x-*/unknown_record` in this session group | | `parse_fidelity.termination_reason` | no | enum or extension | final `session_terminated.payload.reason`, when a `session_terminated` event is present | | `source` | no | object | source-file metadata block (agent, path, format_version) | -| `meta` | no | object | vendor extensions; recommended keys use the `x-/` extension grammar (§8.3 / §12) | +| `meta` | no | object | vendor extensions; recommended keys use the `x-/` extension grammar ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) / [§12](./12-vendor-extensions.md#12-vendor-extensions)) | -When `parse_fidelity` is present, validators MUST compare it against the session group's entries. `quarantined_count` MUST equal the count of quarantined unknown source records emitted as `system_event` entries with `payload.kind` matching `x-*/unknown_record`; see the §10.3 quarantine convention. `termination_reason`, when a `session_terminated` entry exists, MUST match the final `session_terminated.payload.reason`; if no `session_terminated` entry exists, writers MUST omit `termination_reason`. This field is denormalized for cheap listing/filtering only; the event stream remains authoritative. Quarantined records are suspect parse fidelity, not necessarily lossy, because the raw source record is preserved. +When `parse_fidelity` is present, validators MUST compare it against the session group's entries. `quarantined_count` MUST equal the count of quarantined unknown source records emitted as `system_event` entries with `payload.kind` matching `x-*/unknown_record`; see the [§10.3](./10-events.md#103-optional-event-types) quarantine convention. `termination_reason`, when a `session_terminated` entry exists, MUST match the final `session_terminated.payload.reason`; if no `session_terminated` entry exists, writers MUST omit `termination_reason`. This field is denormalized for cheap listing/filtering only; the event stream remains authoritative. Quarantined records are suspect parse fidelity, not necessarily lossy, because the raw source record is preserved. `vcs.remote_url` provides a canonical project identifier that survives across users, machines, and clones — useful for cross-machine aggregation, profile filtering, and project-scoped analysis. Adapters that populate it: @@ -106,9 +106,9 @@ When `parse_fidelity` is present, validators MUST compare it against the session Fresh repositories with an unborn HEAD MAY emit `vcs.revision:null` when a branch is known. A `vcs` block with `vcs.revision:null` MUST include `vcs.branch`, MUST omit `vcs.head_commit`, and writers MUST NOT emit an information-free VCS block. When `vcs.revision` is non-null for git, `vcs.head_commit` typically equals `vcs.revision`. -Privacy: `remote_url` reveals repository identity and MAY identify a private repo. Redacted artifacts MAY strip or normalize it (§16). +Privacy: `remote_url` reveals repository identity and MAY identify a private repo. Redacted artifacts MAY strip or normalize it ([§16](./16-redaction.md#16-redaction)). -When a trail file carries both header-level `vcs` (session-time context) and envelope-level `vcs` (file-assembly-time context, §8), they represent different observation points. File-assembly tools SHOULD preserve both when present. For multi-segment reconciliation rules, see §9.5. +When a trail file carries both header-level `vcs` (session-time context) and envelope-level `vcs` (file-assembly-time context, [§8](./08-the-trail-envelope.md#8-the-trail-envelope)), they represent different observation points. File-assembly tools SHOULD preserve both when present. For multi-segment reconciliation rules, see [§9.5](#95-session-segments-multi-segment-sessions). ### 9.3 Example @@ -125,19 +125,19 @@ The optional header `stream` object: | Field | Required | Type | Notes | |---|---|---|---| | `stream.state` | yes (if `stream` present) | enum | `open` while the writer is actively appending; `closed` once finalized | -| `stream.started_at` | no | string | ISO-8601 timestamp when the stream began; matches the §9 `ts` semantics | +| `stream.started_at` | no | string | ISO-8601 timestamp when the stream began; matches the [§9](#9-the-session-header) `ts` semantics | Lifecycle: 1. **Live phase.** Writer emits the header with `stream: { state: "open" }`. `content_hash` is omitted or set to `""`. Events are appended as they happen. -2. **Finalize.** Writer rewrites the header with `stream` either removed or set to `state: "closed"`, then computes `content_hash` per §7.3. Appending stops. -3. **Clean end.** Writer MAY append a `session_end` event (§10.3) to mark a normal conclusion before finalize. Abnormal ends still use `session_terminated`. +2. **Finalize.** Writer rewrites the header with `stream` either removed or set to `state: "closed"`, then computes `content_hash` per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash). Appending stops. +3. **Clean end.** Writer MAY append a `session_end` event ([§10.3](./10-events.md#103-optional-event-types)) to mark a normal conclusion before finalize. Abnormal ends still use `session_terminated`. Tail readers that observe `stream.state == "open"` SHOULD assume more events MAY arrive. Readers observing `stream` absent or `state == "closed"` SHOULD treat the file as a finalized artifact and verify `content_hash` when present. `stream` is absent in trail files produced by stream-unaware writers; readers MUST treat that case as equivalent to a finalized non-streaming artifact (existing v0.1.0 behavior). -A live `system_event` heartbeat convention is described in §10.3. +A live `system_event` heartbeat convention is described in [§10.3](./10-events.md#103-optional-event-types). --- @@ -149,7 +149,7 @@ A single logical source session MAY be split across multiple trail-file artifact - `segment.seq` — 1-based integer identifying which segment of the session this file is. Single-segment trails MAY omit `segment` entirely, which is equivalent to `{seq: 1}`. -- `segment.prev_content_hash` — the **session-level** `content_hash` (§7.3) of the previous segment's finalized bytes. Required when `seq >= 2`. Forms a verifiable chain (HLS / Postgres-WAL pattern). If the previous segment was lost and the chain cannot be verified, writers MAY emit `null` and readers MUST emit a `segment_chain_break` warning. +- `segment.prev_content_hash` — the **session-level** `content_hash` ([§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash)) of the previous segment's finalized bytes. Required when `seq >= 2`. Forms a verifiable chain (HLS / Postgres-WAL pattern). If the previous segment was lost and the chain cannot be verified, writers MAY emit `null` and readers MUST emit a `segment_chain_break` warning. #### Segment reconciliation @@ -161,18 +161,18 @@ and emit a new finalized trail with freshly computed hashes. Implementation merge policy is documented in `docs/implementation-semantics.md`. -Whole-file graph rules (§18) apply **within** a segment, not across. Cross-segment references are out of scope for v0.1 (event `parent_id` chains do not span segments). +Whole-file graph rules ([§18](./18-validation.md#18-validation)) apply **within** a segment, not across. Cross-segment references are out of scope for v0.1 (event `parent_id` chains do not span segments). #### Writer guidance - Writers SHOULD generate `session_uid` once per source session and reuse it for every segment. - Writers SHOULD finalize each segment normally before starting a new segment. -- To produce `segment.prev_content_hash` for segment N, finalize segment N-1 per §7.3 and copy its session-level `content_hash` verbatim into segment N's header. +- To produce `segment.prev_content_hash` for segment N, finalize segment N-1 per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) and copy its session-level `content_hash` verbatim into segment N's header. - Recovered writers MAY emit `segment.prev_content_hash: null` when the previous segment is lost. #### Composition with multi-session files -`session_uid` and `segment.*` sit at the **session-header** grain, not the file grain. A multi-session trail file (§9.6) MAY contain N session headers, each independently multi-segmentable. The trail envelope (§8) is unaffected. +`session_uid` and `segment.*` sit at the **session-header** grain, not the file grain. A multi-session trail file ([§9.6](#96-multi-session-trail-files)) MAY contain N session headers, each independently multi-segmentable. The trail envelope ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)) is unaffected. Within one file, two groups with the same `session_uid` SHOULD NOT claim the same normalized `segment.seq` value; a missing `segment` is equivalent to `seq: 1`. Duplicate pairs emit `duplicate_segment_seq` warnings. Groups for the same `session_uid` SHOULD appear in non-descending `segment.seq` order in file order; a descending sequence emits `out_of_order_segment_seq`. @@ -208,29 +208,29 @@ group := events* events := zero or more event records (§10) ``` -The trail envelope (§8) remains optional even when N ≥ 2. When present with N ≥ 2 groups, the file-level `content_hash` on the envelope covers all N groups' already-stamped session hashes, applying the §7.4 two-pass procedure unchanged (every session hash stamped first; envelope hash stamped over the finalized record set). When absent, file-level identity defaults from §8.5 apply (no file-level `content_hash` is meaningful; only per-session hashes). +The trail envelope ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)) remains optional even when N ≥ 2. When present with N ≥ 2 groups, the file-level `content_hash` on the envelope covers all N groups' already-stamped session hashes, applying the [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) two-pass procedure unchanged (every session hash stamped first; envelope hash stamped over the finalized record set). When absent, file-level identity defaults from [§8.5](./08-the-trail-envelope.md#85-file-identity-defaults-when-envelope-is-absent) apply (no file-level `content_hash` is meaningful; only per-session hashes). #### 9.6.2 Group boundaries and reader-tolerant recovery -Readers detect group boundaries by `type:"session"` alone. A record with `type:"session"` always opens a new group, regardless of `schema_version` value: this lets reader-tolerant parsers (§6) recover from a malformed mid-file header and continue parsing subsequent groups instead of treating the rest of the file as orphan events. The strict validator still errors on individual records that fail schema validation; recovery affects parsing structure, not per-record validity. +Readers detect group boundaries by `type:"session"` alone. A record with `type:"session"` always opens a new group, regardless of `schema_version` value: this lets reader-tolerant parsers ([§6](./06-versioning.md#6-versioning)) recover from a malformed mid-file header and continue parsing subsequent groups instead of treating the rest of the file as orphan events. The strict validator still errors on individual records that fail schema validation; recovery affects parsing structure, not per-record validity. Entries that appear before the first `type:"session"` record (and after any envelope) are not part of any group and are always invalid: `events_before_first_session_header`. #### 9.6.3 Per-group validation -Whole-file graph rules (§18) apply **within** a group, not across: +Whole-file graph rules ([§18](./18-validation.md#18-validation)) apply **within** a group, not across: - `parent_id` resolution is scoped to the enclosing group. A `parent_id` that references an `id` in another group is treated as `unknown_parent_id` (cross-group references go through `fork_from`, not `parent_id`). -- `tool_call` / `tool_result` pairing (§10.5) runs per group. An unmatched `tool_call` in group A is not satisfied by a `tool_result` in group B. +- `tool_call` / `tool_result` pairing ([§10.5](./10-events.md#105-tool-call-terminal-pairing)) runs per group. An unmatched `tool_call` in group A is not satisfied by a `tool_result` in group B. - `session_end.payload.final_message_id`, `source.raw.envelope_ref`, `payload.usage` checks, and the `stream` consistency rule each run per group. -Event `id` uniqueness (§7.5) remains **file-scoped**: every `id` (across every group's header and events) MUST be unique within the file. +Event `id` uniqueness ([§7.5](./07-identity-artifacts-and-content-addressing.md#75-event-identifiers)) remains **file-scoped**: every `id` (across every group's header and events) MUST be unique within the file. #### 9.6.4 Per-group `content_hash` -Each group's session-level `content_hash` is computed over the canonical bytes of that group's slice only (header + its events, envelope and sibling groups excluded). This is the same procedure as §7.3 / §7.4 applied to the slice. As a consequence, extracting one session from a multi-session file (drop the envelope, drop sibling groups, write only that group's canonical bytes) reproduces the same digest as the in-file value. +Each group's session-level `content_hash` is computed over the canonical bytes of that group's slice only (header + its events, envelope and sibling groups excluded). This is the same procedure as [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) / [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) applied to the slice. As a consequence, extracting one session from a multi-session file (drop the envelope, drop sibling groups, write only that group's canonical bytes) reproduces the same digest as the in-file value. -When a reader extracts a single session from a multi-session file outside writer-strict validation and the recomputed `content_hash` does not match the value stored in the in-file header, it SHOULD emit a warning rather than an error. Strict validation of a finalized trail file still treats an in-place finalized `content_hash` mismatch as an error (§18.4). +When a reader extracts a single session from a multi-session file outside writer-strict validation and the recomputed `content_hash` does not match the value stored in the in-file header, it SHOULD emit a warning rather than an error. Strict validation of a finalized trail file still treats an in-place finalized `content_hash` mismatch as an error ([§18.4](./18-validation.md#184-file-graph-checks)). #### 9.6.5 Cross-group references @@ -246,14 +246,14 @@ The only sanctioned cross-group reference primitive is the session header's `for - Sessions in a file SHOULD appear in chronological order by header `ts`. Out-of-order placement emits `out_of_order_session_headers` (warning, not error). - Per-session `cwd` and `vcs` MAY diverge across sessions in the same file. Divergent `vcs.revision` across groups emits `vcs_revision_divergence` (warning, not error) — useful for spotting accidental cross-checkout bundling. -- `schema_version` is carried on every session header. Sessions in the same file are independently versioned (reader-tolerant patch acceptance per §6 applies per-header). +- `schema_version` is carried on every session header. Sessions in the same file are independently versioned (reader-tolerant patch acceptance per [§6](./06-versioning.md#6-versioning) applies per-header). - Empty groups (a header with zero events) are legal — they represent "session started, nothing happened." #### 9.6.7 Redaction of multi-session files -Redacting a multi-session trail produces a multi-session redacted trail with the same group count in the same order, redacted in place. The redactor resets `content_hash` to `` on every session header (and on the envelope when present) before share/transport tooling re-stamps via the two-pass §7.4 procedure. +Redacting a multi-session trail produces a multi-session redacted trail with the same group count in the same order, redacted in place. The redactor resets `content_hash` to `` on every session header (and on the envelope when present) before share/transport tooling re-stamps via the two-pass [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity) procedure. -When redaction changes bytes, lineage hashes that point to artifacts in the same redacted file MUST be rewritten to the target's redacted content hash, using the §7.4.1 hash tier. Header-level `fork_from.content_hash` is rewritten when `fork_from.session_id` names an in-file sibling. `segment.prev_content_hash` is rewritten when the previous `segment.seq` for the same `session_uid` is in the file. When the lineage target is not in the redacted file, redactors MUST drop `fork_from.content_hash` while keeping id references, and MUST set `segment.prev_content_hash` to `null` for an unverifiable previous segment. `redacted_from.content_hash` remains raw-artifact provenance: header-level `redacted_from.content_hash` links the redacted session to its raw counterpart; envelope-level `redacted_from.content_hash` links the redacted file to its raw counterpart. +When redaction changes bytes, lineage hashes that point to artifacts in the same redacted file MUST be rewritten to the target's redacted content hash, using the [§7.4.1](./07-identity-artifacts-and-content-addressing.md#741-hash-tier-for-fork_from-and-redacted_from) hash tier. Header-level `fork_from.content_hash` is rewritten when `fork_from.session_id` names an in-file sibling. `segment.prev_content_hash` is rewritten when the previous `segment.seq` for the same `session_uid` is in the file. When the lineage target is not in the redacted file, redactors MUST drop `fork_from.content_hash` while keeping id references, and MUST set `segment.prev_content_hash` to `null` for an unverifiable previous segment. `redacted_from.content_hash` remains raw-artifact provenance: header-level `redacted_from.content_hash` links the redacted session to its raw counterpart; envelope-level `redacted_from.content_hash` links the redacted file to its raw counterpart. #### 9.6.8 No hard cap diff --git a/spec/v0.1.0/10-events.md b/spec/v0.1.0/10-events.md index f2115a8..072227d 100644 --- a/spec/v0.1.0/10-events.md +++ b/spec/v0.1.0/10-events.md @@ -31,14 +31,14 @@ Every event entry has this base shape: | Field | Required | Type | Notes | |---|---|---|---| -| `type` | yes | string | event type; see §10.2-10.3 | -| `id` | yes | string | globally unique; ULID or UUID per §19 | +| `type` | yes | string | event type; see [§10.2](#102-mandatory-event-types)-10.3 | +| `id` | yes | string | globally unique; ULID or UUID per [§19](./19-formal-schema.md#19-formal-schema) | | `parent_id` | no | string | references another `id` for tree topology; absent = linear file order | | `ts` | yes | string | ISO-8601 timestamp | | `payload` | yes | object | type-specific data | | `semantic` | no | object | linking metadata for fallback pairing | | `source` | no | object | adapter-provided source metadata | -| `meta` | no | object | vendor extensions (§8.3 / §12) | +| `meta` | no | object | vendor extensions ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) / [§12](./12-vendor-extensions.md#12-vendor-extensions)) | ### 10.2 Mandatory event types @@ -140,7 +140,7 @@ Model identification for downstream cost analysis uses `payload.model` first, fa When a single source envelope fans out to multiple entries (text blocks, tool calls, thinking blocks sharing one API response), `usage` accounts for the whole envelope. Writers MUST attach it to the first derived entry whose payload supports `usage`, skip non-usage-capable derived entries, and MUST NOT repeat it on later derived entries. In v0.1.0, `usage` is valid on `agent_message`, `agent_thinking`, and `tool_call` payloads; if an envelope emits none of those entries, canonical `usage` is omitted. -Monetary cost is intentionally not a canonical trail field or event. Analyzers compute cost from token usage, model identification, and their own pricing tables, and carry pricing provenance such as currency, pricing source, and effective date in analyzer output. If a source exposes a billing estimate, writers MAY preserve it as opaque source data under `x-/` keys on the entry's `meta` field (§8.3). Latency and wall-clock telemetry are deferred to a future minor version; sources rarely expose them consistently. +Monetary cost is intentionally not a canonical trail field or event. Analyzers compute cost from token usage, model identification, and their own pricing tables, and carry pricing provenance such as currency, pricing source, and effective date in analyzer output. If a source exposes a billing estimate, writers MAY preserve it as opaque source data under `x-/` keys on the entry's `meta` field ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention)). Latency and wall-clock telemetry are deferred to a future minor version; sources rarely expose them consistently. #### `task_plan_update` @@ -343,7 +343,7 @@ top-level `exit_code` on `tool_result`, because the concept does not apply to ki or `web_fetch`. Privacy: `meta` carries the same raw content as `output` (shell stdout, MCP block text), so the -redaction pipeline scrubs `meta` string leaves alongside `output` (§16). +redaction pipeline scrubs `meta` string leaves alongside `output` ([§16](./16-redaction.md#16-redaction)). #### `user_query` @@ -470,7 +470,7 @@ Post-creation update to logical session metadata. The session header carries the | Payload field | Required | Type | Notes | |---|---|---|---| | `field` | yes | enum or extension | One of `name`, `description`, `tags`, `agent.model_default`, `vcs.branch`, `vcs.worktree`, or `x-/`. | -| `value` | yes | field-specific | Replacement value. Must match the field type: string for `name`/`description`/`agent.model_default`/`vcs.branch`, string array for `tags`, and the §9.2 worktree shape for `vcs.worktree`. Extension fields MAY carry any JSON value. | +| `value` | yes | field-specific | Replacement value. Must match the field type: string for `name`/`description`/`agent.model_default`/`vcs.branch`, string array for `tags`, and the [§9.2](./09-the-session-header.md#92-fields) worktree shape for `vcs.worktree`. Extension fields MAY carry any JSON value. | | `previous_value` | no | field-specific | Prior value when the adapter knows it. Same type as `value`. | | `reason` | yes | enum or extension | `ai_generated`, `user_set`, `runtime_inferred`, `external`, or `x-/`. | @@ -526,7 +526,7 @@ A meaningful source timeline record that is not a user message, agent message, t | `context_injected` | Runtime injected standalone context that SHOULD remain visible outside a `user_message`. | `{ source_kind, name?, size_bytes? }` | | `hook_progress` | Catch-all for source-emitted progress/hook/queue records that do not map to a more specific reserved lifecycle kind. Adapters SHOULD prefer `session_start` / `turn_end` / `pre_tool_use` / `post_tool_use` / `subagent_end` / `hook_fired` when the source signal is unambiguous, and fall back to `hook_progress` only for unrecognised progress streams. | `{ hook_event?, hook_name?, ... }` | | `queue_operation` | Source recorded an enqueue or dequeue operation. | Free-form. | -| `heartbeat` | Periodic liveness ping during streaming capture (§9.4). Optional. Non-normative; readers MAY treat as informational. | `{ interval_ms? }` | +| `heartbeat` | Periodic liveness ping during streaming capture ([§9.4](./09-the-session-header.md#94-streaming-and-live-capture)). Optional. Non-normative; readers MAY treat as informational. | `{ interval_ms? }` | | `vcs_commit` | Adapter detected a VCS commit created during the session. | `{ sha, tool_call_id, branch?, message?, repo? }` | Use `tool_call_aborted{scope:"turn"}` for stops in a tool-invocation context where no specific call is identifiable. Use `system_event.kind:"turn_aborted"` for model/system-level turn stops with no tool in flight. @@ -565,7 +565,7 @@ Cross-agent diagnostic signals. Adapters MAY emit these to surface non-fatal err - Anything else MUST use `x-/` form, e.g. `x-claudecode/notification`. - Readers are tolerant of unknown `x-*` kinds — they pass through with no diagnostic. - Bare unknown strings (no `x-` prefix, not in the reserved set) are rejected by writer-strict validation. -- Adapters quarantining an unparseable source record MUST emit `system_event` with `kind:"x-/unknown_record"` and preserve the record in `source.raw`; `parse_fidelity.quarantined_count` counts this pattern (§9.2). +- Adapters quarantining an unparseable source record MUST emit `system_event` with `kind:"x-/unknown_record"` and preserve the record in `source.raw`; `parse_fidelity.quarantined_count` counts this pattern ([§9.2](./09-the-session-header.md#92-fields)). - If an `x-*` kind proves cross-agent, promote it to the reserved enum in a minor format version bump. Document emitted kinds per adapter in `docs/parser-source-matrix.md`. #### `capability_change` @@ -846,7 +846,7 @@ Synthesized instances MUST set `source.synthesized: true`. #### `session_end` -Clean terminal marker. Distinct from `session_terminated` (abnormal). Optional; many writers won't emit it. When present at EOF, signals a normal conclusion of the session and suppresses the "unmatched tool calls at EOF" warning of §18.4. +Clean terminal marker. Distinct from `session_terminated` (abnormal). Optional; many writers won't emit it. When present at EOF, signals a normal conclusion of the session and suppresses the "unmatched tool calls at EOF" warning of [§18.4](./18-validation.md#184-file-graph-checks). ```jsonc { @@ -889,7 +889,7 @@ When `tool_result.payload.for_id` is null, missing, or refers to a non-existent Writers SHOULD avoid relying on fallbacks. Populate `for_id` when reliable; use `semantic.call_id` when the source's native ID doesn't map cleanly to event `id`. Do not use semantic or sequential fallback pairing for `tool_call_aborted`; if a source cannot identify the call, emit `scope:"turn"` without `for_id`. -Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning (§18.4): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. +Validators apply the deterministic pairing rules when computing the "unmatched `tool_call` at EOF" warning ([§18.4](./18-validation.md#184-file-graph-checks)): explicit `for_id` references from `tool_result` and call-scoped `tool_call_aborted` first, then fallback rules 1 and 2 above for `tool_result` only (semantic match, branch-scoped sequential match). The heuristic rule (3) is reader-only — it produces uncertain pairings that readers MUST flag in rendered output, so validators do not apply it. A `tool_call` is considered matched when one of these deterministic methods pairs it with a `tool_result` or call-scoped `tool_call_aborted`. > Non-normative diagram. @@ -930,7 +930,7 @@ When a single source envelope produces multiple entries — for example, an assi - The **first** entry derived from a given source envelope sets `source.raw.envelope` (and `source.raw.block`, `source.raw.block_index` if applicable). - **Subsequent** entries derived from the same envelope set `source.raw.envelope_ref` to the first entry's `id`. They omit `source.raw.envelope` and keep `block` / `block_index`. -`source.raw.envelope_ref` is an optional string. Writers MUST ensure it references the `id` of an entry that appears **earlier** in the same file — the same envelope, inlined once. Forward references and dangling references are reader errors (`source_raw_envelope_ref_unresolved`, §18.4). The first-inline-then-ref shape is streaming-write friendly: readers resolve refs in a single pass without backtracking. +`source.raw.envelope_ref` is an optional string. Writers MUST ensure it references the `id` of an entry that appears **earlier** in the same file — the same envelope, inlined once. Forward references and dangling references are reader errors (`source_raw_envelope_ref_unresolved`, [§18.4](./18-validation.md#184-file-graph-checks)). The first-inline-then-ref shape is streaming-write friendly: readers resolve refs in a single pass without backtracking. This mechanism is additive over v0.1.0. Readers that do not understand `envelope_ref` will see it as an unknown raw-source field and ignore it; the entry's other fields (`type`, `payload`, `semantic`) remain fully self-describing. diff --git a/spec/v0.1.0/12-vendor-extensions.md b/spec/v0.1.0/12-vendor-extensions.md index 78ffb78..d036cd0 100644 --- a/spec/v0.1.0/12-vendor-extensions.md +++ b/spec/v0.1.0/12-vendor-extensions.md @@ -1,6 +1,6 @@ ## 12. Vendor extensions -Implementations and vendors can add custom data via the `meta` field on the trail envelope, session header, or any event entry. Use the `x-/` extension grammar (§12.1) for keys to avoid collisions: +Implementations and vendors can add custom data via the `meta` field on the trail envelope, session header, or any event entry. Use the `x-/` extension grammar ([§12.1](#121-extension-grammar)) for keys to avoid collisions: ```jsonc "meta": { @@ -12,7 +12,7 @@ Implementations and vendors can add custom data via the `meta` field on the trai Readers MAY preserve, ignore, or render `meta` fields. They MUST NOT abort on unknown keys. -`entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see §16. +`entry.meta.redaction_count` is a standard optional non-negative integer convention for redacted artifacts. It counts how many redactor mutations were applied to that entry; see [§16](./16-redaction.md#16-redaction). The `meta` field is for fields outside the canonical vocabulary. For verbatim source-event preservation, use `source.raw` ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)) instead. See [§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention) for the full convention. diff --git a/spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md b/spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md index 5db9e52..f8ac3be 100644 --- a/spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md +++ b/spec/v0.1.0/15-truncation-overflow-and-raw-source-size.md @@ -42,9 +42,9 @@ Two placements are valid: Specific size thresholds, the algorithm a writer uses to choose which leaves to elide, and whether elision is gated by a hard cap are implementation policy — they belong in writer documentation, not the format. Validators MAY warn on entries whose `source.raw` exceeds an implementation-chosen size budget, but the wire format itself imposes no fixed limit. -When elision happens at the first emission of a source envelope (§10.7), subsequent `envelope_ref` entries still resolve — the ref points at the elided entry's `id`, not at its inlined envelope. +When elision happens at the first emission of a source envelope ([§10.7](./10-events.md#107-source-envelope-referencing)), subsequent `envelope_ref` entries still resolve — the ref points at the elided entry's `id`, not at its inlined envelope. -Adapters MUST redact known secret patterns in `source.raw` before writing — emission-time redaction is a writer responsibility, not a share-time concern. Validators emit `source_raw_unredacted_secret` (warning) when a string leaf in `source.raw` matches a known credential pattern (Authorization headers, Bearer tokens, JWT, vendor API keys, PEM private key blocks, ENV-style assignments). Share-time redaction (§16) layers additional normalization on top — paths, PII — and produces a separate artifact. +Adapters MUST redact known secret patterns in `source.raw` before writing — emission-time redaction is a writer responsibility, not a share-time concern. Validators emit `source_raw_unredacted_secret` (warning) when a string leaf in `source.raw` matches a known credential pattern (Authorization headers, Bearer tokens, JWT, vendor API keys, PEM private key blocks, ENV-style assignments). Share-time redaction ([§16](./16-redaction.md#16-redaction)) layers additional normalization on top — paths, PII — and produces a separate artifact. --- diff --git a/spec/v0.1.0/16-redaction.md b/spec/v0.1.0/16-redaction.md index dee593a..fb56498 100644 --- a/spec/v0.1.0/16-redaction.md +++ b/spec/v0.1.0/16-redaction.md @@ -9,7 +9,7 @@ Share-time redactors MUST apply the privacy rules below before producing shared | Field or value | Share-time action | |---|---| | `cwd` | Normalize or strip. | -| `vcs.remote_url` | Strip or normalize per §9.2 unless the user explicitly opts in. | +| `vcs.remote_url` | Strip or normalize per [§9.2](./09-the-session-header.md#92-fields) unless the user explicitly opts in. | | `system_event.payload.data.repo` for `vcs_commit` | Treat like `vcs.remote_url`; strip or normalize unless the user explicitly opts in. | | `vcs.worktree.path`, `vcs.worktree.original_cwd` | Normalize or strip. | | `source.path` | Normalize or strip. | @@ -23,7 +23,7 @@ If a resolved response contains answer keys that do not appear on the referenced Share-time redactors SHOULD populate `entry.meta.redaction_count` on each changed event entry. The count is a non-negative integer equal to the number of redactor mutations applied to that entry. Existing numeric `redaction_count` values are additive when a redacted trail is redacted again; unchanged entries keep their existing value. -When redaction changes bytes, lineage hashes are updated as described in §9.6.7. This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. +When redaction changes bytes, lineage hashes are updated as described in [§9.6.7](./09-the-session-header.md#967-redaction-of-multi-session-files). This prevents redacted session bundles and redacted segment chains from retaining raw-artifact hashes that can no longer verify against the shared redacted bytes. > Non-normative diagram. diff --git a/spec/v0.1.0/17-security-considerations.md b/spec/v0.1.0/17-security-considerations.md index 8dd372b..b9f29f1 100644 --- a/spec/v0.1.0/17-security-considerations.md +++ b/spec/v0.1.0/17-security-considerations.md @@ -4,17 +4,17 @@ Trail files are untrusted input. All string content, including messages, tool ou Agent Trail intentionally has no format-level size caps. Consumers SHOULD enforce deployment-specific limits for maximum line length, file size, event count, graph depth, and decoded attachment or overflow bytes. Consumers SHOULD stream rather than buffer whole files where possible; JSONL is the interchange shape partly to make bounded streaming readers practical. -Hostile files can contain invalid graph structure even though `parent_id` cycles and cross-group links are invalid (§13, §18.4). Validators MUST NOT loop indefinitely while checking graph topology, and tree renderers SHOULD bound recursion or use iterative traversal when displaying deep parent chains. +Hostile files can contain invalid graph structure even though `parent_id` cycles and cross-group links are invalid ([§13](./13-tree-and-branching.md#13-tree-and-branching), [§18.4](./18-validation.md#184-file-graph-checks)). Validators MUST NOT loop indefinitely while checking graph topology, and tree renderers SHOULD bound recursion or use iterative traversal when displaying deep parent chains. -`content_hash` provides byte integrity for the canonical artifact (§7.3, §7.4), not authorship, provenance, or trust. A trail claiming `agent.name: "claude-code"` proves only that the file contains that string. Agent Trail v0.1.0 has no signature or attestation mechanism; signing MAY be added by a future extension. +`content_hash` provides byte integrity for the canonical artifact ([§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash), [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity)), not authorship, provenance, or trust. A trail claiming `agent.name: "claude-code"` proves only that the file contains that string. Agent Trail v0.1.0 has no signature or attestation mechanism; signing MAY be added by a future extension. -In v0.1.0, `content_hash` values are bare 64-character SHA-256 hex strings (§7.3). Other content-addressed references, such as attachment URIs (§10.2) and `overflow_ref` values (§15), use `sha256:` references. Consumers that verify prefixed content-addressed references MUST reject unknown algorithm prefixes rather than treating the reference as verified. +In v0.1.0, `content_hash` values are bare 64-character SHA-256 hex strings ([§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash)). Other content-addressed references, such as attachment URIs ([§10.2](./10-events.md#102-mandatory-event-types)) and `overflow_ref` values ([§15](./15-truncation-overflow-and-raw-source-size.md#15-truncation-overflow-and-raw-source-size)), use `sha256:` references. Consumers that verify prefixed content-addressed references MUST reject unknown algorithm prefixes rather than treating the reference as verified. -Attachment URIs and overflow references can identify local resources on the producer's machine. Viewers SHOULD NOT dereference `file:` URIs, `overflow_ref` values, or other external references automatically. Viewers MUST NOT dereference local `file:` URIs or non-`sha256:` overflow references from redacted or shared trails; §16 requires share-time redactors to remove or rewrite those values before transport. +Attachment URIs and overflow references can identify local resources on the producer's machine. Viewers SHOULD NOT dereference `file:` URIs, `overflow_ref` values, or other external references automatically. Viewers MUST NOT dereference local `file:` URIs or non-`sha256:` overflow references from redacted or shared trails; [§16](./16-redaction.md#16-redaction) requires share-time redactors to remove or rewrite those values before transport. Redaction reduces content exposure but does not make a shared trail private. Timestamps, event counts, tool names, model names, branch shape, unredacted file names, and remaining metadata can still reveal workflow information. Sharing a redacted trail SHOULD be treated as publishing it to anyone who can access the transport. -Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads (§16). +Header fields need the same privacy review as event payloads. `cwd`, `vcs.remote_url`, `vcs.worktree`, `name`, `description`, and `tags` commonly contain usernames, internal hostnames, private repository names, or project identifiers. Sharing tools SHOULD scan headers and trail envelopes as well as event payloads ([§16](./16-redaction.md#16-redaction)). The implementation-maintained detector catalog and rule pack schema live in `docs/redaction-patterns.md`; this catalog is non-normative and does not change the trail file format. diff --git a/spec/v0.1.0/18-validation.md b/spec/v0.1.0/18-validation.md index f1a2faa..9aa90d2 100644 --- a/spec/v0.1.0/18-validation.md +++ b/spec/v0.1.0/18-validation.md @@ -19,10 +19,10 @@ reader or writer support they implement. | Class | Name | Requirements | |---|---|---| -| **R0** | Renderer | Reader-tolerant JSONL parsing per §6 and §18.2; renders the mandatory event types in §10.2, including user messages, agent messages, tool calls, tool results, and summaries; preserves or displays fallback output for unknown records it can parse; does not crash on valid or quarantinable input. | -| **R1** | Structural reader | R0 plus the non-hash whole-file layout, graph, pairing, streaming-state, and diagnostic checks in §18.4. R1 catches duplicate ids, unknown parents, parent cycles, unresolved `source.raw.envelope_ref`, tool-call pairing diagnostics, and other file-level checks that do not require recomputing content hashes or comparing segment-chain hashes. | -| **R2** | Verifying reader | R1 plus content-hash verification per §7.3 and §7.4, and segment-chain verification per §9.5. Readers in this class warn rather than abort on reader-tolerant hash mismatches, per §18.4.1. | -| **W** | Writer | Emits writer-strict records that validate against `schema.json` and satisfy the strict whole-file validation rules in §18.4. Writer conformance is about emitted trail files, not reader tolerance. | +| **R0** | Renderer | Reader-tolerant JSONL parsing per [§6](./06-versioning.md#6-versioning) and [§18.2](#182-reader-tolerance); renders the mandatory event types in [§10.2](./10-events.md#102-mandatory-event-types), including user messages, agent messages, tool calls, tool results, and summaries; preserves or displays fallback output for unknown records it can parse; does not crash on valid or quarantinable input. | +| **R1** | Structural reader | R0 plus the non-hash whole-file layout, graph, pairing, streaming-state, and diagnostic checks in [§18.4](#184-file-graph-checks). R1 catches duplicate ids, unknown parents, parent cycles, unresolved `source.raw.envelope_ref`, tool-call pairing diagnostics, and other file-level checks that do not require recomputing content hashes or comparing segment-chain hashes. | +| **R2** | Verifying reader | R1 plus content-hash verification per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) and [§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity), and segment-chain verification per [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions). Readers in this class warn rather than abort on reader-tolerant hash mismatches, per [§18.4.1](#1841-errors). | +| **W** | Writer | Emits writer-strict records that validate against `schema.json` and satisfy the strict whole-file validation rules in [§18.4](#184-file-graph-checks). Writer conformance is about emitted trail files, not reader tolerance. | `@agent-trail/core` implements Class R2 reader behavior through its parsing, validation, canonicalization, hashing, and multi-segment reconciliation APIs. @@ -47,51 +47,51 @@ Portable diagnostic code registry: | Code | Severity | Defining section | |---|---|---| -| `ambiguous_sequential_pairing` | warning | §10.5 / §18.4.2 | -| `child_session_fork_from_mismatch` | warning | §18.4.2 | -| `child_session_parent_link_mismatch` | warning | §18.4.2 | -| `content_hash_invalid` | error | §7.3 / §18.4.1 | -| `content_hash_mismatch` | error (strict), warning (reader-tolerant) | §7.3 / §18.4.1 | -| `cross_group_fork_from_hash_mismatch` | warning | §9.6.5 | -| `duplicate_id` | error | §18.4.1 | -| `duplicate_option_labels` | warning | §10.2 / §18.4.2 | -| `duplicate_segment_seq` | warning | §9.5 / §18.4.2 | -| `duplicate_tool_result` | warning | §10.5 / §18.4.2 | -| `duplicate_user_query_question_id` | error | §10.2 | -| `envelope_has_parent_id` | error | §8 / §18.4.1 | -| `envelope_not_at_line_1` | error | §8 / §18.4.1 | -| `envelope_sessions_manifest_drift` | warning | §8.4 / §18.4.2 | -| `events_before_first_session_header` | error | §9.6 / §18.4.1 | -| `header_has_parent_id` | error | §9 / §18.4.1 | -| `ill_formed_string` | error (strict), warning (reader-tolerant) | §5.2 / §18.4.1 | -| `missing_header` | error | §9 / §18.4.1 | -| `missing_header_after_envelope` | error | §8 / §18.4.1 | -| `multiple_envelopes` | error | §8 / §18.4.1 | -| `non_interoperable_number` | warning | §5.2 / §18.4.2 | -| `non_monotonic_event_ts` | warning | §18.4.2 | -| `out_of_order_segment_seq` | warning | §9.5 / §18.4.2 | -| `out_of_order_session_headers` | warning | §9.6.6 | -| `parent_cycle` | error | §13.2 / §18.4.1 | -| `parse_fidelity_drift` | error | §9.2 / §18.4.1 | -| `reader_tolerant_schema_version` | warning | §6 / §18.2 | -| `reader_tolerant_unknown_payload_field` | warning | §18.2 | -| `reader_tolerant_unknown_record` | warning | §18.2 | -| `segment_chain_break` | warning | §9.5 | -| `source_raw_envelope_ref_unresolved` | error | §10.7 / §18.4.1 | -| `source_raw_unredacted_secret` | warning | §15.1 / §18.4.2 | -| `stream_open_with_content_hash` | warning | §18.4.3 | -| `stream_open_with_terminal_event` | warning | §18.4.3 | -| `tool_args_unredacted_secret` | warning | §16 / §18.4.2 | -| `tool_result_semantic_conflict` | warning | §10.5 / §18.4.2 | -| `unknown_abandoned_branch_id` | warning | §10.3 / §18.4.2 | -| `unknown_branch_point_from_id` | warning | §10.3 / §18.4.2 | -| `unknown_final_message_id` | warning | §10.3 / §18.4.2 | -| `unknown_parent_id` | error | §10.1 / §18.4.1 | -| `unknown_user_query_answer_key` | error | §10.2 | -| `unknown_user_query_for_id` | warning | §10.2 / §18.4.2 | -| `unmatched_tool_call_at_eof` | warning | §10.5 / §18.4.2 | -| `vcs_remote_url_with_credentials` | warning or error | §9.2 / §18.4 | -| `vcs_revision_divergence` | warning | §9.6.6 | +| `ambiguous_sequential_pairing` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `child_session_fork_from_mismatch` | warning | [§18.4.2](#1842-warnings) | +| `child_session_parent_link_mismatch` | warning | [§18.4.2](#1842-warnings) | +| `content_hash_invalid` | error | [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) / [§18.4.1](#1841-errors) | +| `content_hash_mismatch` | error (strict), warning (reader-tolerant) | [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) / [§18.4.1](#1841-errors) | +| `cross_group_fork_from_hash_mismatch` | warning | [§9.6.5](./09-the-session-header.md#965-cross-group-references) | +| `duplicate_id` | error | [§18.4.1](#1841-errors) | +| `duplicate_option_labels` | warning | [§10.2](./10-events.md#102-mandatory-event-types) / [§18.4.2](#1842-warnings) | +| `duplicate_segment_seq` | warning | [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions) / [§18.4.2](#1842-warnings) | +| `duplicate_tool_result` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `duplicate_user_query_question_id` | error | [§10.2](./10-events.md#102-mandatory-event-types) | +| `envelope_has_parent_id` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `envelope_not_at_line_1` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `envelope_sessions_manifest_drift` | warning | [§8.4](./08-the-trail-envelope.md#84-the-sessions-manifest) / [§18.4.2](#1842-warnings) | +| `events_before_first_session_header` | error | [§9.6](./09-the-session-header.md#96-multi-session-trail-files) / [§18.4.1](#1841-errors) | +| `header_has_parent_id` | error | [§9](./09-the-session-header.md#9-the-session-header) / [§18.4.1](#1841-errors) | +| `ill_formed_string` | error (strict), warning (reader-tolerant) | [§5.2](./05-file-format.md#52-encoding) / [§18.4.1](#1841-errors) | +| `missing_header` | error | [§9](./09-the-session-header.md#9-the-session-header) / [§18.4.1](#1841-errors) | +| `missing_header_after_envelope` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `multiple_envelopes` | error | [§8](./08-the-trail-envelope.md#8-the-trail-envelope) / [§18.4.1](#1841-errors) | +| `non_interoperable_number` | warning | [§5.2](./05-file-format.md#52-encoding) / [§18.4.2](#1842-warnings) | +| `non_monotonic_event_ts` | warning | [§18.4.2](#1842-warnings) | +| `out_of_order_segment_seq` | warning | [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions) / [§18.4.2](#1842-warnings) | +| `out_of_order_session_headers` | warning | [§9.6.6](./09-the-session-header.md#966-order-divergence-and-per-session-metadata) | +| `parent_cycle` | error | [§13.2](./13-tree-and-branching.md#132-acyclicity) / [§18.4.1](#1841-errors) | +| `parse_fidelity_drift` | error | [§9.2](./09-the-session-header.md#92-fields) / [§18.4.1](#1841-errors) | +| `reader_tolerant_schema_version` | warning | [§6](./06-versioning.md#6-versioning) / [§18.2](#182-reader-tolerance) | +| `reader_tolerant_unknown_payload_field` | warning | [§18.2](#182-reader-tolerance) | +| `reader_tolerant_unknown_record` | warning | [§18.2](#182-reader-tolerance) | +| `segment_chain_break` | warning | [§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions) | +| `source_raw_envelope_ref_unresolved` | error | [§10.7](./10-events.md#107-source-envelope-referencing) / [§18.4.1](#1841-errors) | +| `source_raw_unredacted_secret` | warning | [§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction) / [§18.4.2](#1842-warnings) | +| `stream_open_with_content_hash` | warning | [§18.4.3](#1843-streaming-state-rules) | +| `stream_open_with_terminal_event` | warning | [§18.4.3](#1843-streaming-state-rules) | +| `tool_args_unredacted_secret` | warning | [§16](./16-redaction.md#16-redaction) / [§18.4.2](#1842-warnings) | +| `tool_result_semantic_conflict` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `unknown_abandoned_branch_id` | warning | [§10.3](./10-events.md#103-optional-event-types) / [§18.4.2](#1842-warnings) | +| `unknown_branch_point_from_id` | warning | [§10.3](./10-events.md#103-optional-event-types) / [§18.4.2](#1842-warnings) | +| `unknown_final_message_id` | warning | [§10.3](./10-events.md#103-optional-event-types) / [§18.4.2](#1842-warnings) | +| `unknown_parent_id` | error | [§10.1](./10-events.md#101-base-shape) / [§18.4.1](#1841-errors) | +| `unknown_user_query_answer_key` | error | [§10.2](./10-events.md#102-mandatory-event-types) | +| `unknown_user_query_for_id` | warning | [§10.2](./10-events.md#102-mandatory-event-types) / [§18.4.2](#1842-warnings) | +| `unmatched_tool_call_at_eof` | warning | [§10.5](./10-events.md#105-tool-call-terminal-pairing) / [§18.4.2](#1842-warnings) | +| `vcs_remote_url_with_credentials` | warning or error | [§9.2](./09-the-session-header.md#92-fields) / [§18.4](#184-file-graph-checks) | +| `vcs_revision_divergence` | warning | [§9.6.6](./09-the-session-header.md#966-order-divergence-and-per-session-metadata) | #### Conformance suite (non-normative) @@ -123,7 +123,7 @@ A v0.1.0-compliant trail file MUST also pass whole-file checks. #### 18.4.1 Errors -1. The first line is either a trail envelope (`type: "trail"`, §8) or a session header (`type: "session"`, `schema_version: "0.1.0"`). When the envelope is present, the session header MUST occupy line 2. +1. The first line is either a trail envelope (`type: "trail"`, [§8](./08-the-trail-envelope.md#8-the-trail-envelope)) or a session header (`type: "session"`, `schema_version: "0.1.0"`). When the envelope is present, the session header MUST occupy line 2. 2. Subsequent lines match an event schema (`type`, `id`, `ts`, `payload`). 3. All `id` values are unique within the file. 4. Every non-null `parent_id` references an `id` in the same file. @@ -134,15 +134,15 @@ A v0.1.0-compliant trail file MUST also pass whole-file checks. If `content_hash` is present: 1. The value is 64 hex characters (SHA-256). Invalid hash shape emits `content_hash_invalid` at `/content_hash`. -2. Strict validators recompute and verify per §7.3. On mismatch, strict validation fails with `content_hash_mismatch` at `/content_hash`. Reader-tolerant parsers MAY warn but MUST NOT abort. +2. Strict validators recompute and verify per [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash). On mismatch, strict validation fails with `content_hash_mismatch` at `/content_hash`. Reader-tolerant parsers MAY warn but MUST NOT abort. Additional whole-file errors: -- `parse_fidelity`, when present, MUST match the session group's entries (§9.2). Drift emits `parse_fidelity_drift` at the mismatched `parse_fidelity` field. +- `parse_fidelity`, when present, MUST match the session group's entries ([§9.2](./09-the-session-header.md#92-fields)). Drift emits `parse_fidelity_drift` at the mismatched `parse_fidelity` field. - A `user_query` question id MUST be unique within that query. Duplicate ids emit `duplicate_user_query_question_id` at the repeated question id. - A `user_query_response.payload.answers` key not present in the resolved `user_query.payload.questions[].id` set emits `unknown_user_query_answer_key` at that answer key. -- `source.raw.envelope_ref`, when set, MUST reference the `id` of an earlier entry in the same file (§10.7). Dangling or forward references are errors with code `source_raw_envelope_ref_unresolved` at `/source/raw/envelope_ref`. -- Trail envelope position and uniqueness (§8): +- `source.raw.envelope_ref`, when set, MUST reference the `id` of an earlier entry in the same file ([§10.7](./10-events.md#107-source-envelope-referencing)). Dangling or forward references are errors with code `source_raw_envelope_ref_unresolved` at `/source/raw/envelope_ref`. +- Trail envelope position and uniqueness ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)): - `envelope_not_at_line_1` (error): a `type:"trail"` record appears on a line other than line 1. - `multiple_envelopes` (error): more than one envelope appears in the file. - `missing_header_after_envelope` (error): an envelope at line 1 is not followed by a session header on line 2. @@ -150,31 +150,31 @@ Additional whole-file errors: #### 18.4.2 Warnings -- Each `tool_call.id` SHOULD be referenced by exactly one `tool_result.payload.for_id` (or paired via §10.5). +- Each `tool_call.id` SHOULD be referenced by exactly one `tool_result.payload.for_id` (or paired via [§10.5](./10-events.md#105-tool-call-terminal-pairing)). - Inline `subagent_invoke` events SHOULD have descendants in the same group, or external child invocations SHOULD set `args.session_id` to the child header `id` when known. - When an in-file child session is present, the parent `subagent_invoke.args.session_id` and child `header.fork_from.{session_id,entry_id}` SHOULD agree. Mismatches are warnings, not errors, so partial bundles and external-only references remain readable. - `branch_point.payload.from_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_branch_point_from_id` at `/payload/from_id`. - `branch_summary.payload.abandoned_branch_id` SHOULD reference a prior event in the same session group. A dangling or forward reference emits `unknown_abandoned_branch_id` at `/payload/abandoned_branch_id`. - Writers SHOULD emit `session_terminated` if any `tool_call` remains unmatched at EOF. The warning code is `unmatched_tool_call_at_eof`. Suppression: - - A `session_end` event anywhere in the file suppresses this warning for every unmatched `tool_call` (clean conclusion, §10.3). + - A `session_end` event anywhere in the file suppresses this warning for every unmatched `tool_call` (clean conclusion, [§10.3](./10-events.md#103-optional-event-types)). - A `session_terminated` event whose `payload.open_call_ids` lists a given `tool_call.id` suppresses the warning for that id only (explicit acknowledgement). A `session_terminated` event without `open_call_ids` does not suppress the warning. - A `tool_result` paired by sequential fallback when two or more unmatched prior same-branch `tool_call` candidates existed emits `ambiguous_sequential_pairing` at `/payload`. - A `user_query` question with duplicate option labels among options that do not carry stable option ids emits `duplicate_option_labels` at the repeated option's `/payload/questions//options//label`. - `user_query_response.payload.for_id` SHOULD reference a `user_query` in the same session group. An unresolved reference emits `unknown_user_query_for_id` at `/payload/for_id`. - `session_end.payload.final_message_id`, when present, SHOULD reference an `id` that appears in the same file (the session header or a prior event). A dangling reference is a warning with code `unknown_final_message_id` at `/payload/final_message_id`. - An event's `ts` SHOULD NOT be earlier than its parent event's `ts` inside the same parent chain. Equal timestamps are allowed; sibling branches may interleave in wall-clock time. A strictly earlier child timestamp emits `non_monotonic_event_ts` (warning) at `/ts`. -- Validators MAY report implementation-defined size budgets for `source.raw`; specific numbers are writer policy (§15.1). +- Validators MAY report implementation-defined size budgets for `source.raw`; specific numbers are writer policy ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)). - `source.raw` SHOULD NOT contain unredacted credentials. A string leaf matching a known credential pattern emits `source_raw_unredacted_secret` (warning) at the matching JSON pointer. - JSON integer numbers outside the IEEE-754 exact-integer range SHOULD be emitted as strings. Unsafe integer numbers emit `non_interoperable_number` (warning) at the offending JSON Pointer. - Privacy-sensitive tool arguments SHOULD NOT contain unredacted credentials. A string leaf in `mcp_call` / `web_fetch` `tool_call.payload.args.headers` or `shell_command` `tool_call.payload.args.command` matching a known credential pattern emits `tool_args_unredacted_secret` (warning) at the matching JSON pointer. - `envelope_sessions_manifest_drift` (warning): the envelope's `sessions` manifest length disagrees with the number of session groups, or a manifest entry disagrees with the matching session header's `id` or `agent.name`. -- Multi-segment consistency within one file (§9.5): +- Multi-segment consistency within one file ([§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions)): - `duplicate_segment_seq` (warning): two groups share the same `(session_uid, segment.seq)` pair, treating missing `segment` as `seq: 1`. - `out_of_order_segment_seq` (warning): groups with the same `session_uid` appear with descending `segment.seq` in file order. #### 18.4.3 Streaming-state rules -Streaming rules (§9.4) are evaluated against the *current* header `stream.state` at validation time — the validator reads the present value, not a history of transitions. Crash-recovery writers MUST finalize (`stream.state` to `"closed"` or remove `stream`) before appending terminal events; once the stream is no longer marked live, the rules below stop applying. +Streaming rules ([§9.4](./09-the-session-header.md#94-streaming-and-live-capture)) are evaluated against the *current* header `stream.state` at validation time — the validator reads the present value, not a history of transitions. Crash-recovery writers MUST finalize (`stream.state` to `"closed"` or remove `stream`) before appending terminal events; once the stream is no longer marked live, the rules below stop applying. 10. If the current `header.stream.state == "open"`: - **10a.** `content_hash` SHOULD be absent or `""`. A populated hex hash is a warning, since the canonical bytes are still in flux. diff --git a/spec/v0.1.0/19-formal-schema.md b/spec/v0.1.0/19-formal-schema.md index f2b4a19..fa4e8ec 100644 --- a/spec/v0.1.0/19-formal-schema.md +++ b/spec/v0.1.0/19-formal-schema.md @@ -2,7 +2,7 @@ The normative writer-strict JSON Schema lives in `schema.json` and is published at `https://agent-trail.dev/schema/v0.1.0.json`. -This spec intentionally does not duplicate the full schema inline. Implementations SHOULD validate each JSONL line against `schema.json`, then run the whole-file checks in §18.4. Reader-tolerant parsing, including unknown future event preservation, is separate from writer-strict schema validation. +This spec intentionally does not duplicate the full schema inline. Implementations SHOULD validate each JSONL line against `schema.json`, then run the whole-file checks in [§18.4](./18-validation.md#184-file-graph-checks). Reader-tolerant parsing, including unknown future event preservation, is separate from writer-strict schema validation. --- diff --git a/spec/v0.1.0/appendix-b-content-hash-worked-example.md b/spec/v0.1.0/appendix-b-content-hash-worked-example.md index 5ca73b3..3ca8d83 100644 --- a/spec/v0.1.0/appendix-b-content-hash-worked-example.md +++ b/spec/v0.1.0/appendix-b-content-hash-worked-example.md @@ -1,6 +1,6 @@ ## Appendix B — Content hash worked example -This example shows the §7.3 two-pass procedure for the +This example shows the [§7.3](./07-identity-artifacts-and-content-addressing.md#73-content-hash) two-pass procedure for the `hash-vectors/minimal-pending-roundtrip.trail.jsonl` conformance fixture. The remaining canonicalization and two-tier identity cases are published in the `hash-vectors/` fixture category. diff --git a/spec/v0.1.0/changelog.md b/spec/v0.1.0/changelog.md index 7576f0e..df16223 100644 --- a/spec/v0.1.0/changelog.md +++ b/spec/v0.1.0/changelog.md @@ -4,16 +4,16 @@ Initial public draft. v0.1.0 defines: -- JSONL file layout, session header, core event envelope, mandatory event types, optional events, the canonical tool taxonomy, vendor `meta` extensions (§8.3), tree semantics, layered validation, and artifact-level content addressing. +- JSONL file layout, session header, core event envelope, mandatory event types, optional events, the canonical tool taxonomy, vendor `meta` extensions ([§8.3](./08-the-trail-envelope.md#83-the-meta-extension-convention)), tree semantics, layered validation, and artifact-level content addressing. - Stable local source filenames (`spec.md`, `schema.json`) with immutable hosted release snapshots at `/spec/v0.1.0` and `/schema/v0.1.0.json`. -- The optional trail envelope record `type:"trail"` at line 1 (§8) with Tier 1 fields (`id`, `name`, `description`, `ts`, `producer`, `content_hash`) and Tier 2 fields (`tags`, `vcs`, `fork_from`, `redacted_from`, `sessions`, `meta`), and two-tier identity (§7.4): session-level `content_hash` excludes the envelope, file-level `content_hash` covers the whole file. +- The optional trail envelope record `type:"trail"` at line 1 ([§8](./08-the-trail-envelope.md#8-the-trail-envelope)) with Tier 1 fields (`id`, `name`, `description`, `ts`, `producer`, `content_hash`) and Tier 2 fields (`tags`, `vcs`, `fork_from`, `redacted_from`, `sessions`, `meta`), and two-tier identity ([§7.4](./07-identity-artifacts-and-content-addressing.md#74-two-tier-identity)): session-level `content_hash` excludes the envelope, file-level `content_hash` covers the whole file. - Session headers MAY carry base `name`, `description`, and `tags`; `session_metadata_update` events replay on top of those base values. `vcs.type` allows reserved systems or `x-/` extensions, and envelope `fork_from.trail_id` uses the standard id shape. -- Multi-segment session primitives (`session_uid`, `segment.seq`, `segment.prev_content_hash`) and reconciliation invariants (§9.5). -- The optional header `stream` field, the `session_end` event, and the recommended `system_event` heartbeat convention (§9.4, §10.3). +- Multi-segment session primitives (`session_uid`, `segment.seq`, `segment.prev_content_hash`) and reconciliation invariants ([§9.5](./09-the-session-header.md#95-session-segments-multi-segment-sessions)). +- The optional header `stream` field, the `session_end` event, and the recommended `system_event` heartbeat convention ([§9.4](./09-the-session-header.md#94-streaming-and-live-capture), [§10.3](./10-events.md#103-optional-event-types)). - Tool-surface fidelity for truncated tool-call args, string-replacement `file_edit`, branch-scoped pairing warnings, stable user-query option ids, stricter attachment identity, and tool-result meta key hygiene. -- The `source.raw.envelope_ref` inline-first / ref-subsequent envelope dedup convention (§10.7), the `{ elided: true, size_bytes: N }` elide marker for `source.raw` (§15.1), and the writer-side redaction requirement for credential patterns in `source.raw`. -- Normative share-time redaction rules for local attachment URIs, unsafe `overflow_ref` values, unresolved `user_query_response` answers, and privacy-sensitive field handling (§16), plus the `tool_args_unredacted_secret` validator warning (§18.4). -- Envelope-level `payload.usage` on the first entry derived from a source envelope, including `agent_message`, `agent_thinking`, and `tool_call` (§10.2). +- The `source.raw.envelope_ref` inline-first / ref-subsequent envelope dedup convention ([§10.7](./10-events.md#107-source-envelope-referencing)), the `{ elided: true, size_bytes: N }` elide marker for `source.raw` ([§15.1](./15-truncation-overflow-and-raw-source-size.md#151-sourceraw-elision-and-redaction)), and the writer-side redaction requirement for credential patterns in `source.raw`. +- Normative share-time redaction rules for local attachment URIs, unsafe `overflow_ref` values, unresolved `user_query_response` answers, and privacy-sensitive field handling ([§16](./16-redaction.md#16-redaction)), plus the `tool_args_unredacted_secret` validator warning ([§18.4](./18-validation.md#184-file-graph-checks)). +- Envelope-level `payload.usage` on the first entry derived from a source envelope, including `agent_message`, `agent_thinking`, and `tool_call` ([§10.2](./10-events.md#102-mandatory-event-types)). - During the v0.1.0 draft cycle, planning snapshots moved from the legacy `tool_call.payload.tool:"task_plan"` shape to the canonical `task_plan_update` event. Final v0.1.0 writer-strict output MUST use `task_plan_update`; legacy `task_plan` tool calls are invalid. - During the v0.1.0 draft cycle, duplicate `system_event` kinds for `session_end` and `permission_mode_change` were removed, thinking levels became source-defined strings, `user_message.origin` was added, and related vocabulary clarifications landed. - During the v0.1.0 draft cycle, vendor extensions converged on one `x-/` grammar across `meta`, enum extensions, `system_event.kind`, `tool_result.payload.meta`, and custom `agent.name`.