From d7527ca6356bb2f2f7d0cc7a70333e67d59386ac Mon Sep 17 00:00:00 2001 From: Thormatt Date: Fri, 12 Jun 2026 12:52:24 -0400 Subject: [PATCH 1/6] =?UTF-8?q?feat(cli):=20orc=20report=20=E2=80=94=20ren?= =?UTF-8?q?der=20traces=20as=20HTML=20artifacts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A trace is only defensible if a reviewer can read it without installing orc. `orc report RUN_ID... [-o PATH] [--open]` renders one or more run traces into a single self-contained HTML file — CSS and JS inlined from packaged copies of site/trace.{css,js} so the artifact matches the public site's design and survives email, archival, and air-gapped review with zero external requests. Every trace-derived string is html.escape()d: evidence text is untrusted corpus content and must not become markup in the report. Sparse traces (failed runs, non-verify skills) render rather than crash, and unknown verdict labels fall back to the neutral "nf" pill. Wheel build verified to include the assets via the existing hatch packages config. Co-Authored-By: Claude Fable 5 --- src/orc/cli.py | 2 + src/orc/cli_commands/report.py | 51 ++ src/orc/rendering/__init__.py | 1 + src/orc/rendering/assets/__init__.py | 7 + src/orc/rendering/assets/trace.css | 1028 ++++++++++++++++++++++++++ src/orc/rendering/assets/trace.js | 162 ++++ src/orc/rendering/trace_html.py | 261 +++++++ tests/unit/test_report_cli.py | 156 ++++ tests/unit/test_trace_html.py | 179 +++++ 9 files changed, 1847 insertions(+) create mode 100644 src/orc/cli_commands/report.py create mode 100644 src/orc/rendering/__init__.py create mode 100644 src/orc/rendering/assets/__init__.py create mode 100644 src/orc/rendering/assets/trace.css create mode 100644 src/orc/rendering/assets/trace.js create mode 100644 src/orc/rendering/trace_html.py create mode 100644 tests/unit/test_report_cli.py create mode 100644 tests/unit/test_trace_html.py diff --git a/src/orc/cli.py b/src/orc/cli.py index e21657c..39ce9ad 100644 --- a/src/orc/cli.py +++ b/src/orc/cli.py @@ -8,6 +8,7 @@ from orc.cli_commands import mcp as mcp_cmd from orc.cli_commands import propose as propose_cmd from orc.cli_commands import replay as replay_cmd +from orc.cli_commands import report as report_cmd from orc.cli_commands import research as research_cmd from orc.cli_commands import search as search_cmd from orc.cli_commands import trace as trace_cmd @@ -29,6 +30,7 @@ def main() -> None: main.add_command(research_cmd.research_command) main.add_command(trace_cmd.trace_group) main.add_command(replay_cmd.replay_command) +main.add_command(report_cmd.report_command) main.add_command(approve_cmd.approve_group) main.add_command(propose_cmd.propose_command) main.add_command(execute_cmd.execute_command) diff --git a/src/orc/cli_commands/report.py b/src/orc/cli_commands/report.py new file mode 100644 index 0000000..4399f5f --- /dev/null +++ b/src/orc/cli_commands/report.py @@ -0,0 +1,51 @@ +"""`orc report RUN_ID...` — render traces as a self-contained HTML artifact.""" + +from __future__ import annotations + +from pathlib import Path + +import click + +from orc.errors import TraceNotFoundError +from orc.rendering.trace_html import build_report_html +from orc.storage.trace_store import load_trace + + +@click.command("report") +@click.argument("run_ids", nargs=-1, required=True) +@click.option( + "-o", + "--output", + "output_path", + type=click.Path(dir_okay=False, writable=True, path_type=Path), + default=None, + help="Write the report to PATH instead of stdout.", +) +@click.option( + "--open", + "open_after", + is_flag=True, + help="Open the written report in the default browser (requires -o).", +) +def report_command( + run_ids: tuple[str, ...], + output_path: Path | None, + open_after: bool, +) -> None: + """Render one or more run traces as a self-contained HTML report.""" + # Fail before rendering: there is no file to open when writing to stdout, + # and silently ignoring the flag would hide a typo in the invocation. + if open_after and output_path is None: + raise click.ClickException("--open requires -o/--output (stdout cannot be opened)") + try: + traces = [load_trace(run_id) for run_id in run_ids] + except TraceNotFoundError as exc: + raise click.ClickException(str(exc)) from exc + html_doc = build_report_html(traces) + if output_path is None: + click.echo(html_doc) + return + output_path.write_text(html_doc, encoding="utf-8") + click.echo(str(output_path)) + if open_after: + click.launch(str(output_path)) diff --git a/src/orc/rendering/__init__.py b/src/orc/rendering/__init__.py new file mode 100644 index 0000000..fdf49db --- /dev/null +++ b/src/orc/rendering/__init__.py @@ -0,0 +1 @@ +"""Rendering: turn persisted trace JSON into human-facing artifacts.""" diff --git a/src/orc/rendering/assets/__init__.py b/src/orc/rendering/assets/__init__.py new file mode 100644 index 0000000..9887cbd --- /dev/null +++ b/src/orc/rendering/assets/__init__.py @@ -0,0 +1,7 @@ +"""Static assets (trace.css, trace.js) inlined into generated reports. + +A real package (not bare data files) so importlib.resources can locate the +assets from a wheel, a zipapp, or an editable install alike. trace.css and +trace.js are verbatim copies of site/trace.css and site/trace.js — the report +artifact and the public site must render traces identically. +""" diff --git a/src/orc/rendering/assets/trace.css b/src/orc/rendering/assets/trace.css new file mode 100644 index 0000000..97821fc --- /dev/null +++ b/src/orc/rendering/assets/trace.css @@ -0,0 +1,1028 @@ +/* ━━━ orc trace · landing-as-audit ━━━━━━━━━━━━━━━━━━━━━━━━━━ */ +:root { + --paper: #f4f0e6; + --paper-2: #ebe5d5; + --paper-3: #ddd5bf; + --ink: #1a1714; + --ink-2: #4a443d; + --ink-3: #8a8479; + --ink-4: #b8ad94; + --rule: rgba(26,23,20,0.12); + --rule-2: rgba(26,23,20,0.25); + --rule-3: rgba(26,23,20,0.5); + + --accent: #7a1d2a; + --supported: #355c3a; + --partial: #8a5a14; + --contradicted: #7a1d2a; + --notfound: #5a544d; + --verifying: #8a8479; + + --mono: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, monospace; + + --col: 1320px; + --pad: 48px; + --main: 820px; + --led: 280px; + --gap: 56px; +} + +* { box-sizing: border-box; } +html { background: var(--paper); } +body { + margin: 0; + background: + repeating-linear-gradient( + to bottom, + transparent 0 31px, + rgba(26,23,20,0.018) 31px 32px + ), + var(--paper); + color: var(--ink); + font-family: var(--mono); + font-size: 13.5px; + line-height: 1.65; + font-feature-settings: "ss01","ss02","cv11","zero"; + -webkit-font-smoothing: antialiased; +} +::selection { background: var(--ink); color: var(--paper); } +a { color: inherit; } + +.wrap { max-width: var(--col); margin: 0 auto; padding: 0 var(--pad); } + +/* ─── topbar ───────────────────────────────────────────── */ +.topbar { + position: sticky; top: 0; z-index: 60; + background: rgba(244,240,230,0.95); + border-bottom: 1px solid var(--ink); + backdrop-filter: blur(6px) saturate(140%); +} +.topbar-inner { + display: flex; align-items: center; gap: 18px; + height: 56px; +} +.tb-mark { + font-weight: 600; + font-size: 16px; + letter-spacing: -0.01em; + color: var(--ink); + text-decoration: none; + flex-shrink: 0; +} +.tb-mark::after { content: "."; color: var(--accent); } +.tb-meta { + flex: 1; + font-size: 11.5px; + color: var(--ink-3); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} +.tb-meta b { color: var(--ink); font-weight: 500; } +.tb-meta .sep { color: var(--rule-2); margin: 0 6px; } +.tb-counters { + display: flex; gap: 6px; align-items: center; + flex-shrink: 0; +} +.tb-count { + display: inline-flex; align-items: center; gap: 5px; + padding: 3px 8px; + border: 1px solid currentColor; + font-size: 11px; + letter-spacing: 0.02em; + line-height: 1; +} +.tb-count::before { + content: ""; width: 5px; height: 5px; background: currentColor; border-radius: 50%; +} +.tb-count.ok { color: var(--supported); } +.tb-count.warn{ color: var(--partial); } +.tb-count.bad { color: var(--contradicted); } +.tb-count.nf { color: var(--notfound); display: none; } +.tb-actions { display: flex; gap: 8px; align-items: center; flex-shrink: 0; } +.tb-btn { + display: inline-flex; align-items: center; gap: 6px; + padding: 6px 10px; + border: 1px solid var(--ink); + text-decoration: none; + color: var(--ink); + background: transparent; + font: inherit; + font-size: 11.5px; + cursor: pointer; + text-transform: uppercase; + letter-spacing: 0.06em; +} +.tb-btn:hover { background: var(--ink); color: var(--paper); } +.tb-btn.primary { background: var(--ink); color: var(--paper); } +.tb-btn.primary:hover { background: var(--accent); border-color: var(--accent); } + +/* ─── layout grid ──────────────────────────────────────── */ +.grid { + display: grid; + grid-template-columns: minmax(0, 1fr) var(--led); + gap: var(--gap); + padding: 36px 0 80px; + align-items: start; +} +.main { min-width: 0; } +.ledger-wrap { + position: sticky; top: 76px; + align-self: start; + height: fit-content; + max-height: calc(100vh - 96px); + overflow-y: auto; + scrollbar-width: thin; + scrollbar-color: var(--rule-2) transparent; +} + +/* ─── trace header ─────────────────────────────────────── */ +.thead { + border-top: 2px solid var(--ink); + border-bottom: 1px solid var(--ink); + padding: 24px 0 28px; + margin-bottom: 36px; +} +.thead .cmd-line { + font-size: 14px; + color: var(--ink); + display: flex; flex-wrap: wrap; align-items: baseline; gap: 8px; + margin-bottom: 14px; +} +.thead .cmd-line .prompt { color: var(--ink-3); } +.thead .cmd-line .cmd { color: var(--ink); } +.thead .cmd-line .arg { color: var(--accent); } +.thead .cmd-line .rid { + margin-left: auto; + font-size: 11px; + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.06em; +} +.thead .cmd-line .rid b { color: var(--ink); font-weight: 500; } +.thead h1 { + font-size: 48px; + font-weight: 500; + letter-spacing: -0.035em; + line-height: 1.0; + margin: 18px 0 18px; + color: var(--ink); + font-feature-settings: "ss01","ss02","cv11"; + max-width: 22ch; + text-wrap: balance; +} +.thead h1 .em { color: var(--accent); } +.thead h1 .strike { + text-decoration: line-through; + text-decoration-color: var(--contradicted); + text-decoration-thickness: 2px; + color: var(--ink-3); +} +.thead .lede { + font-size: 14.5px; + color: var(--ink-2); + max-width: 70ch; + margin: 0 0 24px; +} +.thead .lede strong { color: var(--ink); font-weight: 500; } +.thead .lede em { font-style: normal; color: var(--accent); } + +.thead-meta { + display: grid; + grid-template-columns: max-content 1fr; + column-gap: 16px; + row-gap: 4px; + font-size: 12px; + padding-top: 18px; + border-top: 1px dashed var(--rule-2); +} +.thead-meta dt { + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.08em; + font-size: 10.5px; +} +.thead-meta dd { margin: 0; color: var(--ink); } +.thead-meta dd .dim { color: var(--ink-3); } + +/* summary tick row */ +.summary { + margin-top: 24px; + padding-top: 18px; + border-top: 1px dashed var(--rule-2); +} +.summary .label { + font-size: 10.5px; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--ink-3); + margin-bottom: 10px; +} +.ticks { + display: flex; gap: 3px; flex-wrap: wrap; +} +.tick { + width: 14px; height: 22px; + background: var(--paper-2); + border: 1px solid var(--rule); + cursor: pointer; + position: relative; + transition: transform .12s; +} +.tick:hover { transform: translateY(-2px); border-color: var(--ink); } +.tick.ok { background: var(--supported); border-color: var(--supported); } +.tick.warn { background: var(--partial); border-color: var(--partial); } +.tick.bad { background: var(--contradicted); border-color: var(--contradicted); } +.tick.nf { background: var(--notfound); border-color: var(--notfound); } + + +/* ─── claim block ──────────────────────────────────────── */ +.claim { + border-top: 1px solid var(--rule); + padding: 28px 0 28px; + scroll-margin-top: 76px; +} +.claim:first-of-type { border-top: 0; padding-top: 8px; } +.claim-head { + display: flex; align-items: center; gap: 14px; + margin-bottom: 16px; + flex-wrap: wrap; +} +.claim-id { + font-size: 11px; + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.1em; + font-weight: 500; +} +.claim-id::before { content: "─ "; color: var(--rule-3); } +.claim-rule { + flex: 1; + height: 0; + border-top: 1px solid var(--rule); +} +.verdict { + display: inline-flex; align-items: center; gap: 7px; + padding: 3px 9px 3px 7px; + font-size: 11px; + letter-spacing: 0.08em; + text-transform: uppercase; + border: 1px solid currentColor; + background: var(--paper); + font-weight: 500; + white-space: nowrap; +} +.verdict::before { + content: ""; + display: inline-block; + width: 6px; height: 6px; + background: currentColor; + border-radius: 50%; +} +.verdict.ok { color: var(--supported); } +.verdict.warn { color: var(--partial); } +.verdict.bad { color: var(--contradicted); } +.verdict.nf { color: var(--notfound); } +/* pending pills are hidden until JS resolves them; if JS fails they + stay invisible rather than showing "verifying" forever. */ +.verdict.pending { visibility: hidden; } + +.claim-title { + font-size: 22px; + font-weight: 500; + line-height: 1.25; + margin: 0 0 18px; + letter-spacing: -0.01em; + text-wrap: balance; + max-width: 38ch; + color: var(--ink); +} +.claim-title em { color: var(--accent); font-style: normal; } +.claim-title.quoted::before { content: "“"; color: var(--ink-3); margin-right: 2px; } +.claim-title.quoted::after { content: "”"; color: var(--ink-3); margin-left: 2px; } + +.claim-body { + font-size: 14px; + color: var(--ink-2); + max-width: 72ch; + margin: 0 0 18px; +} +.claim-body p { margin: 0 0 10px; } +.claim-body p:last-child { margin-bottom: 0; } +.claim-body strong { color: var(--ink); font-weight: 500; } +.claim-body em { color: var(--accent); font-style: normal; } +.claim-body code { background: var(--paper-2); padding: 1px 5px; border: 1px solid var(--rule); } + +/* reasoning + evidence */ +.reasoning { + display: grid; + grid-template-columns: 100px 1fr; + gap: 14px; + font-size: 12.5px; + color: var(--ink-2); + border-left: 2px solid var(--rule-2); + padding: 6px 0 6px 14px; + margin: 0 0 16px; + max-width: 72ch; +} +.reasoning .k { + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.08em; + font-size: 10.5px; + padding-top: 3px; +} +.reasoning .v { color: var(--ink-2); } +.reasoning .v em { color: var(--accent); font-style: normal; } + +.chunks { + display: flex; flex-direction: column; + gap: 8px; + margin: 0 0 0; +} +.chunk { + background: var(--paper-2); + border: 1px solid var(--rule); + border-left: 3px solid var(--supported); + padding: 10px 14px; + font-size: 12px; + color: var(--ink-2); +} +.chunk.warn { border-left-color: var(--partial); } +.chunk.bad { border-left-color: var(--contradicted); } +.chunk-head { + display: flex; justify-content: space-between; align-items: baseline; + font-size: 10.5px; + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.06em; + margin-bottom: 6px; + gap: 12px; + flex-wrap: wrap; +} +.chunk-head .cid { color: var(--ink); font-weight: 500; } +.chunk-head .src { color: var(--ink-2); } +.chunk-quote { + font-size: 12.5px; + color: var(--ink); + line-height: 1.55; + font-style: italic; +} +.chunk-quote::before { content: "“"; color: var(--ink-3); } +.chunk-quote::after { content: "”"; color: var(--ink-3); } + +/* sub-claim rows (for nested verdicts like stats, audiences, invariants) */ +.subclaim-list { + display: flex; flex-direction: column; + margin: 0 0 0; + border-top: 1px solid var(--rule); +} +.subclaim { + display: grid; + grid-template-columns: 80px 1fr auto; + gap: 18px; + align-items: baseline; + padding: 16px 0; + border-bottom: 1px solid var(--rule); +} +.subclaim .sid { + font-size: 11px; + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.08em; + padding-top: 2px; +} +.subclaim .sid b { color: var(--accent); font-weight: 500; } +.subclaim .sbody { color: var(--ink); } +.subclaim .sbody .stitle { + font-size: 15px; + font-weight: 500; + color: var(--ink); + margin-bottom: 4px; + letter-spacing: -0.005em; +} +.subclaim .sbody .sdesc { font-size: 12.5px; color: var(--ink-2); max-width: 68ch; } +.subclaim .sbody .sdesc code { background: var(--paper-2); padding: 1px 5px; border: 1px solid var(--rule); } +.subclaim .sbody .scite { + margin-top: 6px; + font-size: 10.5px; + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.06em; +} + +/* stat sub-claim variant: bigger number */ +.subclaim.stat { + grid-template-columns: 200px 1fr auto; +} +.subclaim.stat .sid .num { + display: block; + font-size: 44px; + color: var(--ink); + margin-top: 4px; + letter-spacing: -0.035em; + line-height: 1; + text-transform: none; + font-weight: 500; +} +.subclaim.stat .sid .num .u { font-size: 0.5em; color: var(--ink-3); } + +/* ─── CONTRADICTED block (the punch) ───────────────────── */ +.claim.contradicted { + background: linear-gradient(to bottom, rgba(122,29,42,0.03), transparent); + padding: 32px 0 36px; +} +.claim.contradicted .claim-title.original { + text-decoration: line-through; + text-decoration-color: var(--contradicted); + text-decoration-thickness: 2px; + color: var(--ink-3); + font-size: 18px; + margin-bottom: 12px; + font-weight: 400; +} +.contradicted-box { + border: 1px solid var(--contradicted); + background: rgba(122,29,42,0.05); + padding: 16px 18px; + margin: 0 0 18px; + font-size: 12.5px; + color: var(--ink-2); + max-width: 72ch; +} +.contradicted-box .label { + display: flex; align-items: center; gap: 8px; + font-size: 10.5px; + text-transform: uppercase; + letter-spacing: 0.1em; + color: var(--contradicted); + font-weight: 500; + margin-bottom: 10px; +} +.contradicted-box .label::before { + content: "✗"; display: inline-block; + width: 16px; height: 16px; + text-align: center; line-height: 16px; + background: var(--contradicted); color: var(--paper); + font-size: 11px; +} +.contradicted-box p { margin: 0 0 8px; } +.contradicted-box p:last-child { margin-bottom: 0; } +.contradicted-box em { color: var(--ink); font-style: normal; font-weight: 500; } + +.published-line { + display: grid; + grid-template-columns: 100px 1fr; + gap: 14px; + font-size: 14px; + margin: 0 0 16px; + max-width: 72ch; +} +.published-line .k { + color: var(--supported); + text-transform: uppercase; + letter-spacing: 0.08em; + font-size: 10.5px; + padding-top: 4px; + font-weight: 500; +} +.published-line .k::before { content: "✓ "; } +.published-line .v { + color: var(--ink); + font-size: 17px; + font-weight: 500; + line-height: 1.3; + letter-spacing: -0.005em; +} +.correction-meta { + font-size: 11px; + color: var(--ink-3); + margin-top: 8px; + display: flex; flex-wrap: wrap; gap: 14px; +} +.correction-meta b { color: var(--ink); font-weight: 500; } + +/* ─── exhibit (Sarah Chen) ─────────────────────────────── */ +.exhibit-attach { + margin-top: 16px; + border: 1px solid var(--ink); + background: var(--paper); +} +.exhibit-attach .ex-head { + display: flex; justify-content: space-between; align-items: baseline; + padding: 12px 16px; + background: var(--ink); + color: var(--paper); + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0.1em; +} +.exhibit-attach .ex-head .id { color: var(--ink-4); } +.exhibit-attach .ex-body { padding: 24px 24px 28px; } +.exhibit-attach .ex-title { + font-size: 22px; + font-weight: 500; + margin: 0 0 10px; + letter-spacing: -0.01em; + line-height: 1.2; + text-wrap: balance; + max-width: 38ch; +} +.exhibit-attach .ex-deck { + font-size: 13px; + color: var(--ink-2); + margin: 0 0 22px; + max-width: 68ch; +} +.exhibit-attach .ex-deck em { color: var(--accent); font-style: normal; } +.ex-meta { + display: grid; + grid-template-columns: repeat(4, 1fr); + border-top: 1px solid var(--rule); + border-left: 1px solid var(--rule); + margin-bottom: 24px; +} +.ex-meta .m { + padding: 14px 16px; + border-right: 1px solid var(--rule); + border-bottom: 1px solid var(--rule); +} +.ex-meta .m .v { + font-size: 22px; + font-weight: 500; + letter-spacing: -0.02em; + color: var(--ink); +} +.ex-meta .m .k { + font-size: 10.5px; + color: var(--ink-3); + text-transform: uppercase; + letter-spacing: 0.08em; + margin-top: 2px; +} + +/* scrolly inside exhibit */ +.ex-scrolly { + display: grid; + grid-template-columns: 1fr; + gap: 28px; +} +.ex-step { + border-left: 2px solid var(--rule-2); + padding: 0 0 4px 20px; + position: relative; + scroll-margin-top: 100px; +} +.ex-step::before { + content: ""; + position: absolute; + left: -7px; top: 4px; + width: 12px; height: 12px; + background: var(--paper); border: 2px solid var(--rule-2); + border-radius: 50%; +} +.ex-step.path-a::before { border-color: var(--contradicted); } +.ex-step.path-b::before { border-color: var(--supported); } +.ex-step.active::before { background: var(--accent); border-color: var(--accent); } +.ex-step .step-head { + display: flex; gap: 12px; align-items: baseline; + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--ink-3); + margin-bottom: 10px; + flex-wrap: wrap; +} +.ex-step .step-head .t { color: var(--accent); font-weight: 500; } +.ex-step .step-head .path { + padding: 1px 7px; + border: 1px solid currentColor; + margin-left: auto; +} +.ex-step .step-head .path.a { color: var(--contradicted); } +.ex-step .step-head .path.b { color: var(--supported); } +.ex-step h4 { + font-size: 17px; + font-weight: 500; + margin: 0 0 10px; + letter-spacing: -0.005em; + line-height: 1.3; + text-wrap: balance; + max-width: 36ch; +} +.ex-step p { + font-size: 13px; + color: var(--ink-2); + margin: 0 0 10px; + max-width: 68ch; +} +.ex-step p em { color: var(--accent); font-style: normal; } +.ex-term { + background: #1a1714; + color: #e9e3d4; + padding: 12px 14px; + font-size: 12px; + line-height: 1.6; + margin: 10px 0; + border: 1px solid #2c2620; + white-space: pre-wrap; +} +.ex-term .pr { color: #a89a78; } +.ex-term .cmd { color: #f6efdd; } +.ex-term .ok { color: #7bb88a; } +.ex-term .part { color: #d8a868; } +.ex-term .bad { color: #e58a8a; } +.ex-term .dim { color: #6c6557; } +.ex-pair { + display: grid; + grid-template-columns: 1fr 1fr; + margin-top: 10px; + border: 1px solid var(--rule); +} +.ex-pair .side { padding: 12px 14px; } +.ex-pair .side.a { + background: rgba(122,29,42,0.04); + border-right: 1px solid var(--rule); +} +.ex-pair .side.b { background: rgba(53,92,58,0.04); } +.ex-pair .side .label { + font-size: 10.5px; + text-transform: uppercase; + letter-spacing: 0.1em; + margin-bottom: 6px; +} +.ex-pair .side.a .label { color: var(--contradicted); } +.ex-pair .side.b .label { color: var(--supported); } +.ex-pair .side p { font-size: 12.5px; color: var(--ink-2); margin: 0; } + +/* ─── how-it-works terminal block ──────────────────────── */ +.term { + background: #1a1714; + color: #e9e3d4; + padding: 28px 32px; + border: 1px solid #2c2620; + font-size: 12.5px; + line-height: 1.75; + white-space: pre-wrap; + margin: 0; +} +.term .cmt { color: #8a7e64; } +.term .pr { color: #a89a78; } +.term .cmd { color: #f6efdd; } +.term .out { color: #cfc4ad; } +.term .ok { color: #7bb88a; } +.term .part { color: #d8a868; } +.term .bad { color: #e58a8a; } +.term .nf { color: #9c958a; } +.term .dim { color: #6c6557; } + +/* ─── pricing rendered as sub-claims ───────────────────── */ +.tiers { + display: grid; + grid-template-columns: repeat(3, 1fr); + border-top: 1px solid var(--rule); + border-left: 1px solid var(--rule); +} +.tier { + border-right: 1px solid var(--rule); + border-bottom: 1px solid var(--rule); + padding: 22px 20px; + display: flex; flex-direction: column; +} +.tier .tn { + font-size: 10.5px; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--accent); + margin-bottom: 10px; +} +.tier .tname { + font-size: 17px; + font-weight: 500; + letter-spacing: -0.005em; + margin: 0 0 4px; +} +.tier .tprice { + font-size: 28px; + font-weight: 500; + letter-spacing: -0.03em; + line-height: 1; + margin-bottom: 10px; +} +.tier .tprice .per { font-size: 12px; color: var(--ink-3); } +.tier .tdesc { font-size: 12px; color: var(--ink-2); margin: 0 0 14px; min-height: 60px; } +.tier ul { list-style: none; padding: 0; margin: 0 0 16px; font-size: 11.5px; color: var(--ink-2); } +.tier ul li { + padding: 5px 0 5px 16px; + position: relative; + border-bottom: 1px dashed var(--rule); + line-height: 1.45; +} +.tier ul li::before { content: "✓"; color: var(--supported); position: absolute; left: 0; top: 5px; } +.tier .tcta { margin-top: auto; } +.tcta .tb-btn { width: 100%; justify-content: center; } + +/* ─── trace footer ─────────────────────────────────────── */ +.tfoot { + border-top: 2px solid var(--ink); + margin-top: 56px; + padding: 28px 0 24px; +} +.tfoot .pass-line { + display: flex; align-items: center; gap: 12px; + font-size: 13px; + margin-bottom: 18px; + flex-wrap: wrap; +} +.tfoot .pass-line .pass { + display: inline-flex; align-items: center; gap: 7px; + padding: 4px 10px; + background: var(--supported); + color: var(--paper); + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0.1em; + font-weight: 500; +} +.tfoot .pass-line .pass::before { content: "✓"; font-size: 11px; } +.tfoot .pass-line .desc { color: var(--ink-2); } +.tfoot .pass-line .desc b { color: var(--ink); font-weight: 500; } + +.signature { + font-size: 11px; + color: var(--ink-3); + font-family: var(--mono); + margin: 0 0 24px; + display: grid; + grid-template-columns: max-content 1fr; + gap: 8px 18px; +} +.signature dt { color: var(--ink-3); text-transform: uppercase; letter-spacing: 0.06em; } +.signature dd { margin: 0; color: var(--ink); } + +.install-row { + background: #1a1714; + color: #e9e3d4; + padding: 18px 22px; + margin: 0 0 16px; + display: grid; + grid-template-columns: 1fr auto; + gap: 16px; align-items: center; +} +.install-row .cmds { font-size: 13px; line-height: 1.7; } +.install-row .cmds .pr { color: #a89a78; } +.install-row .cmds .cmd { color: #f6efdd; } +.install-row .cmds .cmt { color: #8a7e64; font-size: 11px; } +.install-row .tb-btn { background: var(--paper); color: var(--ink); border-color: var(--paper); } +.install-row .tb-btn:hover { background: var(--accent); color: var(--paper); border-color: var(--accent); } + +.colofon { + display: flex; justify-content: space-between; font-size: 11px; + color: var(--ink-3); + border-top: 1px dashed var(--rule-2); + padding-top: 16px; + flex-wrap: wrap; gap: 12px; +} +.colofon a { color: var(--ink-3); text-decoration: none; } +.colofon a + a { margin-left: 12px; } +.colofon a:hover { color: var(--ink); } + +/* ─── ledger (sticky right) ────────────────────────────── */ +.ledger { + border: 1px solid var(--ink); + background: var(--paper); + font-size: 11.5px; +} +.ledger-head { + padding: 10px 14px; + background: var(--ink); + color: var(--paper); + font-size: 10.5px; + text-transform: uppercase; + letter-spacing: 0.12em; + display: flex; justify-content: space-between; align-items: baseline; +} +.ledger-head .progress { color: var(--ink-4); } +.ledger ul { list-style: none; margin: 0; padding: 0; display: block; } +.ledger .led { + display: grid; + grid-template-columns: 16px 36px 1fr; + gap: 8px; + padding: 6px 12px; + align-items: baseline; + border-bottom: 1px solid var(--rule); + cursor: pointer; + transition: background .12s, color .12s; + text-decoration: none; + color: var(--ink-2); + font-size: 11.5px; +} +.ledger .led:last-child { border-bottom: 0; } +.ledger .led:hover { background: var(--paper-2); color: var(--ink); } +.ledger .led.active { background: var(--paper-3); color: var(--ink); } +.ledger .led .glyph { + font-size: 10px; + text-align: center; + line-height: 14px; + color: var(--ink-3); +} +.ledger .led.ok .glyph { color: var(--supported); font-weight: 600; } +.ledger .led.warn .glyph { color: var(--partial); font-weight: 600; } +.ledger .led.bad .glyph { color: var(--contradicted); font-weight: 600; } +.ledger .led.nf .glyph { color: var(--notfound); font-weight: 600; } + +.ledger .led .cid { + font-size: 10px; + color: var(--ink-3); + letter-spacing: 0.04em; +} +.ledger .led .lbl { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } + +.ledger-foot { + padding: 10px 14px; + font-size: 10px; + color: var(--ink-3); + border-top: 1px solid var(--rule); + text-transform: uppercase; + letter-spacing: 0.06em; +} + +/* ─── section-of-claims wrapper (for visual grouping) ──── */ +.section { + padding: 32px 0 12px; +} +.section + .section { border-top: 1px solid var(--ink); } +.section-title { + font-size: 10.5px; + letter-spacing: 0.14em; + text-transform: uppercase; + color: var(--ink-3); + margin: 0 0 8px; +} +.section-title b { color: var(--ink); font-weight: 500; letter-spacing: 0.1em; } + +/* misc */ +code { font-family: var(--mono); font-size: 0.95em; } +b, strong { font-weight: 500; } + +/* ─── pre-publication amendment ────────────────────────── */ +.amend { + border: 1px solid var(--rule-2); + background: var(--paper-2); + padding: 18px 22px 22px; + margin: 18px 0 22px; +} +.amend-row { + display: flex; + justify-content: space-between; + align-items: center; + font-size: 10.5px; + letter-spacing: 0.08em; + text-transform: uppercase; + color: var(--ink-3); + padding: 14px 0 4px; + border-top: 1px solid var(--rule); +} +.amend-row:first-child { border-top: 0; padding-top: 2px; } +.amend-row .amend-label { color: var(--ink-3); } +.amend-tag { + padding: 2px 9px; + border: 1px solid currentColor; + font-size: 9.5px; + letter-spacing: 0.12em; + font-weight: 500; +} +.amend-tag.struck { color: var(--contradicted); } +.amend-tag.reason { color: var(--ink-2); } +.amend-tag.corrected { color: var(--supported); } +.amend p { + margin: 6px 0 0; + font-size: 13.5px; + line-height: 1.65; +} +.amend-original { + text-decoration: line-through; + text-decoration-color: var(--contradicted); + text-decoration-thickness: 1px; + color: var(--ink-3); + padding-left: 14px; + border-left: 2px solid var(--contradicted); +} +.amend-reason { + color: var(--ink-2); + padding-left: 14px; +} +.amend-corrected { + color: var(--ink); + padding-left: 14px; + border-left: 2px solid var(--supported); +} + +/* ─── install block (post-trace, in trace aesthetic) ───── */ +.install-block { + margin-top: 56px; + padding-top: 32px; + border-top: 2px solid var(--ink); +} +.install-block .section-title { margin-bottom: 18px; } +.install-title { + font-family: var(--mono); + font-size: 22px; + font-weight: 500; + letter-spacing: -0.005em; + margin: 12px 0 12px; + color: var(--ink); + line-height: 1.25; +} +.install-lede { + font-size: 13.5px; + color: var(--ink-2); + max-width: 64ch; + margin: 0 0 20px; + line-height: 1.7; +} +.install-term { + font-size: 13px; + line-height: 1.85; +} +.install-meta { + font-size: 11px; + color: var(--ink-3); + margin: 16px 0 0; + letter-spacing: 0.04em; +} + +/* ─── trace footer ─────────────────────────────────────── */ +.tfoot { + margin-top: 72px; + padding: 28px 0 12px; + border-top: 1px solid var(--ink); + font-size: 12px; + color: var(--ink-2); +} +.tfoot-rule { + text-align: center; + font-size: 10px; + letter-spacing: 0.32em; + color: var(--ink-4); + margin-bottom: 22px; + text-transform: uppercase; +} +.tfoot-grid { + display: grid; + grid-template-columns: 96px 1fr; + column-gap: 24px; + row-gap: 6px; + margin: 0 0 24px; +} +.tfoot-grid dt { + font-size: 10.5px; + letter-spacing: 0.12em; + text-transform: uppercase; + color: var(--ink-3); + padding-top: 1px; +} +.tfoot-grid dd { + margin: 0; + color: var(--ink); +} +.tfoot-grid dd .dim { color: var(--ink-3); } +.tfoot-grid code { + background: var(--paper-2); + padding: 1px 6px; + border: 1px solid var(--rule); +} +.tfoot-mark { + padding-top: 16px; + border-top: 1px solid var(--rule); + display: flex; + justify-content: space-between; + flex-wrap: wrap; + gap: 12px; + font-size: 11.5px; + color: var(--ink-3); +} +.tfoot-mark b { color: var(--ink); font-weight: 600; } +.tfoot-mark .tfoot-meta { font-style: italic; } + +/* ─── small screens ────────────────────────────────────── */ +@media (max-width: 1080px) { + :root { --pad: 24px; --gap: 32px; --led: 240px; } + .thead h1 { font-size: 36px; } + .subclaim, .subclaim.stat { grid-template-columns: 1fr; gap: 8px; } + .ex-meta { grid-template-columns: repeat(2, 1fr); } + .ex-pair { grid-template-columns: 1fr; } + .ex-pair .side.a { border-right: 0; border-bottom: 1px solid var(--rule); } + .tiers { grid-template-columns: 1fr; } + .install-row { grid-template-columns: 1fr; } +} +@media (max-width: 820px) { + .grid { grid-template-columns: 1fr; } + .ledger-wrap { position: static; max-height: none; order: 2; } + .topbar-inner { gap: 8px; } + .tb-meta { display: none; } + .tb-counters .tb-count { padding: 2px 5px; font-size: 10px; } + .tfoot-grid { grid-template-columns: 1fr; row-gap: 2px; } + .tfoot-grid dt { padding-top: 8px; } + .tfoot-mark { flex-direction: column; gap: 6px; } +} diff --git a/src/orc/rendering/assets/trace.js b/src/orc/rendering/assets/trace.js new file mode 100644 index 0000000..9f85548 --- /dev/null +++ b/src/orc/rendering/assets/trace.js @@ -0,0 +1,162 @@ +/* ━━━ orc trace ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + * The page is the artifact of an already-completed run. No + * "verifying…" theatre — every verdict is final on load. JS + * resolves pending pills, builds the summary tick row and the + * sticky ledger from the claim DOM. + * ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ */ + +const VERDICTS = { + ok: { label: 'SUPPORTED', cls: 'ok', glyph: '✓' }, + warn: { label: 'PARTIAL', cls: 'warn', glyph: '~' }, + bad: { label: 'CONTRADICTED', cls: 'bad', glyph: '✗' }, + nf: { label: 'NOT_FOUND', cls: 'nf', glyph: '·' }, +}; + +function verdictFor(el) { + // climb until we find a node with data-verdict (claim) or data-sub (subclaim) + let n = el; + while (n && n !== document.body) { + if (n.dataset.verdict) return { kind: n.dataset.verdict, score: n.dataset.score }; + if (n.dataset.sub) return { kind: n.dataset.sub, score: n.dataset.score }; + n = n.parentElement; + } + return { kind: 'ok', score: null }; +} + +/* ─── resolve every pending verdict pill ─────────────────── */ +function resolveVerdicts() { + document.querySelectorAll('.verdict.pending').forEach((pill) => { + const { kind, score } = verdictFor(pill); + const v = VERDICTS[kind] || VERDICTS.ok; + pill.classList.remove('pending'); + pill.classList.add(v.cls); + const scoreTxt = score ? ` · ${score}` : ''; + pill.innerHTML = `${v.label}${scoreTxt}`; + }); +} + +/* ─── claim metadata: title, short label, verdict, id ───── */ +function collectClaims() { + return [...document.querySelectorAll('.claim[data-claim]')].map((el) => { + const titleEl = el.querySelector('.claim-title'); + return { + id: el.id, + n: el.dataset.claim, + kind: el.dataset.verdict, + title: titleEl ? titleEl.textContent.trim().replace(/\s+/g, ' ') : '', + }; + }); +} + +/* ─── summary tick row ───────────────────────────────────── */ +function buildTicks(claims) { + const host = document.getElementById('ticks'); + if (!host) return; + host.innerHTML = ''; + claims.forEach((c) => { + const t = document.createElement('a'); + t.href = `#${c.id}`; + t.className = `tick ${c.kind}`; + t.title = `claim_${c.n} · ${VERDICTS[c.kind].label} · ${c.title}`; + host.appendChild(t); + }); +} + +/* ─── ledger ─────────────────────────────────────────────── */ +function buildLedger(claims) { + const list = document.getElementById('ledger-list'); + if (!list) return; + list.innerHTML = ''; + claims.forEach((c) => { + const li = document.createElement('a'); + li.href = `#${c.id}`; + li.className = `led ${c.kind}`; + const v = VERDICTS[c.kind]; + li.innerHTML = ` + ${v.glyph} + ${c.n} + ${c.title} + `; + list.appendChild(li); + }); + const total = document.getElementById('led-total'); + const prog = document.getElementById('led-progress'); + if (total) total.textContent = claims.length; + if (prog) prog.textContent = claims.length; +} + +/* ─── top-bar counters ───────────────────────────────────── */ +function updateCounters(claims) { + const counts = { ok: 0, warn: 0, bad: 0, nf: 0 }; + claims.forEach((c) => counts[c.kind] = (counts[c.kind] || 0) + 1); + const set = (id, n) => { + const el = document.getElementById(id); + if (el) el.textContent = n; + }; + set('cnt-ok', counts.ok); + set('cnt-warn', counts.warn); + set('cnt-bad', counts.bad); +} + +/* ─── active-claim tracking (ledger highlight) ──────────── */ +function trackActive(claims) { + if (!('IntersectionObserver' in window)) return; + const ledgerItems = [...document.querySelectorAll('#ledger-list a')]; + const byId = Object.fromEntries(ledgerItems.map((a) => [a.getAttribute('href').slice(1), a])); + const io = new IntersectionObserver((entries) => { + entries.forEach((e) => { + const item = byId[e.target.id]; + if (item) item.classList.toggle('active', e.isIntersecting); + }); + }, { rootMargin: '-25% 0px -55% 0px', threshold: 0 }); + document.querySelectorAll('.claim[data-claim]').forEach((c) => io.observe(c)); +} + +/* ─── replay button: copy the real command, don't fake a result ── + * Earlier versions of this button toggled to "✓ identical (31.4s)" + * after a setTimeout — i.e. faked a verification result in a tool + * whose point is that you don't fake verification results. The + * honest behavior: hand the visitor the command they would run + * themselves, since the page header already states the run_id. */ +function wireReplay() { + const btn = document.getElementById('replay-btn'); + if (!btn) return; + const RUN_ID = '01KR1NHTQR8B'; + const CMD = `orc replay ${RUN_ID} --workspace public`; + btn.title = `copy: ${CMD}`; + btn.addEventListener('click', async () => { + const orig = btn.textContent; + let ok = false; + try { + if (navigator.clipboard && navigator.clipboard.writeText) { + await navigator.clipboard.writeText(CMD); + ok = true; + } + } catch (_) { /* clipboard blocked — fall back to visible ack only */ } + btn.textContent = ok ? '✓ command copied' : '↻ ' + CMD; + btn.style.background = 'var(--ink)'; + btn.style.color = 'var(--paper)'; + setTimeout(() => { + btn.textContent = orig; + btn.style.background = ''; + btn.style.color = ''; + }, 1800); + }); +} + +/* ─── boot ───────────────────────────────────────────────── */ +function boot() { + resolveVerdicts(); + const claims = collectClaims(); + buildTicks(claims); + buildLedger(claims); + updateCounters(claims); + trackActive(claims); + wireReplay(); +} + +if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', boot); +} else { + boot(); +} diff --git a/src/orc/rendering/trace_html.py b/src/orc/rendering/trace_html.py new file mode 100644 index 0000000..201c435 --- /dev/null +++ b/src/orc/rendering/trace_html.py @@ -0,0 +1,261 @@ +"""Render trace JSON into a self-contained HTML report. + +The report is a single file with inlined CSS/JS so it can be attached to an +email, filed with a compliance package, or archived — no server, no CDN, no +external requests. Structure mirrors site/index.html (the designed mockup); +verdict pills, ticks, counters, and the sticky ledger are resolved +client-side by the same trace.js the site uses. +""" + +from __future__ import annotations + +import html +from importlib.resources import files +from typing import Any + +# Verdict labels come from the verify_claim output contract; CSS classes come +# from site/trace.css. Unknown labels fall back to "nf" — the neutral verdict — +# rather than failing the whole report over one odd trace. +_LABEL_TO_VERDICT = { + "supported": "ok", + "partial": "warn", + "contradicted": "bad", + "not_found": "nf", +} + + +def _asset(name: str) -> str: + """Read a packaged asset (works from wheels and editable installs alike).""" + return files("orc.rendering.assets").joinpath(name).read_text(encoding="utf-8") + + +def _esc(value: Any) -> str: + """Escape a trace-derived value for HTML. + + Evidence text is untrusted corpus content and claims are caller input — + everything read from a trace goes through here before hitting the page. + """ + return html.escape(str(value), quote=True) + + +def build_report_html(traces: list[dict[str, Any]]) -> str: + """Render one or more trace dicts as a self-contained HTML document.""" + articles = [ + _claim_article(trace, index=i) for i, trace in enumerate(traces, start=1) + ] + run_ids = [str(t.get("run_id", "?")) for t in traces] + title = _esc("orc — trace " + ", ".join(run_ids)) + return "\n".join( + [ + "", + '', + "", + '', + '', + f"{title}", + f"", + "", + "", + _topbar(traces, run_ids=run_ids), + '
', + '
', + _thead(traces, run_ids=run_ids), + *articles, + _footer(traces), + "
", + _ledger_aside(), + "
", + # At the bottom so the claim DOM exists when trace.js boots and + # builds the ticks, counters, and ledger from it. + f"", + "", + "", + ] + ) + + +def _uniq(values: list[str]) -> str: + """Join distinct non-empty values, preserving order — multi-trace reports + can span workspaces or models and the header must not pretend otherwise.""" + seen = [v for i, v in enumerate(values) if v and v not in values[:i]] + return ", ".join(seen) if seen else "?" + + +def _topbar(traces: list[dict[str, Any]], *, run_ids: list[str]) -> str: + workspaces = _uniq([str(t.get("workspace", "")) for t in traces]) + models = _uniq([str(t.get("model") or "") for t in traces]) + corpora = _uniq([str(t.get("corpus_version", "")) for t in traces]) + sep = '·' + meta = sep.join( + [ + f"trace {_esc(', '.join(run_ids))}", + f"workspace={_esc(workspaces)}", + f"model={_esc(models)}", + f"corpus v{_esc(corpora)}", + ] + ) + return "\n".join( + [ + '
', + '
', + 'orc', + f'
{meta}
', + '
', + '0 supported', + '0 partial', + '0 contradicted', + "
", + "
", + "
", + ] + ) + + +def _thead(traces: list[dict[str, Any]], *, run_ids: list[str]) -> str: + first = traces[0] if traces else {} + started = first.get("started_at") or "?" + ended = first.get("ended_at") or "?" + cmd_args = " ".join(f'{_esc(rid)}' for rid in run_ids) + return "\n".join( + [ + '
', + '
', + '$', + f'orc report {cmd_args}', + "
", + '
', + f"
runs
{_esc(', '.join(run_ids))}
", + f"
started
{_esc(started)} " + f'· ended {_esc(ended)}
', + "
", + '
', + '
claim-by-claim summary
', + '
', + "
", + "
", + ] + ) + + +def _ledger_aside() -> str: + # Empty containers by design: trace.js builds the ledger rows from the + # .claim DOM, exactly as the public site does. + return "\n".join( + [ + '", + ] + ) + + +def _footer(traces: list[dict[str, Any]]) -> str: + calls = [call for t in traces for call in (t.get("llm_calls") or [])] + total_in = sum(int(c.get("input_tokens") or 0) for c in calls) + total_out = sum(int(c.get("output_tokens") or 0) for c in calls) + rows = [ + "
tokens
" + f"
{total_in:,} in · {total_out:,} out " + f'· across {len(calls)} llm call(s)
' + ] + for trace in traces: + replay_of = (trace.get("inputs") or {}).get("_replay_of") + if replay_of: + rows.append( + "
lineage
" + f"
run {_esc(trace.get('run_id', '?'))} is a replay of " + f"{_esc(replay_of)}
" + ) + return "\n".join( + [ + '
', + '
━━━ end of trace ━━━
', + '
', + *rows, + "
", + '
', + "orc. · the verification runtime", + '— generated by orc report —', + "
", + "
", + ] + ) + + +def _claim_article(trace: dict[str, Any], *, index: int) -> str: + output = trace.get("output") or {} + verdict = _LABEL_TO_VERDICT.get(output.get("label"), "nf") + confidence = output.get("confidence") + score = f' data-score="{float(confidence):.2f}"' if confidence is not None else "" + claim = output.get("claim") or trace.get("inputs", {}).get("claim") or "(no claim recorded)" + n = f"{index:02d}" + run_id = trace.get("run_id", "?") + return "\n".join( + [ + f'
', + '
', + f'claim_{n} · run {_esc(run_id)}', + '', + 'verifying', + "
", + f'

{_esc(claim)}

', + *_reasoning_block(output), + *_chunks_block(output), + "
", + ] + ) + + +def _reasoning_block(output: dict[str, Any]) -> list[str]: + lines: list[str] = [] + reasoning = output.get("reasoning") + if reasoning: + lines.append( + '
reasoning' + f'{_esc(reasoning)}
' + ) + missing = output.get("missing_information") + if missing: + lines.append( + '
missing' + f'{_esc(missing)}
' + ) + return lines + + +def _chunks_block(output: dict[str, Any]) -> list[str]: + supporting = output.get("supporting_chunks") or [] + contradicting = output.get("contradicting_chunks") or [] + if not supporting and not contradicting: + return [] + lines = ['
'] + lines.extend(_chunk_div(c, role="supporting") for c in supporting) + lines.extend(_chunk_div(c, role="contradicting") for c in contradicting) + lines.append("
") + return lines + + +def _chunk_div(chunk: dict[str, Any], *, role: str) -> str: + # `.chunk.bad` carries the contradicted border color in trace.css. + css = "chunk bad" if role == "contradicting" else "chunk" + source = chunk.get("evidence_source_path") or chunk.get("evidence_title") or "" + return "\n".join( + [ + f'
', + '
', + f'{_esc(chunk.get("chunk_id", "?"))}', + f'{_esc(source)}', + "
", + f'
{_esc(chunk.get("text", ""))}
', + "
", + ] + ) diff --git a/tests/unit/test_report_cli.py b/tests/unit/test_report_cli.py new file mode 100644 index 0000000..4082c96 --- /dev/null +++ b/tests/unit/test_report_cli.py @@ -0,0 +1,156 @@ +"""`orc report` CLI tests. + +Most tests write trace JSON straight into the workspace traces dir (the +cheapest fixture that load_trace can find); one end-to-end test drives the +real verify pipeline the same way test_trace_cli.py does. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import pytest +from click.testing import CliRunner + +from orc.cli import main +from orc.ingest.pipeline import ingest as do_ingest +from orc.llm import client as client_module +from orc.runs import open_run +from orc.storage import workspace as ws_module +from orc.storage.trace_store import write_trace_json +from tests._fake_llm import FakeAnthropic, make_verdict_response + + +def _trace_dict(run_id: str, *, claim: str = "Skills API shipped in 2025.") -> dict[str, Any]: + return { + "schema_version": 2, + "run_id": run_id, + "directive": "research", + "skill": "verify_claim", + "workspace": "demo", + "corpus_version": 1, + "started_at": "2026-06-01T08:00:00Z", + "ended_at": "2026-06-01T08:00:31Z", + "status": "ok", + "model": "claude-sonnet-4-6", + "inputs": {"claim": claim}, + "effective_kwargs": {"k": 6}, + "events": [], + "retrieval": {"method": "bm25", "candidates_considered": 4, "returned": []}, + "llm_calls": [ + { + "model": "claude-sonnet-4-6", + "input_tokens": 500, + "output_tokens": 80, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + "elapsed_ms": 700, + } + ], + "output": { + "claim": claim, + "label": "supported", + "confidence": 0.9, + "reasoning": "Stated verbatim in the corpus.", + "supporting_chunks": [], + "contradicting_chunks": [], + "missing_information": None, + }, + "error_message": None, + } + + +def _seed_trace(run_id: str) -> None: + payload = _trace_dict(run_id) + write_trace_json("demo", run_id, payload["started_at"], payload) + + +def test_report_writes_html_to_stdout_by_default(orc_home: Path) -> None: + _seed_trace("01STDOUTRUN") + + result = CliRunner().invoke(main, ["report", "01STDOUTRUN"]) + + assert result.exit_code == 0, result.output + assert result.output.startswith("") + assert "01STDOUTRUN" in result.output + + +def test_report_o_writes_file_and_echoes_path(orc_home: Path, tmp_path: Path) -> None: + _seed_trace("01FILEDRUN") + out = tmp_path / "report.html" + + result = CliRunner().invoke(main, ["report", "01FILEDRUN", "-o", str(out)]) + + assert result.exit_code == 0, result.output + assert out.exists() + assert "01FILEDRUN" in out.read_text() + assert str(out) in result.output + assert "" not in result.output + + +def test_report_multiple_run_ids_renders_multi_claim_report(orc_home: Path) -> None: + _seed_trace("01MULTIAAA") + _seed_trace("01MULTIBBB") + + result = CliRunner().invoke(main, ["report", "01MULTIAAA", "01MULTIBBB"]) + + assert result.exit_code == 0, result.output + assert "01MULTIAAA" in result.output + assert "01MULTIBBB" in result.output + assert result.output.count('
None: + _seed_trace("01KNOWNRUN") + + result = CliRunner().invoke(main, ["report", "01NOSUCHRUN"]) + + assert result.exit_code != 0 + assert result.exception is None or isinstance(result.exception, SystemExit) + assert "Error" in result.output + assert "01NOSUCHRUN" in result.output + + +def test_report_open_without_output_errors(orc_home: Path) -> None: + _seed_trace("01OPENRUN") + + result = CliRunner().invoke(main, ["report", "01OPENRUN", "--open"]) + + assert result.exit_code != 0 + assert "--open requires -o" in result.output + + +def test_report_cli_end_to_end( + orc_home: Path, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + # Same seeding approach as test_trace_cli.py: a real workspace, a real + # ingest, and a verify run against a fake LLM — the report renders a + # trace produced by the actual pipeline, not a hand-built dict. + from orc import directives + + ws = ws_module.create("demo") + corpus = tmp_path / "c" + corpus.mkdir() + (corpus / "a.md").write_text("# Doc A\n\nSkills API October 2025.\n") + do_ingest(ws, str(corpus)) + + # "supported" with no cited chunk ids would be structurally downgraded by + # verify_claim; "not_found" is the honest verdict a chunkless fake can return. + fake = FakeAnthropic(responses=[make_verdict_response(label="not_found", confidence=0.5)]) + monkeypatch.setattr(client_module, "_client", fake) + monkeypatch.setattr(client_module, "_factory", None) + + skill = directives.get("research").skills["verify_claim"] + with open_run(ws, directive="research", skill="verify_claim", inputs={"claim": "x"}) as run: + result_out = skill.run(workspace=ws, run=run, claim="skills api") + run.close(output=result_out) + + out = tmp_path / "e2e.html" + result = CliRunner().invoke(main, ["report", run.run_id, "-o", str(out)]) + + assert result.exit_code == 0, result.output + html_doc = out.read_text() + assert run.run_id in html_doc + assert 'data-verdict="nf"' in html_doc + assert "", + # Overrides on top of the verbatim mockup asset: real traces carry + # long unbreakable tokens (URLs, DOIs, file paths, run ids) the + # mockup never had — without word-breaking, the centered .grid's + # min-content width exceeds the viewport and clips off the LEFT + # edge, unreachable by scrolling. + "", "", "", _topbar(traces, run_ids=run_ids), @@ -81,6 +91,13 @@ def _uniq(values: list[str]) -> str: return ", ".join(seen) if seen else "?" +def _run_label(run_ids: list[str]) -> str: + """Topbar-sized run reference: one id verbatim, a count beyond that.""" + if len(run_ids) == 1: + return f"trace {_esc(run_ids[0])}" + return f"{len(run_ids)} runs {_esc(run_ids[0])} +{len(run_ids) - 1}" + + def _topbar(traces: list[dict[str, Any]], *, run_ids: list[str]) -> str: workspaces = _uniq([str(t.get("workspace", "")) for t in traces]) models = _uniq([str(t.get("model") or "") for t in traces]) @@ -88,7 +105,7 @@ def _topbar(traces: list[dict[str, Any]], *, run_ids: list[str]) -> str: sep = '·' meta = sep.join( [ - f"trace {_esc(', '.join(run_ids))}", + f"{_run_label(run_ids)}", f"workspace={_esc(workspaces)}", f"model={_esc(models)}", f"corpus v{_esc(corpora)}", diff --git a/tests/unit/test_trace_html.py b/tests/unit/test_trace_html.py index 91ad377..9e22b07 100644 --- a/tests/unit/test_trace_html.py +++ b/tests/unit/test_trace_html.py @@ -177,3 +177,18 @@ def test_inline_css_and_js_are_embedded() -> None: assert "resolveVerdicts" in html_doc assert 'href="trace.css"' not in html_doc assert 'src="trace.js"' not in html_doc + + +def test_report_handles_unbreakable_tokens_and_many_runs() -> None: + """Real traces carry long unbreakable tokens (URLs, DOIs, file paths) the + mockup never had; without word-breaking overrides the centered grid + overflows and clips off the LEFT viewport edge. And a 13-run report must + not dump every run id into the topbar.""" + traces = [make_trace(run_id=f"01KTYCD5EZSNV3APT9DZA9M7Y{i}") for i in range(5)] + html = build_report_html(traces) + # word-break overrides present after the verbatim asset css + assert "overflow-wrap" in html + # topbar summarizes instead of listing all five ids + head = html[: html.index(" Date: Fri, 12 Jun 2026 13:21:43 -0400 Subject: [PATCH 5/6] fix(report): close DOM-XSS in the bundled trace.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build_report_html escapes every trace field into the HTML, but trace.js read the claim title back via .textContent (which decodes those entities) and re-injected it through innerHTML when building the ledger and the verdict pill — re-opening exactly the injection the server-side escaping closed. A claim of `` executed arbitrary JS when the report was opened; since reports are meant to be emailed/filed as trustworthy compliance artifacts and claim text is attacker-influenceable (verified web/corpus content), this was a real shipping defect. The ledger row and pill are now built with createElement + textContent only. A test pins the contract: no innerHTML in trace.js may carry trace text (bare container clears excepted). Verified end-to-end in a real browser — the payload now renders as inert text. Co-Authored-By: Claude Fable 5 --- src/orc/rendering/assets/trace.js | 19 +++++++++++++------ tests/unit/test_trace_html.py | 13 +++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/orc/rendering/assets/trace.js b/src/orc/rendering/assets/trace.js index 9f85548..788a65d 100644 --- a/src/orc/rendering/assets/trace.js +++ b/src/orc/rendering/assets/trace.js @@ -31,7 +31,10 @@ function resolveVerdicts() { pill.classList.remove('pending'); pill.classList.add(v.cls); const scoreTxt = score ? ` · ${score}` : ''; - pill.innerHTML = `${v.label}${scoreTxt}`; + const vt = document.createElement('span'); + vt.className = 'vt'; + vt.textContent = `${v.label}${scoreTxt}`; + pill.replaceChildren(vt); }); } @@ -72,11 +75,15 @@ function buildLedger(claims) { li.href = `#${c.id}`; li.className = `led ${c.kind}`; const v = VERDICTS[c.kind]; - li.innerHTML = ` - ${v.glyph} - ${c.n} - ${c.title} - `; + // c.title came from .textContent (entities already decoded) — injecting + // it through innerHTML would re-open the XSS the server-side escaping + // closed. Build the row with createElement/textContent only. + for (const [cls, text] of [["glyph", v.glyph], ["cid", c.n], ["lbl", c.title]]) { + const span = document.createElement('span'); + span.className = cls; + span.textContent = text; + li.appendChild(span); + } list.appendChild(li); }); const total = document.getElementById('led-total'); diff --git a/tests/unit/test_trace_html.py b/tests/unit/test_trace_html.py index 9e22b07..91c0882 100644 --- a/tests/unit/test_trace_html.py +++ b/tests/unit/test_trace_html.py @@ -192,3 +192,16 @@ def test_report_handles_unbreakable_tokens_and_many_runs() -> None: head = html[: html.index(" None: + """trace.js reads claim titles back via .textContent (entities decoded); + re-injecting them through innerHTML re-opens the XSS that server-side + escaping closed — demonstrated live before this fix. The only allowed + innerHTML uses are bare container clears.""" + from importlib.resources import files + + js = files("orc.rendering.assets").joinpath("trace.js").read_text(encoding="utf-8") + for line in js.splitlines(): + if "innerHTML" in line and not line.strip().startswith("//"): + assert line.strip().endswith("innerHTML = '';"), f"unsafe innerHTML: {line.strip()}" From 1d38a182a494ed614c88d0c64a5fd34afd3fae82 Mon Sep 17 00:00:00 2001 From: Thormatt Date: Fri, 12 Jun 2026 13:23:58 -0400 Subject: [PATCH 6/6] docs: correct release status and stale roadmap entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CHANGELOG claimed "[0.2.0] First PyPI release" but orc-ai was never published, tagged, or released — a falsifiable release claim in a project whose pitch is "every claim is traceable." Mark 0.2.0 unreleased and describe the publish trigger accurately. Add the shipped-but-unreleased wave-3 work (hybrid retrieval, orc propose, orc report) to an Added section instead of leaving hybrid retrieval under Planned in the same tree that implements it. Refresh the README roadmap (hybrid retrieval is shipped opt-in) and roadmap.md's code-state line (v0.1.4 -> v0.2.0). Co-Authored-By: Claude Fable 5 --- CHANGELOG.md | 22 +++++++++++++++++----- README.md | 4 ++-- docs/business/roadmap.md | 2 +- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a0fa97..da15434 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,20 +7,32 @@ Version numbers follow [SemVer](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added (not yet released) + +- **Hybrid retrieval** — opt-in BM25 + dense-vector retrieval fused with + Reciprocal Rank Fusion. Local `sentence-transformers` embedder by default + (no API key), pluggable `Embedder` protocol. `orc workspace create + --embeddings`, `orc workspace embed` backfill. BM25 stays the default. +- **`orc propose`** — stage an allow-listed effect for human approval from the + CLI (the approval queue's producer surface); `orc approve list --json`. +- **`orc report ...`** — render trace(s) into a self-contained HTML + artifact reusing the trace design language. + ### Planned - `gads` directive (Google Ads agentic analysis: lens-based decomposition, read-only MCP integration, evidence-bound recommendation verification). - `orc eval consistency|perturb|retrieval|regression` reliability commands. -- Voyage-AI or local-`sentence-transformers` embeddings + hybrid retrieval (RRF over BM25 + vector). +- Voyage-AI / OpenAI embedding backends behind the existing `Embedder` protocol. - Hosted runtime (scheduled triggers, web dashboard, team workspaces). - Decomposition + arithmetic combined for DROP-shaped multi-step claims. -## [0.2.0] — 2026-06-11 +## [0.2.0] — unreleased -First PyPI release. The distribution is named **`orc-ai`** — `orc` is taken on -PyPI by an unrelated project — but the import package (`import orc`) and the -CLI command (`orc`) are unchanged. +Packaged for PyPI as **`orc-ai`** (`orc` is taken by an unrelated project); +the import package (`import orc`) and CLI command (`orc`) are unchanged. The +release workflow publishes on a `v0.2.0` tag once the trusted publisher is +configured — not yet tagged or published. ### Added diff --git a/README.md b/README.md index 41188f6..c6899cb 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ git clone https://github.com/Thormatt/orc.git cd orc uv sync --extra dev -uv run pytest # 260+ tests, <5s +uv run pytest # 360+ tests, <5s uv run ruff check src tests uv run orc --version ``` @@ -185,8 +185,8 @@ Live LLM tests are gated behind `ORC_TEST_ALLOW_LIVE_LLM=1` and require a real A ## Roadmap -- Embedding-based retrieval (hybrid BM25 + vector via `sqlite-vec`) - OCR for scanned/image-only PDFs +- Voyage/OpenAI embedding backends (the `Embedder` protocol is pluggable; local `sentence-transformers` hybrid retrieval shipped as opt-in) - Long-running directives (scheduled triggers, cloud execution) - `marketing` directive (assisted-only at first, autonomous behind approval gates later) - `legal` / `gads` / `code-review` directives — same runtime, new skill packages diff --git a/docs/business/roadmap.md b/docs/business/roadmap.md index 9f07a84..b2f839e 100644 --- a/docs/business/roadmap.md +++ b/docs/business/roadmap.md @@ -7,7 +7,7 @@ validated against real customer demand. Stage 0 is "land 3 pilots and learn what to charge for"; everything past Stage 1 will be revised based on what those pilots teach us. -Last updated: 2026-05-19. Code state: v0.1.4 (F1 = 0.864 on a stratified +Last updated: 2026-05-19. Code state: v0.2.0 — hybrid retrieval, PDF ingest, propose/report CLIs shipped (unreleased). Benchmark F1 = 0.864 on a stratified 504-item HaluBench subsample — competitive with Lynx-70B's published home-court 0.85, not a same-set head-to-head; see [competitive.md](../positioning/competitive.md) for caveats).