diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b77a6713..06ae0ace 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,14 @@ jobs: - name: Install @rheo packages uses: ./.github/actions/install-rheo-packages + - name: Clone rheo-tests as sibling + shell: bash + run: | + git clone --depth 1 https://github.com/freecomputinglab/rheo-tests.git ../rheo-tests + - name: Run tests run: cargo test --all-targets --all-features + working-directory: ../rheo-tests env: + RHEO_MANIFEST: ${{ github.workspace }}/Cargo.toml TYPST_IGNORE_SYSTEM_FONTS: "1" diff --git a/.github/workflows/compat.yml b/.github/workflows/compat.yml index 820fd2bf..00c744b8 100644 --- a/.github/workflows/compat.yml +++ b/.github/workflows/compat.yml @@ -26,8 +26,15 @@ jobs: - name: Install @rheo packages uses: ./.github/actions/install-rheo-packages + - name: Clone rheo-tests as sibling + shell: bash + run: | + git clone --depth 1 https://github.com/freecomputinglab/rheo-tests.git ../rheo-tests + - name: Run compatibility tests run: cargo test --test compat + working-directory: ../rheo-tests env: RUN_COMPAT_TESTS: "1" TYPST_IGNORE_SYSTEM_FONTS: "1" + RHEO_MANIFEST: ${{ github.workspace }}/Cargo.toml diff --git a/CLAUDE.md b/CLAUDE.md index 1064bc5d..1197a590 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,10 +10,11 @@ - `crates/html/` — HTML plugin (dev server, CSS/JS injection) - `crates/pdf/` — PDF plugin - `crates/epub/` — EPUB plugin -- `crates/tests/` — Integration tests and harness - `src/typ/rheo.typ` — Core Typst template (auto-injected) - `build/` — Output dir (gitignored): `pdf/`, `html/`, `epub/` +Integration tests and examples live in [freecomputinglab/rheo-tests](https://github.com/freecomputinglab/rheo-tests), cloned side-by-side at `../rheo-tests` for sibling path dependencies. + ## Development Commands ```bash @@ -26,7 +27,9 @@ cargo run -- clean RUST_LOG=rheo=trace cargo run -- compile ... # debug logging # Tests -cargo test # run all tests +cargo test # run unit tests only +# Integration tests run from ../rheo-tests with: +RHEO_MANIFEST=../rheo/Cargo.toml cargo test --test harness See [TESTING.md](TESTING.md) for more test commands and options. cargo fmt && cargo clippy -- -D warnings ``` @@ -111,6 +114,8 @@ Feed variables: **NEVER run `jj git push` (or any push) — the user always pushes themselves.** Prepare commits and bookmarks, then stop and let the user push. +**NEVER run `git` commands, not even read-only ones** (`git log`, `git show`, `git status`, `git diff`). Always use the jj equivalents (`jj log`, `jj show`, `jj status`, `jj diff`, `jj file show`). This applies in sibling repos too (e.g. `../rheo-tests`). + ```bash jj status / jj diff / jj log / jj show jj commit -m "message" / jj describe -m "message" diff --git a/Cargo.lock b/Cargo.lock index d041c78b..fbeb9e1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,17 +8,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" -[[package]] -name = "aes" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" -dependencies = [ - "cfg-if", - "cipher", - "cpufeatures 0.2.17", -] - [[package]] name = "aho-corasick" version = "1.1.4" @@ -275,33 +264,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "block-buffer" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" -dependencies = [ - "hybrid-array", -] - -[[package]] -name = "block-padding" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" -dependencies = [ - "generic-array", -] - [[package]] name = "bstr" version = "1.12.1" @@ -357,15 +319,6 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" -[[package]] -name = "cbc" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" -dependencies = [ - "cipher", -] - [[package]] name = "cc" version = "1.2.63" @@ -382,17 +335,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "chacha20" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" -dependencies = [ - "cfg-if", - "cpufeatures 0.3.0", - "rand_core 0.10.1", -] - [[package]] name = "chinese-number" version = "0.7.8" @@ -452,16 +394,6 @@ dependencies = [ "half", ] -[[package]] -name = "cipher" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" -dependencies = [ - "crypto-common 0.1.7", - "inout", -] - [[package]] name = "citationberg" version = "0.6.1" @@ -584,12 +516,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "const-oid" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" - [[package]] name = "core-foundation" version = "0.10.1" @@ -615,24 +541,6 @@ dependencies = [ "libm", ] -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - -[[package]] -name = "cpufeatures" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" -dependencies = [ - "libc", -] - [[package]] name = "crc32fast" version = "1.5.0" @@ -673,25 +581,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" -[[package]] -name = "crypto-common" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "crypto-common" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" -dependencies = [ - "hybrid-array", -] - [[package]] name = "csv" version = "1.4.0" @@ -794,27 +683,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer 0.10.4", - "crypto-common 0.1.7", -] - -[[package]] -name = "digest" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" -dependencies = [ - "block-buffer 0.12.0", - "const-oid", - "crypto-common 0.2.2", -] - [[package]] name = "diligent-date-parser" version = "0.1.5" @@ -856,15 +724,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "ecb" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7" -dependencies = [ - "cipher", -] - [[package]] name = "ecow" version = "0.2.6" @@ -1155,16 +1014,6 @@ dependencies = [ "slab", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.17" @@ -1185,7 +1034,6 @@ dependencies = [ "cfg-if", "libc", "r-efi", - "rand_core 0.10.1", "wasip2", "wasip3", ] @@ -1440,15 +1288,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" -[[package]] -name = "hybrid-array" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" -dependencies = [ - "typenum", -] - [[package]] name = "hyper" version = "1.10.1" @@ -1855,16 +1694,6 @@ dependencies = [ "libc", ] -[[package]] -name = "inout" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" -dependencies = [ - "block-padding", - "generic-array", -] - [[package]] name = "iref" version = "4.0.0" @@ -1900,47 +1729,6 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" -[[package]] -name = "jiff" -version = "0.2.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4603d3033e49e2b0e31229fcab20a5d40089c607d975cd9c80551dc69eed9102" -dependencies = [ - "jiff-static", - "jiff-tzdb-platform", - "log", - "portable-atomic", - "portable-atomic-util", - "serde_core", - "windows-link", -] - -[[package]] -name = "jiff-static" -version = "0.2.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "782d32378dddf207193ac91cefb848ad41abb58195c95168e1291227a0832b47" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "jiff-tzdb" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" - -[[package]] -name = "jiff-tzdb-platform" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" -dependencies = [ - "jiff-tzdb", -] - [[package]] name = "jni" version = "0.22.4" @@ -2162,7 +1950,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "636860251af8963cc40f6b4baadee105f02e21b28131d76eba8e40ce84ab8064" dependencies = [ - "rand 0.8.6", + "rand", "rand_chacha", ] @@ -2196,37 +1984,6 @@ version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" -[[package]] -name = "lopdf" -version = "0.41.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67513274c50a2b51e5f75d9e682fcf4ab064a8a9c9ae2c3c59309084882bb24d" -dependencies = [ - "aes", - "bitflags 2.13.0", - "cbc", - "chrono", - "ecb", - "encoding_rs", - "flate2", - "getrandom 0.4.2", - "indexmap", - "itoa", - "jiff", - "log", - "md-5", - "nom", - "rand 0.10.1", - "rangemap", - "rayon", - "sha2 0.10.9", - "stringprep", - "thiserror 2.0.18", - "time", - "ttf-parser", - "weezl", -] - [[package]] name = "markup5ever" version = "0.39.0" @@ -2265,16 +2022,6 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest 0.10.7", -] - [[package]] name = "memchr" version = "2.8.1" @@ -2383,15 +2130,6 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" -[[package]] -name = "nom" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" -dependencies = [ - "memchr", -] - [[package]] name = "normpath" version = "1.5.1" @@ -2428,39 +2166,6 @@ dependencies = [ "bitflags 2.13.0", ] -[[package]] -name = "ntest" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54d1aa56874c2152c24681ed0df95ee155cc06c5c61b78e2d1e8c0cae8bc5326" -dependencies = [ - "ntest_test_cases", - "ntest_timeout", -] - -[[package]] -name = "ntest_test_cases" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6913433c6319ef9b2df316bb8e3db864a41724c2bb8f12555e07dc4ec69d3db1" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "ntest_timeout" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9224be3459a0c1d6e9b0f42ab0e76e98b29aef5aba33c0487dfcf47ea08b5150" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -2821,15 +2526,6 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" -[[package]] -name = "portable-atomic-util" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" -dependencies = [ - "portable-atomic", -] - [[package]] name = "postcard" version = "1.1.3" @@ -2882,15 +2578,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "proc-macro-crate" -version = "3.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" -dependencies = [ - "toml_edit 0.25.12+spec-1.1.0", -] - [[package]] name = "proc-macro-error" version = "1.0.4" @@ -2999,18 +2686,7 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" dependencies = [ - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" -dependencies = [ - "chacha20", - "getrandom 0.4.2", - "rand_core 0.10.1", + "rand_core", ] [[package]] @@ -3020,7 +2696,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -3029,18 +2705,6 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -[[package]] -name = "rand_core" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" - -[[package]] -name = "rangemap" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" - [[package]] name = "rayon" version = "1.12.0" @@ -3276,29 +2940,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "rheo-tests" -version = "0.3.1" -dependencies = [ - "html5ever", - "lopdf", - "markup5ever_rcdom", - "ntest", - "paste", - "rheo-core", - "rheo-epub", - "rheo-html", - "rheo-pdf", - "serde", - "serde-xml-rs", - "serde_json", - "sha2 0.11.0", - "similar", - "tempfile", - "walkdir", - "zip", -] - [[package]] name = "roman-numerals-rs" version = "3.1.0" @@ -3541,28 +3182,6 @@ dependencies = [ "unsafe-libyaml", ] -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures 0.2.17", - "digest 0.10.7", -] - -[[package]] -name = "sha2" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" -dependencies = [ - "cfg-if", - "cpufeatures 0.3.0", - "digest 0.11.3", -] - [[package]] name = "sharded-slab" version = "0.1.7" @@ -3610,15 +3229,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" -[[package]] -name = "similar" -version = "3.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6505efef05804732ed8a3f2d4f279429eb485bd69d5b0cc6b19cc02005cda16" -dependencies = [ - "bstr", -] - [[package]] name = "simplecss" version = "0.2.2" @@ -3788,17 +3398,6 @@ dependencies = [ "quote", ] -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - [[package]] name = "strsim" version = "0.11.1" @@ -3855,7 +3454,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", - "quote", "unicode-ident", ] @@ -4161,7 +3759,7 @@ dependencies = [ "serde", "serde_spanned 0.6.9", "toml_datetime 0.6.11", - "toml_edit 0.22.27", + "toml_edit", ] [[package]] @@ -4211,18 +3809,6 @@ dependencies = [ "winnow 0.7.15", ] -[[package]] -name = "toml_edit" -version = "0.25.12+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" -dependencies = [ - "indexmap", - "toml_datetime 1.1.1+spec-1.1.0", - "toml_parser", - "winnow 1.0.3", -] - [[package]] name = "toml_parser" version = "1.1.2+spec-1.1.0" @@ -4392,12 +3978,6 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" -[[package]] -name = "typenum" -version = "1.20.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" - [[package]] name = "typst" version = "0.14.2" @@ -5354,9 +4934,6 @@ name = "winnow" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" -dependencies = [ - "memchr", -] [[package]] name = "wit-bindgen" diff --git a/Cargo.toml b/Cargo.toml index 5ac57bcb..26342f0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["crates/core", "crates/html", "crates/pdf", "crates/epub", "crates/cli", "crates/tests"] +members = ["crates/core", "crates/html", "crates/pdf", "crates/epub", "crates/cli"] [workspace.package] version = "0.3.1" diff --git a/README.md b/README.md index a59491c6..73d24112 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,9 @@ Rheo is a standalone CLI tool that includes a development server for rapid websi Compile all `.typ` files in a directory to PDF, HTML, and EPUB and recompile on change: ```bash -rheo watch examples/blog_site --open +# Clone the examples repo first +git clone https://github.com/freecomputinglab/rheo-tests.git ../rheo-tests +rheo watch ../rheo-tests/examples/blog_site --open ``` The `--open` flag starts a development server at `http://localhost:3000` with automatic browser refresh. @@ -19,10 +21,10 @@ Use additional flags for customization: ```bash # Custom config file location -rheo compile examples/blog_site --config /path/to/custom.toml +rheo compile ../rheo-tests/examples/blog_site --config /path/to/custom.toml # Custom build directory -rheo compile examples/blog_site --build-dir /tmp/build +rheo compile ../rheo-tests/examples/blog_site --build-dir /tmp/build ``` See [the documentation](https://rheo.ohrg.org) for more information regarding which flags are available. @@ -105,13 +107,15 @@ rheo compile my_project --pdf --html # PDF and HTML Watch mode automatically recompiles files when they change, perfect for iterative development: ```bash -rheo watch examples/blog_site +# Clone the examples repo first +git clone https://github.com/freecomputinglab/rheo-tests.git ../rheo-tests +rheo watch ../rheo-tests/examples/blog_site ``` Add the `--open` flag to launch a development server with automatic browser refresh: ```bash -rheo watch examples/blog_site --open +rheo watch ../rheo-tests/examples/blog_site --open ``` The development server: diff --git a/TESTING.md b/TESTING.md index 74a15636..d21b482f 100644 --- a/TESTING.md +++ b/TESTING.md @@ -1,10 +1,12 @@ # Testing -## Run All Tests +Integration tests live in [freecomputinglab/rheo-tests](https://github.com/freecomputinglab/rheo-tests), cloned side-by-side at `../rheo-tests`. All integration test commands require `RHEO_MANIFEST=../rheo/Cargo.toml` to resolve path dependencies. + +## Run All Tests (Unit Tests Only) ```bash cargo test ``` -Runs unit tests, integration tests, compat tests (skip unless `RUN_COMPAT_TESTS=1`), and doc tests. +Runs unit tests for all crates in the main repo. Integration tests run from the sibling rheo-tests clone. ## Run Individual Test Suites @@ -21,26 +23,30 @@ cargo test --lib -p rheo ### Integration Tests (harness) ```bash -cargo test -p rheo-tests --test harness +cd ../rheo-tests +RHEO_MANIFEST=../rheo/Cargo.toml cargo test --test harness ``` -Runs 42 integration tests against example projects and test cases. +Runs integration tests against example projects and test cases. ### Compat Tests (skip by default) ```bash -cargo test -p rheo-tests --test compat +cd ../rheo-tests +RHEO_MANIFEST=../rheo/Cargo.toml cargo test --test compat ``` -Runs 5 compatibility tests against external Rheo projects (skip immediately unless `RUN_COMPAT_TESTS=1`). +Runs compatibility tests against external Rheo projects (skip immediately unless `RUN_COMPAT_TESTS=1`). ### Compat Tests (actually execute) ```bash -RUN_COMPAT_TESTS=1 cargo test -p rheo-tests --test compat +cd ../rheo-tests +RUN_COMPAT_TESTS=1 RHEO_MANIFEST=../rheo/Cargo.toml cargo test --test compat ``` -Clones 5 external repos and compiles them (~7 seconds). +Clones external repos and compiles them (~7 seconds). ## Format-Specific Tests ```bash -RUN_HTML_TESTS=1 cargo test -p rheo-tests --test harness -RUN_PDF_TESTS=1 cargo test -p rheo-tests --test harness -RUN_EPUB_TESTS=1 cargo test -p rheo-tests --test harness +cd ../rheo-tests +RHEO_MANIFEST=../rheo/Cargo.toml RUN_HTML_TESTS=1 cargo test --test harness +RHEO_MANIFEST=../rheo/Cargo.toml RUN_PDF_TESTS=1 cargo test --test harness +RHEO_MANIFEST=../rheo/Cargo.toml RUN_EPUB_TESTS=1 cargo test --test harness ``` Only run tests targeting HTML/PDF/EPUB formats respectively. diff --git a/crates/html/src/lib.rs b/crates/html/src/lib.rs index bce948df..21f161af 100644 --- a/crates/html/src/lib.rs +++ b/crates/html/src/lib.rs @@ -56,8 +56,8 @@ impl FormatPlugin for HtmlPlugin { // The stylesheet included with the template mirrors the default stylesheet, so that // users can build from it or start from scratch as they wish. ("style.css", include_str!("templates/style.css")), - // A demonstrative JS file that just logs to console. See the examples/ directory for - // how to use Rheo with bundled JS. + // A demonstrative JS file that just logs to console. Use JS files in your project + // to add client-side interactivity to Rheo output. ("index.js", include_str!("templates/index.js")), ], options_toml: Some(include_str!("templates/init/rheo_section.toml")), diff --git a/crates/tests/Cargo.toml b/crates/tests/Cargo.toml deleted file mode 100644 index 1b36992e..00000000 --- a/crates/tests/Cargo.toml +++ /dev/null @@ -1,42 +0,0 @@ -[package] -name = "rheo-tests" -version.workspace = true -edition.workspace = true -publish = false - -[lib] -test = true - -[[test]] -name = "compat" -path = "tests/compat.rs" - -[dependencies] -# Internal crates -rheo-core = { workspace = true } -rheo-html = { workspace = true } -rheo-pdf = { workspace = true } -rheo-epub = { workspace = true } - -# Workspace dependencies -tempfile = { workspace = true } -walkdir = { workspace = true } -serde = { workspace = true } - -# Test-specific dependencies -ntest = "0.9" -similar = "3.1" -serde_json = "1.0" -paste = "1" - -# PDF metadata extraction -lopdf = "0.41" -sha2 = "0.11" - -# EPUB handling -zip = { workspace = true } -html5ever = { workspace = true } -markup5ever_rcdom = { workspace = true } - -# XML parsing (for EPUB metadata) -serde-xml-rs = { workspace = true } diff --git a/crates/tests/README.md b/crates/tests/README.md deleted file mode 100644 index 1854556d..00000000 --- a/crates/tests/README.md +++ /dev/null @@ -1,404 +0,0 @@ -# Rheo Integration Test Suite - -This directory contains the integration test suite for rheo compilation. The tests verify that rheo correctly compiles Typst projects to PDF, HTML and EPUB formats. - -## Test Structure - -``` -tests/ -├── harness.rs # Main test file with #[test_case] declarations -├── helpers/ # Test helper modules -│ ├── mod.rs # Module declarations -│ ├── fixtures.rs # TestCase types (Directory and SingleFile) -│ ├── comparison.rs # HTML/PDF comparison and validation -│ ├── reference.rs # Reference generation -│ └── markers.rs # Test marker parser for .typ files -├── ref/ # Reference outputs (committed to git) -│ ├── examples/ # Project tests -│ │ ├── blog_site/ -│ │ │ ├── html/ # Reference HTML outputs -│ │ │ ├── pdf/ # Reference PDF metadata (*.metadata.json) -│ │ │ └── epub/ # Reference EPUB metadata -│ │ ├── blog_post/ -│ │ └── init_template/ -│ ├── cases/ # Custom project tests -│ └── files/ # Single-file tests -│ └── / # Hash-based directory for each file -│ └── / -│ ├── html/ -│ └── pdf/ -└── store/ # Temporary test outputs (gitignored) -``` - -## Running Tests - -### Run all tests -```bash -cargo test -``` - -### Run integration tests only -```bash -cargo test --test harness -``` - -### Run with verbose output -```bash -cargo test -- --nocapture -``` - -## Font Consistency - -To ensure tests produce identical output across different environments (local machines and CI), tests automatically use only Typst's embedded fonts. This prevents font-related rendering differences that cause page count and layout variations. - -**The test harness automatically sets `TYPST_IGNORE_SYSTEM_FONTS=1`**, so you can simply run: -```bash -cargo test --test harness -``` - -**Why this matters:** -- Different machines have different system fonts installed -- Font metrics (line height, character width) affect text layout -- Layout differences cause page breaks to vary → different page counts -- CI (Ubuntu) has different fonts than macOS/Windows - -**Typst's embedded fonts** (New Computer Modern, Libertinus) are: -- Bundled with the Typst compiler -- Identical across all platforms -- Deterministic in rendering behavior - -**Implementation:** The environment variable is automatically passed to all `cargo run` subprocess invocations in `tests/harness.rs`. - -**Note:** This only affects tests. Normal `rheo compile` commands still use system fonts as expected. - -## Updating Reference Outputs - -When you make intentional changes to rheo's output, update the reference files: - -### Update all references -```bash -UPDATE_REFERENCES=1 cargo test --test harness -``` - -This will: -1. Compile all test projects -2. Copy HTML outputs to `tests/ref//html/` -3. Extract PDF metadata to `tests/ref//pdf/*.metadata.json` - -After updating, commit the changed reference files to git. - -## Test Filtering - -### Run only HTML tests (across all projects that support HTML) -```bash -RUN_HTML_TESTS=1 cargo test --test harness -``` - -### Run only PDF tests (across all projects that support PDF) -```bash -RUN_PDF_TESTS=1 cargo test --test harness -``` - -### Run both formats explicitly -```bash -RUN_HTML_TESTS=1 RUN_PDF_TESTS=1 cargo test -``` - -### Increase diff output limit (default: 2000 chars) -```bash -RHEO_TEST_DIFF_LIMIT=10000 cargo test -- --nocapture -``` - -**Behavior**: -- **Default** (no env vars): Run all formats specified by project's `rheo.toml` -- **With env vars**: Filter to specified formats, respecting project capabilities -- Environment variables override project defaults but still respect what the project supports - -## How Tests Work - -Rheo supports two test modes: **Directory Tests** (full projects) and **Single-File Tests** (individual .typ files). - -### Directory Mode Tests - -1. **Discovery**: Finds all `examples/*/rheo.toml` projects -2. **Compilation**: Runs `cargo run -- compile ` for each project -3. **Verification**: Compares output against reference files: - - **HTML**: Byte-for-byte comparison of HTML content and asset validation - - **PDF**: Metadata comparison (page count exact, file size within 10% tolerance) - -### Single-File Mode Tests (NEW) - -Single-file tests allow testing individual .typ files without creating a full project structure. - -**Adding test markers to .typ files**: -```typst -// @rheo:test -// @rheo:formats html,pdf -// @rheo:description Main blog index page with post listings - -= My Document -Content here... -``` - -**Test marker syntax**: -- `// @rheo:test` (required) - Marks file as test case -- `// @rheo:formats ` (optional) - Comma-separated formats (html, pdf, epub). Default: html,pdf -- `// @rheo:description ` (optional) - Human-readable test description -- `// @rheo:expect error` (optional) - Indicates test expects compilation to fail -- `// @rheo:error-patterns "pattern1", "pattern2", ...` (optional) - Required substrings in error output - -#### Error Case Testing - -Tests can validate that compilation fails with expected error messages: - -**Example error test:** -```typst -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "cannot add integer and string", "│" -// @rheo:formats pdf - -= Type Error Test -#let x = 5 + "hello" // This will fail -``` - -**How it works:** -- Compilation must fail (non-zero exit code) -- All patterns in `@rheo:error-patterns` must appear in stderr -- Reference comparison is skipped for error cases -- Useful for testing error formatting and diagnostic quality - -**Pattern format:** -- Comma-separated list of double-quoted strings -- Patterns are case-sensitive substrings -- Common patterns to check: - - `"error"` - Confirms error-level diagnostic - - Filename (e.g., `"type_error.typ"`) - Confirms error location - - Error message text - Confirms diagnostic content - - `"│"` - Confirms codespan-reporting format with source context - -**Note:** Tests expecting success don't need `@rheo:expect` - that's the default behavior. Only use `@rheo:expect error` for tests that should fail compilation. - -**Declaring single-file tests** in `tests/harness.rs`: -```rust -#[test_case("file:examples/blog_site/content/index.typ")] -#[test_case("file:examples/blog_site/content/severance-ep-1.typ")] -fn run_test_case(name: &str) { ... } -``` - -**Reference storage**: -- Projects: `tests/ref/examples//html/` -- Single files: `tests/ref/files///html/` -- Hash prevents conflicts between files with the same name - -**Running single-file tests**: -```bash -# Run specific single-file test -cargo test run_test_case_file_colonexamples_slashblog_site_slashcontent_slashindex_full_stoptyp - -# Update references for single-file test -UPDATE_REFERENCES=1 cargo test run_test_case_file_colonexamples_slashblog_site_slashcontent_slashindex_full_stoptyp -``` - -### HTML Verification - -- Compares HTML content byte-for-byte using unified diffs -- Validates that all expected assets (images, .typ files, CSS) are present -- Checks that no unexpected files appear in output -- Verifies exclusion patterns (e.g., blog_site excludes non-.typ files per `rheo.toml`) - -### PDF Verification - -- Extracts metadata: page count and file size -- Compares page count (must match exactly) -- Compares file size (must be within 10% tolerance) -- Verifies exclusion patterns (e.g., blog_site excludes `index.typ` from PDF) - -### EPUB Testing - -EPUB reference testing validates the structure and metadata of generated EPUB files using a lightweight metadata approach. - -#### Approach - -Unlike HTML (full content comparison) and similar to PDF (metadata only), EPUB testing uses metadata validation: - -**What's Validated:** -- Title (from config or inferred from filename/directory) -- Language (from document metadata) -- Spine files (ordered list of content files, exact match) -- Navigation file existence (nav.xhtml) -- File size (10% tolerance) - -**What's NOT Validated:** -- Timestamps (dcterms:modified changes every build) -- UUIDs (generated fresh each time) -- Exact XHTML content (already tested via HTML tests) - -**Rationale:** -- EPUB content is derived from HTML compilation -- XHTML conversion is deterministic and unit tested -- Focus on structural integrity and configuration correctness -- Lightweight (no binary files in repo) - -#### Reference Files - -EPUB metadata stored as JSON: -``` -tests/ref/ -├── examples/ -│ └── blog_post/ -│ └── epub/ -│ └── blog_post.metadata.json -└── cases/ - └── epub_explicit_config/ - └── epub/ - └── epub_explicit_config.metadata.json -``` - -Example metadata file: -```json -{ - "filetype": "epub", - "file_size": 12453, - "title": "Blog Post", - "language": "en", - "spine_files": ["portable_epubs.xhtml"], - "has_nav": true -} -``` - -#### Running EPUB Tests - -```bash -# Run only EPUB tests -RUN_EPUB_TESTS=1 cargo test --test harness - -# Run specific EPUB test -cargo test run_test_case_examples_slashblog_post -- --nocapture - -# Update EPUB references -UPDATE_REFERENCES=1 RUN_EPUB_TESTS=1 cargo test --test harness - -# Run all formats (HTML, PDF, EPUB) -cargo test --test harness -``` - -#### When to Update References - -Update EPUB references when: -- Changing EPUB title inference logic -- Changing spine ordering logic -- Changing EPUB compilation configuration -- Adding new EPUB test cases - -DO NOT update for: -- Minor formatting changes (within 10% file size tolerance) -- Timestamp/UUID changes (not validated) - -#### Troubleshooting - -**File size mismatch beyond 10% tolerance:** -- Indicates significant structural change -- Review EPUB configuration or spine changes -- Update reference if intentional change - -**Spine order mismatch:** -- Check rheo.toml spine configuration -- Verify file naming for default lexicographic sorting -- Update reference if intentional change - -**Title/language mismatch:** -- Check rheo.toml [epub] configuration -- Verify document language metadata -- Update reference if intentional change - -## Adding New Tests - -### Add a new project test - -1. Create a new project directory in `examples/` -2. Add a `rheo.toml` config file -3. Add `.typ` source files -4. Run `UPDATE_REFERENCES=1 cargo test` to generate references -5. Commit the reference files to git - -### Test exclusions automatically - -PDF and HTML exclusion patterns are tested automatically via reference validation: - -- **PDF**: `validate_pdf_assets()` ensures actual PDFs match reference metadata files exactly -- **HTML**: `validate_html_assets()` ensures actual HTML files match reference files exactly - -When you change exclusion patterns in `rheo.toml`: -1. Clean and compile: `cargo run -- clean examples/project && cargo run -- compile examples/project` -2. Update references: `UPDATE_REFERENCES=1 cargo test` -3. Tests will now fail if exclusions aren't respected - -## Troubleshooting - -### Test fails with "reference not found" - -Run `UPDATE_REFERENCES=1 cargo test` to generate missing references. - -### HTML content mismatch - -The test will show an improved unified diff with: -- **Statistics**: Insertion/deletion counts -- **Diff preview**: First N characters (configurable via `RHEO_TEST_DIFF_LIMIT`) -- **Update command**: Test-specific command to update references -- **Full diff option**: Command to see complete diff if truncated - -Example error: -``` -HTML content mismatch for tests/ref/examples/blog_site/html/index.html - -Diff: 12 insertions(+), 8 deletions(-), 145 lines unchanged - -
--

Old Title

-+

New Title

-+

Additional paragraph

-
- -... (showing first 2000 chars of 5000 bytes total) - -To update this reference, run: - UPDATE_REFERENCES=1 cargo test run_test_case_examples_slashblog_site -- --nocapture - -Or to see full diff: - RHEO_TEST_DIFF_LIMIT=10000 cargo test run_test_case_examples_slashblog_site -- --nocapture -``` - -Common causes: -- Typst version changed (update references) -- Intentional output change (update references) -- Unintentional regression (fix the code) - -### PDF metadata mismatch - -Enhanced error messages now show: -- **Page count changes**: Shows exact difference (e.g., "3 pages added") -- **File size changes**: Shows percentage difference -- **Context**: Suggests this may indicate content or formatting changes - -Example error: -``` -PDF metadata mismatch: - - Page count: expected 16, got 15 (1 pages removed) - - File size: expected 24560 bytes, got 27200 bytes (11% diff, beyond 10% tolerance) - -This may indicate a change in content or formatting. -``` - -Common causes: -- Typst version changed rendering (update references if expected) -- Page count changed (verify this is intentional) -- File size changed significantly (check for regression) - -## Reference File Management - -- **HTML references**: Full HTML files and assets committed to git -- **PDF references**: Metadata JSON only (page count, file size) - - PDFs themselves are NOT committed (too large, binary) - - Metadata provides sufficient validation for most cases -- **Update policy**: Update references when making intentional changes to output format diff --git a/crates/tests/cases/code_blocks_with_links/code_examples.typ b/crates/tests/cases/code_blocks_with_links/code_examples.typ deleted file mode 100644 index 1b7df437..00000000 --- a/crates/tests/cases/code_blocks_with_links/code_examples.typ +++ /dev/null @@ -1,27 +0,0 @@ -= Code Blocks with Links Test - -This document tests that link transformation correctly handles code blocks. - -== Real Links - -Real links should be transformed: #link("./other.typ")[see other page]. - -Multiple links: #link("./intro.typ")[intro] and #link("./conclusion.typ")[conclusion]. - -== Code Examples - -Inline code should preserve links: `#link("./file.typ")[example]`. - -Code block example: -``` -// This link should be preserved: -#link("./other.typ")[other page] -``` - -== Mixed Content - -Real link: #link("./chapter1.typ")[Chapter 1] - -Then code: `#link("./code.typ")[code link]` - -And another real link: #link("./chapter2.typ")[Chapter 2] diff --git a/crates/tests/cases/code_blocks_with_links/rheo.toml b/crates/tests/cases/code_blocks_with_links/rheo.toml deleted file mode 100644 index 788c7bb2..00000000 --- a/crates/tests/cases/code_blocks_with_links/rheo.toml +++ /dev/null @@ -1,3 +0,0 @@ - -# Test PDF transformation -formats = ["pdf"] diff --git a/crates/tests/cases/cross_directory_links/appendix/notes.typ b/crates/tests/cases/cross_directory_links/appendix/notes.typ deleted file mode 100644 index 9457824f..00000000 --- a/crates/tests/cases/cross_directory_links/appendix/notes.typ +++ /dev/null @@ -1,13 +0,0 @@ -= Appendix: Notes - -Additional notes and references. - -== Cross References - -Back to #link("../chapters/ch1.typ")[Chapter 1]. - -Return to #link("../intro.typ")[the introduction]. - -== Details - -Testing links from a different subdirectory. diff --git a/crates/tests/cases/cross_directory_links/chapters/ch1.typ b/crates/tests/cases/cross_directory_links/chapters/ch1.typ deleted file mode 100644 index b62d95bd..00000000 --- a/crates/tests/cases/cross_directory_links/chapters/ch1.typ +++ /dev/null @@ -1,11 +0,0 @@ -= Chapter 1 - -This is the first chapter. - -== References - -Go back to #link("../intro.typ")[the introduction]. - -Continue to #link("ch2.typ")[Chapter 2] (sibling). - -See #link("../appendix/notes.typ")[the appendix notes] for additional info. diff --git a/crates/tests/cases/cross_directory_links/chapters/ch2.typ b/crates/tests/cases/cross_directory_links/chapters/ch2.typ deleted file mode 100644 index 0bd81874..00000000 --- a/crates/tests/cases/cross_directory_links/chapters/ch2.typ +++ /dev/null @@ -1,13 +0,0 @@ -= Chapter 2 - -This is the second chapter. - -== Navigation - -Previous: #link("ch1.typ")[Chapter 1] - -Root: #link("../intro.typ")[Introduction] - -== Content - -Testing cross-directory navigation patterns. diff --git a/crates/tests/cases/cross_directory_links/intro.typ b/crates/tests/cases/cross_directory_links/intro.typ deleted file mode 100644 index bdabdac8..00000000 --- a/crates/tests/cases/cross_directory_links/intro.typ +++ /dev/null @@ -1,9 +0,0 @@ -= Introduction - -Welcome to the cross-directory test. - -== Overview - -This document links to #link("chapters/ch1.typ")[Chapter 1]. - -See also #link("chapters/ch2.typ")[Chapter 2] for more details. diff --git a/crates/tests/cases/cross_directory_links/rheo.toml b/crates/tests/cases/cross_directory_links/rheo.toml deleted file mode 100644 index a4157746..00000000 --- a/crates/tests/cases/cross_directory_links/rheo.toml +++ /dev/null @@ -1,7 +0,0 @@ - -formats = ["html", "pdf"] - -[pdf.spine] -merge = true -title = "Cross Directory Test" -vertebrae = ["intro.typ", "chapters/ch1.typ", "chapters/ch2.typ", "appendix/notes.typ"] diff --git a/crates/tests/cases/epub_explicit_spine/chapter.typ b/crates/tests/cases/epub_explicit_spine/chapter.typ deleted file mode 100644 index 236b6df0..00000000 --- a/crates/tests/cases/epub_explicit_spine/chapter.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: [Main Chapter]) - -= Chapter 1 - -The main content. diff --git a/crates/tests/cases/epub_explicit_spine/intro.typ b/crates/tests/cases/epub_explicit_spine/intro.typ deleted file mode 100644 index e917334d..00000000 --- a/crates/tests/cases/epub_explicit_spine/intro.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: [Introduction]) - -= Introduction - -Welcome to the book. diff --git a/crates/tests/cases/epub_explicit_spine/rheo.toml b/crates/tests/cases/epub_explicit_spine/rheo.toml deleted file mode 100644 index bc56fb6b..00000000 --- a/crates/tests/cases/epub_explicit_spine/rheo.toml +++ /dev/null @@ -1,4 +0,0 @@ - -[epub.spine] -title = "My EPUB Book" -vertebrae = ["intro.typ", "chapter.typ"] diff --git a/crates/tests/cases/epub_inferred_spine/a.typ b/crates/tests/cases/epub_inferred_spine/a.typ deleted file mode 100644 index e01171ae..00000000 --- a/crates/tests/cases/epub_inferred_spine/a.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: "Part A") - -= Part A - -This is the first part of the document. diff --git a/crates/tests/cases/epub_inferred_spine/b.typ b/crates/tests/cases/epub_inferred_spine/b.typ deleted file mode 100644 index 7f0dea7e..00000000 --- a/crates/tests/cases/epub_inferred_spine/b.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: "Part B") - -= Part B - -This is the second part of the document. diff --git a/crates/tests/cases/epub_inferred_spine/c.typ b/crates/tests/cases/epub_inferred_spine/c.typ deleted file mode 100644 index 3941877f..00000000 --- a/crates/tests/cases/epub_inferred_spine/c.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: "Part C") - -= Part C - -This is the third part of the document. diff --git a/crates/tests/cases/error_formatting/array_index_error.typ b/crates/tests/cases/error_formatting/array_index_error.typ deleted file mode 100644 index cb276691..00000000 --- a/crates/tests/cases/error_formatting/array_index_error.typ +++ /dev/null @@ -1,15 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "array_index_error.typ", "│" -// @rheo:formats pdf -// Test file with array index error - -= Array Index Error Test - -// Create a small array -#let items = ("first", "second", "third") - -// Try to access an index that doesn't exist -#items.at(10) - -Some content diff --git a/crates/tests/cases/error_formatting/function_arg_error.typ b/crates/tests/cases/error_formatting/function_arg_error.typ deleted file mode 100644 index 396014b2..00000000 --- a/crates/tests/cases/error_formatting/function_arg_error.typ +++ /dev/null @@ -1,15 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "function_arg_error.typ", "│" -// @rheo:formats pdf -// Test file with function argument error - -= Function Argument Error Test - -// Define a function that requires two arguments -#let add_numbers(x, y) = x + y - -// Call it with only one argument (missing required argument) -#add_numbers(5) - -Some content diff --git a/crates/tests/cases/error_formatting/import_error.typ b/crates/tests/cases/error_formatting/import_error.typ deleted file mode 100644 index ca1820ce..00000000 --- a/crates/tests/cases/error_formatting/import_error.typ +++ /dev/null @@ -1,12 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "import_error.typ", "│" -// @rheo:formats pdf -// Test file with import error (missing file) - -= Import Error Test - -// Try to include a file that doesn't exist -#include "nonexistent_file.typ" - -Some content diff --git a/crates/tests/cases/error_formatting/invalid_field.typ b/crates/tests/cases/error_formatting/invalid_field.typ deleted file mode 100644 index 53a0410a..00000000 --- a/crates/tests/cases/error_formatting/invalid_field.typ +++ /dev/null @@ -1,18 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "invalid_field.typ", "│" -// @rheo:formats pdf -// Test file with invalid field access - -= Invalid Field Access Test - -// Create a dictionary and try to access non-existent field -#let person = ( - name: "Alice", - age: 30 -) - -// Try to access a field that doesn't exist -#person.nonexistent_field - -Some content diff --git a/crates/tests/cases/error_formatting/invalid_method.typ b/crates/tests/cases/error_formatting/invalid_method.typ deleted file mode 100644 index 9b634d30..00000000 --- a/crates/tests/cases/error_formatting/invalid_method.typ +++ /dev/null @@ -1,13 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "invalid_method.typ", "│" -// @rheo:formats pdf -// Test file with invalid method call - -= Invalid Method Test - -// Try to call a method that doesn't exist on strings -#let text = "hello" -#let result = text.nonexistent_method() - -Some content diff --git a/crates/tests/cases/error_formatting/multiple_errors.typ b/crates/tests/cases/error_formatting/multiple_errors.typ deleted file mode 100644 index 63b4c709..00000000 --- a/crates/tests/cases/error_formatting/multiple_errors.typ +++ /dev/null @@ -1,16 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "multiple_errors.typ", "│" -// @rheo:formats pdf -// Test file with multiple errors - -= Multiple Errors Test - -// First error: undefined variable -#let x = undefined_var_one - -// Second error: type mismatch -#let y = 5 + "string" - -// Third error: undefined variable again -The value is: #undefined_var_two diff --git a/crates/tests/cases/error_formatting/rheo.toml b/crates/tests/cases/error_formatting/rheo.toml deleted file mode 100644 index 385f755b..00000000 --- a/crates/tests/cases/error_formatting/rheo.toml +++ /dev/null @@ -1,6 +0,0 @@ - -# Test project for error formatting validation -formats = ["pdf"] - -[pdf] -# Don't merge - test single file errors diff --git a/crates/tests/cases/error_formatting/syntax_error.typ b/crates/tests/cases/error_formatting/syntax_error.typ deleted file mode 100644 index 4dd311ea..00000000 --- a/crates/tests/cases/error_formatting/syntax_error.typ +++ /dev/null @@ -1,15 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "syntax_error.typ", "│" -// @rheo:formats pdf -// Test file with syntax error (unclosed delimiter) - -= Syntax Error Test - -#let items = [ - Item 1, - Item 2, - Item 3 -// Missing closing bracket ] - -Content follows diff --git a/crates/tests/cases/error_formatting/type_error.typ b/crates/tests/cases/error_formatting/type_error.typ deleted file mode 100644 index 01aa6cda..00000000 --- a/crates/tests/cases/error_formatting/type_error.typ +++ /dev/null @@ -1,16 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "type_error.typ", "│" -// @rheo:formats pdf -// Test file with type error -// This should trigger a Typst compilation error - -= Type Error Test - -#let x = 5 -#let y = "hello" - -// This will cause a type error: can't add number and string -#let result = x + y - -Content: #result diff --git a/crates/tests/cases/error_formatting/undefined_var.typ b/crates/tests/cases/error_formatting/undefined_var.typ deleted file mode 100644 index e73bbaef..00000000 --- a/crates/tests/cases/error_formatting/undefined_var.typ +++ /dev/null @@ -1,10 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "undefined_var.typ", "undefined_variable", "│" -// @rheo:formats pdf -// Test file with undefined variable error - -= Undefined Variable Test - -// This will cause an error: undefined_variable doesn't exist -The value is: #undefined_variable diff --git a/crates/tests/cases/error_formatting/unknown_function.typ b/crates/tests/cases/error_formatting/unknown_function.typ deleted file mode 100644 index 0d3e2879..00000000 --- a/crates/tests/cases/error_formatting/unknown_function.typ +++ /dev/null @@ -1,12 +0,0 @@ -// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "unknown_function.typ", "│" -// @rheo:formats pdf -// Test file with unknown function call - -= Unknown Function Test - -// Call a function that doesn't exist -#nonexistent_function("arg1", "arg2") - -Some content here diff --git a/crates/tests/cases/html_spine/about.typ b/crates/tests/cases/html_spine/about.typ deleted file mode 100644 index 1e9f1c4d..00000000 --- a/crates/tests/cases/html_spine/about.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: [About]) - -= About - -Information about the site. diff --git a/crates/tests/cases/html_spine/index.typ b/crates/tests/cases/html_spine/index.typ deleted file mode 100644 index f0414685..00000000 --- a/crates/tests/cases/html_spine/index.typ +++ /dev/null @@ -1,7 +0,0 @@ -#set document(title: [Home]) - -= Welcome - -This is the home page. - -See also: @about diff --git a/crates/tests/cases/html_spine/rheo.toml b/crates/tests/cases/html_spine/rheo.toml deleted file mode 100644 index f163c7bb..00000000 --- a/crates/tests/cases/html_spine/rheo.toml +++ /dev/null @@ -1,4 +0,0 @@ - -[html.spine] -title = "My Website" -vertebrae = ["index.typ", "about.typ"] diff --git a/crates/tests/cases/link_path_edge_cases/chapter-01.typ b/crates/tests/cases/link_path_edge_cases/chapter-01.typ deleted file mode 100644 index 90d0f99e..00000000 --- a/crates/tests/cases/link_path_edge_cases/chapter-01.typ +++ /dev/null @@ -1,5 +0,0 @@ -= Chapter 01 - -This filename contains numbers. - -Back to #link("main.typ")[main]. diff --git a/crates/tests/cases/link_path_edge_cases/file-name.typ b/crates/tests/cases/link_path_edge_cases/file-name.typ deleted file mode 100644 index 3fd21397..00000000 --- a/crates/tests/cases/link_path_edge_cases/file-name.typ +++ /dev/null @@ -1,5 +0,0 @@ -= File with Hyphen - -This filename contains a hyphen. - -Back to #link("main.typ")[main]. diff --git a/crates/tests/cases/link_path_edge_cases/file_name.typ b/crates/tests/cases/link_path_edge_cases/file_name.typ deleted file mode 100644 index b786beba..00000000 --- a/crates/tests/cases/link_path_edge_cases/file_name.typ +++ /dev/null @@ -1,5 +0,0 @@ -= File with Underscore - -This filename contains an underscore. - -Back to #link("main.typ")[main]. diff --git a/crates/tests/cases/link_path_edge_cases/main.typ b/crates/tests/cases/link_path_edge_cases/main.typ deleted file mode 100644 index b557e5fc..00000000 --- a/crates/tests/cases/link_path_edge_cases/main.typ +++ /dev/null @@ -1,17 +0,0 @@ -= Path Edge Cases Test - -This tests unusual but valid filename patterns. - -== Links to Edge Case Files - -Hyphen: #link("file-name.typ")[file with hyphen] - -Underscore: #link("file_name.typ")[file with underscore] - -Dot in name: #link("version-1.0.typ")[file with dot] - -Number: #link("chapter-01.typ")[file with number] - -== Content - -All these edge cases should transform correctly. diff --git a/crates/tests/cases/link_path_edge_cases/rheo.toml b/crates/tests/cases/link_path_edge_cases/rheo.toml deleted file mode 100644 index aa46594d..00000000 --- a/crates/tests/cases/link_path_edge_cases/rheo.toml +++ /dev/null @@ -1,7 +0,0 @@ - -formats = ["html", "pdf"] - -[pdf.spine] -merge = true -title = "Path Edge Cases Test" -vertebrae = ["main.typ", "file-name.typ", "file_name.typ", "version-1.0.typ", "chapter-01.typ"] diff --git a/crates/tests/cases/link_path_edge_cases/version-1.0.typ b/crates/tests/cases/link_path_edge_cases/version-1.0.typ deleted file mode 100644 index 7fc25564..00000000 --- a/crates/tests/cases/link_path_edge_cases/version-1.0.typ +++ /dev/null @@ -1,5 +0,0 @@ -= Version 1.0 - -This filename contains a dot in the name (not just the extension). - -Back to #link("main.typ")[main]. diff --git a/crates/tests/cases/link_transformation/doc1.typ b/crates/tests/cases/link_transformation/doc1.typ deleted file mode 100644 index 69c2f08d..00000000 --- a/crates/tests/cases/link_transformation/doc1.typ +++ /dev/null @@ -1,12 +0,0 @@ -// @test-formats: pdf,html,epub -// @test-description: Verify AST-based .typ link transformation - -= Document 1 - -This is the first document. - -You can navigate to #link("./doc2.typ")[See Doc 2] for more information. - -== Section in Doc 1 - -More content here. diff --git a/crates/tests/cases/link_transformation/doc2.typ b/crates/tests/cases/link_transformation/doc2.typ deleted file mode 100644 index 59251024..00000000 --- a/crates/tests/cases/link_transformation/doc2.typ +++ /dev/null @@ -1,9 +0,0 @@ -= Document 2 - -This is the second document. - -Go #link("./doc1.typ")[Back to Doc 1] to see the first document. - -== Another Section - -Additional content in document 2. diff --git a/crates/tests/cases/link_transformation/rheo.toml b/crates/tests/cases/link_transformation/rheo.toml deleted file mode 100644 index 3e4e50f0..00000000 --- a/crates/tests/cases/link_transformation/rheo.toml +++ /dev/null @@ -1,9 +0,0 @@ - -[pdf.spine] -merge = true -title = "Link Transformation Test" -vertebrae = ["doc1.typ", "doc2.typ"] - -[epub.spine] -title = "Link Transformation Test" -vertebrae = ["doc1.typ", "doc2.typ"] diff --git a/crates/tests/cases/links_with_fragments/page1.typ b/crates/tests/cases/links_with_fragments/page1.typ deleted file mode 100644 index 9164d4d2..00000000 --- a/crates/tests/cases/links_with_fragments/page1.typ +++ /dev/null @@ -1,11 +0,0 @@ -= Page 1 - -This is the first page. - -See the #link("./page2.typ#intro")[introduction in Page 2] for details. - -Also check #link("./page2.typ#conclusion")[the conclusion]. - -== Section in Page 1 - -More content here. diff --git a/crates/tests/cases/links_with_fragments/page2.typ b/crates/tests/cases/links_with_fragments/page2.typ deleted file mode 100644 index 82d874de..00000000 --- a/crates/tests/cases/links_with_fragments/page2.typ +++ /dev/null @@ -1,19 +0,0 @@ -= Page 2 - -This is the second page. - -== Introduction - -This is the introduction section. - -It has some content that the first page links to. - -== Middle Section - -Some middle content. - -== Conclusion - -This is the conclusion section. - -Referenced from page 1. diff --git a/crates/tests/cases/links_with_fragments/rheo.toml b/crates/tests/cases/links_with_fragments/rheo.toml deleted file mode 100644 index b9fb6d4d..00000000 --- a/crates/tests/cases/links_with_fragments/rheo.toml +++ /dev/null @@ -1,11 +0,0 @@ - -formats = ["html", "pdf", "epub"] - -[pdf.spine] -merge = true -title = "Links with Fragments Test" -vertebrae = ["page1.typ", "page2.typ"] - -[epub.spine] -title = "Links with Fragments Test" -vertebrae = ["page1.typ", "page2.typ"] diff --git a/crates/tests/cases/merged_subdir_imports/content/author/author.typ b/crates/tests/cases/merged_subdir_imports/content/author/author.typ deleted file mode 100644 index d10f7a60..00000000 --- a/crates/tests/cases/merged_subdir_imports/content/author/author.typ +++ /dev/null @@ -1,7 +0,0 @@ -#import "../template.typ": article - -#show: article - -= Author Page - -Written by the author. diff --git a/crates/tests/cases/merged_subdir_imports/content/index.typ b/crates/tests/cases/merged_subdir_imports/content/index.typ deleted file mode 100644 index 14b46fbc..00000000 --- a/crates/tests/cases/merged_subdir_imports/content/index.typ +++ /dev/null @@ -1,7 +0,0 @@ -#import "template.typ": article - -#show: article - -= Merged Subdir Imports - -This is the index page. diff --git a/crates/tests/cases/merged_subdir_imports/content/template.typ b/crates/tests/cases/merged_subdir_imports/content/template.typ deleted file mode 100644 index 99b78b2e..00000000 --- a/crates/tests/cases/merged_subdir_imports/content/template.typ +++ /dev/null @@ -1,4 +0,0 @@ -#let article(body) = { - set document(title: "Merged Subdir Imports") - body -} diff --git a/crates/tests/cases/merged_subdir_imports/rheo.toml b/crates/tests/cases/merged_subdir_imports/rheo.toml deleted file mode 100644 index f593600f..00000000 --- a/crates/tests/cases/merged_subdir_imports/rheo.toml +++ /dev/null @@ -1,12 +0,0 @@ -version = "0.2.1" - -formats = ["pdf", "epub"] - -[pdf.spine] -title = "Merged Subdir Imports" -vertebrae = ["content/**/*.typ"] -merge = true - -[epub.spine] -title = "Merged Subdir Imports" -vertebrae = ["content/**/*.typ"] diff --git a/crates/tests/cases/multiple_links_inline.typ b/crates/tests/cases/multiple_links_inline.typ deleted file mode 100644 index cf1189a6..00000000 --- a/crates/tests/cases/multiple_links_inline.typ +++ /dev/null @@ -1,21 +0,0 @@ -// @rheo:test -// @rheo:formats html,pdf -// @rheo:description Multiple links per line test - -= Multiple Links Test - -== Adjacent Links with Text - -See #link("file1.typ")[File 1] and #link("file2.typ")[File 2] for details. - -== Multiple References in List - -References: #link("a.typ")[A], #link("b.typ")[B], #link("c.typ")[C]. - -== Minimal Separation - -Adjacent links: #link("x.typ")[X]#link("y.typ")[Y] - -== Multiple Links in Sentence - -Check #link("intro.typ")[the introduction], then #link("chapter1.typ")[chapter 1], and finally #link("conclusion.typ")[the conclusion]. diff --git a/crates/tests/cases/pdf_individual/chapter1.typ b/crates/tests/cases/pdf_individual/chapter1.typ deleted file mode 100644 index 7b27ae27..00000000 --- a/crates/tests/cases/pdf_individual/chapter1.typ +++ /dev/null @@ -1,7 +0,0 @@ -#set document(title: "Chapter 1") - -= Chapter 1 - -This is the first chapter. - -See also: #link("./chapter2.typ")[Chapter 2] for more information. diff --git a/crates/tests/cases/pdf_individual/chapter2.typ b/crates/tests/cases/pdf_individual/chapter2.typ deleted file mode 100644 index a41be40d..00000000 --- a/crates/tests/cases/pdf_individual/chapter2.typ +++ /dev/null @@ -1,7 +0,0 @@ -#set document(title: "Chapter 2") - -= Chapter 2 - -This is the second chapter. - -Refer back to #link("./chapter1.typ")[Chapter 1] if needed. diff --git a/crates/tests/cases/pdf_individual/rheo.toml b/crates/tests/cases/pdf_individual/rheo.toml deleted file mode 100644 index c8a53544..00000000 --- a/crates/tests/cases/pdf_individual/rheo.toml +++ /dev/null @@ -1,2 +0,0 @@ - -formats = ["pdf"] diff --git a/crates/tests/cases/pdf_merge/chapter1.typ b/crates/tests/cases/pdf_merge/chapter1.typ deleted file mode 100644 index 02410790..00000000 --- a/crates/tests/cases/pdf_merge/chapter1.typ +++ /dev/null @@ -1,5 +0,0 @@ -= Chapter 1 - -First chapter content. - -See also #link("./chapter2.typ")[Chapter 2]. diff --git a/crates/tests/cases/pdf_merge/chapter2.typ b/crates/tests/cases/pdf_merge/chapter2.typ deleted file mode 100644 index 0cb7bf5c..00000000 --- a/crates/tests/cases/pdf_merge/chapter2.typ +++ /dev/null @@ -1,7 +0,0 @@ -= Chapter 2 - -Second chapter content. - -Refer back to #link("./chapter1.typ")[Chapter 1]. - -Jump to #link("./conclusion.typ")[Conclusion]. diff --git a/crates/tests/cases/pdf_merge/conclusion.typ b/crates/tests/cases/pdf_merge/conclusion.typ deleted file mode 100644 index 28e2cf20..00000000 --- a/crates/tests/cases/pdf_merge/conclusion.typ +++ /dev/null @@ -1,3 +0,0 @@ -= Conclusion - -Final thoughts. diff --git a/crates/tests/cases/pdf_merge/intro.typ b/crates/tests/cases/pdf_merge/intro.typ deleted file mode 100644 index f21fd347..00000000 --- a/crates/tests/cases/pdf_merge/intro.typ +++ /dev/null @@ -1,5 +0,0 @@ -= Introduction - -This is the introduction. - -Continue to #link("./chapter1.typ")[Chapter 1]. diff --git a/crates/tests/cases/pdf_merge/rheo.toml b/crates/tests/cases/pdf_merge/rheo.toml deleted file mode 100644 index b017c4da..00000000 --- a/crates/tests/cases/pdf_merge/rheo.toml +++ /dev/null @@ -1,5 +0,0 @@ - -[pdf.spine] -merge = true -vertebrae = ["intro.typ", "chapter*.typ", "conclusion.typ"] -title = "Test Merged Document" diff --git a/crates/tests/cases/pdf_merge_false/a.typ b/crates/tests/cases/pdf_merge_false/a.typ deleted file mode 100644 index 371d2ea1..00000000 --- a/crates/tests/cases/pdf_merge_false/a.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: "doc1") - -= A - -The first doc. diff --git a/crates/tests/cases/pdf_merge_false/b.typ b/crates/tests/cases/pdf_merge_false/b.typ deleted file mode 100644 index 05b0ba0f..00000000 --- a/crates/tests/cases/pdf_merge_false/b.typ +++ /dev/null @@ -1,6 +0,0 @@ -#set document(title: "B") - -= B - -THIS IS A DRAFT, DO NOT RENDER. - diff --git a/crates/tests/cases/pdf_merge_false/c.typ b/crates/tests/cases/pdf_merge_false/c.typ deleted file mode 100644 index f8931d9e..00000000 --- a/crates/tests/cases/pdf_merge_false/c.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: "doc2") - -= C - -The second doc. diff --git a/crates/tests/cases/pdf_merge_false/rheo.toml b/crates/tests/cases/pdf_merge_false/rheo.toml deleted file mode 100644 index d384e935..00000000 --- a/crates/tests/cases/pdf_merge_false/rheo.toml +++ /dev/null @@ -1,15 +0,0 @@ - -formats = ["pdf", "html"] - -[pdf.spine] -vertebrae = [ - "a.typ", - "c.typ" -] -merge = false - -[html.spine] -vertebrae = [ - "a.typ", - "c.typ" -] diff --git a/crates/tests/cases/pdf_spine_merge_false/file1.typ b/crates/tests/cases/pdf_spine_merge_false/file1.typ deleted file mode 100644 index 6e6e5e5e..00000000 --- a/crates/tests/cases/pdf_spine_merge_false/file1.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: [File 1]) - -= Chapter 1 - -This is the first file. diff --git a/crates/tests/cases/pdf_spine_merge_false/file2.typ b/crates/tests/cases/pdf_spine_merge_false/file2.typ deleted file mode 100644 index 4cec706b..00000000 --- a/crates/tests/cases/pdf_spine_merge_false/file2.typ +++ /dev/null @@ -1,5 +0,0 @@ -#set document(title: [File 2]) - -= Chapter 2 - -This is the second file. diff --git a/crates/tests/cases/pdf_spine_merge_false/rheo.toml b/crates/tests/cases/pdf_spine_merge_false/rheo.toml deleted file mode 100644 index 88e65786..00000000 --- a/crates/tests/cases/pdf_spine_merge_false/rheo.toml +++ /dev/null @@ -1,5 +0,0 @@ - -[pdf.spine] -title = "Individual PDFs" -vertebrae = ["file*.typ"] -merge = false diff --git a/crates/tests/cases/relative_path_links/rheo.toml b/crates/tests/cases/relative_path_links/rheo.toml deleted file mode 100644 index 16bf9466..00000000 --- a/crates/tests/cases/relative_path_links/rheo.toml +++ /dev/null @@ -1,7 +0,0 @@ - -formats = ["html", "pdf"] - -[pdf.spine] -merge = true -title = "Relative Path Test" -vertebrae = ["root.typ", "subdir/child.typ", "subdir/sibling.typ"] diff --git a/crates/tests/cases/relative_path_links/root.typ b/crates/tests/cases/relative_path_links/root.typ deleted file mode 100644 index 4ad9a58f..00000000 --- a/crates/tests/cases/relative_path_links/root.typ +++ /dev/null @@ -1,13 +0,0 @@ -= Root Document - -This is the root of the test project. - -== Links to Subdirectory - -See #link("subdir/child.typ")[the child document] in the subdir. - -Also check out #link("subdir/sibling.typ")[the sibling]. - -== More Content - -This tests that subdirectory paths transform correctly. diff --git a/crates/tests/cases/relative_path_links/subdir/child.typ b/crates/tests/cases/relative_path_links/subdir/child.typ deleted file mode 100644 index ed0437ad..00000000 --- a/crates/tests/cases/relative_path_links/subdir/child.typ +++ /dev/null @@ -1,15 +0,0 @@ -= Child Document - -This document is in a subdirectory. - -== Link to Parent Directory - -Go back to #link("../root.typ")[the root document]. - -== Link to Sibling (Explicit Same Dir) - -See #link("./sibling.typ")[the sibling] in the same directory. - -== Link to Sibling (Implicit Same Dir) - -Also see #link("sibling.typ")[the sibling again] with implicit path. diff --git a/crates/tests/cases/relative_path_links/subdir/sibling.typ b/crates/tests/cases/relative_path_links/subdir/sibling.typ deleted file mode 100644 index b1ad7a18..00000000 --- a/crates/tests/cases/relative_path_links/subdir/sibling.typ +++ /dev/null @@ -1,15 +0,0 @@ -= Sibling Document - -This is the sibling document in the subdirectory. - -== Link to Sibling - -Go to #link("child.typ")[the child document]. - -== Link to Parent Directory - -Return to #link("../root.typ")[root]. - -== Content - -Testing various relative path patterns. diff --git a/crates/tests/cases/script_injection/content/index.typ b/crates/tests/cases/script_injection/content/index.typ deleted file mode 100644 index d2fc3151..00000000 --- a/crates/tests/cases/script_injection/content/index.typ +++ /dev/null @@ -1,3 +0,0 @@ -= Test - -Hello world. diff --git a/crates/tests/cases/script_injection/index.js b/crates/tests/cases/script_injection/index.js deleted file mode 100644 index 044794b2..00000000 --- a/crates/tests/cases/script_injection/index.js +++ /dev/null @@ -1 +0,0 @@ -console.log("loaded"); diff --git a/crates/tests/cases/script_injection/rheo.toml b/crates/tests/cases/script_injection/rheo.toml deleted file mode 100644 index 8b863bf7..00000000 --- a/crates/tests/cases/script_injection/rheo.toml +++ /dev/null @@ -1,2 +0,0 @@ -version = "0.2.1" -formats = ["html"] diff --git a/crates/tests/cases/script_injection/style.css b/crates/tests/cases/script_injection/style.css deleted file mode 100644 index bc406f05..00000000 --- a/crates/tests/cases/script_injection/style.css +++ /dev/null @@ -1 +0,0 @@ -body { margin: 0; } diff --git a/crates/tests/cases/script_injection_no_css/content/index.typ b/crates/tests/cases/script_injection_no_css/content/index.typ deleted file mode 100644 index d2fc3151..00000000 --- a/crates/tests/cases/script_injection_no_css/content/index.typ +++ /dev/null @@ -1,3 +0,0 @@ -= Test - -Hello world. diff --git a/crates/tests/cases/script_injection_no_css/index.js b/crates/tests/cases/script_injection_no_css/index.js deleted file mode 100644 index 044794b2..00000000 --- a/crates/tests/cases/script_injection_no_css/index.js +++ /dev/null @@ -1 +0,0 @@ -console.log("loaded"); diff --git a/crates/tests/cases/script_injection_no_css/rheo.toml b/crates/tests/cases/script_injection_no_css/rheo.toml deleted file mode 100644 index 8b863bf7..00000000 --- a/crates/tests/cases/script_injection_no_css/rheo.toml +++ /dev/null @@ -1,2 +0,0 @@ -version = "0.2.1" -formats = ["html"] diff --git a/crates/tests/cases/target_function/main.typ b/crates/tests/cases/target_function/main.typ deleted file mode 100644 index 2781d5ee..00000000 --- a/crates/tests/cases/target_function/main.typ +++ /dev/null @@ -1,24 +0,0 @@ -// @rheo:test -// @rheo:formats html,pdf,epub -// @rheo:description Verifies target() function returns correct format string - -= Target Function Test - -This test verifies that the `target()` function returns format-specific values. - -#context { - let format = target() - [Current format: *#format*] -} - -== Conditional Content - -#context if target() == "html" { - [HTML-specific content: This appears only in HTML output] -} else if target() == "pdf" { - [PDF-specific content: This appears only in PDF output] -} else if target() == "epub" { - [EPUB-specific content: This appears only in EPUB output] -} else { - [Unknown format detected] -} diff --git a/crates/tests/cases/target_function/rheo.toml b/crates/tests/cases/target_function/rheo.toml deleted file mode 100644 index 5a3f09fd..00000000 --- a/crates/tests/cases/target_function/rheo.toml +++ /dev/null @@ -1,2 +0,0 @@ - -formats = ["html", "pdf", "epub"] diff --git a/crates/tests/cases/target_function_in_module/lib/format_helper.typ b/crates/tests/cases/target_function_in_module/lib/format_helper.typ deleted file mode 100644 index 7d6916cd..00000000 --- a/crates/tests/cases/target_function_in_module/lib/format_helper.typ +++ /dev/null @@ -1,19 +0,0 @@ -// Module that uses target() function -// Tests whether target() polyfill propagates to imported files - -#let get_format() = { - target() -} - -#let format_specific_content() = context { - let fmt = target() - if fmt == "epub" { - [Module: EPUB] - } else if fmt == "html" { - [Module: HTML] - } else if fmt == "pdf" { - [Module: PDF] - } else { - [Module: Unknown (#fmt)] - } -} diff --git a/crates/tests/cases/target_function_in_module/main.typ b/crates/tests/cases/target_function_in_module/main.typ deleted file mode 100644 index 11ab2600..00000000 --- a/crates/tests/cases/target_function_in_module/main.typ +++ /dev/null @@ -1,16 +0,0 @@ -// @rheo:test -// @rheo:formats html,pdf,epub -// @rheo:description Verifies target() works in imported modules - -#import "lib/format_helper.typ": get_format, format_specific_content - -= Target Function in Module - -== Main File -#context [Main: *#target()*] - -== Imported Module -#context [Module returns: *#get_format()*] - -== Module Conditional -#format_specific_content() diff --git a/crates/tests/cases/target_function_in_module/rheo.toml b/crates/tests/cases/target_function_in_module/rheo.toml deleted file mode 100644 index cf23ed7c..00000000 --- a/crates/tests/cases/target_function_in_module/rheo.toml +++ /dev/null @@ -1,5 +0,0 @@ -formats = ["html", "pdf", "epub"] - -[epub.spine] -title = "Target Function in Module" -vertebrae = ["main.typ"] diff --git a/crates/tests/cases/target_function_in_package/main.typ b/crates/tests/cases/target_function_in_package/main.typ deleted file mode 100644 index 1901929e..00000000 --- a/crates/tests/cases/target_function_in_package/main.typ +++ /dev/null @@ -1,35 +0,0 @@ -// @rheo:test -// @rheo:formats html,epub -// @rheo:description Tests target() polyfill vs packages using std.target() -// -// This test demonstrates rheo's target() polyfill: -// -// - User code using target() sees "epub" for EPUB output (via polyfill) -// - Universe packages that call std.target() see "html" (the underlying compile target) -// -// Why packages see "html": -// - EPUB compilation uses Typst's HTML export internally -// - Packages like bullseye explicitly call std.target() to get the "real" target -// - This is expected behavior - std.target() returns the underlying format -// -// For package authors: -// - Packages can adopt rheo's pattern to detect rheo output format -// - The pattern: `if "rheo-target" in sys.inputs { sys.inputs.rheo-target } else { target() }` -// - This provides graceful degradation when compiled outside rheo - -#import "@preview/bullseye:0.1.0": on-target - -= Target Function in Package - -== Using bullseye package - -// Expected: "html" in both HTML and EPUB modes (bullseye calls std.target()) -#context on-target( - html: [Package sees: *html*], - paged: [Package sees: *paged*], -) - -== Using target() - -// Expected: "html" for HTML, "epub" for EPUB (uses polyfill) -Main file target: #context [*#target()*] diff --git a/crates/tests/cases/target_function_in_package/rheo.toml b/crates/tests/cases/target_function_in_package/rheo.toml deleted file mode 100644 index 640c641d..00000000 --- a/crates/tests/cases/target_function_in_package/rheo.toml +++ /dev/null @@ -1,5 +0,0 @@ -formats = ["html", "epub"] - -[epub.spine] -title = "Target Function in Package" -vertebrae = ["main.typ"] diff --git a/crates/tests/ref/cases/pdf_merge/html/chapter1.html b/crates/tests/ref/cases/pdf_merge/html/chapter1.html deleted file mode 100644 index 70ec72c8..00000000 --- a/crates/tests/ref/cases/pdf_merge/html/chapter1.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - -

Chapter 1

-

First chapter content.

- - - \ No newline at end of file diff --git a/crates/tests/ref/cases/pdf_merge/html/chapter2.html b/crates/tests/ref/cases/pdf_merge/html/chapter2.html deleted file mode 100644 index 0a48b3e6..00000000 --- a/crates/tests/ref/cases/pdf_merge/html/chapter2.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - -

Chapter 2

-

Second chapter content.

- - - \ No newline at end of file diff --git a/crates/tests/ref/cases/pdf_merge/html/conclusion.html b/crates/tests/ref/cases/pdf_merge/html/conclusion.html deleted file mode 100644 index 9b59cecd..00000000 --- a/crates/tests/ref/cases/pdf_merge/html/conclusion.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - -

Conclusion

-

Final thoughts.

- - - \ No newline at end of file diff --git a/crates/tests/ref/cases/pdf_merge/html/intro.html b/crates/tests/ref/cases/pdf_merge/html/intro.html deleted file mode 100644 index d6b3f920..00000000 --- a/crates/tests/ref/cases/pdf_merge/html/intro.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - -

Introduction

-

This is the introduction.

- - - \ No newline at end of file diff --git a/crates/tests/ref/cases/pdf_merge/pdf/pdf_merge.metadata.json b/crates/tests/ref/cases/pdf_merge/pdf/pdf_merge.metadata.json deleted file mode 100644 index c2cdc11c..00000000 --- a/crates/tests/ref/cases/pdf_merge/pdf/pdf_merge.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 15556, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_post/epub/blog_post.metadata.json b/crates/tests/ref/examples/blog_post/epub/blog_post.metadata.json deleted file mode 100644 index dcbd846c..00000000 --- a/crates/tests/ref/examples/blog_post/epub/blog_post.metadata.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "filetype": "epub", - "file_size": 6720194, - "title": "Blog Post", - "language": "en", - "spine_files": [ - "portable_epubs.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_post/epub/xhtml/portable_epubs.xhtml b/crates/tests/ref/examples/blog_post/epub/xhtml/portable_epubs.xhtml deleted file mode 100644 index 936eda04..00000000 --- a/crates/tests/ref/examples/blog_post/epub/xhtml/portable_epubs.xhtml +++ /dev/null @@ -1,222 +0,0 @@ - - - - - - Portable EPUBs - -
-
-

Portable EPUBs

- Will CrichtonBrown UniversityJanuary 25, 2024Despite decades of advances in document rendering technology, most of the world’s documents are stuck in the 1990s due to the limitations of PDF. Yet, modern document formats like HTML have yet to provide a competitive alternative to PDF. This post explores what prevents HTML documents from being portable, and I propose a way forward based on the EPUB format. To demonstrate my ideas, this post is presented using a prototype EPUB reading system. -
-

The Good and Bad of PDF

-

PDF is the de facto file format for reading and sharing digital documents like papers, textbooks, and flyers. People use the PDF format for several reasons:

-
    -
  • -

    PDFs are self-contained. A PDF is a single file that contains all the images, fonts, and other data needed to render it. It’s easy to pass around a PDF. A PDF is unlikely to be missing some critical dependency on your computer.

    -
  • -
  • -

    PDFs are rendered consistently. A PDF specifies precisely how it should be rendered, so a PDF author can be confident that a reader will see the same document under any conditions.

    -
  • -
  • -

    PDFs are stable over time. PDFs from decades ago still render the same today. PDFs have a relatively stable standard. PDFs cannot be easily edited.

    -
  • -
-

Yet, in the 32 years since the initial release of PDF, a lot has changed. People print out documents less and less. People use phones, tablets, and e-readers to read digital documents. The internet happened; web browsers now provide a platform for rendering rich documents. These changes have laid bare the limitations of PDF:

-
    -
  • -

    PDFs cannot easily adapt to different screen sizes. Most PDFs are designed to mimic 8.5x11″ paper (or worse, 145,161 km2). These PDFs are readable on a computer monitor, but they are less readable on a tablet, and far less readable on a phone.

    -
  • -
  • -

    PDFs cannot be easily understood by programs. A plain PDF is just a scattered sequence of lines and characters. For accessibility, screen readers may not know which order to read through the text. For data extraction, scraping tables out of a PDF is an open area of research.

    -
  • -
  • -

    PDFs cannot easily express interaction. PDFs were primarily designed as static documents that cannot react to user input beyond filling in forms.

    -
  • -
-

These pros and cons can be traced back to one key fact: the PDF representation of a document is fundamentally unstructured. A PDF consists of commands like:

-
-
Move the cursor to the right by 0.5 inches.
Set the current font color to black.
Draw the text "Hello World" at the current position.
-
-

PDF commands are unstructured because a document’s organization is only clear to a person looking at the rendered document, and not clear from the commands themselves. Reflowing, accessibility, data extraction, and interaction all rely on programmatically understanding the structure of a document. Hence, these aspects are not easy to integrate with PDFs.

-

This raises the question: how can we design digital documents with the benefits of PDFs but without the limitations?

-

Can’t We Just Fix PDF?

-

A simple answer is to improve the PDF format. After all, we already have billions of PDFs — why reinvent the wheel?

-

The designers of PDF are well aware of its limitations. I carefully hedged each bullet with “easily”, because PDF does make it possible to overcome each limitation, at least partially. PDFs can be annotated with their logical structure to create a tagged PDF. Most PDF exporters will not add tags automatically — the simplest option is to use Adobe’s subscription-only Acrobat Pro, which provides an “Automatically tag PDF” action. For example, here is a recent paper of mine with added tags:

-
- -
Figure 1: A LaTeX-generated paper with automatically added tags.
-
-

If you squint, you can see that the logical structure closely resembles the HTML document model. The document has sections, headings, paragraphs, and links. Adobe characterizes the logical structure as an accessibility feature, but it has other benefits. You may be surprised to know that Adobe Acrobat allows you to reflow tagged PDFs at different screen sizes. You may be unsurprised to know that reflowing does not always work well. For example:

-
-
- -
Figure 3: A section of the paper in its default fixed layout. Note that the second paragraph is wrapped around the code snippet.
-
-
- -
Figure 4: The same section of the paper after reflowing to a smaller width. Note that the code is now interleaved with the second paragraph.
-
-
-

In theory, these issues could be fixed. If the world’s PDF exporters could be modified to include logical structure. If Adobe’s reflowing algorithm could be improved to fix its edge cases. If the reflowing algorithm could be specified, and if Adobe were willing to release it publicly, and if it were implemented in each PDF viewer. And that doesn’t even cover interaction! So in practice, I don’t think we can just fix the PDF format, at least within a reasonable time frame.

-

The Good and Bad of HTML

-

In the meantime, we already have a structured document format which can be flexibly and interactively rendered: HTML (and CSS and Javascript, but here just collectively referred to as HTML). The HTML format provides almost exactly the inverse advantages and disadvantages of PDF.

-
    -
  • HTML can more easily adapt to different screen sizes. Over the last 20 years, web developers and browser vendors have created a wide array of techniques for responsive design.
  • -
  • HTML can be more easily understood by a program. HTML provides both an inherent structure plus additional attributes to support accessibility tools.
  • -
  • HTML can more easily express interaction. People have used HTML to produce amazing interactive documents that would be impossible in PDF. Think: Distill.pub, Explorable Explanations, Bartosz Ciechanowski, and Bret Victor, just to name a few.
  • -
-

Again, these advantages are hedged with “more easily”. One can easily produce a convoluted or inaccessible HTML document. But on balance, these aspects are more true than not compared to PDF. However, HTML is lacking where PDF shines:

-
    -
  • HTML is not self-contained. HTML files may contain URL references to external files that may be hosted on a server. One can rarely download an HTML file and have it render correctly without an internet connection.
  • -
  • HTML is not always rendered consistently. HTML’s dynamic layout means that an author may not see the same document as a reader. Moreover, HTML layout is not fully specified, so browsers may differ in their implementation.
  • -
  • HTML is not fully stable over time. Browsers try to maintain backwards compatibility (come on and slam!), but the HTML format is still evolving. The HTML standard is a “living standard” due to the rapidly changing needs and feature sets of modern browsers.
  • -
-

So I’ve been thinking: how can we design HTML documents to gain the benefits of PDFs without losing the key strengths of HTML? The rest of this document will present some early prototypes and tentative proposals in this direction.

-

Self-Contained HTML with EPUB

-

First, how can we make HTML documents self-contained? This is an old problem with many potential solutions. WARC, webarchive, and MHTML are all file formats designed to contain all the resources needed to render a web page. But these formats are more designed for snapshotting an existing website, rather than serving as a single source of truth for a web document. From my research, the most sensible format for this purpose is EPUB.

-

EPUB is a “distribution and interchange format for digital publications and documents”, per the EPUB 3 Overview. Reductively, an EPUB is a ZIP archive of web files: HTML, CSS, JS, and assets like images and fonts. On a technical level, what distinguishes EPUB from archival formats is that EPUB includes well-specified files that describe metadata about a document. On a social level, EPUB appears to be the HTML publication format with the most adoption and momentum in 2024, compared to moribund formats like Mobi.

-

The EPUB spec has all the gory details, but to give you a rough sense, a sample EPUB might have the following file structure:

-
-
sample.epub
├── META-INF
│ └── container.xml
└── EPUB
├── package.opf
├── nav.xhtml
├── chapter1.xhtml
├── chapter2.xhtml
└── img
└── sample.jpg
-
-

An EPUB contains content documents (like chapter1.xhtml and chapter2.xhtml) which contain the core HTML content. Content documents can contain relative links to assets in the EPUB, like img/sample.jpg. The navigation document (nav.xhtml) provides a table of contents, and the package document (package.opf) provides metadata about the document. These files collectively define one “rendition” of the whole document, and the container file (container.xml) points to each rendition contained in the EPUB.

-

The EPUB format optimizes for machine-readable content and metadata. HTML content is required to be in XML format (hence, XHTML). Document metadata like the title and author is provided in structured form in the package document. The navigation document has a carefully prescribed tag structure so the TOC can be consistently extracted.

-

Overall, EPUB’s structured format makes it a solid candidate for a single-file HTML document container. However, EPUB is not a silver bullet. EPUB is quite permissive in what kinds of content can be put into a content document.

-

For example, a major issue for self-containment is that EPUB content can embed external assets. A content document can legally include an image or font file whose src is a URL to a hosted server. This is not hypothetical, either; as of the time of writing, Google Doc’s EPUB exporter will emit CSS that will @include external Google Fonts files. The problem is that such an EPUB will not render correctly without an internet connection, nor will it render correctly if Google changes the URLs of its font files.

-

Hence, I will propose a new format which I call a portable EPUB, which is an EPUB with additional requirements and recommendations to improve PDF-like portability. The first requirement is:

-
Local asset requirement: All assets (like images, scripts, and fonts) embedded in a content document of a portable EPUB must refer to local files included in the EPUB. Hyperlinks to external files are permissible.
-

Consistency vs. Flexibility in Rendering

-

There is a fundamental tension between consistency and flexibility in document rendering. A PDF is consistent because it is designed to render in one way: one layout, one choice of fonts, one choice of colors, one pagination, and so on. Consistency is desirable because an author can be confident that their document will look good for a reader (or at least, not look bad). Consistency has subtler benefits — because a PDF is chunked into a consistent set of pages, a passage can be cited by referring to the page containing the passage.

-

On the other hand, flexibility is desirable because people want to read documents under different conditions. Device conditions include screen size (from phone to monitor) and screen capabilities (E-ink vs. LCD). Some readers may prefer larger fonts or higher contrasts for visibility, alternative color schemes for color blindness, or alternative font faces for dyslexia. Sufficiently flexible documents can even permit readers to select a level of detail appropriate for their background (here’s an example).

-

Finding a balance between consistency and flexibility is arguably the most fundamental design challenge in attempting to replace PDF with EPUB. To navigate this trade-off, we first need to talk about EPUB reading systems, or the tools that render an EPUB for human consumption. To get a sense of variation between reading systems, I tried rendering this post as an EPUB (without any styling, just HTML) on four systems: Calibre, Adobe Digital Editions, Apple Books, and Amazon Kindle. This is how the first page looks on each system (omitting Calibre because it looked the same as Adobe Digital Editions):

-
-
- -
Figure 6: Adobe Digital Editions
-
-
- -
Figure 7: Apple Books
-
-
- -
Figure 8: Amazon Kindle
-
-
-

Calibre and Adobe Digital Editions both render the document in a plain web view, as if you opened the HTML file directly in the browser. Apple Books applies some styling, using the New York font by default and changing link decorations. Amazon Kindle increases the line height and also uses my Kindle’s globally-configured default font, Bookerly.

-

As you can see, an EPUB may look quite different on different reading systems. The variation displayed above seems reasonable to me. But how different is too different? For instance, I was recently reading A History of Writing on my Kindle. Here’s an example of how a figure in the book renders on the Kindle:

-
- -
Figure 9: A figure in the EPUB version of A History of Writing on my Kindle
-
-

When I read this page, I thought, “wow, this looks like crap.” The figure is way too small (although you can long-press the image and zoom), and the position of the figure seems nonsensical. I found a PDF version online, and indeed the PDF’s figure has a proper size in the right location:

-
- -
Figure 10: A figure in the PDF version of A History of Writing on my Mac
-
-

This is not a fully fair comparison, but it nonetheless exemplifies an author’s reasonable concern today with EPUB: what if it makes my document looks like crap?

-

Principles for Consistent EPUB Rendering

-

I think the core solution for consistently rendering EPUBs comes down to this:

-
    -
  1. The document format (i.e., portable EPUB) needs to establish a subset of HTML (call it “portable HTML”) which could represent most, but not all, documents.
  2. -
  3. Reading systems need to guarantee that a document within the subset will always look reasonable under all reading conditions.
  4. -
  5. If a document uses features outside this subset, then the document author is responsible for ensuring the readability of the document.
  6. -
-

If someone wants to write a document such as this post, then that person need not be a frontend web developer to feel confident that their document will render reasonably. Conversely, if someone wants to stuff the entire Facebook interface into an EPUB, then fine, but it’s on them to ensure the document is responsive.

-

For instance, one simple version of portable HTML could be described by this grammar:

-
-
Document ::= <article> Block* </article>
Block ::= <p> Inline* </p> | <figure> Block* </figure>
Inline ::= text | <strong> Inline* </strong>
-
-

The EPUB spec already defines a comparable subset for navigation documents. I am essentially proposing to extend this idea for content documents, but as a soft constraint rather than a hard constraint. Finding the right subset of HTML will take some experimentation, so I can only gesture toward the broad solution here.

-
Portable HTML rendering requirement: if a document only uses features in the portable HTML subset, then a portable EPUB reading system must guarantee that the document will render reasonably.
-
Portable HTML generation principle: when possible, systems that generate portable EPUB should output portable HTML.
-

A related challenge is to define when a particular rendering is “good” or “reasonable”, so one could evaluate either a document or a reading system on its conformance to spec. For instance, if document content is accidentally rendered in an inaccesible location off-screen, then that would be a bad rendering. A more aggressive definition might say that any rendering which violates accessibility guidelines is a bad rendering. Again, finding the right standard for rendering quality will take some experimentation.

-

If an author is particularly concerned about providing a single “canonical” rendering of their document, one fallback option is to provide a fixed-layout rendition. The EPUB format permits a rendition to specify that it should be rendered in fixed viewport size and optionally a fixed pagination. A fixed-layout rendition could then manually position all content on the page, similar to a PDF. Of course, this loses the flexibility of a reflowable rendition. But an EPUB could in theory provide multiple renditions, offering users the choice of whichever best suits their reading conditions and aesthetic preferences.

-
Fixed-layout fallback principle: systems that generate portable EPUB can consider providing both a reflowable and fixed-layout rendition of a document.
-

It’s possible that the reading system, the document author, and the reader can each express preferences about how a document should render. If these preferences are conflicting, then the renderer should generally prioritize the reader over the author, and the author over the reading system. This is an ideal use case for the “cascading” aspect of CSS:

-
Cascading styles principle: both documents and reading systems should express stylistic preferences (such as font face, font size, and document width) as CSS styles which can be overriden (e.g., do not use !important). The reading system should load the CSS rules such that the priority order is reading system styles < document styles < reader styles.
-

A Lighter EPUB Reading System

-

The act of working with PDFs is relatively fluid. I can download a PDF, quickly open it in a PDF reading system like Preview, and keep or discard the PDF as needed. But EPUB reading systems feel comparatively clunky. Loading an EPUB into Apple Books or Calibre will import the EPUB into the application’s library, which both copies and potentially decompresses the file. Loading an EPUB on a Kindle requires waiting several minutes for the Send to Kindle service to complete.

-

Worse, EPUB reading systems often don’t give you appropriate control over rendering an EPUB. For example, to emulate the experience of reading a book, most reading systems will chunk an EPUB into pages. A reader cannot scroll the document but rather “turn” the page, meaning textually-adjacent content can be split up between pages. Whether a document is paginated or scrolled should be a reader’s choice, but 3/4 reading systems I tested would only permit pagination (Calibre being the exception).

-

Therefore I decided to build a lighter EPUB reading system, Bene. You’re using it right now. This document is an EPUB — you can download it by clicking the button in the top-right corner. The styling and icons are mostly borrowed from pdf.js. Bene is implemented in Tauri, so it can work as both a desktop app and a browser app. Please appreciate this picture of Bene running as a desktop app:

-
- -
Figure 11: The Bene reading system running as a desktop app. Wow! It works!
-
-

Bene is designed to make opening and reading an EPUB feel fast and non-committal. The app is much quicker to open on my Macbook (<1sec) than other desktop apps. It decompresses files on-the-fly so no additional disk space is used. The backend is implemented in Rust and compiled to Wasm for the browser version.

-

The general design goal of Bene is to embody my ideals for a portable EPUB reader. That is, a utilitarian interface into an EPUB that satisfies my additional requirements for portability. Bene allows you to configure document rendering by changing the font size (try the +/- buttons in the top bar) and the viewer width (if you’re on desktop, move your mouse over the right edge of the document, and drag the handle). Long-term, I want Bene to also provide richer document interactions than a standard EPUB reader, which means we must discuss scripting.

-

Defensively Scripting EPUBs

-

To some people, the idea of code in their documents is unappealing. Last time one of my document-related projects was posted to Hacker News, the top comment was complaining about dynamic documents. The sentiment is understandable — concerns include:

-
    -
  • Bad code: your document shouldn’t crash or glitch due to a failure in a script.
  • -
  • Bad browsers: your document shouldn’t fail to render when a browser updates.
  • -
  • Bad actors: a malicious document shouldn’t be able to pwn your computer.
  • -
  • Bad interfaces: a script shouldn’t cause your document to become unreadable.
  • -
-

Yet, document scripting provides many opportunities for improving how we communicate information. For one example, if you haven’t yet, try hovering your mouse over any instance of the term portable EPUB (or long press it on a touch screen). You should see a tooltip appear with the term’s definition. The goal of these tooltips is to simplify reading a document that contains a lot of specialized notation or terminology. If you forget a definition, you can quickly look it up without having to jump around.

-

The key design challenge is how to permit useful scripting behaviors while limiting the downsides of scripting. One strategy is as follows:

-
Structure over scripts principle: documents should prefer structural annotations over scripts where possible. Documents should rely on reading systems to utilize structure where possible.
-

As an example of this principle, consider how the portable EPUB definition and references are expressed in this document:

-
-
-
<p><dfn-container>Hence, I will propose a new format which I call a <dfn id="portable-epub">portable EPUB</dfn>, which is an EPUB with additional requirements and recommendations to improve PDF-like portability.</dfn-container> The first requirement is:</p>
-
Listing 5: Creating a definition
-
-
-
For one example, if you haven't yet, try hovering your mouse over any instance of the term <a href="#portable-epub" data-target="dfn">portable EPUB</a> (or long press it on a touch screen).
-
Listing 6: Referencing a definition
-
-
-

The definition uses the <dfn> element wrapped in a custom <dfn-container> element to indicate the scope of the definition. The reference to the definition uses a standard anchor with an addition data-target attribute to emphasize that a definition is being linked. The document itself does not provide a script. The Bene reading system automatically detects these annotations and provides the tooltip interaction.

-

Encapsulating Scripts with Web Components

-

But what if a document wants to provide an interactive component that isn’t natively supported by the reading system? For instance, I have recently been working with The Rust Programming Language, a textbook that explains the different features of Rust. It contains a lot of passages like this one:

-
-
let x = 5;
let x = x + 1;
{
let x = x * 2;
println!("The value of x in the inner scope is: {x}");
}
println!("The value of x is: {x}");
}
-

This program first binds x to a value of 5. Then it creates a new variable x by repeating let x =, taking the original value and adding 1 so the value of x is then 6. Then, within an inner scope created with the curly brackets, the third let statement also shadows x and creates a new variable, multiplying the previous value by 2 to give x a value of 12. When that scope is over, the inner shadowing ends and x returns to being 6. When we run this program, it will output the following:

-
-

A challenge in reading this passage is finding the correspondences between the prose and the code. An interactive code reading component can help you track those correspondences, like this (try mousing-over or clicking-on each sentence):

-
-
fn main() { 
-  let x = 5
-  let x = x + 1
-  { 
-    let x = x * 2
-    println!(“The value of x in the inner scope is: {x}”);
-  }
-  println!(“The value of x is: {x}”);
-}
-

This program first binds x to a value of 5.Then it creates a new variable x by repeating let x =,taking the original value and adding 1 so the value of x is then 6.Then, within an inner scope created with the curly brackets,the third let statement also shadows x and creates a new variable,multiplying the previous value by 2 to give x a value of 12.When that scope is over, the inner shadowing ends and x returns to being 6.

-
-

The interactive code description component is used as follows:

-
-
<code-description>
<pre><code>fn main() {
let <span id="code-1">x</span> = <span id="code-2">5</span>;
<!-- rest of the code... -->
}</code></pre>
<p>
<code-step>This program first binds <a href="#code-1"><code>x</code></a> to a value of <a href="#code-2"><code>5</code></a>.</code-step>
<!-- rest of the prose... -->
</p>
</code-description>
-
-

Again, the document content contains no actual script. It contains a custom element <code-description>, and it contains a series of annotations as spans and anchors. The <code-description> element is implemented as a web component.

-

Web components are a programming model for writing encapsulated interactive fragments of HTML, CSS, and Javascript. Web components are one of many ways to write componentized HTML, such as React, Solid, Svelte, and Angular. I see web components as the most suitable as a framework for portable EPUBs because:

-
    -
  • Web components are a standardized technology. Its key features like custom elements (for specifying the behavior of novel elements) and shadow trees (for encapsulating a custom element from the rest of the document) are part of the official HTML and DOM specifications. This improves the likelihood that future browsers will maintain backwards compatibility with web components written today.
  • -
  • Web components are designed for tight encapusulation. The shadow tree mechanism ensures that styling applied within a custom component cannot accidentally affect other components on the page.
  • -
  • Web components have a decent ecosystem to leverage. As far as I can tell, web components are primarily used by Google, which has created notable frameworks like Lit.
  • -
  • Web components provide a clear fallback mechanism. If a renderer does not support Javascript, or if a renderer loses the ability to render web components, then an HTML renderer will simply ignore custom tags and render their contents.
  • -
-

Thus, I propose one principle and one requirement:

-
Encapsulated scripts principle: interactive components should be implemented as web components when possible, or otherwise be carefully designed to avoid conflicting with the base document or other components.
-
Components fallback requirement: interactive components must provide a fallback mechanism for rendering a reasonable substitute if Javascript is disabled.
-

Where To Go From Here?

-

Every time I have told someone “I want to replace PDF”, the statement has been met with extreme skepticism. Hopefully this document has convinced you that HTML-via-EPUB could potentially be a viable and desirable document format for the future.

-

My short-term goal is to implement a few more documents in the portable EPUB format, such as my PLDI paper. That will challenge both the file format and the reading system to be flexible enough to support each document type. In particular, each document should look good under a range of reading conditions (screen sizes, font sizes and faces, etc.).

-

My long-term goal is to design a document language that makes it easy to generate portable EPUBs. Writing XHTML by hand is not reasonable. I designed Nota before I was thinking about EPUBs, so its next iteration will be targeted at this new format.

-

If you have any thoughts about how to make this work or why I’m wrong, let me know by email or Twitter or Mastodon or wherever this gets posted. If you would like to help out, please reach out! This is just a passion project in my free time (for now…), so any programming or document authoring assistance could provide a lot of momentum to the project.

-

But What About…

-

A brief postscript for a few things I haven’t touched on.

-

…security? You might dislike the idea that document authors can run arbitrary Javascript on your personal computer. But then again, you presumably use both a PDF reader and a web browser on the daily, and those both run Javascript. What I’m proposing is not really any less secure than our current state of affairs. If anything, I’d hope that browsers are more battle-hardened than PDF viewers regarding code execution. Certainly the designers of EPUB reading systems should be careful to not give documents any additional capabilities beyond those already provided by the browser.

-

…privacy? Modern web sites use many kinds of telemetry and cookies to track user behavior. I strongly believe that EPUBs should not follow this trend. Telemetry must at least require the explicit consent of the user, and even that may be too generous. Companies will inevitably do things like offer discounts in exchange for requiring your consent to telemetry, similar to Amazon’s Kindle ads policy. Perhaps it is better to preempt this behavior by banning all tracking.

-

…aesthetics? People often intuit that LaTeX-generated PDFs look prettier than HTML documents, or even prettier than PDFs created by other software. This is because Donald Knuth took his job very seriously. In particular, the Knuth-Plass line-breaking algorithm tends to produce better-looking justified text than whatever algorithm is used by browsers.

-

There’s two ways to make progress here. One is for browsers to provide more typography tools. Allegedly, text-wrap: pretty is supposed to help, but in my brief testing it doesn’t seem to improve line-break quality. The other way is to pre-calculate line breaks, which would only work for fixed-layout renditions.

-

…page citations? I think we just have to give up on citing content by pages. Instead, we should mandate a consistent numbering scheme for block elements within a document, and have people cite using that scheme. (Allison Morrell points out this is already the standard in the Canadian legal system.) For example, Bene will auto-number all blocks. If you’re on a desktop, try hovering your mouse in the left column next to the top-right of any paragraph.

-

…annotations? Ideally it should be as easy to mark up an EPUB as a PDF. The Web Annotations specification seems to be a good starting point for annotating EPUBs. Web Annotations seem designed for annotations on “targetable” objects, like a labeled element or a range of text. It’s not yet clear how to deal with free-hand annotations, especially on reflowable documents.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/blog_post/html/portable_epubs.html b/crates/tests/ref/examples/blog_post/html/portable_epubs.html deleted file mode 100644 index f3fec726..00000000 --- a/crates/tests/ref/examples/blog_post/html/portable_epubs.html +++ /dev/null @@ -1,220 +0,0 @@ - - - - Portable EPUBs - - -
-

Portable EPUBs

- Will CrichtonBrown UniversityJanuary 25, 2024Despite decades of advances in document rendering technology, most of the world’s documents are stuck in the 1990s due to the limitations of PDF. Yet, modern document formats like HTML have yet to provide a competitive alternative to PDF. This post explores what prevents HTML documents from being portable, and I propose a way forward based on the EPUB format. To demonstrate my ideas, this post is presented using a prototype EPUB reading system. -
-

The Good and Bad of PDF

-

PDF is the de facto file format for reading and sharing digital documents like papers, textbooks, and flyers. People use the PDF format for several reasons:

-
    -
  • -

    PDFs are self-contained. A PDF is a single file that contains all the images, fonts, and other data needed to render it. It’s easy to pass around a PDF. A PDF is unlikely to be missing some critical dependency on your computer.

    -
  • -
  • -

    PDFs are rendered consistently. A PDF specifies precisely how it should be rendered, so a PDF author can be confident that a reader will see the same document under any conditions.

    -
  • -
  • -

    PDFs are stable over time. PDFs from decades ago still render the same today. PDFs have a relatively stable standard. PDFs cannot be easily edited.

    -
  • -
-

Yet, in the 32 years since the initial release of PDF, a lot has changed. People print out documents less and less. People use phones, tablets, and e-readers to read digital documents. The internet happened; web browsers now provide a platform for rendering rich documents. These changes have laid bare the limitations of PDF:

-
    -
  • -

    PDFs cannot easily adapt to different screen sizes. Most PDFs are designed to mimic 8.5x11″ paper (or worse, 145,161 km2). These PDFs are readable on a computer monitor, but they are less readable on a tablet, and far less readable on a phone.

    -
  • -
  • -

    PDFs cannot be easily understood by programs. A plain PDF is just a scattered sequence of lines and characters. For accessibility, screen readers may not know which order to read through the text. For data extraction, scraping tables out of a PDF is an open area of research.

    -
  • -
  • -

    PDFs cannot easily express interaction. PDFs were primarily designed as static documents that cannot react to user input beyond filling in forms.

    -
  • -
-

These pros and cons can be traced back to one key fact: the PDF representation of a document is fundamentally unstructured. A PDF consists of commands like:

-
-
Move the cursor to the right by 0.5 inches.
Set the current font color to black.
Draw the text "Hello World" at the current position.
-
-

PDF commands are unstructured because a document’s organization is only clear to a person looking at the rendered document, and not clear from the commands themselves. Reflowing, accessibility, data extraction, and interaction all rely on programmatically understanding the structure of a document. Hence, these aspects are not easy to integrate with PDFs.

-

This raises the question: how can we design digital documents with the benefits of PDFs but without the limitations?

-

Can’t We Just Fix PDF?

-

A simple answer is to improve the PDF format. After all, we already have billions of PDFs — why reinvent the wheel?

-

The designers of PDF are well aware of its limitations. I carefully hedged each bullet with “easily”, because PDF does make it possible to overcome each limitation, at least partially. PDFs can be annotated with their logical structure to create a tagged PDF. Most PDF exporters will not add tags automatically — the simplest option is to use Adobe’s subscription-only Acrobat Pro, which provides an “Automatically tag PDF” action. For example, here is a recent paper of mine with added tags:

-
- -
Figure 1: A LaTeX-generated paper with automatically added tags.
-
-

If you squint, you can see that the logical structure closely resembles the HTML document model. The document has sections, headings, paragraphs, and links. Adobe characterizes the logical structure as an accessibility feature, but it has other benefits. You may be surprised to know that Adobe Acrobat allows you to reflow tagged PDFs at different screen sizes. You may be unsurprised to know that reflowing does not always work well. For example:

-
-
- -
Figure 3: A section of the paper in its default fixed layout. Note that the second paragraph is wrapped around the code snippet.
-
-
- -
Figure 4: The same section of the paper after reflowing to a smaller width. Note that the code is now interleaved with the second paragraph.
-
-
-

In theory, these issues could be fixed. If the world’s PDF exporters could be modified to include logical structure. If Adobe’s reflowing algorithm could be improved to fix its edge cases. If the reflowing algorithm could be specified, and if Adobe were willing to release it publicly, and if it were implemented in each PDF viewer. And that doesn’t even cover interaction! So in practice, I don’t think we can just fix the PDF format, at least within a reasonable time frame.

-

The Good and Bad of HTML

-

In the meantime, we already have a structured document format which can be flexibly and interactively rendered: HTML (and CSS and Javascript, but here just collectively referred to as HTML). The HTML format provides almost exactly the inverse advantages and disadvantages of PDF.

-
    -
  • HTML can more easily adapt to different screen sizes. Over the last 20 years, web developers and browser vendors have created a wide array of techniques for responsive design.
  • -
  • HTML can be more easily understood by a program. HTML provides both an inherent structure plus additional attributes to support accessibility tools.
  • -
  • HTML can more easily express interaction. People have used HTML to produce amazing interactive documents that would be impossible in PDF. Think: Distill.pub, Explorable Explanations, Bartosz Ciechanowski, and Bret Victor, just to name a few.
  • -
-

Again, these advantages are hedged with “more easily”. One can easily produce a convoluted or inaccessible HTML document. But on balance, these aspects are more true than not compared to PDF. However, HTML is lacking where PDF shines:

-
    -
  • HTML is not self-contained. HTML files may contain URL references to external files that may be hosted on a server. One can rarely download an HTML file and have it render correctly without an internet connection.
  • -
  • HTML is not always rendered consistently. HTML’s dynamic layout means that an author may not see the same document as a reader. Moreover, HTML layout is not fully specified, so browsers may differ in their implementation.
  • -
  • HTML is not fully stable over time. Browsers try to maintain backwards compatibility (come on and slam!), but the HTML format is still evolving. The HTML standard is a “living standard” due to the rapidly changing needs and feature sets of modern browsers.
  • -
-

So I’ve been thinking: how can we design HTML documents to gain the benefits of PDFs without losing the key strengths of HTML? The rest of this document will present some early prototypes and tentative proposals in this direction.

-

Self-Contained HTML with EPUB

-

First, how can we make HTML documents self-contained? This is an old problem with many potential solutions. WARC, webarchive, and MHTML are all file formats designed to contain all the resources needed to render a web page. But these formats are more designed for snapshotting an existing website, rather than serving as a single source of truth for a web document. From my research, the most sensible format for this purpose is EPUB.

-

EPUB is a “distribution and interchange format for digital publications and documents”, per the EPUB 3 Overview. Reductively, an EPUB is a ZIP archive of web files: HTML, CSS, JS, and assets like images and fonts. On a technical level, what distinguishes EPUB from archival formats is that EPUB includes well-specified files that describe metadata about a document. On a social level, EPUB appears to be the HTML publication format with the most adoption and momentum in 2024, compared to moribund formats like Mobi.

-

The EPUB spec has all the gory details, but to give you a rough sense, a sample EPUB might have the following file structure:

-
-
sample.epub
├── META-INF
│ └── container.xml
└── EPUB
├── package.opf
├── nav.xhtml
├── chapter1.xhtml
├── chapter2.xhtml
└── img
└── sample.jpg
-
-

An EPUB contains content documents (like chapter1.xhtml and chapter2.xhtml) which contain the core HTML content. Content documents can contain relative links to assets in the EPUB, like img/sample.jpg. The navigation document (nav.xhtml) provides a table of contents, and the package document (package.opf) provides metadata about the document. These files collectively define one “rendition” of the whole document, and the container file (container.xml) points to each rendition contained in the EPUB.

-

The EPUB format optimizes for machine-readable content and metadata. HTML content is required to be in XML format (hence, XHTML). Document metadata like the title and author is provided in structured form in the package document. The navigation document has a carefully prescribed tag structure so the TOC can be consistently extracted.

-

Overall, EPUB’s structured format makes it a solid candidate for a single-file HTML document container. However, EPUB is not a silver bullet. EPUB is quite permissive in what kinds of content can be put into a content document.

-

For example, a major issue for self-containment is that EPUB content can embed external assets. A content document can legally include an image or font file whose src is a URL to a hosted server. This is not hypothetical, either; as of the time of writing, Google Doc’s EPUB exporter will emit CSS that will @include external Google Fonts files. The problem is that such an EPUB will not render correctly without an internet connection, nor will it render correctly if Google changes the URLs of its font files.

-

Hence, I will propose a new format which I call a portable EPUB, which is an EPUB with additional requirements and recommendations to improve PDF-like portability. The first requirement is:

-
Local asset requirement: All assets (like images, scripts, and fonts) embedded in a content document of a portable EPUB must refer to local files included in the EPUB. Hyperlinks to external files are permissible.
-

Consistency vs. Flexibility in Rendering

-

There is a fundamental tension between consistency and flexibility in document rendering. A PDF is consistent because it is designed to render in one way: one layout, one choice of fonts, one choice of colors, one pagination, and so on. Consistency is desirable because an author can be confident that their document will look good for a reader (or at least, not look bad). Consistency has subtler benefits — because a PDF is chunked into a consistent set of pages, a passage can be cited by referring to the page containing the passage.

-

On the other hand, flexibility is desirable because people want to read documents under different conditions. Device conditions include screen size (from phone to monitor) and screen capabilities (E-ink vs. LCD). Some readers may prefer larger fonts or higher contrasts for visibility, alternative color schemes for color blindness, or alternative font faces for dyslexia. Sufficiently flexible documents can even permit readers to select a level of detail appropriate for their background (here’s an example).

-

Finding a balance between consistency and flexibility is arguably the most fundamental design challenge in attempting to replace PDF with EPUB. To navigate this trade-off, we first need to talk about EPUB reading systems, or the tools that render an EPUB for human consumption. To get a sense of variation between reading systems, I tried rendering this post as an EPUB (without any styling, just HTML) on four systems: Calibre, Adobe Digital Editions, Apple Books, and Amazon Kindle. This is how the first page looks on each system (omitting Calibre because it looked the same as Adobe Digital Editions):

-
-
- -
Figure 6: Adobe Digital Editions
-
-
- -
Figure 7: Apple Books
-
-
- -
Figure 8: Amazon Kindle
-
-
-

Calibre and Adobe Digital Editions both render the document in a plain web view, as if you opened the HTML file directly in the browser. Apple Books applies some styling, using the New York font by default and changing link decorations. Amazon Kindle increases the line height and also uses my Kindle’s globally-configured default font, Bookerly.

-

As you can see, an EPUB may look quite different on different reading systems. The variation displayed above seems reasonable to me. But how different is too different? For instance, I was recently reading A History of Writing on my Kindle. Here’s an example of how a figure in the book renders on the Kindle:

-
- -
Figure 9: A figure in the EPUB version of A History of Writing on my Kindle
-
-

When I read this page, I thought, “wow, this looks like crap.” The figure is way too small (although you can long-press the image and zoom), and the position of the figure seems nonsensical. I found a PDF version online, and indeed the PDF’s figure has a proper size in the right location:

-
- -
Figure 10: A figure in the PDF version of A History of Writing on my Mac
-
-

This is not a fully fair comparison, but it nonetheless exemplifies an author’s reasonable concern today with EPUB: what if it makes my document looks like crap?

-

Principles for Consistent EPUB Rendering

-

I think the core solution for consistently rendering EPUBs comes down to this:

-
    -
  1. The document format (i.e., portable EPUB) needs to establish a subset of HTML (call it “portable HTML”) which could represent most, but not all, documents.
  2. -
  3. Reading systems need to guarantee that a document within the subset will always look reasonable under all reading conditions.
  4. -
  5. If a document uses features outside this subset, then the document author is responsible for ensuring the readability of the document.
  6. -
-

If someone wants to write a document such as this post, then that person need not be a frontend web developer to feel confident that their document will render reasonably. Conversely, if someone wants to stuff the entire Facebook interface into an EPUB, then fine, but it’s on them to ensure the document is responsive.

-

For instance, one simple version of portable HTML could be described by this grammar:

-
-
Document ::= <article> Block* </article>
Block ::= <p> Inline* </p> | <figure> Block* </figure>
Inline ::= text | <strong> Inline* </strong>
-
-

The EPUB spec already defines a comparable subset for navigation documents. I am essentially proposing to extend this idea for content documents, but as a soft constraint rather than a hard constraint. Finding the right subset of HTML will take some experimentation, so I can only gesture toward the broad solution here.

-
Portable HTML rendering requirement: if a document only uses features in the portable HTML subset, then a portable EPUB reading system must guarantee that the document will render reasonably.
-
Portable HTML generation principle: when possible, systems that generate portable EPUB should output portable HTML.
-

A related challenge is to define when a particular rendering is “good” or “reasonable”, so one could evaluate either a document or a reading system on its conformance to spec. For instance, if document content is accidentally rendered in an inaccesible location off-screen, then that would be a bad rendering. A more aggressive definition might say that any rendering which violates accessibility guidelines is a bad rendering. Again, finding the right standard for rendering quality will take some experimentation.

-

If an author is particularly concerned about providing a single “canonical” rendering of their document, one fallback option is to provide a fixed-layout rendition. The EPUB format permits a rendition to specify that it should be rendered in fixed viewport size and optionally a fixed pagination. A fixed-layout rendition could then manually position all content on the page, similar to a PDF. Of course, this loses the flexibility of a reflowable rendition. But an EPUB could in theory provide multiple renditions, offering users the choice of whichever best suits their reading conditions and aesthetic preferences.

-
Fixed-layout fallback principle: systems that generate portable EPUB can consider providing both a reflowable and fixed-layout rendition of a document.
-

It’s possible that the reading system, the document author, and the reader can each express preferences about how a document should render. If these preferences are conflicting, then the renderer should generally prioritize the reader over the author, and the author over the reading system. This is an ideal use case for the “cascading” aspect of CSS:

-
Cascading styles principle: both documents and reading systems should express stylistic preferences (such as font face, font size, and document width) as CSS styles which can be overriden (e.g., do not use !important). The reading system should load the CSS rules such that the priority order is reading system styles < document styles < reader styles.
-

A Lighter EPUB Reading System

-

The act of working with PDFs is relatively fluid. I can download a PDF, quickly open it in a PDF reading system like Preview, and keep or discard the PDF as needed. But EPUB reading systems feel comparatively clunky. Loading an EPUB into Apple Books or Calibre will import the EPUB into the application’s library, which both copies and potentially decompresses the file. Loading an EPUB on a Kindle requires waiting several minutes for the Send to Kindle service to complete.

-

Worse, EPUB reading systems often don’t give you appropriate control over rendering an EPUB. For example, to emulate the experience of reading a book, most reading systems will chunk an EPUB into pages. A reader cannot scroll the document but rather “turn” the page, meaning textually-adjacent content can be split up between pages. Whether a document is paginated or scrolled should be a reader’s choice, but 3/4 reading systems I tested would only permit pagination (Calibre being the exception).

-

Therefore I decided to build a lighter EPUB reading system, Bene. You’re using it right now. This document is an EPUB — you can download it by clicking the button in the top-right corner. The styling and icons are mostly borrowed from pdf.js. Bene is implemented in Tauri, so it can work as both a desktop app and a browser app. Please appreciate this picture of Bene running as a desktop app:

-
- -
Figure 11: The Bene reading system running as a desktop app. Wow! It works!
-
-

Bene is designed to make opening and reading an EPUB feel fast and non-committal. The app is much quicker to open on my Macbook (<1sec) than other desktop apps. It decompresses files on-the-fly so no additional disk space is used. The backend is implemented in Rust and compiled to Wasm for the browser version.

-

The general design goal of Bene is to embody my ideals for a portable EPUB reader. That is, a utilitarian interface into an EPUB that satisfies my additional requirements for portability. Bene allows you to configure document rendering by changing the font size (try the +/- buttons in the top bar) and the viewer width (if you’re on desktop, move your mouse over the right edge of the document, and drag the handle). Long-term, I want Bene to also provide richer document interactions than a standard EPUB reader, which means we must discuss scripting.

-

Defensively Scripting EPUBs

-

To some people, the idea of code in their documents is unappealing. Last time one of my document-related projects was posted to Hacker News, the top comment was complaining about dynamic documents. The sentiment is understandable — concerns include:

-
    -
  • Bad code: your document shouldn’t crash or glitch due to a failure in a script.
  • -
  • Bad browsers: your document shouldn’t fail to render when a browser updates.
  • -
  • Bad actors: a malicious document shouldn’t be able to pwn your computer.
  • -
  • Bad interfaces: a script shouldn’t cause your document to become unreadable.
  • -
-

Yet, document scripting provides many opportunities for improving how we communicate information. For one example, if you haven’t yet, try hovering your mouse over any instance of the term portable EPUB (or long press it on a touch screen). You should see a tooltip appear with the term’s definition. The goal of these tooltips is to simplify reading a document that contains a lot of specialized notation or terminology. If you forget a definition, you can quickly look it up without having to jump around.

-

The key design challenge is how to permit useful scripting behaviors while limiting the downsides of scripting. One strategy is as follows:

-
Structure over scripts principle: documents should prefer structural annotations over scripts where possible. Documents should rely on reading systems to utilize structure where possible.
-

As an example of this principle, consider how the portable EPUB definition and references are expressed in this document:

-
-
-
<p><dfn-container>Hence, I will propose a new format which I call a <dfn id="portable-epub">portable EPUB</dfn>, which is an EPUB with additional requirements and recommendations to improve PDF-like portability.</dfn-container> The first requirement is:</p>
-
Listing 5: Creating a definition
-
-
-
For one example, if you haven't yet, try hovering your mouse over any instance of the term <a href="#portable-epub" data-target="dfn">portable EPUB</a> (or long press it on a touch screen).
-
Listing 6: Referencing a definition
-
-
-

The definition uses the <dfn> element wrapped in a custom <dfn-container> element to indicate the scope of the definition. The reference to the definition uses a standard anchor with an addition data-target attribute to emphasize that a definition is being linked. The document itself does not provide a script. The Bene reading system automatically detects these annotations and provides the tooltip interaction.

-

Encapsulating Scripts with Web Components

-

But what if a document wants to provide an interactive component that isn’t natively supported by the reading system? For instance, I have recently been working with The Rust Programming Language, a textbook that explains the different features of Rust. It contains a lot of passages like this one:

-
-
let x = 5;
let x = x + 1;
{
let x = x * 2;
println!("The value of x in the inner scope is: {x}");
}
println!("The value of x is: {x}");
}
-

This program first binds x to a value of 5. Then it creates a new variable x by repeating let x =, taking the original value and adding 1 so the value of x is then 6. Then, within an inner scope created with the curly brackets, the third let statement also shadows x and creates a new variable, multiplying the previous value by 2 to give x a value of 12. When that scope is over, the inner shadowing ends and x returns to being 6. When we run this program, it will output the following:

-
-

A challenge in reading this passage is finding the correspondences between the prose and the code. An interactive code reading component can help you track those correspondences, like this (try mousing-over or clicking-on each sentence):

-
-
fn main() { 
-  let x = 5
-  let x = x + 1
-  { 
-    let x = x * 2
-    println!(“The value of x in the inner scope is: {x}”);
-  }
-  println!(“The value of x is: {x}”);
-}
-

This program first binds x to a value of 5.Then it creates a new variable x by repeating let x =,taking the original value and adding 1 so the value of x is then 6.Then, within an inner scope created with the curly brackets,the third let statement also shadows x and creates a new variable,multiplying the previous value by 2 to give x a value of 12.When that scope is over, the inner shadowing ends and x returns to being 6.

-
-

The interactive code description component is used as follows:

-
-
<code-description>
<pre><code>fn main() {
let <span id="code-1">x</span> = <span id="code-2">5</span>;
<!-- rest of the code... -->
}</code></pre>
<p>
<code-step>This program first binds <a href="#code-1"><code>x</code></a> to a value of <a href="#code-2"><code>5</code></a>.</code-step>
<!-- rest of the prose... -->
</p>
</code-description>
-
-

Again, the document content contains no actual script. It contains a custom element <code-description>, and it contains a series of annotations as spans and anchors. The <code-description> element is implemented as a web component.

-

Web components are a programming model for writing encapsulated interactive fragments of HTML, CSS, and Javascript. Web components are one of many ways to write componentized HTML, such as React, Solid, Svelte, and Angular. I see web components as the most suitable as a framework for portable EPUBs because:

-
    -
  • Web components are a standardized technology. Its key features like custom elements (for specifying the behavior of novel elements) and shadow trees (for encapsulating a custom element from the rest of the document) are part of the official HTML and DOM specifications. This improves the likelihood that future browsers will maintain backwards compatibility with web components written today.
  • -
  • Web components are designed for tight encapusulation. The shadow tree mechanism ensures that styling applied within a custom component cannot accidentally affect other components on the page.
  • -
  • Web components have a decent ecosystem to leverage. As far as I can tell, web components are primarily used by Google, which has created notable frameworks like Lit.
  • -
  • Web components provide a clear fallback mechanism. If a renderer does not support Javascript, or if a renderer loses the ability to render web components, then an HTML renderer will simply ignore custom tags and render their contents.
  • -
-

Thus, I propose one principle and one requirement:

-
Encapsulated scripts principle: interactive components should be implemented as web components when possible, or otherwise be carefully designed to avoid conflicting with the base document or other components.
-
Components fallback requirement: interactive components must provide a fallback mechanism for rendering a reasonable substitute if Javascript is disabled.
-

Where To Go From Here?

-

Every time I have told someone “I want to replace PDF”, the statement has been met with extreme skepticism. Hopefully this document has convinced you that HTML-via-EPUB could potentially be a viable and desirable document format for the future.

-

My short-term goal is to implement a few more documents in the portable EPUB format, such as my PLDI paper. That will challenge both the file format and the reading system to be flexible enough to support each document type. In particular, each document should look good under a range of reading conditions (screen sizes, font sizes and faces, etc.).

-

My long-term goal is to design a document language that makes it easy to generate portable EPUBs. Writing XHTML by hand is not reasonable. I designed Nota before I was thinking about EPUBs, so its next iteration will be targeted at this new format.

-

If you have any thoughts about how to make this work or why I’m wrong, let me know by email or Twitter or Mastodon or wherever this gets posted. If you would like to help out, please reach out! This is just a passion project in my free time (for now…), so any programming or document authoring assistance could provide a lot of momentum to the project.

-

But What About…

-

A brief postscript for a few things I haven’t touched on.

-

…security? You might dislike the idea that document authors can run arbitrary Javascript on your personal computer. But then again, you presumably use both a PDF reader and a web browser on the daily, and those both run Javascript. What I’m proposing is not really any less secure than our current state of affairs. If anything, I’d hope that browsers are more battle-hardened than PDF viewers regarding code execution. Certainly the designers of EPUB reading systems should be careful to not give documents any additional capabilities beyond those already provided by the browser.

-

…privacy? Modern web sites use many kinds of telemetry and cookies to track user behavior. I strongly believe that EPUBs should not follow this trend. Telemetry must at least require the explicit consent of the user, and even that may be too generous. Companies will inevitably do things like offer discounts in exchange for requiring your consent to telemetry, similar to Amazon’s Kindle ads policy. Perhaps it is better to preempt this behavior by banning all tracking.

-

…aesthetics? People often intuit that LaTeX-generated PDFs look prettier than HTML documents, or even prettier than PDFs created by other software. This is because Donald Knuth took his job very seriously. In particular, the Knuth-Plass line-breaking algorithm tends to produce better-looking justified text than whatever algorithm is used by browsers.

-

There’s two ways to make progress here. One is for browsers to provide more typography tools. Allegedly, text-wrap: pretty is supposed to help, but in my brief testing it doesn’t seem to improve line-break quality. The other way is to pre-calculate line breaks, which would only work for fixed-layout renditions.

-

…page citations? I think we just have to give up on citing content by pages. Instead, we should mandate a consistent numbering scheme for block elements within a document, and have people cite using that scheme. (Allison Morrell points out this is already the standard in the Canadian legal system.) For example, Bene will auto-number all blocks. If you’re on a desktop, try hovering your mouse in the left column next to the top-right of any paragraph.

-

…annotations? Ideally it should be as easy to mark up an EPUB as a PDF. The Web Annotations specification seems to be a good starting point for annotating EPUBs. Web Annotations seem designed for annotations on “targetable” objects, like a labeled element or a range of text. It’s not yet clear how to deal with free-hand annotations, especially on reflowable documents.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_post/pdf/portable_epubs.metadata.json b/crates/tests/ref/examples/blog_post/pdf/portable_epubs.metadata.json deleted file mode 100644 index 50e46c09..00000000 --- a/crates/tests/ref/examples/blog_post/pdf/portable_epubs.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 5063041, - "page_count": 10 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/epub/blog_site.metadata.json b/crates/tests/ref/examples/blog_site/epub/blog_site.metadata.json deleted file mode 100644 index 898d3371..00000000 --- a/crates/tests/ref/examples/blog_site/epub/blog_site.metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "filetype": "epub", - "file_size": 4665620, - "title": "Screening the Subject | Severance", - "language": "en", - "spine_files": [ - "severance-ep-1.xhtml", - "severance-ep-2.xhtml", - "severance-ep-3.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-1.xhtml b/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-1.xhtml deleted file mode 100644 index 99d576b4..00000000 --- a/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-1.xhtml +++ /dev/null @@ -1,45 +0,0 @@ - - - - - - Good news about hell - Severance [s1/e1] - -
-

Good news about hell - Severance [s1/e1]

- -

The first thing to notice is the colour palette. She is dressed in blue, but her hair is chestnut red. It spills out for the frame of her figure into the table around it, blockaded at its border by chairs and a carpet clad in green, yellow, then green again; then gray. The establishing shot is a bird’s eye view of an unknown woman who is soon revealed to have been put in the board room by someone else’s design, who learns about her predicament only by a man’s voice that emanates from the little device that rests on the table along with the woman, arranged so that it aims directly at her head.

-

This opening image is a graph of the subject’s predicament on the severed floor at Lumon. Blue is the company colour. Employees are almost invariably dressed in shades of it– navy, midnight, Prussian, Oxford, cobalt– and more reliably so as we work our way up the hierarchy. Red is unruly passion, the tone of tempers that itch to tear off the straitjacket directives, to disregulate the business-as-usual in which there is no obvious place for illicit activities. Green is the accent of Macro Data Refinement, the division of Lumon in which the show’s protagonists are employed. The device directs a man’s voice at a woman’s body in an attempt to keep her tempers in check, to ensure her firecraft does not smoke out the staid edifice of personality management, to order her “perceptual chronologies” accordingly. (Later in the episode, we learn that she almost manages to “break in” on the control room during that opening sequence: the solidity of its enclosure is threatened from the very first.)

-

It is instructive to attempt to articulate the dynamics that this graph indexes before we start talking about other scenes in the show. Graphs are not at one with what they represent, for in the decision to render ‘data’ in the very act of a representation, we both lose and gain distinction of the dynamics in question. The voice that opens Helly R up to the world of Lumon’s severed floor begins: “Who are you?” This question is a mistake. We retroactively learn, in a later scene, that Mark S was in fact supposed to begin with a less interrogative, more perfunctory: “Hi there, you on the table. I wonder if you’d mind taking a brief survey.” As Irving puts it: “You [Mark S] skipped the preamble”. Helly R is thrust, by this accident, immediately into questioning not only herself, but also the self-assurance of the voice that interrogates her. Does this voice in my head [she could be thinking] really know what it is doing? Or is it just a role of similarly confused actors struggling to stick to a badly written script?

-

This episode-length recap of the first episode names this graph ‘the Helly incident’, a poorly executed orientation of Helly’s newfound subjectivity that can be blamed at one level on Mark S (for starting with the wrong part of the manual), at another on Mr. Milchick (for misguiding Mark while he was distracted setting up the visual feed), on Ms. Cobel (for giving Mark Petey K’s old manual without redacting his obscurely scribbled notes and paper bookmarks), or even on Irving (for neglecting to intervene and clarify how Mark should begin being the more senior refiner in the situation: “Irving will be there to shadow. Just stick to the flowchart and escalate properly depending on dialectics.”). Wherever to place blame, there is doubtless a misconfiguration that takes place. Helly’s instinctual reaction seems to be to try to kill the voice pointed at her head, rather than to befriend it as Mark states he did (where Petey was Mark). (Helly will eventually have sex with the source of the voice, rather than murdering or fraternizing with it.) In this episode, however, Mark (the voice’s source) is physically assaulted by Helly, dented in his temple by the same vocalization device that mediated their first communication.

- -

So this is the Macro Data refiner’s situation. On the one hand, she is affronted with a voice that compels her to abide by the rules and permits her to enjoy some small reliefs (egress from a locked room) if she concedes to it. On the other, she is always teeming and thus flirting with red, considering escape routes that involve drawing blood, setting off alarms, or removing clothes.

-

This unruly red is what Macro Data Refinement’s greening procedures are supposed to contain to produce a completely controlled and scripted blot of blue. Perhaps this is why the glipse of the vacant desks planned for the severed floor’s expansion are draped in purple, for that shade of subjectivity would better incorporate the contrasting contours into a unified and taskable tone. The red that threatens Lumon’s corporate, calm, and collected blue (the Lumon logo is a water droplet that suspiciously resembles a camera) is splattered across scenes in the episode. It is, for example, the envelope that Petey slips Mark at the company-owned restaurant Pip’s with the suggestion that he should read it if he wants to know “what’s going on down there”. It is the sweater Mark wears to his sister’s dinnerless dinner party, punctuated by red place mats (“what a lot of people overlook, I think, is that life is not food”), where the ontological substance of his innie is called into question, and where we learn about the passions he has lost– the history of World War II, educating, whiskey– the last of which seems to have given way to an indiscriminate consumption of beer, wine, anything that will drown out the clarity of sober consciousness. It is the general hue of his sister’s house, which consisently wants him to question that placid blue of his company-subsidized housing at Baird Creek Manor.

-

This dinner tells us something more about the subject in question in Severance. Just as Helly’s outie had alerted us to the basic principle in the video her innie was shown in curiously lo-fi resolution to conclude her innie’s orientation– “perceptual chronologies… surgically split”– Mark’s predicament is comparably explained to him by another more or less ignorant (we can’t help but imagine) third party: “One’s memories are bifurcated, so when you’re at work, you have no recollection of what it is you do there.” As pretentious as they are, the dinner’s guests do seem to be attuned to an important dimension of the meaning of life, which is that it can’t only be about satiating biological needs such as food. What each individual ‘needs’ is a disharmonious melange of needs and demands, openings of desire that emerge not only through a graph of bare necessities– food, water, shelter– but also through capricious carapaces that emerge from more ambiguous pinings in the social sphere– company, care, love. The real question of Lumon’s smooth functioning is whether it will be able to effectively plug up these pinings, the incidental moments at work where one wonders what one is really doing with one’s life, whether the company can really manage its employees’ unsanctioned thoughts and the way in which those illicit ideas seep into the daily practice of their workerhood. More on the plasticity of our needs and drives to satisfy them in later posts.

- -

Ms. Cobel, in contrast to Helly’s and Mark’s doubtful and doubting red, is a stormy and icy blue. (We must wait until season two to uncover the historical and psychological depth of this colour for Harmony Cobel.) She is the figure with a body that seems to be the most in charge, of those we meet in this episode. Though Ms. Cobel is not a master in herself, it seems, for she too is subjected to a disembodied voice-via-device, ‘the board’, albeit which only appears evidently as an ear so far (“The board won’t be contributing to this meeting vocally”). Cobel is responsible for keeping the severed floor’s uncertainty in check, the ‘head’ that sits atop the variegated limbs of its disobedient body.

-

When Cobel reprimands Mark for his derailing of Helly’s orientation, she recalls an obscure and theological aspect of her parentage:

-
You know, my mother was an atheist. She used to say that there was good news and bad news about hell. The good news is, hell is just the product of a morbid human imagination. The bad news is, whatever humans can imagine, they can usually create.
-

At the close of the episode, just before Mark’s senile neighbor Mrs. Selvig (who we have only heard about through Mark’s voice thus far, when he is on the phone with her) visually reveals herself to be the same woman as Ms. Cobel, she gives a slightly different account of her heritage:

-
You know, my mother was a Catholic. She used to say it takes the saints eight hours to bless a sleeping child. I hope you aren’t rushing the saints.
-

It’s unclear at this point whether Cobel is a severed worker like Mark, or whether there is some other reason for her (strange, almost senseless) duplicity. Why lie about the religious leanings of one’s mother? Or maybe ‘mother’ is actually a name for something else, a kind of interim authority that gives synthetic weight to some hearsay, rumor, or idle phrase. (The other cameo of an ambiguously defined mother in this episode is in question five of Helly’s orientation survey: “To the best of your memory, what is or was the color of your mother’s eyes?”) Perhaps it is that, severed or not, atheist or Catholic, Cobel’s subjectivity is structured by a comparable split in her perceptual chronologies, whereby some memories (of her mother) get more airtime in her conscious experience of herself than others.

-

Severance flirts with this idea extensively, that the innie/outie dyad is analagous to the unconscious/conscious experience that we, as subjects, have of ourselves. Mark’s sister Devon hints at the psycho-logical reading of the severed condition in her diagnosis of Mark’s morose (outie) predicament as a state of failed therapy in response to mourning for his late wife: “I just feel like forgetting about her for eight hours a day isn’t the same thing as healing.” As with not-mothers and the plasticity of the drive, we will address the psychoanalytic implications here in later posts; but to finish I want to bring our attention to the imaging of time at work in just this first episode.

-

The fascinating details of failed synchronisation between all the watchfaces we see are enumerated in this Reddit thread. Many of the watch hands appear to be stalled, and the crossover from each to the next– as when Mark Scout switches his wrist watch in preparation for his elevator descent into the workday of innie Mark S– doesn’t match with our experience of the actors on screen. One of the few things we do know about the severance procedure is that it ‘alters perceptual chronologies’, and that this messing with a subject’s sense of time is thought to

-
    -
  1. make them more adequate or productive in a certain kind of work (for why else would Lumon go to the necessary lengths to sever some employees)
  2. -
  3. supposes to section off innie memories and experience from outie memories and experience
  4. -
-

So the subject’s subjectivity is marked by its sense of time, and Lumon’s success (profitability?) hinges in some way on altering their employees’ stable sense of it while in the space of the severed floor.

-

Mark S’s temporal predicament here has been explained by a man whose last name we get by speeding up the saying of his own, Karl Marx (Mar-k-S). Logically speaking, Marx argues, there is an amount of time that goes missing in the worker’s employment by way of a wage, when he advances some portion of his time to the capitalist in exchange for a pay-check one or more weeks later. I refer the reader interested in the details to chapter 20 of Capital Vol. I: but the essential point here is that it is through an obfuscation of the real value of a worker’s time that the capitalist manages to produce surplus-value. The production of this kind of time-distorted surplus-value is the engine of capitalism as a social relation that appears, on the surface, to be equally fair to capitalist and worker alike. So the project of controlling ‘perceptual chronologies’ with which Lumon seems to be so concerned is perhaps not as esoteric and inessential as it might at first seem. Perhaps it is an embodiment of the core ingredient of the company’s success as a company, of its incorporation as an entity that ought to be sustained even at the expense of its members’ happiness, their health, and their livelihoods.

-
-


-
- -
- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-2.xhtml b/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-2.xhtml deleted file mode 100644 index ea528789..00000000 --- a/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-2.xhtml +++ /dev/null @@ -1,50 +0,0 @@ - - - - - - Half Loop - Severance [s1/e2] - -
-

Half Loop - Severance [s1/e2]

-

In the first episode, we were introduced to the two-sided subject at Lumon. On the one hand, there is Mark S, the innie, who is screened for the first and major part of the episode. On the other, Mark Scout, the outie, to whose predicament we are introduced in the concluding scenes. S1E2 opens with a rewind on how innie Helly R came to be: how Milchick handed her flowers at end of her first day (which we glimpsed in S1E1 when Mark almost ran her over), a glimpse of her confidence gliding into the operating room on a higher floor of the same Lumon complex we saw Mark leave, a stereoscopic view of the implant procedure by which she becomes an android whose existence is “spatially dictated” by Lumon’s mysterious machinations.

-

Lumon Industries

- -

Lumon is a corporate pastiche, and not only of technology companies. Lumon seems to have its hands in surgical hardware (the operating room equipment), digital technology (‘Macrodata Refinement’), and medicines and topical salves (as discussed at the dinner party in S1E1 - “What don't they make?”). It is a quintessentially American jack of all trades, a global power in its own right cohered by a family dynasty—the Eagans—recalling the Du Ponts or the Rockefellers.

-

The more obvious comparison to make, however, is between Lumon and Apple, perhaps in part because the show screens on Apple TV Plus. The style of the computers on the severed floor recalls the dawn of the era of personal computing in the 1970s and 80s, an aesthetic imaginary in which Apple plays an important role.

-

Indeed, the aura of Lumon as a futuristic computing corporation from the late 70s is reinforced by the fact that its headquarters are shot at Bell Labs in New Jersey, a building that has now been renovated as a mixed-use office for high-tech startup companies as Bell Works. Bell Labs is the quasi-mythological source in the contemporary corporate technology culture (Silicon Valley) of the idea that a certain kind of research freedom characterized by open-ended product delivery timelines and serendipitous encounters in open office plans can cultivate ground-breaking technology. (Mark Zuckerberg recommended a book on Bell Labs as one of his “important books” of 2015.) The irony of this setting, of course, both in Severance and in the technology companies it parodies in the American landscape in 2025, is that the workplace has never been more saturated with surveillance and micro-management. The overhead shot of Helly R that opened the series is indicative here again, as is the complementary overhead of MDR’s desks we get in this episode: there is always something watching from above, it seems, even if what it captures of the actual activity is a flattened and at times misrepresentative image.

-

There are also evocations of Microsoft and IBM in Lumon, such as the Clippy-like guide on the manual handed to Helly in the episode, or the apparent requirement of suits on the severed floor echoing IBM’s infamous strict dress code. Lumon is a melange of imaginary pasts, presents, and futures in American innovation. It is futuristic in the framing of its bio-technological project of perceptual management—and in the “data smuggling” detectors that are installed in the elevators to the severed floor, about which more soon—but retrofitted in its aesthetic, in its management style, and in its outdated repertoire of daily devices. Recall, for example, Milchick’s handheld camcorder, and the tube-activated (vacuum-tube?) camera he uses to snap the official photo of the new group of refiners.

- -

The overhead of Lumon Industries itself depicts a sketchy graph of a brain, one can’t help but think. Its upper floors all operate above board with normally conscious workers, whereas underground there is something sensitive enough happening so as to require extra precautions. In S1E1′s analysis, we introduced the idea that Lumon’s interest in severing workers has to do with the mechanics of capital, in that surplus value can only ever be produced (in Marx’s account) through the structural theft of time from its laborers.1 Lumon’s spatial layout suggests that there might also be a psychoanalytic metaphor at stake in severance as an operation, where the happenings that occur in the business brain’s basement are essential to what it really is, why it does what it does.

-

Though Freud’s theory has been popularized as a topographical notion, wherein the unconscious is the submerged part of the mind’s iceberg of which we only see the tip, there is good reason to believe that this spatial description misrepresents how the unconscious should be properly understood. Lacan thus preferred topological descriptors to suggest that, if the unconscious is a ‘place’ or ‘site’, it contradicts any over-simplistic understanding of spaces that are distinctly separable. The relationship between the conscious and the unconscious in a psychoanalytic theory of the subject, I would suggest, is better understood through the figure of a coin with two inseparable sides. The meaning of any one side (‘heads’) derives from the meaning of its opposite (‘tails’); and it is thus insensible to imagine separating one part from the other without repressing something fundamental about the structure of the subject as a whole.

-

Lumon, though, seems to want desperately to keep innies from being in contact with their outies. Indeed, the very project of severance seems to have something fundamental to do with managing repression effectively, with renovating the worker into a perfectly divided self that cannot complain about the conditions of her labor through the fact of not knowing anything about them. (When Mark is given a dinner coupon on account of his head injury in S1E1, the real cause of the scar—Helly R’s riotous attempt to escape the orientation room—is not revealed to outie Mark.) The subject in Severance is split and maintained as such. The ‘unconscious’ of one’s home life should not affect one’s ‘conscious’ ability to perform at work.

-

The vice-versa is also true. Outies cannot suffer the ‘unconscious’ of their innies, either. Mark Scout’s decision to sever himself seems to be an attempt to repress the devastating effect of his experience of his wife’s death for some part of the day, given that he admits he was unable to continue his job as a history teacher due to alcoholism. At Lumon, however, Mark’s alcoholism is brutally functional; as his innie must suffer what (lack of) energy he is given by outie Mark’s actions the night before (“I find it helps to focus on the effects of sleep since we don’t actually get to experience it”).

-

The intellectual impoverishment of Lumon’s severed workers is further exposed in this episode as Dylan tries to convey to Helly the substance of what there is to live for as a severed innie: his “embarrassment of wealth” that consists of finger traps, a caricature portrait, and the hope that there might be a “waffle party” on the horizon. The sad satisfactions that severed workers aspire to reinvigorate the sense of the phrase “wage slavery”, an important formulation that in fact has solid footing in Marx’s analysis of capital. For Marx, it is worth comparing the wage worker’s predicament to the slave’s; for both must labor not for themselves, for their own ends and aspirations, but for an external master that appropriates their efforts. The important distinction is that, while in actual slavery the slave’s enthrallment to the master is explicit and explicitly enforced by means of force, in wage slavery the figure of the master is more diffuse, and hierarchical distinctions are ‘justified’ in the discursive suggestion of their being fairly and freely established. The proletariat (wage laborer) is free to choose her own master on the market, selling her labor power to whomever she chooses. But she is not free to refuse to sell her labor as labor-power; as this “wage slavery” is the generalized means of her reproduction and ability to go on living. So the proletariat is enslaved to a structure, not a person, and that structure is characterized by the reduction of labor in its multifarious forms to labor-power, a measurement of labor in time that thus becomes exchangeable on the market. In capitalism, in other words, freedom is structurally reduced to the freedom to choose to whom one sells one labor-power: which is not the same thing as freedom tout court. Thus is the wage laborer unfree in a way that is comparable, though not equivalent, to the slave.

-

Death at Lumon

-

The death culture at Lumon should also be doubly refracted through Marx’s analysis of how capital reduces its workers to shadows of themselves on the one hand, and a psychoanalytic understanding of the subject on the other. When Mark gets emotional about Petey’s disappearance during the game of office introductions (which tellingly involves passing around a brignt red ball), Milchick reprimands him with the following explanation:

-
I think this is a good time to remind ourselves that things like deaths happen outside of here. Not here. A life at Lumon is protected from such things. And I think a great potential response to that from all of you is gratitude.
-

Severed workers are insulated from death because the very structure of their subjectivity distances the meaning of its concept. Innies symbolically ‘die’ when their outies do not come back to work, but this event does not necessarily coincide with their physical death, which as Milchick suggests should only be imagined to take place in the world of their outies. There is a contradiction here, though, as a physical accident at work would propagate through to an innie’s outie. So Milchick’s repression of the notion of death must be recognized as just that: a repression of a certain moment in or dimension of logic (a moment that is too dangerous or frightening to imagine saying out loud), and not as an explication of the necessary consequences of a thorough logic of life.

-

Milchick’s philosophizing also points to something more sinister in the structure of the severed subject. The severed worker is protected from death, perhaps, because there is a sense in which he is already undead. Doomed to exist in the artificial enclosure of Lumon’s basement and placated only by the pathetic enjoyments of finger traps, company coffee, ideological art, and the odd waffle party, what is there, really, to live for at Lumon? The motto briefly shown on the implant hardware in Helly’s operating room scene at the episode’s opening has a morbid resonance here: “Don’t live to work. Work to live.”

-

There is a stronger psychoanalytic sense in which we might make sense of Milchick’s discourse on death that is worth mentioning here, too. Lacan articulates a distinction between two kinds of death in his theory of the subject, a first death that is biological and a second death that is symbolic. I will explicate this theory later in S1, when Milchick’s foreshadowing of death’s importance in the show bubbles clearly to the surface in a later episode.

-

Capturing and controlling the symbolic

-

Let’s talk about the “symbol detectors” in the elevators, which are introduced in this episode. These are the real basis of how Lumon separates innies from outies, as they supposedly ensure that no notes, no language, is passed between the two kinds of self. In S1E1, we saw outie Mark put the tissue he had been crying into in his car in his pocket; and we then saw innie Mark confidently strolling out of the elevator on the severed floor, quizzically discovering the tissue in his pocket, and tossing it into a bin on his walk down the hall to MDR. So the suggestion has already been planted in our (the viewers’) mind that it is possible to traffic objects across the boundary. The other clear evidence of this is offered here in S1E2, where Irving similarly, quizzically, observes the black sooty substance underneath his fingernails during the distraction of the melon party.

-

Yet Helly’s note to herself triggers the alarms, resulting in the elevator doors refusing to close and a screen washed out with red alert. So they do seem to have some power to detect ‘symbols’. But what marks the boundary between a symbol and a non-symbol for this technology? It is not only explicit language in the form of written or spoken words that make meaning for us as human animals. We are affected by a frightening range of other things; colors, tactile memories, qualities of our past selves that seep into our present (such as too much alcohol drunk the night before). So it is hard to imagine, knowing the complexities of our selves as we all do, that Lumon could really effectively police the boundary between innies and outies, even with its back-to-the-future technological prowess.

-

Indeed, the audio recording that innie/outie Petey shows outie Mark in his hideout at the greenhouse reveals the insecurity of symbol detection at Lumon. In order to get a recording of what he was subjected to in the Break Room, he must have been able to get that retro handheld device back up into the ‘real’ world. So either the elevators weren’t able to pick it up, or there is some other way for innies to move between the supposedly demarcated spaces. Either way, the symbol policing at the innie/outie border seems to have some shortcomings.

-

A brief note on Petey’s dishevelled greenhouse to conclude, as this episode is where we are first introduced to much of the geography that will become important in the series: the break room, wellness, MDR, optics and design, Mark’s basement, the company restaurant (where Mark has his insufferably awkward date), the elevator, the MDR kitchen, the operating room, the Lumon foyer. Petey’s greenhouse, like many of the spaces in Severance, is a graph that both embodies and reflects a psycho-social moment of the show. Green like Macrodata Refinement, but much less put-together, the greenhouse reveals the underside of Lumon’s apparent glaem, the unconscious damage that its project of perfection wreaks on its workers psychologically and physically. Petey shows us that the worker, like so many words and things in the show, is not simply what it seems, but consists also of an excess signification that inevitably creeps into its conspicuous comportment. Mark is a depressed drunkard on the outside, and Irving (it seems) has his fingers in some hellish kind of black pie, a color that takes over his desk as he dozes off when he lets the distinction between his waking and unconscious self slip, we might say, when the reality of sleep threatens the security of being awake. There is, as the imagery in the poster of the ‘Whole Mind Collective’ that motivates Mark to bunk off and follow up on Petey’s enigmatic red letter suggests, a real revolution of sorts brewing beneath the surface of a fantasy of symbolic control.

-
-


-
- -
-
-
    -
  1. 1Boštjan Nedoh has evocatively called this operation “theft without a thief”.
  2. -
-
- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-3.xhtml b/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-3.xhtml deleted file mode 100644 index 94ab97b6..00000000 --- a/crates/tests/ref/examples/blog_site/epub/xhtml/severance-ep-3.xhtml +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - In Perpetuity - Severance [s1/e3] - -
-

In Perpetuity - Severance [s1/e3]

- -

We need to talk about Ms. Cobel

-

As we noted in analysis of S1E1, she typically storms the screen with an icy blue, a temper (the significance of this word we shall unpack shortly) that seeks to quell the fiery red that flickers in and out of the consciousness of workers on the severed floor. The ominous ending to that first episode intimated that, while her wintery business has its office underground, it also warrants her prying into Mark’s outie’s personal life in Baird Creek’s subsidized Lumon housing. Indeed, it seems that Miss Cobel lives in Mark’s housing complex, too. From the state of her fridge, though, which we see in the foreground of a shot that implies surreptitious surveillance at work in her intimate space– a sense that has already been produced in Mark’s home with objects littered in the frame’s foreground– it doesn’t appear that she spends very much time making a home there. (Not too unlike Mark, perhaps.)

-

Ms. Cobel is a kaleidoscopic vector of strange femininity in the show. She is at once old widow next door, a girl-boss superior on the severed floor, and a little girl prone to tantrums. As Mrs. Selvig, the hare-brained widow next door, she offers Mark unwanted company and cruddy cookies. Yet we know by now that this is apparanetly a ruse, a senile disguise through which the conniving Harmony Cobel can keep an eye on her employee, Mark, beyond the bounds of his time at work. At Baird Creek, she is a middle-aged executive in the clothing of an older and less cognitively composed character.

-

But even if Ms. Cobel is the ‘real’ Mrs. Selvig, there is still something anile about her character. She can be both comandeering and childish, as we see in her encounter with (innie) Mark S when he arrives unannounced at her office to request a kind of permission to take Hellie to the perpetuity wing in S1E3. Commandering, because she accosts Mark with bureaucratic demands in her role as his boss (“And have you filled out a common-reservation slip?”). Childish, because she literally throws a mug at him out of a petty frustration that is unbefitting of a mature manager. Cobel rationalizes her childish temper as follows:

-
What I just did was something I knew that you could handle and grow from. It was very painful for me. I hope that you’ll let it help you.
-

This outburst locates something undecided within Ms. Cobel, a moment in relation to Mark where she lets her personal anger supersede her role as his manager. This mug-throwing episode demonstrates that Cobel, too, is capable of breaking character as head of the severed floor and allowing some other aspect of her self to seep in, despite the pretense of a calm composure. The thrown-mug, in other words, is the wish fulfillment heralded by Cobel’s stunningly funny, inappropriate remark to Hellie during her orientation in S1E1; “I’ve wanted to pummel Mark myself, but I am his employer.” Even Cobel, who is supposed to be more in charge of herself than the MDR employees who are her inferiors– her breaking into Mark’s house while he isn’t there implies is that she is unsevered, and thus more ‘responsible’– harbours desires that exceed and contradict the prescribed role she is supposed to play.

-

The image of Cobel above confirms her as childish in some respects. Notice that here, at ‘home’, she wears her hair in pigtails rather than loosely around her shoulders. But it also paints her as a scopophilic and overbearing mother. Whatever she is doing creating excuses to talk to Mark’s outie as Mrs. Selvig, it becomes clear in this episode that there is a convoluted kind of care at stake in her creepy and overcurious work. Peering at him as he wanders up from the basement (Cobel doesn’t seem to know that Petey is also down there at this point, though her break-in later in the episode suggests that she suspects something is awry), she murmurs to herself, “Oh, Mark. Are you all right?”

-

This is a strange exhibition of affection, coming from the same woman who will throw a mug at Markfor his failure to “get MDR to its numbers” as department chief, who knowingly subjects him to the break room– which we observe on screen for the first time later in the episode– and who steals the book left by his brother-in-law as a gift at his doorstep. Despite these mistreatments, Cobel does still seem to hold some perverted penchant for and attachment to Mark. As HaxDogma notes in his review of this episode, it is hard to see Mark’s promotion to department chief after Petey disappears as anything other than a nepotistic appointment, given that Irving is clearly the more experienced refiner in a number of respects (orientation procedure, group photo protocol, number of years spent on the severed floor, to name a few). Cobel’s overinquisitive manner on display in this episode is perhap best described as motherly, even as she is certainly not a paradigmatically good mother.

-

There is also something undoubtedly sexual about Cobel’s relationship to Mark. Her lingering at the door in S1E2 waiting to be invited in, her awkward and suggestive mention of her late husband’s building an apartment in the back of their abode in heaven “in case I found a new man before I got there”, her creating an excuse to talk to him by pretending to de-ice her stoop; and, naturally, her peeping at him through the window. She is either a stalker by-the-book, or (more charitably) a lonely woman who is searching for some missing satisfaction. Most likely, she is an inextricable concoction of the two. Cobel wants to have Mark’s cake and eat it too; to be at once his mother, his corporate superior, and (we can’t help but suspect) his lover. Like many put in positions of power, she has trouble setting her more inapproriate desires aside so as to simply ‘do her job’.

-

Primal father figures

-

Cobel’s mother energy is arguably muted and mixed up in her Sphinxesque triplicity. But the father energy on display in this episode is, by contrast, loudly and proudly pronounced in at least three different figures: Petey, Irving, and, of course, Kier Eagan.1 Before tackling these fathers one by one, it is instructive to straightforwardly and schematically lay out the Oedipus complex, an ‘absolute fiction’ that nonetheless, Freud claims, depicts something foundational about the graph of the speaking subject, the graph in which we took interest in our analysis of S1E1.

-

The Oedipus complex is so-named because it takes its architecture from the figure of Oedipus as he appears in the ancient Greek playwright Sophocles’ trilogy, which consists of the plays Oedipus Rex, Oedipus at Colonus, and Antigone. (Oedipus’ tragic tale is drawn from a mythology that predates these plays, but the story is nonetheless usually traced to its Sophoclean production.) Oedipus is well-known to students of psychoanalysis because of Freud’s making him into a complex, which is generally (mis)understood as ‘every person wants to kill their father and fuck their mother’. Famously, Oedipus killed his father– at a crossroads, thinking he was simply a threatening stranger at the time– and married his mother– not understanding that relation in the moment of the act, either.

-

Jacques Lacan rendered the Oedipus complex more philosophically significant than this overblown and crude Freudian telling. For Lacan, the Oedipus complex designates an abstract account of how desire is produced by the speaking subject in relation to the formative figures with which it is in relation. As he notes in one of his 1938 text, The Family Complexes:

-
our criticism since Freud presents this psychological entity [the Oedipus complex] as the specific form of the human family and subordinates all social variations of the family to it. (Lacan 2002, p.35)
-

The Oedipus complex is not so much a diagnosis of a particular perversion that is presumed universal, in the sense that everyone consciously suffers by repressing these secret dual desires to kill (my father) and to fuck (my mother). It is rather an important part of how he architects a philosophy of the subject’s relation to itself (and others) by way of a “triangular conflict” (Lacan 2002, p.41) between three figures: one’s self, the Mother, and the Father.

-

The Mother is the subject’s first known object that is seen as separable from one’s sense of self. We can imagine this through the process of weaning, of a mother teaching her baby that sustenance ought to be sought in solid foods rather than directly from her teat. Originally, a baby does not have a firm enough sense of itself to recognize that the Mother’s teat is separated from its own body. When it wants nourishment, it cries, and a breast brimming with milk appears (assuming a good mother, here). The breast seems almost part and parcel of the baby, from its perspective, as what reason does it have to think otherwise? (We are assuming here that the separation between a baby’s sense of its own body and the world is not ingrained at birth, but rather learned, acculturated.) It is only when the baby’s crying stops precipitating a breast that it should start to doubt this part of itself, to think that perhaps my Mother’s breast is not part of me as subject but rather its own kind of thing, a separate object. Thus the Mother is, in this developmental sense, the subject’s first proper object. The Mother (and her breast), the baby subject thinks, is both mine and not mine, as though there is some relation that my Mother has to me, she is not (quite) the same as me.

-

The Father, on the other hand, incorporates (into) the baby subject’s sense of self differently. It is not considered, as the Mother is, a part of the subject that was at some point taken away, but rather represents the source of that action of taking away. If the Mother ought (in the terms of the baby subject’s nascent ethics) to be a part of me, the Father is the force and figure responsible for taking her away. This stature of the Father is better understood, perhaps, with reference to the myth of the Primal Father, which Lacan reinterprets from its presentation in Freud as originally depicted in the fourth and final chapter of Totem and Taboo (Freud 1919). Like the Oedipus complex, the myth of the Primal Father is a narrativization that helps to understand the structure of the subject. Suppose a primal horde, Freud offers, at the helm of which exists a Primal Father who monopolizes all women. All women in the horde, in other words, are sexually subject to this single male; no other male gets to enjoy anything of them. A band of brothers, resentful of the Father’s monopoly on enjoyment, conspire to escape the ban on sexual enjoyment through a plot to murder him.2 They do so through what could be called an original jealousy, a feeling that the Father is enjoying in a way that is prohibited (by virtue of the Father’s taboo) for each of them.

-

Freud offers this as an “historic explanation… [of] the origin of incest” (Freud 1919, p.207), as the Primal Father’s taboo on enjoyment is what, Freud suggests, drives exogamy, wherein each of the band of brothers leaves that original tribe to start their own in which they can (finally) enjoy the women for themselves. That this is an historic explanation does not mean that Freud believes that it represents an actual state of affairs in some distant past. Indeed, he states the opposite, that “primal state of society has nowhere been observed.” (Freud 1919, p.233) The parable of the Primal Father is historic rather in the sense that narrates to us an important aspect of the structure of the subject, much like Oedipus’ tragedy.

-

Daddy issues at work

-

Okay: we now return from this Freudian digression to the stuff of Severance. What bearing do the Oedipus complex and the myth of the Primal Father have on the structure of the subject on display in the show? Let’s go now to the scene in S1E3 at the crossroads, where MDR runs into two employees in Optics and Design (O&D).

- -

The composition of this shot puts the reflective axis down the center, and the encounter is suggestively Oedipean in its structure (at a crossroads, unknowing of the Other at play). Note that Irving is compositionally mirrored by Burt, played by Christopher Walken, and we will explore this suggestive symmetry in detail in later episodes. The two departments (MDR and O&D) know of each other, we surmise from the dialogue that follows. But Irving isn’t supposed to know Burt by name, as he accidentally happened upon him in S1E2 on the way to a Wellness session. (Burt was coming from his Wellness session.)

-

While Irving greets Burt on the back of this previous encounter with gentle and flirtatious warmth, Dylan’s hostility towards O&D is clear. In place of the camaraderie that one might have hoped for between the two factions given their shared plight as severed workers, there appears to be an enmity built on a mythology (what Irving calls an “absolute fiction”) of otherness:

-
Kier sorted the departments by virtue. Macrodats are clever and true, while O&D’s more cruelty-centered…. O&D tried a violent coup on the others decades ago, and that’s why they reduced them down to two. And that’s why they keep us all so far apart now.
-

Kier is evidently the Primal Father of the severed floor, responsible for instituting the symbolic system of rules, regulations, and affects in the various ‘bands of brothers’ which reside there. The tour of the perfect replica of Kier’s house later in the episode reinforces his architectural status as Primal Father. Irving chides Mark for his lack of reverence in deigning to turn the tour of the Perpetuity Wing into Eagan Bingo, and is aghast when he almost happens to “bed sit” on the facsimile in his duplicate chambers. (Thou shall not lie in Kier Eagan’s bed.) Kier and the lineage of Eagans more generally constitute the law of the father, the signifier of authority that keeps the severed floor’s social order intact, the symbolic source from which both rules and the forbidden temptations of their being broken, taboos, sprout. Irving fosters this authority during the tour, standing in for the absent caregivers, existential (Kier, the Eagans) and material (Cobel and Milchick as superintendents who seem to be letting the kids take care of themselves for a short period).

-

Another paternal authority whose absence has haunted and structured Mark since the show’s opening is Petey, the man whose shoes he stepped into as MDR’s department chief. As per his exchange to Cobel in the mug-throwing scene, Mark lionizes Petey as a tone-setter, often acting through an ethics refracted by the subordinate conjunctive, ‘if Petey were here’, or the preface ‘Petey used to say’. Mark’s innie is steered more by an imagined sense of what Petey would do, rather than what Kier would.

-

Thus while it is Cobel who is explicitly in charge, the spectral presence of these father figures– Kier, Petey, Irving– correlatively structures the subject on the severed floor. There is, in other words, an Oedipal triangular conflict at work in relation the ethical imperative of a severed worker. The four members of MDR, as orientations to the structure of this subject, suffer different relationships to the positions of Mother and Father. Mark S is a momma’s boy, sired more by Petey’s radical rejection of company policy than by Kier. Dylan, though impertinent to the minutiae in the structure of Law at times, is ultimately his Father’s son, acquiring satisfaction by accumulating accolades, and apparently driven by the impending idea of another finger trap or a waffle party. Irving seems at this point the most mature of the children, looking reverentailly to Kier. Yet recall that he has been chided by Milchick already for falling asleep on the job, so not all is perfect in paradise. Hellie has no time for Cobel’s authority, yet we will see in due course that her relationship with a Father is a deep lineament in her personality, too.

-

Taming tempers

-

The count of four in the members of MDR mirrors the exact amount of tempers that we learn about from Kier Eagan’s wax simulacrum speaking during the tour of the Perpetuity Wing. These tempers are crucial as coordinates of the Eaganic attempt to coherently quantify the subject, and Kier’s pronouncement is deeply significant for our investigation of the subject’s distorted structure on the severed floor:

-
I know that death is near upon me, because people have begun to ask what I see as my life’s great achievement. They wish to know how they should remember me as I rot. In my life, I have identified four components, which I call tempers, from which are derived every human soul. Woe. Frolic. Dread. Malice. Each man’s character is defined by the precise ratio that resides in him. I walked into the cave of my own mind, and there I tamed them. Should you tame the tempers as I did mine, then the world shall become but your appendage. It is this great and consecrated power that I hope to pass on to all of you, my children.
-

If there was any doubt that Kier Eagan embodies the Freudian Primal Father, the foundational component of absolute fiction on which the edifice of Law (the rules and taboos by which a subject is bound to abide) is constructed, the quotation above should put it to bed. Kier’s ‘philosophy’ seeks to conquer death by quantifying life, sorting its myriadic nature into a “precise ratio” of character that can be counted (completely, it seems) in four distinct tempers. Indeed, we saw the pictorial representation of this taming in s1e2, in the scene where Irving meets Burt:

- -

In the post-Platonic cave of his own mind, Kier is the master of his passions. He admits no unconscious contours that sneak up on him unbeknowst in Freudian slips of the tongue or unwanted symptoms. Indeed, the Eaganesque fantasy of the subject is one in which the necessary excess of language that psychoanalysis discovered does not exist. Words are detected (via sensors in the elevator, say), controlled, managed. Any psychoanalytic excess is, in Kier’s project of a precisely rationalized subject, beaten out of language. Excess meaning is ‘tamed’ as if it were a wild animal by a clear-headed, upstanding, divinely radiant visonary. (As we will see, the position of primal power that Kier occupies here is sexually overbearing, too, as we might suspect from the Freudian analogy.)

-

This episode ends with two scenes depicting the dark and bloody underside of Kier’s waxen vision of the precisely quantified human subject. The first is Helly’s harrowing experience in the break room, a space where the unruly distance between words as they are uttered and the meaning they convey is thought to be stamped out, suffocated by the drudgery of debilitating repetition. A subject will not exceed its authorized symbolization, the break room seems to want to claim. The worker’s unconscious will be tamed and ultimately made beholden to a regime of conscious rationality. The second, and the closing scene of the epsiode, is Petey’s psychotic demise at the convenience store, where he yells at wit’s end: “I need tokens so I can eat!” Ravaged by the failure of his complete quantification inside Lumon, Petey seems no longer to have a firm footing in either his innie’s or outie’s reality. Mark looks on from a distance as he collapses outside the store, escorted by police, attempting (it seems) to account for his disintegration.

-
-

Bibliography

-
    -
  • Freud, Sigmund. 1919. “Totem and Taboo: Resemblances Between the Psychic Lives of Savages and Neurotics.” Translated by A.A Brill. Moffat, Yard and Company 50 (1): 94–95.
  • -
  • Lacan, Jacques. 2002. “Family Complexes in the Formation of the Individual.” Antony Rowe London,.
  • -
  • McGowan, Todd. 2021. “The Distribution of Enjoyment.” European Journal of Psychoanalysis 8 (1).
  • -
-
-
-


-
- -
-
-
    -
  1. 1 There is foreshadowing, too, of a fourth father figure in Rickon, Mark’s brother-in-law. While reading his confiscated book, Milchick quietly remarks to himself a thought that will become an important refrain for many other characters with respect to Rickon later in the season: “This is… Jesus.”
  2. -
  3. 2 There has been much written on Freud’s mythos of the Primal Father. For a relatively recent use of the concept that serves as a reasonable introduction to Lacan’s reading of Totem and Taboo, see (McGowan 2021).
  4. -
-
- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/html/index.html b/crates/tests/ref/examples/blog_site/html/index.html deleted file mode 100644 index 0f598725..00000000 --- a/crates/tests/ref/examples/blog_site/html/index.html +++ /dev/null @@ -1,24 +0,0 @@ - - - - - -

Screening the subject

-

Screening the subject is a blog that analyses content on both the big and small screen in reasonable detail, i.e. episode-by-episode or scene-by-scene. Contact us at info@ohrg.org for enquiries.

- -
-


-
- -
- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/html/severance-ep-1.html b/crates/tests/ref/examples/blog_site/html/severance-ep-1.html deleted file mode 100644 index c10fc107..00000000 --- a/crates/tests/ref/examples/blog_site/html/severance-ep-1.html +++ /dev/null @@ -1,43 +0,0 @@ - - - - Good news about hell - Severance [s1/e1] - - -

Good news about hell - Severance [s1/e1]

- -

The first thing to notice is the colour palette. She is dressed in blue, but her hair is chestnut red. It spills out for the frame of her figure into the table around it, blockaded at its border by chairs and a carpet clad in green, yellow, then green again; then gray. The establishing shot is a bird’s eye view of an unknown woman who is soon revealed to have been put in the board room by someone else’s design, who learns about her predicament only by a man’s voice that emanates from the little device that rests on the table along with the woman, arranged so that it aims directly at her head.

-

This opening image is a graph of the subject’s predicament on the severed floor at Lumon. Blue is the company colour. Employees are almost invariably dressed in shades of it– navy, midnight, Prussian, Oxford, cobalt– and more reliably so as we work our way up the hierarchy. Red is unruly passion, the tone of tempers that itch to tear off the straitjacket directives, to disregulate the business-as-usual in which there is no obvious place for illicit activities. Green is the accent of Macro Data Refinement, the division of Lumon in which the show’s protagonists are employed. The device directs a man’s voice at a woman’s body in an attempt to keep her tempers in check, to ensure her firecraft does not smoke out the staid edifice of personality management, to order her “perceptual chronologies” accordingly. (Later in the episode, we learn that she almost manages to “break in” on the control room during that opening sequence: the solidity of its enclosure is threatened from the very first.)

-

It is instructive to attempt to articulate the dynamics that this graph indexes before we start talking about other scenes in the show. Graphs are not at one with what they represent, for in the decision to render ‘data’ in the very act of a representation, we both lose and gain distinction of the dynamics in question. The voice that opens Helly R up to the world of Lumon’s severed floor begins: “Who are you?” This question is a mistake. We retroactively learn, in a later scene, that Mark S was in fact supposed to begin with a less interrogative, more perfunctory: “Hi there, you on the table. I wonder if you’d mind taking a brief survey.” As Irving puts it: “You [Mark S] skipped the preamble”. Helly R is thrust, by this accident, immediately into questioning not only herself, but also the self-assurance of the voice that interrogates her. Does this voice in my head [she could be thinking] really know what it is doing? Or is it just a role of similarly confused actors struggling to stick to a badly written script?

-

This episode-length recap of the first episode names this graph ‘the Helly incident’, a poorly executed orientation of Helly’s newfound subjectivity that can be blamed at one level on Mark S (for starting with the wrong part of the manual), at another on Mr. Milchick (for misguiding Mark while he was distracted setting up the visual feed), on Ms. Cobel (for giving Mark Petey K’s old manual without redacting his obscurely scribbled notes and paper bookmarks), or even on Irving (for neglecting to intervene and clarify how Mark should begin being the more senior refiner in the situation: “Irving will be there to shadow. Just stick to the flowchart and escalate properly depending on dialectics.”). Wherever to place blame, there is doubtless a misconfiguration that takes place. Helly’s instinctual reaction seems to be to try to kill the voice pointed at her head, rather than to befriend it as Mark states he did (where Petey was Mark). (Helly will eventually have sex with the source of the voice, rather than murdering or fraternizing with it.) In this episode, however, Mark (the voice’s source) is physically assaulted by Helly, dented in his temple by the same vocalization device that mediated their first communication.

- -

So this is the Macro Data refiner’s situation. On the one hand, she is affronted with a voice that compels her to abide by the rules and permits her to enjoy some small reliefs (egress from a locked room) if she concedes to it. On the other, she is always teeming and thus flirting with red, considering escape routes that involve drawing blood, setting off alarms, or removing clothes.

-

This unruly red is what Macro Data Refinement’s greening procedures are supposed to contain to produce a completely controlled and scripted blot of blue. Perhaps this is why the glipse of the vacant desks planned for the severed floor’s expansion are draped in purple, for that shade of subjectivity would better incorporate the contrasting contours into a unified and taskable tone. The red that threatens Lumon’s corporate, calm, and collected blue (the Lumon logo is a water droplet that suspiciously resembles a camera) is splattered across scenes in the episode. It is, for example, the envelope that Petey slips Mark at the company-owned restaurant Pip’s with the suggestion that he should read it if he wants to know “what’s going on down there”. It is the sweater Mark wears to his sister’s dinnerless dinner party, punctuated by red place mats (“what a lot of people overlook, I think, is that life is not food”), where the ontological substance of his innie is called into question, and where we learn about the passions he has lost– the history of World War II, educating, whiskey– the last of which seems to have given way to an indiscriminate consumption of beer, wine, anything that will drown out the clarity of sober consciousness. It is the general hue of his sister’s house, which consisently wants him to question that placid blue of his company-subsidized housing at Baird Creek Manor.

-

This dinner tells us something more about the subject in question in Severance. Just as Helly’s outie had alerted us to the basic principle in the video her innie was shown in curiously lo-fi resolution to conclude her innie’s orientation– “perceptual chronologies… surgically split”– Mark’s predicament is comparably explained to him by another more or less ignorant (we can’t help but imagine) third party: “One’s memories are bifurcated, so when you’re at work, you have no recollection of what it is you do there.” As pretentious as they are, the dinner’s guests do seem to be attuned to an important dimension of the meaning of life, which is that it can’t only be about satiating biological needs such as food. What each individual ‘needs’ is a disharmonious melange of needs and demands, openings of desire that emerge not only through a graph of bare necessities– food, water, shelter– but also through capricious carapaces that emerge from more ambiguous pinings in the social sphere– company, care, love. The real question of Lumon’s smooth functioning is whether it will be able to effectively plug up these pinings, the incidental moments at work where one wonders what one is really doing with one’s life, whether the company can really manage its employees’ unsanctioned thoughts and the way in which those illicit ideas seep into the daily practice of their workerhood. More on the plasticity of our needs and drives to satisfy them in later posts.

- -

Ms. Cobel, in contrast to Helly’s and Mark’s doubtful and doubting red, is a stormy and icy blue. (We must wait until season two to uncover the historical and psychological depth of this colour for Harmony Cobel.) She is the figure with a body that seems to be the most in charge, of those we meet in this episode. Though Ms. Cobel is not a master in herself, it seems, for she too is subjected to a disembodied voice-via-device, ‘the board’, albeit which only appears evidently as an ear so far (“The board won’t be contributing to this meeting vocally”). Cobel is responsible for keeping the severed floor’s uncertainty in check, the ‘head’ that sits atop the variegated limbs of its disobedient body.

-

When Cobel reprimands Mark for his derailing of Helly’s orientation, she recalls an obscure and theological aspect of her parentage:

-
You know, my mother was an atheist. She used to say that there was good news and bad news about hell. The good news is, hell is just the product of a morbid human imagination. The bad news is, whatever humans can imagine, they can usually create.
-

At the close of the episode, just before Mark’s senile neighbor Mrs. Selvig (who we have only heard about through Mark’s voice thus far, when he is on the phone with her) visually reveals herself to be the same woman as Ms. Cobel, she gives a slightly different account of her heritage:

-
You know, my mother was a Catholic. She used to say it takes the saints eight hours to bless a sleeping child. I hope you aren’t rushing the saints.
-

It’s unclear at this point whether Cobel is a severed worker like Mark, or whether there is some other reason for her (strange, almost senseless) duplicity. Why lie about the religious leanings of one’s mother? Or maybe ‘mother’ is actually a name for something else, a kind of interim authority that gives synthetic weight to some hearsay, rumor, or idle phrase. (The other cameo of an ambiguously defined mother in this episode is in question five of Helly’s orientation survey: “To the best of your memory, what is or was the color of your mother’s eyes?”) Perhaps it is that, severed or not, atheist or Catholic, Cobel’s subjectivity is structured by a comparable split in her perceptual chronologies, whereby some memories (of her mother) get more airtime in her conscious experience of herself than others.

-

Severance flirts with this idea extensively, that the innie/outie dyad is analagous to the unconscious/conscious experience that we, as subjects, have of ourselves. Mark’s sister Devon hints at the psycho-logical reading of the severed condition in her diagnosis of Mark’s morose (outie) predicament as a state of failed therapy in response to mourning for his late wife: “I just feel like forgetting about her for eight hours a day isn’t the same thing as healing.” As with not-mothers and the plasticity of the drive, we will address the psychoanalytic implications here in later posts; but to finish I want to bring our attention to the imaging of time at work in just this first episode.

-

The fascinating details of failed synchronisation between all the watchfaces we see are enumerated in this Reddit thread. Many of the watch hands appear to be stalled, and the crossover from each to the next– as when Mark Scout switches his wrist watch in preparation for his elevator descent into the workday of innie Mark S– doesn’t match with our experience of the actors on screen. One of the few things we do know about the severance procedure is that it ‘alters perceptual chronologies’, and that this messing with a subject’s sense of time is thought to

-
    -
  1. make them more adequate or productive in a certain kind of work (for why else would Lumon go to the necessary lengths to sever some employees)
  2. -
  3. supposes to section off innie memories and experience from outie memories and experience
  4. -
-

So the subject’s subjectivity is marked by its sense of time, and Lumon’s success (profitability?) hinges in some way on altering their employees’ stable sense of it while in the space of the severed floor.

-

Mark S’s temporal predicament here has been explained by a man whose last name we get by speeding up the saying of his own, Karl Marx (Mar-k-S). Logically speaking, Marx argues, there is an amount of time that goes missing in the worker’s employment by way of a wage, when he advances some portion of his time to the capitalist in exchange for a pay-check one or more weeks later. I refer the reader interested in the details to chapter 20 of Capital Vol. I: but the essential point here is that it is through an obfuscation of the real value of a worker’s time that the capitalist manages to produce surplus-value. The production of this kind of time-distorted surplus-value is the engine of capitalism as a social relation that appears, on the surface, to be equally fair to capitalist and worker alike. So the project of controlling ‘perceptual chronologies’ with which Lumon seems to be so concerned is perhaps not as esoteric and inessential as it might at first seem. Perhaps it is an embodiment of the core ingredient of the company’s success as a company, of its incorporation as an entity that ought to be sustained even at the expense of its members’ happiness, their health, and their livelihoods.

-
-


-
- -
- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/html/severance-ep-2.html b/crates/tests/ref/examples/blog_site/html/severance-ep-2.html deleted file mode 100644 index 86f60aed..00000000 --- a/crates/tests/ref/examples/blog_site/html/severance-ep-2.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - Half Loop - Severance [s1/e2] - - -

Half Loop - Severance [s1/e2]

-

In the first episode, we were introduced to the two-sided subject at Lumon. On the one hand, there is Mark S, the innie, who is screened for the first and major part of the episode. On the other, Mark Scout, the outie, to whose predicament we are introduced in the concluding scenes. S1E2 opens with a rewind on how innie Helly R came to be: how Milchick handed her flowers at end of her first day (which we glimpsed in S1E1 when Mark almost ran her over), a glimpse of her confidence gliding into the operating room on a higher floor of the same Lumon complex we saw Mark leave, a stereoscopic view of the implant procedure by which she becomes an android whose existence is “spatially dictated” by Lumon’s mysterious machinations.

-

Lumon Industries

- -

Lumon is a corporate pastiche, and not only of technology companies. Lumon seems to have its hands in surgical hardware (the operating room equipment), digital technology (‘Macrodata Refinement’), and medicines and topical salves (as discussed at the dinner party in S1E1 - “What don't they make?”). It is a quintessentially American jack of all trades, a global power in its own right cohered by a family dynasty—the Eagans—recalling the Du Ponts or the Rockefellers.

-

The more obvious comparison to make, however, is between Lumon and Apple, perhaps in part because the show screens on Apple TV Plus. The style of the computers on the severed floor recalls the dawn of the era of personal computing in the 1970s and 80s, an aesthetic imaginary in which Apple plays an important role.

-

Indeed, the aura of Lumon as a futuristic computing corporation from the late 70s is reinforced by the fact that its headquarters are shot at Bell Labs in New Jersey, a building that has now been renovated as a mixed-use office for high-tech startup companies as Bell Works. Bell Labs is the quasi-mythological source in the contemporary corporate technology culture (Silicon Valley) of the idea that a certain kind of research freedom characterized by open-ended product delivery timelines and serendipitous encounters in open office plans can cultivate ground-breaking technology. (Mark Zuckerberg recommended a book on Bell Labs as one of his “important books” of 2015.) The irony of this setting, of course, both in Severance and in the technology companies it parodies in the American landscape in 2025, is that the workplace has never been more saturated with surveillance and micro-management. The overhead shot of Helly R that opened the series is indicative here again, as is the complementary overhead of MDR’s desks we get in this episode: there is always something watching from above, it seems, even if what it captures of the actual activity is a flattened and at times misrepresentative image.

-

There are also evocations of Microsoft and IBM in Lumon, such as the Clippy-like guide on the manual handed to Helly in the episode, or the apparent requirement of suits on the severed floor echoing IBM’s infamous strict dress code. Lumon is a melange of imaginary pasts, presents, and futures in American innovation. It is futuristic in the framing of its bio-technological project of perceptual management—and in the “data smuggling” detectors that are installed in the elevators to the severed floor, about which more soon—but retrofitted in its aesthetic, in its management style, and in its outdated repertoire of daily devices. Recall, for example, Milchick’s handheld camcorder, and the tube-activated (vacuum-tube?) camera he uses to snap the official photo of the new group of refiners.

- -

The overhead of Lumon Industries itself depicts a sketchy graph of a brain, one can’t help but think. Its upper floors all operate above board with normally conscious workers, whereas underground there is something sensitive enough happening so as to require extra precautions. In S1E1′s analysis, we introduced the idea that Lumon’s interest in severing workers has to do with the mechanics of capital, in that surplus value can only ever be produced (in Marx’s account) through the structural theft of time from its laborers.1 Lumon’s spatial layout suggests that there might also be a psychoanalytic metaphor at stake in severance as an operation, where the happenings that occur in the business brain’s basement are essential to what it really is, why it does what it does.

-

Though Freud’s theory has been popularized as a topographical notion, wherein the unconscious is the submerged part of the mind’s iceberg of which we only see the tip, there is good reason to believe that this spatial description misrepresents how the unconscious should be properly understood. Lacan thus preferred topological descriptors to suggest that, if the unconscious is a ‘place’ or ‘site’, it contradicts any over-simplistic understanding of spaces that are distinctly separable. The relationship between the conscious and the unconscious in a psychoanalytic theory of the subject, I would suggest, is better understood through the figure of a coin with two inseparable sides. The meaning of any one side (‘heads’) derives from the meaning of its opposite (‘tails’); and it is thus insensible to imagine separating one part from the other without repressing something fundamental about the structure of the subject as a whole.

-

Lumon, though, seems to want desperately to keep innies from being in contact with their outies. Indeed, the very project of severance seems to have something fundamental to do with managing repression effectively, with renovating the worker into a perfectly divided self that cannot complain about the conditions of her labor through the fact of not knowing anything about them. (When Mark is given a dinner coupon on account of his head injury in S1E1, the real cause of the scar—Helly R’s riotous attempt to escape the orientation room—is not revealed to outie Mark.) The subject in Severance is split and maintained as such. The ‘unconscious’ of one’s home life should not affect one’s ‘conscious’ ability to perform at work.

-

The vice-versa is also true. Outies cannot suffer the ‘unconscious’ of their innies, either. Mark Scout’s decision to sever himself seems to be an attempt to repress the devastating effect of his experience of his wife’s death for some part of the day, given that he admits he was unable to continue his job as a history teacher due to alcoholism. At Lumon, however, Mark’s alcoholism is brutally functional; as his innie must suffer what (lack of) energy he is given by outie Mark’s actions the night before (“I find it helps to focus on the effects of sleep since we don’t actually get to experience it”).

-

The intellectual impoverishment of Lumon’s severed workers is further exposed in this episode as Dylan tries to convey to Helly the substance of what there is to live for as a severed innie: his “embarrassment of wealth” that consists of finger traps, a caricature portrait, and the hope that there might be a “waffle party” on the horizon. The sad satisfactions that severed workers aspire to reinvigorate the sense of the phrase “wage slavery”, an important formulation that in fact has solid footing in Marx’s analysis of capital. For Marx, it is worth comparing the wage worker’s predicament to the slave’s; for both must labor not for themselves, for their own ends and aspirations, but for an external master that appropriates their efforts. The important distinction is that, while in actual slavery the slave’s enthrallment to the master is explicit and explicitly enforced by means of force, in wage slavery the figure of the master is more diffuse, and hierarchical distinctions are ‘justified’ in the discursive suggestion of their being fairly and freely established. The proletariat (wage laborer) is free to choose her own master on the market, selling her labor power to whomever she chooses. But she is not free to refuse to sell her labor as labor-power; as this “wage slavery” is the generalized means of her reproduction and ability to go on living. So the proletariat is enslaved to a structure, not a person, and that structure is characterized by the reduction of labor in its multifarious forms to labor-power, a measurement of labor in time that thus becomes exchangeable on the market. In capitalism, in other words, freedom is structurally reduced to the freedom to choose to whom one sells one labor-power: which is not the same thing as freedom tout court. Thus is the wage laborer unfree in a way that is comparable, though not equivalent, to the slave.

-

Death at Lumon

-

The death culture at Lumon should also be doubly refracted through Marx’s analysis of how capital reduces its workers to shadows of themselves on the one hand, and a psychoanalytic understanding of the subject on the other. When Mark gets emotional about Petey’s disappearance during the game of office introductions (which tellingly involves passing around a brignt red ball), Milchick reprimands him with the following explanation:

-
I think this is a good time to remind ourselves that things like deaths happen outside of here. Not here. A life at Lumon is protected from such things. And I think a great potential response to that from all of you is gratitude.
-

Severed workers are insulated from death because the very structure of their subjectivity distances the meaning of its concept. Innies symbolically ‘die’ when their outies do not come back to work, but this event does not necessarily coincide with their physical death, which as Milchick suggests should only be imagined to take place in the world of their outies. There is a contradiction here, though, as a physical accident at work would propagate through to an innie’s outie. So Milchick’s repression of the notion of death must be recognized as just that: a repression of a certain moment in or dimension of logic (a moment that is too dangerous or frightening to imagine saying out loud), and not as an explication of the necessary consequences of a thorough logic of life.

-

Milchick’s philosophizing also points to something more sinister in the structure of the severed subject. The severed worker is protected from death, perhaps, because there is a sense in which he is already undead. Doomed to exist in the artificial enclosure of Lumon’s basement and placated only by the pathetic enjoyments of finger traps, company coffee, ideological art, and the odd waffle party, what is there, really, to live for at Lumon? The motto briefly shown on the implant hardware in Helly’s operating room scene at the episode’s opening has a morbid resonance here: “Don’t live to work. Work to live.”

-

There is a stronger psychoanalytic sense in which we might make sense of Milchick’s discourse on death that is worth mentioning here, too. Lacan articulates a distinction between two kinds of death in his theory of the subject, a first death that is biological and a second death that is symbolic. I will explicate this theory later in S1, when Milchick’s foreshadowing of death’s importance in the show bubbles clearly to the surface in a later episode.

-

Capturing and controlling the symbolic

-

Let’s talk about the “symbol detectors” in the elevators, which are introduced in this episode. These are the real basis of how Lumon separates innies from outies, as they supposedly ensure that no notes, no language, is passed between the two kinds of self. In S1E1, we saw outie Mark put the tissue he had been crying into in his car in his pocket; and we then saw innie Mark confidently strolling out of the elevator on the severed floor, quizzically discovering the tissue in his pocket, and tossing it into a bin on his walk down the hall to MDR. So the suggestion has already been planted in our (the viewers’) mind that it is possible to traffic objects across the boundary. The other clear evidence of this is offered here in S1E2, where Irving similarly, quizzically, observes the black sooty substance underneath his fingernails during the distraction of the melon party.

-

Yet Helly’s note to herself triggers the alarms, resulting in the elevator doors refusing to close and a screen washed out with red alert. So they do seem to have some power to detect ‘symbols’. But what marks the boundary between a symbol and a non-symbol for this technology? It is not only explicit language in the form of written or spoken words that make meaning for us as human animals. We are affected by a frightening range of other things; colors, tactile memories, qualities of our past selves that seep into our present (such as too much alcohol drunk the night before). So it is hard to imagine, knowing the complexities of our selves as we all do, that Lumon could really effectively police the boundary between innies and outies, even with its back-to-the-future technological prowess.

-

Indeed, the audio recording that innie/outie Petey shows outie Mark in his hideout at the greenhouse reveals the insecurity of symbol detection at Lumon. In order to get a recording of what he was subjected to in the Break Room, he must have been able to get that retro handheld device back up into the ‘real’ world. So either the elevators weren’t able to pick it up, or there is some other way for innies to move between the supposedly demarcated spaces. Either way, the symbol policing at the innie/outie border seems to have some shortcomings.

-

A brief note on Petey’s dishevelled greenhouse to conclude, as this episode is where we are first introduced to much of the geography that will become important in the series: the break room, wellness, MDR, optics and design, Mark’s basement, the company restaurant (where Mark has his insufferably awkward date), the elevator, the MDR kitchen, the operating room, the Lumon foyer. Petey’s greenhouse, like many of the spaces in Severance, is a graph that both embodies and reflects a psycho-social moment of the show. Green like Macrodata Refinement, but much less put-together, the greenhouse reveals the underside of Lumon’s apparent glaem, the unconscious damage that its project of perfection wreaks on its workers psychologically and physically. Petey shows us that the worker, like so many words and things in the show, is not simply what it seems, but consists also of an excess signification that inevitably creeps into its conspicuous comportment. Mark is a depressed drunkard on the outside, and Irving (it seems) has his fingers in some hellish kind of black pie, a color that takes over his desk as he dozes off when he lets the distinction between his waking and unconscious self slip, we might say, when the reality of sleep threatens the security of being awake. There is, as the imagery in the poster of the ‘Whole Mind Collective’ that motivates Mark to bunk off and follow up on Petey’s enigmatic red letter suggests, a real revolution of sorts brewing beneath the surface of a fantasy of symbolic control.

-
-


-
- -
-
-
    -
  1. 1Boštjan Nedoh has evocatively called this operation “theft without a thief”.
  2. -
-
- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/html/severance-ep-3.html b/crates/tests/ref/examples/blog_site/html/severance-ep-3.html deleted file mode 100644 index 50eb6561..00000000 --- a/crates/tests/ref/examples/blog_site/html/severance-ep-3.html +++ /dev/null @@ -1,68 +0,0 @@ - - - - In Perpetuity - Severance [s1/e3] - - -

In Perpetuity - Severance [s1/e3]

- -

We need to talk about Ms. Cobel

-

As we noted in analysis of S1E1, she typically storms the screen with an icy blue, a temper (the significance of this word we shall unpack shortly) that seeks to quell the fiery red that flickers in and out of the consciousness of workers on the severed floor. The ominous ending to that first episode intimated that, while her wintery business has its office underground, it also warrants her prying into Mark’s outie’s personal life in Baird Creek’s subsidized Lumon housing. Indeed, it seems that Miss Cobel lives in Mark’s housing complex, too. From the state of her fridge, though, which we see in the foreground of a shot that implies surreptitious surveillance at work in her intimate space– a sense that has already been produced in Mark’s home with objects littered in the frame’s foreground– it doesn’t appear that she spends very much time making a home there. (Not too unlike Mark, perhaps.)

-

Ms. Cobel is a kaleidoscopic vector of strange femininity in the show. She is at once old widow next door, a girl-boss superior on the severed floor, and a little girl prone to tantrums. As Mrs. Selvig, the hare-brained widow next door, she offers Mark unwanted company and cruddy cookies. Yet we know by now that this is apparanetly a ruse, a senile disguise through which the conniving Harmony Cobel can keep an eye on her employee, Mark, beyond the bounds of his time at work. At Baird Creek, she is a middle-aged executive in the clothing of an older and less cognitively composed character.

-

But even if Ms. Cobel is the ‘real’ Mrs. Selvig, there is still something anile about her character. She can be both comandeering and childish, as we see in her encounter with (innie) Mark S when he arrives unannounced at her office to request a kind of permission to take Hellie to the perpetuity wing in S1E3. Commandering, because she accosts Mark with bureaucratic demands in her role as his boss (“And have you filled out a common-reservation slip?”). Childish, because she literally throws a mug at him out of a petty frustration that is unbefitting of a mature manager. Cobel rationalizes her childish temper as follows:

-
What I just did was something I knew that you could handle and grow from. It was very painful for me. I hope that you’ll let it help you.
-

This outburst locates something undecided within Ms. Cobel, a moment in relation to Mark where she lets her personal anger supersede her role as his manager. This mug-throwing episode demonstrates that Cobel, too, is capable of breaking character as head of the severed floor and allowing some other aspect of her self to seep in, despite the pretense of a calm composure. The thrown-mug, in other words, is the wish fulfillment heralded by Cobel’s stunningly funny, inappropriate remark to Hellie during her orientation in S1E1; “I’ve wanted to pummel Mark myself, but I am his employer.” Even Cobel, who is supposed to be more in charge of herself than the MDR employees who are her inferiors– her breaking into Mark’s house while he isn’t there implies is that she is unsevered, and thus more ‘responsible’– harbours desires that exceed and contradict the prescribed role she is supposed to play.

-

The image of Cobel above confirms her as childish in some respects. Notice that here, at ‘home’, she wears her hair in pigtails rather than loosely around her shoulders. But it also paints her as a scopophilic and overbearing mother. Whatever she is doing creating excuses to talk to Mark’s outie as Mrs. Selvig, it becomes clear in this episode that there is a convoluted kind of care at stake in her creepy and overcurious work. Peering at him as he wanders up from the basement (Cobel doesn’t seem to know that Petey is also down there at this point, though her break-in later in the episode suggests that she suspects something is awry), she murmurs to herself, “Oh, Mark. Are you all right?”

-

This is a strange exhibition of affection, coming from the same woman who will throw a mug at Markfor his failure to “get MDR to its numbers” as department chief, who knowingly subjects him to the break room– which we observe on screen for the first time later in the episode– and who steals the book left by his brother-in-law as a gift at his doorstep. Despite these mistreatments, Cobel does still seem to hold some perverted penchant for and attachment to Mark. As HaxDogma notes in his review of this episode, it is hard to see Mark’s promotion to department chief after Petey disappears as anything other than a nepotistic appointment, given that Irving is clearly the more experienced refiner in a number of respects (orientation procedure, group photo protocol, number of years spent on the severed floor, to name a few). Cobel’s overinquisitive manner on display in this episode is perhap best described as motherly, even as she is certainly not a paradigmatically good mother.

-

There is also something undoubtedly sexual about Cobel’s relationship to Mark. Her lingering at the door in S1E2 waiting to be invited in, her awkward and suggestive mention of her late husband’s building an apartment in the back of their abode in heaven “in case I found a new man before I got there”, her creating an excuse to talk to him by pretending to de-ice her stoop; and, naturally, her peeping at him through the window. She is either a stalker by-the-book, or (more charitably) a lonely woman who is searching for some missing satisfaction. Most likely, she is an inextricable concoction of the two. Cobel wants to have Mark’s cake and eat it too; to be at once his mother, his corporate superior, and (we can’t help but suspect) his lover. Like many put in positions of power, she has trouble setting her more inapproriate desires aside so as to simply ‘do her job’.

-

Primal father figures

-

Cobel’s mother energy is arguably muted and mixed up in her Sphinxesque triplicity. But the father energy on display in this episode is, by contrast, loudly and proudly pronounced in at least three different figures: Petey, Irving, and, of course, Kier Eagan.1 Before tackling these fathers one by one, it is instructive to straightforwardly and schematically lay out the Oedipus complex, an ‘absolute fiction’ that nonetheless, Freud claims, depicts something foundational about the graph of the speaking subject, the graph in which we took interest in our analysis of S1E1.

-

The Oedipus complex is so-named because it takes its architecture from the figure of Oedipus as he appears in the ancient Greek playwright Sophocles’ trilogy, which consists of the plays Oedipus Rex, Oedipus at Colonus, and Antigone. (Oedipus’ tragic tale is drawn from a mythology that predates these plays, but the story is nonetheless usually traced to its Sophoclean production.) Oedipus is well-known to students of psychoanalysis because of Freud’s making him into a complex, which is generally (mis)understood as ‘every person wants to kill their father and fuck their mother’. Famously, Oedipus killed his father– at a crossroads, thinking he was simply a threatening stranger at the time– and married his mother– not understanding that relation in the moment of the act, either.

-

Jacques Lacan rendered the Oedipus complex more philosophically significant than this overblown and crude Freudian telling. For Lacan, the Oedipus complex designates an abstract account of how desire is produced by the speaking subject in relation to the formative figures with which it is in relation. As he notes in one of his 1938 text, The Family Complexes:

-
our criticism since Freud presents this psychological entity [the Oedipus complex] as the specific form of the human family and subordinates all social variations of the family to it. (Lacan 2002, p.35)
-

The Oedipus complex is not so much a diagnosis of a particular perversion that is presumed universal, in the sense that everyone consciously suffers by repressing these secret dual desires to kill (my father) and to fuck (my mother). It is rather an important part of how he architects a philosophy of the subject’s relation to itself (and others) by way of a “triangular conflict” (Lacan 2002, p.41) between three figures: one’s self, the Mother, and the Father.

-

The Mother is the subject’s first known object that is seen as separable from one’s sense of self. We can imagine this through the process of weaning, of a mother teaching her baby that sustenance ought to be sought in solid foods rather than directly from her teat. Originally, a baby does not have a firm enough sense of itself to recognize that the Mother’s teat is separated from its own body. When it wants nourishment, it cries, and a breast brimming with milk appears (assuming a good mother, here). The breast seems almost part and parcel of the baby, from its perspective, as what reason does it have to think otherwise? (We are assuming here that the separation between a baby’s sense of its own body and the world is not ingrained at birth, but rather learned, acculturated.) It is only when the baby’s crying stops precipitating a breast that it should start to doubt this part of itself, to think that perhaps my Mother’s breast is not part of me as subject but rather its own kind of thing, a separate object. Thus the Mother is, in this developmental sense, the subject’s first proper object. The Mother (and her breast), the baby subject thinks, is both mine and not mine, as though there is some relation that my Mother has to me, she is not (quite) the same as me.

-

The Father, on the other hand, incorporates (into) the baby subject’s sense of self differently. It is not considered, as the Mother is, a part of the subject that was at some point taken away, but rather represents the source of that action of taking away. If the Mother ought (in the terms of the baby subject’s nascent ethics) to be a part of me, the Father is the force and figure responsible for taking her away. This stature of the Father is better understood, perhaps, with reference to the myth of the Primal Father, which Lacan reinterprets from its presentation in Freud as originally depicted in the fourth and final chapter of Totem and Taboo (Freud 1919). Like the Oedipus complex, the myth of the Primal Father is a narrativization that helps to understand the structure of the subject. Suppose a primal horde, Freud offers, at the helm of which exists a Primal Father who monopolizes all women. All women in the horde, in other words, are sexually subject to this single male; no other male gets to enjoy anything of them. A band of brothers, resentful of the Father’s monopoly on enjoyment, conspire to escape the ban on sexual enjoyment through a plot to murder him.2 They do so through what could be called an original jealousy, a feeling that the Father is enjoying in a way that is prohibited (by virtue of the Father’s taboo) for each of them.

-

Freud offers this as an “historic explanation… [of] the origin of incest” (Freud 1919, p.207), as the Primal Father’s taboo on enjoyment is what, Freud suggests, drives exogamy, wherein each of the band of brothers leaves that original tribe to start their own in which they can (finally) enjoy the women for themselves. That this is an historic explanation does not mean that Freud believes that it represents an actual state of affairs in some distant past. Indeed, he states the opposite, that “primal state of society has nowhere been observed.” (Freud 1919, p.233) The parable of the Primal Father is historic rather in the sense that narrates to us an important aspect of the structure of the subject, much like Oedipus’ tragedy.

-

Daddy issues at work

-

Okay: we now return from this Freudian digression to the stuff of Severance. What bearing do the Oedipus complex and the myth of the Primal Father have on the structure of the subject on display in the show? Let’s go now to the scene in S1E3 at the crossroads, where MDR runs into two employees in Optics and Design (O&D).

- -

The composition of this shot puts the reflective axis down the center, and the encounter is suggestively Oedipean in its structure (at a crossroads, unknowing of the Other at play). Note that Irving is compositionally mirrored by Burt, played by Christopher Walken, and we will explore this suggestive symmetry in detail in later episodes. The two departments (MDR and O&D) know of each other, we surmise from the dialogue that follows. But Irving isn’t supposed to know Burt by name, as he accidentally happened upon him in S1E2 on the way to a Wellness session. (Burt was coming from his Wellness session.)

-

While Irving greets Burt on the back of this previous encounter with gentle and flirtatious warmth, Dylan’s hostility towards O&D is clear. In place of the camaraderie that one might have hoped for between the two factions given their shared plight as severed workers, there appears to be an enmity built on a mythology (what Irving calls an “absolute fiction”) of otherness:

-
Kier sorted the departments by virtue. Macrodats are clever and true, while O&D’s more cruelty-centered…. O&D tried a violent coup on the others decades ago, and that’s why they reduced them down to two. And that’s why they keep us all so far apart now.
-

Kier is evidently the Primal Father of the severed floor, responsible for instituting the symbolic system of rules, regulations, and affects in the various ‘bands of brothers’ which reside there. The tour of the perfect replica of Kier’s house later in the episode reinforces his architectural status as Primal Father. Irving chides Mark for his lack of reverence in deigning to turn the tour of the Perpetuity Wing into Eagan Bingo, and is aghast when he almost happens to “bed sit” on the facsimile in his duplicate chambers. (Thou shall not lie in Kier Eagan’s bed.) Kier and the lineage of Eagans more generally constitute the law of the father, the signifier of authority that keeps the severed floor’s social order intact, the symbolic source from which both rules and the forbidden temptations of their being broken, taboos, sprout. Irving fosters this authority during the tour, standing in for the absent caregivers, existential (Kier, the Eagans) and material (Cobel and Milchick as superintendents who seem to be letting the kids take care of themselves for a short period).

-

Another paternal authority whose absence has haunted and structured Mark since the show’s opening is Petey, the man whose shoes he stepped into as MDR’s department chief. As per his exchange to Cobel in the mug-throwing scene, Mark lionizes Petey as a tone-setter, often acting through an ethics refracted by the subordinate conjunctive, ‘if Petey were here’, or the preface ‘Petey used to say’. Mark’s innie is steered more by an imagined sense of what Petey would do, rather than what Kier would.

-

Thus while it is Cobel who is explicitly in charge, the spectral presence of these father figures– Kier, Petey, Irving– correlatively structures the subject on the severed floor. There is, in other words, an Oedipal triangular conflict at work in relation the ethical imperative of a severed worker. The four members of MDR, as orientations to the structure of this subject, suffer different relationships to the positions of Mother and Father. Mark S is a momma’s boy, sired more by Petey’s radical rejection of company policy than by Kier. Dylan, though impertinent to the minutiae in the structure of Law at times, is ultimately his Father’s son, acquiring satisfaction by accumulating accolades, and apparently driven by the impending idea of another finger trap or a waffle party. Irving seems at this point the most mature of the children, looking reverentailly to Kier. Yet recall that he has been chided by Milchick already for falling asleep on the job, so not all is perfect in paradise. Hellie has no time for Cobel’s authority, yet we will see in due course that her relationship with a Father is a deep lineament in her personality, too.

-

Taming tempers

-

The count of four in the members of MDR mirrors the exact amount of tempers that we learn about from Kier Eagan’s wax simulacrum speaking during the tour of the Perpetuity Wing. These tempers are crucial as coordinates of the Eaganic attempt to coherently quantify the subject, and Kier’s pronouncement is deeply significant for our investigation of the subject’s distorted structure on the severed floor:

-
I know that death is near upon me, because people have begun to ask what I see as my life’s great achievement. They wish to know how they should remember me as I rot. In my life, I have identified four components, which I call tempers, from which are derived every human soul. Woe. Frolic. Dread. Malice. Each man’s character is defined by the precise ratio that resides in him. I walked into the cave of my own mind, and there I tamed them. Should you tame the tempers as I did mine, then the world shall become but your appendage. It is this great and consecrated power that I hope to pass on to all of you, my children.
-

If there was any doubt that Kier Eagan embodies the Freudian Primal Father, the foundational component of absolute fiction on which the edifice of Law (the rules and taboos by which a subject is bound to abide) is constructed, the quotation above should put it to bed. Kier’s ‘philosophy’ seeks to conquer death by quantifying life, sorting its myriadic nature into a “precise ratio” of character that can be counted (completely, it seems) in four distinct tempers. Indeed, we saw the pictorial representation of this taming in s1e2, in the scene where Irving meets Burt:

- -

In the post-Platonic cave of his own mind, Kier is the master of his passions. He admits no unconscious contours that sneak up on him unbeknowst in Freudian slips of the tongue or unwanted symptoms. Indeed, the Eaganesque fantasy of the subject is one in which the necessary excess of language that psychoanalysis discovered does not exist. Words are detected (via sensors in the elevator, say), controlled, managed. Any psychoanalytic excess is, in Kier’s project of a precisely rationalized subject, beaten out of language. Excess meaning is ‘tamed’ as if it were a wild animal by a clear-headed, upstanding, divinely radiant visonary. (As we will see, the position of primal power that Kier occupies here is sexually overbearing, too, as we might suspect from the Freudian analogy.)

-

This episode ends with two scenes depicting the dark and bloody underside of Kier’s waxen vision of the precisely quantified human subject. The first is Helly’s harrowing experience in the break room, a space where the unruly distance between words as they are uttered and the meaning they convey is thought to be stamped out, suffocated by the drudgery of debilitating repetition. A subject will not exceed its authorized symbolization, the break room seems to want to claim. The worker’s unconscious will be tamed and ultimately made beholden to a regime of conscious rationality. The second, and the closing scene of the epsiode, is Petey’s psychotic demise at the convenience store, where he yells at wit’s end: “I need tokens so I can eat!” Ravaged by the failure of his complete quantification inside Lumon, Petey seems no longer to have a firm footing in either his innie’s or outie’s reality. Mark looks on from a distance as he collapses outside the store, escorted by police, attempting (it seems) to account for his disintegration.

-
-

Bibliography

-
    -
  • Freud, Sigmund. 1919. “Totem and Taboo: Resemblances Between the Psychic Lives of Savages and Neurotics.” Translated by A.A Brill. Moffat, Yard and Company 50 (1): 94–95.
  • -
  • Lacan, Jacques. 2002. “Family Complexes in the Formation of the Individual.” Antony Rowe London,.
  • -
  • McGowan, Todd. 2021. “The Distribution of Enjoyment.” European Journal of Psychoanalysis 8 (1).
  • -
-
-
-


-
- -
-
-
    -
  1. 1 There is foreshadowing, too, of a fourth father figure in Rickon, Mark’s brother-in-law. While reading his confiscated book, Milchick quietly remarks to himself a thought that will become an important refrain for many other characters with respect to Rickon later in the season: “This is… Jesus.”
  2. -
  3. 2 There has been much written on Freud’s mythos of the Primal Father. For a relatively recent use of the concept that serves as a reasonable introduction to Lacan’s reading of Totem and Taboo, see (McGowan 2021).
  4. -
-
- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/html/writing-in-typst.html b/crates/tests/ref/examples/blog_site/html/writing-in-typst.html deleted file mode 100644 index 3cf7d346..00000000 --- a/crates/tests/ref/examples/blog_site/html/writing-in-typst.html +++ /dev/null @@ -1,56 +0,0 @@ - - - - - -

Writing in Typst | Hacking on Neovim with Claude

-

What is a ‘good’ writing system?

-

I have been incrementally hacking on my writing environment for some time now, since at least 2013 when I started seriously using computers in undergrad. A couple of years ago, I migrated to Orgmode as the best markup syntax for my needs, and wrote aa post about how Emacs and Orgmode serviced my writing needs.

-

Here’s a summary of that post and the core tenets of what I consider an acceptable writing environment, parsed out over the five or so years I’ve been experimenting with one through grad school:

-
    -
  1. -

    Flexible, powerful and distraction-free. In short, this means that the environment needs to be an extension to a modal editor in the terminal. I started using a modal text editor around 2018, and use a range of ergonomic keyboards in funky ways that make using a mouse undesirable in most cases. (The web browser is the one environment where I still get some mileage out of a mouse. I do a lot with keyboard shorcuts via Vimium, but there are still some contexts where it’s just quicker or more comfortable to use a mouse.) One of the main reasons that I settled on Orgmode rather than, say, Markdown at the time was because of its more standardized bibliographic management.

    -
  2. -
  3. -

    Non-proprietary and sane markup format. Microsoft Word documents and Google Docs are great for a lot of things, but I refuse to rely on either of them as a primary format for all of the writing I do, as their formats are to hard to parse (to write custom software for) and bound to Microsoft’s and Google’s ecosystems respectively. The ability to run Unix-style comands on a simple markup format from a terminal to search and replace, for example, is an essential. Writing documents in a plain-text markup language also gives me the safety of knowing that, if it really came down to it, I could write my own parser and compilers. My writing archive shouldn’t strictly rely on some company’s infrastructure to host, search, or otherwise make use of the thought it contains. Using such a format also means that cross-platform editing is made simpler and possible. (I run linux mostly, but still regrettably use Android as my phone’s operating system.)

    -
  4. -
  5. -

    Multi-format export. Most of the world’s documents are still PDF. There’s no getting away from needing to export writing as PDF in many cases– for e-readers like the reMarkable that I use, or for submission to conferences. But we increasingly read writing on a web page of some sort, and so I also need a workflow to export fully functional documents to HTML and CSS, too. Other formats that are interesting if not essential include some kind of presentation file (PowerPoint, or better: just a website that has slideshow-like interactions), Markdown for rich formatting to copy somewhere, and plain text.

    -
  6. -
-

I have up until very recently used Orgmode as my markup language of choice, exported them to PDF with exported them to PDF with latex, and exported them to HTML with pandoc. But I am very attached to the Neovim ecosystem for my code editing and writing, and so it was clunky to open up an Emacs installation (that I barely understood) exclusively to edit Orgmode. So I switched to editing Orgmode in Neovim along with everything else, using plugins and custom functions to get towards the writing experience that I wanted.

-

This has actually worked surprisingly well, but it has some sharp edges. One of the more significant ones is that any time I want to produce anything more complicated than basic, formatted text with citations and footnotes– for all of which pandoc transformations produce reasonable output in both HTML and PDF– I need to start embedding LaTeX into Orgmode, and deal with the LaTeX toolchain / dependency management in order to compile a PDF. Similarly, if I want to produce an interactive HTML document, I need to embed the source code directly in Orgmode and ensure that the export process handles dependencies and the like appropriately.

-

Some of this is unavoidable. If I want to run custom Javascript in a website that is well beyond the expressive capacities of a markup language, at some point I just want to be able to write Javascript. But what I found frustrating about my Orgmode / LaTeX / HTML workflow is that there wasn’t any reasonable way to work towards extending the markup language in some ways, unless I was willing to start developing my own bespoke flavor of Orgmode plus plus. I also don’t particularly like wrestling with the LaTeX ecosystem, because– and this is hardly controversial to say– LaTeX has a lot of bloat. What I wanted was a more extensible system which had saner defaults.

-

Enter: Typst

-

A few months ago, I started seriously considering typst as a potential replacement for LaTeX. At the very least, I thought, it would be more fun to wrestle with a modern ecosystem when struggling to produce some custom table or figure in my output PDF, as typst has a layout system that uses terms that are a lot more intuitive to me than the black magic of laying out LaTeX documents.

-

It just so happened, however, that I started to follow typst development more closely at a time when the final touches to the basic foundations of HTML export, such as footnotes and bibliography, were just about to be added to the upstream. So I made a few contributions to spirit it along, and started more serious experimentation using typst as a unified way to produce both PDF and HTML in my writing environment. Pandoc can convert to and from typst, so I originally intended to keep writing documents in Orgmode and then transiently convert them to typst in order to produce PDF and HTML both. But I quickly found that the typst syntax natively accommodates all of the features that I make use of regularly in Orgmode such as citations, footnotes, headings, links and text decoration– and then some.

-

So why not write my blogs, papers, and documents directly in typst? I considered the critical features of my Neovim / Orgmode writing environment that I didn’t want to abandon:

-
    -
  1. Shortcuts for markup. The nvim-orgmode plugin makes writing Orgmode in Neovim pleasurable, providing shortcuts to insert a link and basic text decoration while composing.
  2. -
  3. Citation and link picking. Though I’ve gone without it for a few months for reasons that are immaterial here, I used to have a shortcut to bring up a fuzzy finder for all of my bibliography entries to easily insert a citation. The same fuzzy finder would make it easy to link to local files (in a website, for example, to link to other posts).
  4. -
  5. Document folding. The ability to fold away all of the text beneath a heading is very useful when navigating larger documents, as it helps me to compartmentalize writing tasks and organize longer documents such as a dissertation chapter.
  6. -
  7. Export shortcuts. I have customized my Neovim editor so that I can easily export the active Orgmode document (through the pandoc and LaTeX processes described above). Personally, I don’t feel that I need a real-time live preview of the document as I type, as I generally just want to check that it looks reasonable at certain junctures in the writing process, rather than continuously.
  8. -
-

The one other features of Orgmode that I have come to rely on heavily is its TODO functionality. I typically only use this in notes related to projects or tasks more generally, however, and not in documents that are intended for publication such as a paper or blog post.

-

Enter: Claude Code

-

At this point in the past of a new writing technology’s prospecting, I would go searching for a Neovim plugin for typst and hope that it provides features that satisfy a majority of these requirements. I’ve spent a fair bit of time tinkering with my init.lua, the entrypoint for customizing Neovim, but I’ve never had the time nor interest to sit down and write a plugin from scratch.

-

LLMs, of course, are at time of writing taking the coding world by storm. I have started moderately relying on Claude Code when writing some– though certainly not all– kinds of code. As is well-known by now, Claude is especially good at scaffolding hacky scripts or modules from scratch, when no large codebase or domain-specific knowledge needs to be kept in context. A Neovim plugin, I realized this morning, is a pretty ideal domain for LLM-assisted coding. The ‘codebase’ is often just a single configuration file, and the domain-specific knowledge is the Neovim editor itself, a well-documented and expansively customized software for which there are many examples on Reddit.1

-

So I fired up Claude Code earlier this afternoon, and– fast-forward an hour or two– I have a fully functional writing environment for Typst that essentially has feature-parity with my Orgmode environment. Moreover, my Neovim config is now much more comprehensibly modularized; and I have a tried-and-tested method for extending it without needing to spend days learning the ins-and-outs of Neovim’s API; and some bugbears in my NixOS config were eliminated while I was at it. (If that last bit means nothing to you, count yourself lucky!)

-

My new writing environment

-

I use treesitter for syntax highlighting, and Typst already looks pretty good with it. I get function completion by integrating an LSP for the format, for which I’m using tinymist.

-

As I noted above, I haven’t had dynamic link or citation insertion for some time. It was one of the features that got lost in my move from writing Orgmode in Emacs to writing it in Neovim. I use telescope.nvim for general search and file-picking when coding in Neovim, and I figured that I could use a customized pop-up to dynamically pick available citations from the relevant BibTeX file, too. After a few minutes of vibe-engineering, I have the following:

- -

When I am writing in Typst, and I want to bring in a reference, I can open a panel. Note that the search is full-text, not just using the reference ID. I also have a shortcut to specify which bib file to use through the #bibliography function in Typst. I can insert links in the same way as citations, both references files relative to the current one (blog posts on the same site), and external links. Both the citation and link insertion work either by highlighting text and annotating it, or to insert new links/citations. I also have a similar shortcut to add footnotes.

-

This is pretty functional now for generic writing!

-

Future work

-

Typst isn’t ideal for producing fully-featured websites currently, as HTML export is experimental. Even when it becomes better supported, the project is– understandably, given its priority supporting PDF– taking a relatively conservative approach to HTML generation. Anything that doesn’t have a robust analog in a PDF document, such as videos and hover panels, will have to be ‘embedded’ in Typst with HTML/CSS/JS, rather than being written in Typst syntax. The current experience isn’t much worse than Orgmode with Pandoc, though, and the Typst roadmap promises that it will become much better in the relatively short-term future.

-

There is a longstanding issue that I’ve had with links in Orgmode that I haven’t yet tackled with Typst. When I’m writing, I like hyperlinked text to appear as it will in the final document, i.e. without the underlying URL on display. When editing any particular line, though, it’s better that all of the links are ‘expanded’ to their full source syntax (#link("...")[...]) so that its feasible to edit the markup without requiring any fancy shortcuts. The effective shortening of lines that occurs when hiding these URLS results in different Neovim line-wrapping requirements, with which the Orgmode plugin I have been using does a bad job, giving ugly linebreaks in documents with long links. This link presentation will likely be the next feature I add to my Neovim Typst plugin.

-

I’ll add to the capabilities in my Neovim config files, and might eventually release a separate plugin if the features become significant/mature enough.

-
-
    -
  1. 1It’s impossible to mention LLM coding at this time without adding some sort of disclaimer that, no, I don’t think AGI is around the corner, and yes, I do expect both programming languages and language writ large to remain ‘a thing’ in the foreseeable future. LLMs are an incredibly powerful tool to write and analyze code and text, but the purpose of code and text– as a medium of symbolic communication amongst social beings– has not been rendered valueless since ChatGPT became publically available. If anything, the value of adeptly and adroitly handling written language has taken deeper root. For my preliminary thoughts on why we are so keen to imagine that computers will supplant the usefulness of the human, I refer the reader to this talk I gave in 2024.
  2. -
-
- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/blog_site/pdf/blog_site.metadata.json b/crates/tests/ref/examples/blog_site/pdf/blog_site.metadata.json deleted file mode 100644 index 964c8d0a..00000000 --- a/crates/tests/ref/examples/blog_site/pdf/blog_site.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 5232600, - "page_count": 6 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/code_blocks_with_links/pdf/code_examples.metadata.json b/crates/tests/ref/examples/code_blocks_with_links/pdf/code_examples.metadata.json deleted file mode 100644 index 705d9d1a..00000000 --- a/crates/tests/ref/examples/code_blocks_with_links/pdf/code_examples.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 28288, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/cover_minusletter/pdf/cover-letter.metadata.json b/crates/tests/ref/examples/cover_minusletter/pdf/cover-letter.metadata.json deleted file mode 100644 index bd8f3cf3..00000000 --- a/crates/tests/ref/examples/cover_minusletter/pdf/cover-letter.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 1239761, - "page_count": 2 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/cross_directory_links/html/ch1.html b/crates/tests/ref/examples/cross_directory_links/html/ch1.html deleted file mode 100644 index c1c5e4de..00000000 --- a/crates/tests/ref/examples/cross_directory_links/html/ch1.html +++ /dev/null @@ -1,143 +0,0 @@ - - - - - - - -

Chapter 1

-

This is the first chapter.

-

References

-

Go back to the introduction.

-

Continue to Chapter 2 (sibling).

-

See the appendix notes for additional info.

- - diff --git a/crates/tests/ref/examples/cross_directory_links/html/ch2.html b/crates/tests/ref/examples/cross_directory_links/html/ch2.html deleted file mode 100644 index d0568c4e..00000000 --- a/crates/tests/ref/examples/cross_directory_links/html/ch2.html +++ /dev/null @@ -1,144 +0,0 @@ - - - - - - - -

Chapter 2

-

This is the second chapter.

-

Navigation

-

Previous: Chapter 1

-

Root: Introduction

-

Content

-

Testing cross-directory navigation patterns.

- - diff --git a/crates/tests/ref/examples/cross_directory_links/html/intro.html b/crates/tests/ref/examples/cross_directory_links/html/intro.html deleted file mode 100644 index 0c919f25..00000000 --- a/crates/tests/ref/examples/cross_directory_links/html/intro.html +++ /dev/null @@ -1,142 +0,0 @@ - - - - - - - -

Introduction

-

Welcome to the cross-directory test.

-

Overview

-

This document links to Chapter 1.

-

See also Chapter 2 for more details.

- - diff --git a/crates/tests/ref/examples/cross_directory_links/html/notes.html b/crates/tests/ref/examples/cross_directory_links/html/notes.html deleted file mode 100644 index 41cef651..00000000 --- a/crates/tests/ref/examples/cross_directory_links/html/notes.html +++ /dev/null @@ -1,144 +0,0 @@ - - - - - - - -

Appendix: Notes

-

Additional notes and references.

-

Cross References

-

Back to Chapter 1.

-

Return to the introduction.

-

Details

-

Testing links from a different subdirectory.

- - diff --git a/crates/tests/ref/examples/cross_directory_links/pdf/cross_directory_links.metadata.json b/crates/tests/ref/examples/cross_directory_links/pdf/cross_directory_links.metadata.json deleted file mode 100644 index d0490193..00000000 --- a/crates/tests/ref/examples/cross_directory_links/pdf/cross_directory_links.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 26778, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/epub/epub_inferred_spine.metadata.json b/crates/tests/ref/examples/epub_inferred_spine/epub/epub_inferred_spine.metadata.json deleted file mode 100644 index 8609744b..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/epub/epub_inferred_spine.metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "filetype": "epub", - "file_size": 4076, - "title": "Epub Inferred Spine", - "language": "en", - "spine_files": [ - "a.xhtml", - "b.xhtml", - "c.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/a.xhtml b/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/a.xhtml deleted file mode 100644 index 31705815..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/a.xhtml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - Part A - -
-

Part A

-

This is the first part of the document.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/b.xhtml b/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/b.xhtml deleted file mode 100644 index beffed35..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/b.xhtml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - Part B - -
-

Part B

-

This is the second part of the document.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/c.xhtml b/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/c.xhtml deleted file mode 100644 index 9e6cd5b7..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/epub/xhtml/c.xhtml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - Part C - -
-

Part C

-

This is the third part of the document.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/html/a.html b/crates/tests/ref/examples/epub_inferred_spine/html/a.html deleted file mode 100644 index f9d9a1b2..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/html/a.html +++ /dev/null @@ -1,11 +0,0 @@ - - - - Part A - - -

Part A

-

This is the first part of the document.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/html/b.html b/crates/tests/ref/examples/epub_inferred_spine/html/b.html deleted file mode 100644 index 1f09d152..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/html/b.html +++ /dev/null @@ -1,11 +0,0 @@ - - - - Part B - - -

Part B

-

This is the second part of the document.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/html/c.html b/crates/tests/ref/examples/epub_inferred_spine/html/c.html deleted file mode 100644 index 03487a24..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/html/c.html +++ /dev/null @@ -1,11 +0,0 @@ - - - - Part C - - -

Part C

-

This is the third part of the document.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/pdf/a.metadata.json b/crates/tests/ref/examples/epub_inferred_spine/pdf/a.metadata.json deleted file mode 100644 index 8ec853d3..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/pdf/a.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 2619, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/pdf/b.metadata.json b/crates/tests/ref/examples/epub_inferred_spine/pdf/b.metadata.json deleted file mode 100644 index 8ec853d3..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/pdf/b.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 2619, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/epub_inferred_spine/pdf/c.metadata.json b/crates/tests/ref/examples/epub_inferred_spine/pdf/c.metadata.json deleted file mode 100644 index 8ec853d3..00000000 --- a/crates/tests/ref/examples/epub_inferred_spine/pdf/c.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 2619, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/index/html/index.html b/crates/tests/ref/examples/index/html/index.html deleted file mode 100644 index e3190a8a..00000000 --- a/crates/tests/ref/examples/index/html/index.html +++ /dev/null @@ -1,153 +0,0 @@ - - - - - - - -

Screening the subject

-

Screening the subject is a blog that analyses content on both the big and small screen in reasonable detail, i.e. episode-by-episode or scene-by-scene. Contact us at info@ohrg.org for enquiries.

- -
-


-
- -
- - diff --git a/crates/tests/ref/examples/index/pdf/index.metadata.json b/crates/tests/ref/examples/index/pdf/index.metadata.json deleted file mode 100644 index 8dc1fc4f..00000000 --- a/crates/tests/ref/examples/index/pdf/index.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 20019, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/link_path_edge_cases/html/chapter-01.html b/crates/tests/ref/examples/link_path_edge_cases/html/chapter-01.html deleted file mode 100644 index 367b4a4a..00000000 --- a/crates/tests/ref/examples/link_path_edge_cases/html/chapter-01.html +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - -

Chapter 01

-

This filename contains numbers.

-

Back to main.

- - diff --git a/crates/tests/ref/examples/link_path_edge_cases/html/file-name.html b/crates/tests/ref/examples/link_path_edge_cases/html/file-name.html deleted file mode 100644 index e78e622b..00000000 --- a/crates/tests/ref/examples/link_path_edge_cases/html/file-name.html +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - -

File with Hyphen

-

This filename contains a hyphen.

-

Back to main.

- - diff --git a/crates/tests/ref/examples/link_path_edge_cases/html/file_name.html b/crates/tests/ref/examples/link_path_edge_cases/html/file_name.html deleted file mode 100644 index fb643be9..00000000 --- a/crates/tests/ref/examples/link_path_edge_cases/html/file_name.html +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - -

File with Underscore

-

This filename contains an underscore.

-

Back to main.

- - diff --git a/crates/tests/ref/examples/link_path_edge_cases/html/main.html b/crates/tests/ref/examples/link_path_edge_cases/html/main.html deleted file mode 100644 index 863e0314..00000000 --- a/crates/tests/ref/examples/link_path_edge_cases/html/main.html +++ /dev/null @@ -1,146 +0,0 @@ - - - - - - - -

Path Edge Cases Test

-

This tests unusual but valid filename patterns.

-

Links to Edge Case Files

-

Hyphen: file with hyphen

-

Underscore: file with underscore

-

Dot in name: file with dot

-

Number: file with number

-

Content

-

All these edge cases should transform correctly.

- - diff --git a/crates/tests/ref/examples/link_path_edge_cases/html/version-1.html b/crates/tests/ref/examples/link_path_edge_cases/html/version-1.html deleted file mode 100644 index cf476ca5..00000000 --- a/crates/tests/ref/examples/link_path_edge_cases/html/version-1.html +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - -

Version 1.0

-

This filename contains a dot in the name (not just the extension).

-

Back to main.

- - diff --git a/crates/tests/ref/examples/link_path_edge_cases/pdf/link_path_edge_cases.metadata.json b/crates/tests/ref/examples/link_path_edge_cases/pdf/link_path_edge_cases.metadata.json deleted file mode 100644 index 93f87970..00000000 --- a/crates/tests/ref/examples/link_path_edge_cases/pdf/link_path_edge_cases.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 24245, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/link_transformation/epub/link_transformation.metadata.json b/crates/tests/ref/examples/link_transformation/epub/link_transformation.metadata.json deleted file mode 100644 index 81a4864b..00000000 --- a/crates/tests/ref/examples/link_transformation/epub/link_transformation.metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "filetype": "epub", - "file_size": 4018, - "title": "Link Transformation Test", - "language": "en", - "spine_files": [ - "doc1.xhtml", - "doc2.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/link_transformation/epub/xhtml/doc1.xhtml b/crates/tests/ref/examples/link_transformation/epub/xhtml/doc1.xhtml deleted file mode 100644 index 2f43d9f2..00000000 --- a/crates/tests/ref/examples/link_transformation/epub/xhtml/doc1.xhtml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - -
-

Document 1

-

This is the first document.

-

You can navigate to See Doc 2 for more information.

-

Section in Doc 1

-

More content here.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/link_transformation/epub/xhtml/doc2.xhtml b/crates/tests/ref/examples/link_transformation/epub/xhtml/doc2.xhtml deleted file mode 100644 index ed92cb52..00000000 --- a/crates/tests/ref/examples/link_transformation/epub/xhtml/doc2.xhtml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - -
-

Document 2

-

This is the second document.

-

Go Back to Doc 1 to see the first document.

-

Another Section

-

Additional content in document 2.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/link_transformation/html/doc1.html b/crates/tests/ref/examples/link_transformation/html/doc1.html deleted file mode 100644 index a8bf2bf7..00000000 --- a/crates/tests/ref/examples/link_transformation/html/doc1.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - -

Document 1

-

This is the first document.

-

You can navigate to See Doc 2 for more information.

-

Section in Doc 1

-

More content here.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/link_transformation/html/doc2.html b/crates/tests/ref/examples/link_transformation/html/doc2.html deleted file mode 100644 index 28122773..00000000 --- a/crates/tests/ref/examples/link_transformation/html/doc2.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - -

Document 2

-

This is the second document.

-

Go Back to Doc 1 to see the first document.

-

Another Section

-

Additional content in document 2.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/link_transformation/pdf/link_transformation.metadata.json b/crates/tests/ref/examples/link_transformation/pdf/link_transformation.metadata.json deleted file mode 100644 index 9bf2aa64..00000000 --- a/crates/tests/ref/examples/link_transformation/pdf/link_transformation.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 4527, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/links_with_fragments/epub/links_with_fragments.metadata.json b/crates/tests/ref/examples/links_with_fragments/epub/links_with_fragments.metadata.json deleted file mode 100644 index 6519a0db..00000000 --- a/crates/tests/ref/examples/links_with_fragments/epub/links_with_fragments.metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "filetype": "epub", - "file_size": 4388, - "title": "Links with Fragments Test", - "language": "en", - "spine_files": [ - "page1.xhtml", - "page2.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/links_with_fragments/epub/xhtml/page1.xhtml b/crates/tests/ref/examples/links_with_fragments/epub/xhtml/page1.xhtml deleted file mode 100644 index 446615e4..00000000 --- a/crates/tests/ref/examples/links_with_fragments/epub/xhtml/page1.xhtml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/crates/tests/ref/examples/links_with_fragments/epub/xhtml/page2.xhtml b/crates/tests/ref/examples/links_with_fragments/epub/xhtml/page2.xhtml deleted file mode 100644 index d687b032..00000000 --- a/crates/tests/ref/examples/links_with_fragments/epub/xhtml/page2.xhtml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - -
-

Page 2

-

This is the second page.

-

Introduction

-

This is the introduction section.

-

It has some content that the first page links to.

-

Middle Section

-

Some middle content.

-

Conclusion

-

This is the conclusion section.

-

Referenced from page 1.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/links_with_fragments/html/page1.html b/crates/tests/ref/examples/links_with_fragments/html/page1.html deleted file mode 100644 index b841dc8d..00000000 --- a/crates/tests/ref/examples/links_with_fragments/html/page1.html +++ /dev/null @@ -1,143 +0,0 @@ - - - - - - - -

Page 1

-

This is the first page.

-

See the introduction in Page 2 for details.

-

Also check the conclusion.

-

Section in Page 1

-

More content here.

- - diff --git a/crates/tests/ref/examples/links_with_fragments/html/page2.html b/crates/tests/ref/examples/links_with_fragments/html/page2.html deleted file mode 100644 index 2c84162b..00000000 --- a/crates/tests/ref/examples/links_with_fragments/html/page2.html +++ /dev/null @@ -1,147 +0,0 @@ - - - - - - - -

Page 2

-

This is the second page.

-

Introduction

-

This is the introduction section.

-

It has some content that the first page links to.

-

Middle Section

-

Some middle content.

-

Conclusion

-

This is the conclusion section.

-

Referenced from page 1.

- - diff --git a/crates/tests/ref/examples/links_with_fragments/pdf/links_with_fragments.metadata.json b/crates/tests/ref/examples/links_with_fragments/pdf/links_with_fragments.metadata.json deleted file mode 100644 index 554b1037..00000000 --- a/crates/tests/ref/examples/links_with_fragments/pdf/links_with_fragments.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 18897, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/merged-imports/pdf/merged-imports.metadata.json b/crates/tests/ref/examples/merged-imports/pdf/merged-imports.metadata.json deleted file mode 100644 index 49828eb6..00000000 --- a/crates/tests/ref/examples/merged-imports/pdf/merged-imports.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 12726, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/merged_subdir_imports/epub/merged_subdir_imports.metadata.json b/crates/tests/ref/examples/merged_subdir_imports/epub/merged_subdir_imports.metadata.json deleted file mode 100644 index c1fdad08..00000000 --- a/crates/tests/ref/examples/merged_subdir_imports/epub/merged_subdir_imports.metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "filetype": "epub", - "file_size": 4083, - "title": "Merged Subdir Imports", - "language": "en", - "spine_files": [ - "author.xhtml", - "index.xhtml", - "template.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/author.xhtml b/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/author.xhtml deleted file mode 100644 index 57c282db..00000000 --- a/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/author.xhtml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - Merged Subdir Imports - -
-

Author Page

-

Written by the author.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/index.xhtml b/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/index.xhtml deleted file mode 100644 index 0d691caa..00000000 --- a/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/index.xhtml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - Merged Subdir Imports - -
-

Merged Subdir Imports

-

This is the index page.

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/template.xhtml b/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/template.xhtml deleted file mode 100644 index ca9cef2c..00000000 --- a/crates/tests/ref/examples/merged_subdir_imports/epub/xhtml/template.xhtml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - -
- -
\ No newline at end of file diff --git a/crates/tests/ref/examples/merged_subdir_imports/pdf/merged_subdir_imports.metadata.json b/crates/tests/ref/examples/merged_subdir_imports/pdf/merged_subdir_imports.metadata.json deleted file mode 100644 index b0edcba6..00000000 --- a/crates/tests/ref/examples/merged_subdir_imports/pdf/merged_subdir_imports.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 14175, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/multiple_links_inline/html/multiple_links_inline.html b/crates/tests/ref/examples/multiple_links_inline/html/multiple_links_inline.html deleted file mode 100644 index d627cc5c..00000000 --- a/crates/tests/ref/examples/multiple_links_inline/html/multiple_links_inline.html +++ /dev/null @@ -1,146 +0,0 @@ - - - - - - - -

Multiple Links Test

-

Adjacent Links with Text

-

See File 1 and File 2 for details.

-

Multiple References in List

-

References: A, B, C.

-

Minimal Separation

-

Adjacent links: XY

-

Multiple Links in Sentence

-

Check the introduction, then chapter 1, and finally the conclusion.

- - diff --git a/crates/tests/ref/examples/multiple_links_inline/pdf/multiple_links_inline.metadata.json b/crates/tests/ref/examples/multiple_links_inline/pdf/multiple_links_inline.metadata.json deleted file mode 100644 index cac56e45..00000000 --- a/crates/tests/ref/examples/multiple_links_inline/pdf/multiple_links_inline.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 17849, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/pdf_individual/pdf/chapter1.metadata.json b/crates/tests/ref/examples/pdf_individual/pdf/chapter1.metadata.json deleted file mode 100644 index 8696e980..00000000 --- a/crates/tests/ref/examples/pdf_individual/pdf/chapter1.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 11864, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/pdf_individual/pdf/chapter2.metadata.json b/crates/tests/ref/examples/pdf_individual/pdf/chapter2.metadata.json deleted file mode 100644 index b3115878..00000000 --- a/crates/tests/ref/examples/pdf_individual/pdf/chapter2.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 11890, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/pdf_merge_false/html/a.html b/crates/tests/ref/examples/pdf_merge_false/html/a.html deleted file mode 100644 index 41229167..00000000 --- a/crates/tests/ref/examples/pdf_merge_false/html/a.html +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - doc1 - - -

A

-

The first doc.

- - diff --git a/crates/tests/ref/examples/pdf_merge_false/html/c.html b/crates/tests/ref/examples/pdf_merge_false/html/c.html deleted file mode 100644 index 408d0031..00000000 --- a/crates/tests/ref/examples/pdf_merge_false/html/c.html +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - doc2 - - -

C

-

The second doc.

- - diff --git a/crates/tests/ref/examples/pdf_merge_false/pdf/a.metadata.json b/crates/tests/ref/examples/pdf_merge_false/pdf/a.metadata.json deleted file mode 100644 index c41b4781..00000000 --- a/crates/tests/ref/examples/pdf_merge_false/pdf/a.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 9362, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/pdf_merge_false/pdf/c.metadata.json b/crates/tests/ref/examples/pdf_merge_false/pdf/c.metadata.json deleted file mode 100644 index 4f9ae42b..00000000 --- a/crates/tests/ref/examples/pdf_merge_false/pdf/c.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 8981, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/portable_epubs/html/portable_epubs.html b/crates/tests/ref/examples/portable_epubs/html/portable_epubs.html deleted file mode 100644 index e3b18f12..00000000 --- a/crates/tests/ref/examples/portable_epubs/html/portable_epubs.html +++ /dev/null @@ -1,346 +0,0 @@ - - - - - - Portable EPUBs - - -
-

Portable EPUBs

- Will CrichtonBrown UniversityJanuary 25, 2024Despite decades of advances in document rendering technology, most of the world’s documents are stuck in the 1990s due to the limitations of PDF. Yet, modern document formats like HTML have yet to provide a competitive alternative to PDF. This post explores what prevents HTML documents from being portable, and I propose a way forward based on the EPUB format. To demonstrate my ideas, this post is presented using a prototype EPUB reading system. -
-

The Good and Bad of PDF

-

PDF is the de facto file format for reading and sharing digital documents like papers, textbooks, and flyers. People use the PDF format for several reasons:

-
    -
  • -

    PDFs are self-contained. A PDF is a single file that contains all the images, fonts, and other data needed to render it. It’s easy to pass around a PDF. A PDF is unlikely to be missing some critical dependency on your computer.

    -
  • -
  • -

    PDFs are rendered consistently. A PDF specifies precisely how it should be rendered, so a PDF author can be confident that a reader will see the same document under any conditions.

    -
  • -
  • -

    PDFs are stable over time. PDFs from decades ago still render the same today. PDFs have a relatively stable standard. PDFs cannot be easily edited.

    -
  • -
-

Yet, in the 32 years since the initial release of PDF, a lot has changed. People print out documents less and less. People use phones, tablets, and e-readers to read digital documents. The internet happened; web browsers now provide a platform for rendering rich documents. These changes have laid bare the limitations of PDF:

-
    -
  • -

    PDFs cannot easily adapt to different screen sizes. Most PDFs are designed to mimic 8.5x11″ paper (or worse, 145,161 km2). These PDFs are readable on a computer monitor, but they are less readable on a tablet, and far less readable on a phone.

    -
  • -
  • -

    PDFs cannot be easily understood by programs. A plain PDF is just a scattered sequence of lines and characters. For accessibility, screen readers may not know which order to read through the text. For data extraction, scraping tables out of a PDF is an open area of research.

    -
  • -
  • -

    PDFs cannot easily express interaction. PDFs were primarily designed as static documents that cannot react to user input beyond filling in forms.

    -
  • -
-

These pros and cons can be traced back to one key fact: the PDF representation of a document is fundamentally unstructured. A PDF consists of commands like:

-
-
Move the cursor to the right by 0.5 inches.
Set the current font color to black.
Draw the text "Hello World" at the current position.
-
-

PDF commands are unstructured because a document’s organization is only clear to a person looking at the rendered document, and not clear from the commands themselves. Reflowing, accessibility, data extraction, and interaction all rely on programmatically understanding the structure of a document. Hence, these aspects are not easy to integrate with PDFs.

-

This raises the question: how can we design digital documents with the benefits of PDFs but without the limitations?

-

Can’t We Just Fix PDF?

-

A simple answer is to improve the PDF format. After all, we already have billions of PDFs — why reinvent the wheel?

-

The designers of PDF are well aware of its limitations. I carefully hedged each bullet with “easily”, because PDF does make it possible to overcome each limitation, at least partially. PDFs can be annotated with their logical structure to create a tagged PDF. Most PDF exporters will not add tags automatically — the simplest option is to use Adobe’s subscription-only Acrobat Pro, which provides an “Automatically tag PDF” action. For example, here is a recent paper of mine with added tags:

-
- -
Figure 1: A LaTeX-generated paper with automatically added tags.
-
-

If you squint, you can see that the logical structure closely resembles the HTML document model. The document has sections, headings, paragraphs, and links. Adobe characterizes the logical structure as an accessibility feature, but it has other benefits. You may be surprised to know that Adobe Acrobat allows you to reflow tagged PDFs at different screen sizes. You may be unsurprised to know that reflowing does not always work well. For example:

-
-
- -
Figure 3: A section of the paper in its default fixed layout. Note that the second paragraph is wrapped around the code snippet.
-
-
- -
Figure 4: The same section of the paper after reflowing to a smaller width. Note that the code is now interleaved with the second paragraph.
-
-
-

In theory, these issues could be fixed. If the world’s PDF exporters could be modified to include logical structure. If Adobe’s reflowing algorithm could be improved to fix its edge cases. If the reflowing algorithm could be specified, and if Adobe were willing to release it publicly, and if it were implemented in each PDF viewer. And that doesn’t even cover interaction! So in practice, I don’t think we can just fix the PDF format, at least within a reasonable time frame.

-

The Good and Bad of HTML

-

In the meantime, we already have a structured document format which can be flexibly and interactively rendered: HTML (and CSS and Javascript, but here just collectively referred to as HTML). The HTML format provides almost exactly the inverse advantages and disadvantages of PDF.

-
    -
  • HTML can more easily adapt to different screen sizes. Over the last 20 years, web developers and browser vendors have created a wide array of techniques for responsive design.
  • -
  • HTML can be more easily understood by a program. HTML provides both an inherent structure plus additional attributes to support accessibility tools.
  • -
  • HTML can more easily express interaction. People have used HTML to produce amazing interactive documents that would be impossible in PDF. Think: Distill.pub, Explorable Explanations, Bartosz Ciechanowski, and Bret Victor, just to name a few.
  • -
-

Again, these advantages are hedged with “more easily”. One can easily produce a convoluted or inaccessible HTML document. But on balance, these aspects are more true than not compared to PDF. However, HTML is lacking where PDF shines:

-
    -
  • HTML is not self-contained. HTML files may contain URL references to external files that may be hosted on a server. One can rarely download an HTML file and have it render correctly without an internet connection.
  • -
  • HTML is not always rendered consistently. HTML’s dynamic layout means that an author may not see the same document as a reader. Moreover, HTML layout is not fully specified, so browsers may differ in their implementation.
  • -
  • HTML is not fully stable over time. Browsers try to maintain backwards compatibility (come on and slam!), but the HTML format is still evolving. The HTML standard is a “living standard” due to the rapidly changing needs and feature sets of modern browsers.
  • -
-

So I’ve been thinking: how can we design HTML documents to gain the benefits of PDFs without losing the key strengths of HTML? The rest of this document will present some early prototypes and tentative proposals in this direction.

-

Self-Contained HTML with EPUB

-

First, how can we make HTML documents self-contained? This is an old problem with many potential solutions. WARC, webarchive, and MHTML are all file formats designed to contain all the resources needed to render a web page. But these formats are more designed for snapshotting an existing website, rather than serving as a single source of truth for a web document. From my research, the most sensible format for this purpose is EPUB.

-

EPUB is a “distribution and interchange format for digital publications and documents”, per the EPUB 3 Overview. Reductively, an EPUB is a ZIP archive of web files: HTML, CSS, JS, and assets like images and fonts. On a technical level, what distinguishes EPUB from archival formats is that EPUB includes well-specified files that describe metadata about a document. On a social level, EPUB appears to be the HTML publication format with the most adoption and momentum in 2024, compared to moribund formats like Mobi.

-

The EPUB spec has all the gory details, but to give you a rough sense, a sample EPUB might have the following file structure:

-
-
sample.epub
├── META-INF
│ └── container.xml
└── EPUB
├── package.opf
├── nav.xhtml
├── chapter1.xhtml
├── chapter2.xhtml
└── img
└── sample.jpg
-
-

An EPUB contains content documents (like chapter1.xhtml and chapter2.xhtml) which contain the core HTML content. Content documents can contain relative links to assets in the EPUB, like img/sample.jpg. The navigation document (nav.xhtml) provides a table of contents, and the package document (package.opf) provides metadata about the document. These files collectively define one “rendition” of the whole document, and the container file (container.xml) points to each rendition contained in the EPUB.

-

The EPUB format optimizes for machine-readable content and metadata. HTML content is required to be in XML format (hence, XHTML). Document metadata like the title and author is provided in structured form in the package document. The navigation document has a carefully prescribed tag structure so the TOC can be consistently extracted.

-

Overall, EPUB’s structured format makes it a solid candidate for a single-file HTML document container. However, EPUB is not a silver bullet. EPUB is quite permissive in what kinds of content can be put into a content document.

-

For example, a major issue for self-containment is that EPUB content can embed external assets. A content document can legally include an image or font file whose src is a URL to a hosted server. This is not hypothetical, either; as of the time of writing, Google Doc’s EPUB exporter will emit CSS that will @include external Google Fonts files. The problem is that such an EPUB will not render correctly without an internet connection, nor will it render correctly if Google changes the URLs of its font files.

-

Hence, I will propose a new format which I call a portable EPUB, which is an EPUB with additional requirements and recommendations to improve PDF-like portability. The first requirement is:

-
Local asset requirement: All assets (like images, scripts, and fonts) embedded in a content document of a portable EPUB must refer to local files included in the EPUB. Hyperlinks to external files are permissible.
-

Consistency vs. Flexibility in Rendering

-

There is a fundamental tension between consistency and flexibility in document rendering. A PDF is consistent because it is designed to render in one way: one layout, one choice of fonts, one choice of colors, one pagination, and so on. Consistency is desirable because an author can be confident that their document will look good for a reader (or at least, not look bad). Consistency has subtler benefits — because a PDF is chunked into a consistent set of pages, a passage can be cited by referring to the page containing the passage.

-

On the other hand, flexibility is desirable because people want to read documents under different conditions. Device conditions include screen size (from phone to monitor) and screen capabilities (E-ink vs. LCD). Some readers may prefer larger fonts or higher contrasts for visibility, alternative color schemes for color blindness, or alternative font faces for dyslexia. Sufficiently flexible documents can even permit readers to select a level of detail appropriate for their background (here’s an example).

-

Finding a balance between consistency and flexibility is arguably the most fundamental design challenge in attempting to replace PDF with EPUB. To navigate this trade-off, we first need to talk about EPUB reading systems, or the tools that render an EPUB for human consumption. To get a sense of variation between reading systems, I tried rendering this post as an EPUB (without any styling, just HTML) on four systems: Calibre, Adobe Digital Editions, Apple Books, and Amazon Kindle. This is how the first page looks on each system (omitting Calibre because it looked the same as Adobe Digital Editions):

-
-
- -
Figure 6: Adobe Digital Editions
-
-
- -
Figure 7: Apple Books
-
-
- -
Figure 8: Amazon Kindle
-
-
-

Calibre and Adobe Digital Editions both render the document in a plain web view, as if you opened the HTML file directly in the browser. Apple Books applies some styling, using the New York font by default and changing link decorations. Amazon Kindle increases the line height and also uses my Kindle’s globally-configured default font, Bookerly.

-

As you can see, an EPUB may look quite different on different reading systems. The variation displayed above seems reasonable to me. But how different is too different? For instance, I was recently reading A History of Writing on my Kindle. Here’s an example of how a figure in the book renders on the Kindle:

-
- -
Figure 9: A figure in the EPUB version of A History of Writing on my Kindle
-
-

When I read this page, I thought, “wow, this looks like crap.” The figure is way too small (although you can long-press the image and zoom), and the position of the figure seems nonsensical. I found a PDF version online, and indeed the PDF’s figure has a proper size in the right location:

-
- -
Figure 10: A figure in the PDF version of A History of Writing on my Mac
-
-

This is not a fully fair comparison, but it nonetheless exemplifies an author’s reasonable concern today with EPUB: what if it makes my document looks like crap?

-

Principles for Consistent EPUB Rendering

-

I think the core solution for consistently rendering EPUBs comes down to this:

-
    -
  1. The document format (i.e., portable EPUB) needs to establish a subset of HTML (call it “portable HTML”) which could represent most, but not all, documents.
  2. -
  3. Reading systems need to guarantee that a document within the subset will always look reasonable under all reading conditions.
  4. -
  5. If a document uses features outside this subset, then the document author is responsible for ensuring the readability of the document.
  6. -
-

If someone wants to write a document such as this post, then that person need not be a frontend web developer to feel confident that their document will render reasonably. Conversely, if someone wants to stuff the entire Facebook interface into an EPUB, then fine, but it’s on them to ensure the document is responsive.

-

For instance, one simple version of portable HTML could be described by this grammar:

-
-
Document ::= <article> Block* </article>
Block ::= <p> Inline* </p> | <figure> Block* </figure>
Inline ::= text | <strong> Inline* </strong>
-
-

The EPUB spec already defines a comparable subset for navigation documents. I am essentially proposing to extend this idea for content documents, but as a soft constraint rather than a hard constraint. Finding the right subset of HTML will take some experimentation, so I can only gesture toward the broad solution here.

-
Portable HTML rendering requirement: if a document only uses features in the portable HTML subset, then a portable EPUB reading system must guarantee that the document will render reasonably.
-
Portable HTML generation principle: when possible, systems that generate portable EPUB should output portable HTML.
-

A related challenge is to define when a particular rendering is “good” or “reasonable”, so one could evaluate either a document or a reading system on its conformance to spec. For instance, if document content is accidentally rendered in an inaccesible location off-screen, then that would be a bad rendering. A more aggressive definition might say that any rendering which violates accessibility guidelines is a bad rendering. Again, finding the right standard for rendering quality will take some experimentation.

-

If an author is particularly concerned about providing a single “canonical” rendering of their document, one fallback option is to provide a fixed-layout rendition. The EPUB format permits a rendition to specify that it should be rendered in fixed viewport size and optionally a fixed pagination. A fixed-layout rendition could then manually position all content on the page, similar to a PDF. Of course, this loses the flexibility of a reflowable rendition. But an EPUB could in theory provide multiple renditions, offering users the choice of whichever best suits their reading conditions and aesthetic preferences.

-
Fixed-layout fallback principle: systems that generate portable EPUB can consider providing both a reflowable and fixed-layout rendition of a document.
-

It’s possible that the reading system, the document author, and the reader can each express preferences about how a document should render. If these preferences are conflicting, then the renderer should generally prioritize the reader over the author, and the author over the reading system. This is an ideal use case for the “cascading” aspect of CSS:

-
Cascading styles principle: both documents and reading systems should express stylistic preferences (such as font face, font size, and document width) as CSS styles which can be overriden (e.g., do not use !important). The reading system should load the CSS rules such that the priority order is reading system styles < document styles < reader styles.
-

A Lighter EPUB Reading System

-

The act of working with PDFs is relatively fluid. I can download a PDF, quickly open it in a PDF reading system like Preview, and keep or discard the PDF as needed. But EPUB reading systems feel comparatively clunky. Loading an EPUB into Apple Books or Calibre will import the EPUB into the application’s library, which both copies and potentially decompresses the file. Loading an EPUB on a Kindle requires waiting several minutes for the Send to Kindle service to complete.

-

Worse, EPUB reading systems often don’t give you appropriate control over rendering an EPUB. For example, to emulate the experience of reading a book, most reading systems will chunk an EPUB into pages. A reader cannot scroll the document but rather “turn” the page, meaning textually-adjacent content can be split up between pages. Whether a document is paginated or scrolled should be a reader’s choice, but 3/4 reading systems I tested would only permit pagination (Calibre being the exception).

-

Therefore I decided to build a lighter EPUB reading system, Bene. You’re using it right now. This document is an EPUB — you can download it by clicking the button in the top-right corner. The styling and icons are mostly borrowed from pdf.js. Bene is implemented in Tauri, so it can work as both a desktop app and a browser app. Please appreciate this picture of Bene running as a desktop app:

-
- -
Figure 11: The Bene reading system running as a desktop app. Wow! It works!
-
-

Bene is designed to make opening and reading an EPUB feel fast and non-committal. The app is much quicker to open on my Macbook (<1sec) than other desktop apps. It decompresses files on-the-fly so no additional disk space is used. The backend is implemented in Rust and compiled to Wasm for the browser version.

-

The general design goal of Bene is to embody my ideals for a portable EPUB reader. That is, a utilitarian interface into an EPUB that satisfies my additional requirements for portability. Bene allows you to configure document rendering by changing the font size (try the +/- buttons in the top bar) and the viewer width (if you’re on desktop, move your mouse over the right edge of the document, and drag the handle). Long-term, I want Bene to also provide richer document interactions than a standard EPUB reader, which means we must discuss scripting.

-

Defensively Scripting EPUBs

-

To some people, the idea of code in their documents is unappealing. Last time one of my document-related projects was posted to Hacker News, the top comment was complaining about dynamic documents. The sentiment is understandable — concerns include:

-
    -
  • Bad code: your document shouldn’t crash or glitch due to a failure in a script.
  • -
  • Bad browsers: your document shouldn’t fail to render when a browser updates.
  • -
  • Bad actors: a malicious document shouldn’t be able to pwn your computer.
  • -
  • Bad interfaces: a script shouldn’t cause your document to become unreadable.
  • -
-

Yet, document scripting provides many opportunities for improving how we communicate information. For one example, if you haven’t yet, try hovering your mouse over any instance of the term portable EPUB (or long press it on a touch screen). You should see a tooltip appear with the term’s definition. The goal of these tooltips is to simplify reading a document that contains a lot of specialized notation or terminology. If you forget a definition, you can quickly look it up without having to jump around.

-

The key design challenge is how to permit useful scripting behaviors while limiting the downsides of scripting. One strategy is as follows:

-
Structure over scripts principle: documents should prefer structural annotations over scripts where possible. Documents should rely on reading systems to utilize structure where possible.
-

As an example of this principle, consider how the portable EPUB definition and references are expressed in this document:

-
-
-
<p><dfn-container>Hence, I will propose a new format which I call a <dfn id="portable-epub">portable EPUB</dfn>, which is an EPUB with additional requirements and recommendations to improve PDF-like portability.</dfn-container> The first requirement is:</p>
-
Listing 5: Creating a definition
-
-
-
For one example, if you haven't yet, try hovering your mouse over any instance of the term <a href="#portable-epub" data-target="dfn">portable EPUB</a> (or long press it on a touch screen).
-
Listing 6: Referencing a definition
-
-
-

The definition uses the <dfn> element wrapped in a custom <dfn-container> element to indicate the scope of the definition. The reference to the definition uses a standard anchor with an addition data-target attribute to emphasize that a definition is being linked. The document itself does not provide a script. The Bene reading system automatically detects these annotations and provides the tooltip interaction.

-

Encapsulating Scripts with Web Components

-

But what if a document wants to provide an interactive component that isn’t natively supported by the reading system? For instance, I have recently been working with The Rust Programming Language, a textbook that explains the different features of Rust. It contains a lot of passages like this one:

-
-
let x = 5;
let x = x + 1;
{
let x = x * 2;
println!("The value of x in the inner scope is: {x}");
}
println!("The value of x is: {x}");
}
-

This program first binds x to a value of 5. Then it creates a new variable x by repeating let x =, taking the original value and adding 1 so the value of x is then 6. Then, within an inner scope created with the curly brackets, the third let statement also shadows x and creates a new variable, multiplying the previous value by 2 to give x a value of 12. When that scope is over, the inner shadowing ends and x returns to being 6. When we run this program, it will output the following:

-
-

A challenge in reading this passage is finding the correspondences between the prose and the code. An interactive code reading component can help you track those correspondences, like this (try mousing-over or clicking-on each sentence):

-
fn main() { 
-  let x = 5
-  let x = x + 1
-  { 
-    let x = x * 2
-    println!(“The value of x in the inner scope is: {x}”);
-  }
-  println!(“The value of x is: {x}”);
-}

This program first binds x to a value of 5.Then it creates a new variable x by repeating let x =,taking the original value and adding 1 so the value of x is then 6.Then, within an inner scope created with the curly brackets,the third let statement also shadows x and creates a new variable,multiplying the previous value by 2 to give x a value of 12.When that scope is over, the inner shadowing ends and x returns to being 6.

-

The interactive code description component is used as follows:

-
-
<code-description>
<pre><code>fn main() {
let <span id="code-1">x</span> = <span id="code-2">5</span>;
<!-- rest of the code... -->
}</code></pre>
<p>
<code-step>This program first binds <a href="#code-1"><code>x</code></a> to a value of <a href="#code-2"><code>5</code></a>.</code-step>
<!-- rest of the prose... -->
</p>
</code-description>
-
-

Again, the document content contains no actual script. It contains a custom element <code-description>, and it contains a series of annotations as spans and anchors. The <code-description> element is implemented as a web component.

-

Web components are a programming model for writing encapsulated interactive fragments of HTML, CSS, and Javascript. Web components are one of many ways to write componentized HTML, such as React, Solid, Svelte, and Angular. I see web components as the most suitable as a framework for portable EPUBs because:

-
    -
  • Web components are a standardized technology. Its key features like custom elements (for specifying the behavior of novel elements) and shadow trees (for encapsulating a custom element from the rest of the document) are part of the official HTML and DOM specifications. This improves the likelihood that future browsers will maintain backwards compatibility with web components written today.
  • -
  • Web components are designed for tight encapusulation. The shadow tree mechanism ensures that styling applied within a custom component cannot accidentally affect other components on the page.
  • -
  • Web components have a decent ecosystem to leverage. As far as I can tell, web components are primarily used by Google, which has created notable frameworks like Lit.
  • -
  • Web components provide a clear fallback mechanism. If a renderer does not support Javascript, or if a renderer loses the ability to render web components, then an HTML renderer will simply ignore custom tags and render their contents.
  • -
-

Thus, I propose one principle and one requirement:

-
Encapsulated scripts principle: interactive components should be implemented as web components when possible, or otherwise be carefully designed to avoid conflicting with the base document or other components.
-
Components fallback requirement: interactive components must provide a fallback mechanism for rendering a reasonable substitute if Javascript is disabled.
-

Where To Go From Here?

-

Every time I have told someone “I want to replace PDF”, the statement has been met with extreme skepticism. Hopefully this document has convinced you that HTML-via-EPUB could potentially be a viable and desirable document format for the future.

-

My short-term goal is to implement a few more documents in the portable EPUB format, such as my PLDI paper. That will challenge both the file format and the reading system to be flexible enough to support each document type. In particular, each document should look good under a range of reading conditions (screen sizes, font sizes and faces, etc.).

-

My long-term goal is to design a document language that makes it easy to generate portable EPUBs. Writing XHTML by hand is not reasonable. I designed Nota before I was thinking about EPUBs, so its next iteration will be targeted at this new format.

-

If you have any thoughts about how to make this work or why I’m wrong, let me know by email or Twitter or Mastodon or wherever this gets posted. If you would like to help out, please reach out! This is just a passion project in my free time (for now…), so any programming or document authoring assistance could provide a lot of momentum to the project.

-

But What About…

-

A brief postscript for a few things I haven’t touched on.

-

…security? You might dislike the idea that document authors can run arbitrary Javascript on your personal computer. But then again, you presumably use both a PDF reader and a web browser on the daily, and those both run Javascript. What I’m proposing is not really any less secure than our current state of affairs. If anything, I’d hope that browsers are more battle-hardened than PDF viewers regarding code execution. Certainly the designers of EPUB reading systems should be careful to not give documents any additional capabilities beyond those already provided by the browser.

-

…privacy? Modern web sites use many kinds of telemetry and cookies to track user behavior. I strongly believe that EPUBs should not follow this trend. Telemetry must at least require the explicit consent of the user, and even that may be too generous. Companies will inevitably do things like offer discounts in exchange for requiring your consent to telemetry, similar to Amazon’s Kindle ads policy. Perhaps it is better to preempt this behavior by banning all tracking.

-

…aesthetics? People often intuit that LaTeX-generated PDFs look prettier than HTML documents, or even prettier than PDFs created by other software. This is because Donald Knuth took his job very seriously. In particular, the Knuth-Plass line-breaking algorithm tends to produce better-looking justified text than whatever algorithm is used by browsers.

-

There’s two ways to make progress here. One is for browsers to provide more typography tools. Allegedly, text-wrap: pretty is supposed to help, but in my brief testing it doesn’t seem to improve line-break quality. The other way is to pre-calculate line breaks, which would only work for fixed-layout renditions.

-

…page citations? I think we just have to give up on citing content by pages. Instead, we should mandate a consistent numbering scheme for block elements within a document, and have people cite using that scheme. (Allison Morrell points out this is already the standard in the Canadian legal system.) For example, Bene will auto-number all blocks. If you’re on a desktop, try hovering your mouse in the left column next to the top-right of any paragraph.

-

…annotations? Ideally it should be as easy to mark up an EPUB as a PDF. The Web Annotations specification seems to be a good starting point for annotating EPUBs. Web Annotations seem designed for annotations on “targetable” objects, like a labeled element or a range of text. It’s not yet clear how to deal with free-hand annotations, especially on reflowable documents.

- - diff --git a/crates/tests/ref/examples/portable_epubs/pdf/portable_epubs.metadata.json b/crates/tests/ref/examples/portable_epubs/pdf/portable_epubs.metadata.json deleted file mode 100644 index 182b0d6e..00000000 --- a/crates/tests/ref/examples/portable_epubs/pdf/portable_epubs.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 5179715, - "page_count": 17 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/relative_path_links/html/child.html b/crates/tests/ref/examples/relative_path_links/html/child.html deleted file mode 100644 index 48e19aab..00000000 --- a/crates/tests/ref/examples/relative_path_links/html/child.html +++ /dev/null @@ -1,145 +0,0 @@ - - - - - - - -

Child Document

-

This document is in a subdirectory.

-

Link to Parent Directory

-

Go back to the root document.

-

Link to Sibling (Explicit Same Dir)

-

See the sibling in the same directory.

-

Link to Sibling (Implicit Same Dir)

-

Also see the sibling again with implicit path.

- - diff --git a/crates/tests/ref/examples/relative_path_links/html/root.html b/crates/tests/ref/examples/relative_path_links/html/root.html deleted file mode 100644 index 3fea1e14..00000000 --- a/crates/tests/ref/examples/relative_path_links/html/root.html +++ /dev/null @@ -1,144 +0,0 @@ - - - - - - - -

Root Document

-

This is the root of the test project.

-

Links to Subdirectory

-

See the child document in the subdir.

-

Also check out the sibling.

-

More Content

-

This tests that subdirectory paths transform correctly.

- - diff --git a/crates/tests/ref/examples/relative_path_links/html/sibling.html b/crates/tests/ref/examples/relative_path_links/html/sibling.html deleted file mode 100644 index e7948149..00000000 --- a/crates/tests/ref/examples/relative_path_links/html/sibling.html +++ /dev/null @@ -1,145 +0,0 @@ - - - - - - - -

Sibling Document

-

This is the sibling document in the subdirectory.

-

Link to Sibling

-

Go to the child document.

-

Link to Parent Directory

-

Return to root.

-

Content

-

Testing various relative path patterns.

- - diff --git a/crates/tests/ref/examples/relative_path_links/pdf/relative_path_links.metadata.json b/crates/tests/ref/examples/relative_path_links/pdf/relative_path_links.metadata.json deleted file mode 100644 index b9fc5cdb..00000000 --- a/crates/tests/ref/examples/relative_path_links/pdf/relative_path_links.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 24877, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/script_injection/html/index.html b/crates/tests/ref/examples/script_injection/html/index.html deleted file mode 100644 index 52c72c74..00000000 --- a/crates/tests/ref/examples/script_injection/html/index.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - -

Test

-

Hello world.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/script_injection/html/index.js b/crates/tests/ref/examples/script_injection/html/index.js deleted file mode 100644 index 044794b2..00000000 --- a/crates/tests/ref/examples/script_injection/html/index.js +++ /dev/null @@ -1 +0,0 @@ -console.log("loaded"); diff --git a/crates/tests/ref/examples/script_injection/html/style.metadata.json b/crates/tests/ref/examples/script_injection/html/style.metadata.json deleted file mode 100644 index 1fc2cb0a..00000000 --- a/crates/tests/ref/examples/script_injection/html/style.metadata.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "filetype": "css", - "file_size": 20, - "path": "style.css", - "hash": "eac0e790573fb6424e6008c9f3a1bdf262add6bb2460a001bb89549fb1ddf482" -} \ No newline at end of file diff --git a/crates/tests/ref/examples/script_injection_no_css/html/index.html b/crates/tests/ref/examples/script_injection_no_css/html/index.html deleted file mode 100644 index 0085aa5b..00000000 --- a/crates/tests/ref/examples/script_injection_no_css/html/index.html +++ /dev/null @@ -1,138 +0,0 @@ - - - - - -

Test

-

Hello world.

- - - \ No newline at end of file diff --git a/crates/tests/ref/examples/script_injection_no_css/html/index.js b/crates/tests/ref/examples/script_injection_no_css/html/index.js deleted file mode 100644 index 044794b2..00000000 --- a/crates/tests/ref/examples/script_injection_no_css/html/index.js +++ /dev/null @@ -1 +0,0 @@ -console.log("loaded"); diff --git a/crates/tests/ref/examples/severance_minusep_minus1/html/severance-ep-1.html b/crates/tests/ref/examples/severance_minusep_minus1/html/severance-ep-1.html deleted file mode 100644 index 84b90805..00000000 --- a/crates/tests/ref/examples/severance_minusep_minus1/html/severance-ep-1.html +++ /dev/null @@ -1,172 +0,0 @@ - - - - - - Good news about hell - Severance [s1/e1] - - -

Good news about hell - Severance [s1/e1]

- -

The first thing to notice is the colour palette. She is dressed in blue, but her hair is chestnut red. It spills out for the frame of her figure into the table around it, blockaded at its border by chairs and a carpet clad in green, yellow, then green again; then gray. The establishing shot is a bird’s eye view of an unknown woman who is soon revealed to have been put in the board room by someone else’s design, who learns about her predicament only by a man’s voice that emanates from the little device that rests on the table along with the woman, arranged so that it aims directly at her head.

-

This opening image is a graph of the subject’s predicament on the severed floor at Lumon. Blue is the company colour. Employees are almost invariably dressed in shades of it– navy, midnight, Prussian, Oxford, cobalt– and more reliably so as we work our way up the hierarchy. Red is unruly passion, the tone of tempers that itch to tear off the straitjacket directives, to disregulate the business-as-usual in which there is no obvious place for illicit activities. Green is the accent of Macro Data Refinement, the division of Lumon in which the show’s protagonists are employed. The device directs a man’s voice at a woman’s body in an attempt to keep her tempers in check, to ensure her firecraft does not smoke out the staid edifice of personality management, to order her “perceptual chronologies” accordingly. (Later in the episode, we learn that she almost manages to “break in” on the control room during that opening sequence: the solidity of its enclosure is threatened from the very first.)

-

It is instructive to attempt to articulate the dynamics that this graph indexes before we start talking about other scenes in the show. Graphs are not at one with what they represent, for in the decision to render ‘data’ in the very act of a representation, we both lose and gain distinction of the dynamics in question. The voice that opens Helly R up to the world of Lumon’s severed floor begins: “Who are you?” This question is a mistake. We retroactively learn, in a later scene, that Mark S was in fact supposed to begin with a less interrogative, more perfunctory: “Hi there, you on the table. I wonder if you’d mind taking a brief survey.” As Irving puts it: “You [Mark S] skipped the preamble”. Helly R is thrust, by this accident, immediately into questioning not only herself, but also the self-assurance of the voice that interrogates her. Does this voice in my head [she could be thinking] really know what it is doing? Or is it just a role of similarly confused actors struggling to stick to a badly written script?

-

This episode-length recap of the first episode names this graph ‘the Helly incident’, a poorly executed orientation of Helly’s newfound subjectivity that can be blamed at one level on Mark S (for starting with the wrong part of the manual), at another on Mr. Milchick (for misguiding Mark while he was distracted setting up the visual feed), on Ms. Cobel (for giving Mark Petey K’s old manual without redacting his obscurely scribbled notes and paper bookmarks), or even on Irving (for neglecting to intervene and clarify how Mark should begin being the more senior refiner in the situation: “Irving will be there to shadow. Just stick to the flowchart and escalate properly depending on dialectics.”). Wherever to place blame, there is doubtless a misconfiguration that takes place. Helly’s instinctual reaction seems to be to try to kill the voice pointed at her head, rather than to befriend it as Mark states he did (where Petey was Mark). (Helly will eventually have sex with the source of the voice, rather than murdering or fraternizing with it.) In this episode, however, Mark (the voice’s source) is physically assaulted by Helly, dented in his temple by the same vocalization device that mediated their first communication.

- -

So this is the Macro Data refiner’s situation. On the one hand, she is affronted with a voice that compels her to abide by the rules and permits her to enjoy some small reliefs (egress from a locked room) if she concedes to it. On the other, she is always teeming and thus flirting with red, considering escape routes that involve drawing blood, setting off alarms, or removing clothes.

-

This unruly red is what Macro Data Refinement’s greening procedures are supposed to contain to produce a completely controlled and scripted blot of blue. Perhaps this is why the glipse of the vacant desks planned for the severed floor’s expansion are draped in purple, for that shade of subjectivity would better incorporate the contrasting contours into a unified and taskable tone. The red that threatens Lumon’s corporate, calm, and collected blue (the Lumon logo is a water droplet that suspiciously resembles a camera) is splattered across scenes in the episode. It is, for example, the envelope that Petey slips Mark at the company-owned restaurant Pip’s with the suggestion that he should read it if he wants to know “what’s going on down there”. It is the sweater Mark wears to his sister’s dinnerless dinner party, punctuated by red place mats (“what a lot of people overlook, I think, is that life is not food”), where the ontological substance of his innie is called into question, and where we learn about the passions he has lost– the history of World War II, educating, whiskey– the last of which seems to have given way to an indiscriminate consumption of beer, wine, anything that will drown out the clarity of sober consciousness. It is the general hue of his sister’s house, which consisently wants him to question that placid blue of his company-subsidized housing at Baird Creek Manor.

-

This dinner tells us something more about the subject in question in Severance. Just as Helly’s outie had alerted us to the basic principle in the video her innie was shown in curiously lo-fi resolution to conclude her innie’s orientation– “perceptual chronologies… surgically split”– Mark’s predicament is comparably explained to him by another more or less ignorant (we can’t help but imagine) third party: “One’s memories are bifurcated, so when you’re at work, you have no recollection of what it is you do there.” As pretentious as they are, the dinner’s guests do seem to be attuned to an important dimension of the meaning of life, which is that it can’t only be about satiating biological needs such as food. What each individual ‘needs’ is a disharmonious melange of needs and demands, openings of desire that emerge not only through a graph of bare necessities– food, water, shelter– but also through capricious carapaces that emerge from more ambiguous pinings in the social sphere– company, care, love. The real question of Lumon’s smooth functioning is whether it will be able to effectively plug up these pinings, the incidental moments at work where one wonders what one is really doing with one’s life, whether the company can really manage its employees’ unsanctioned thoughts and the way in which those illicit ideas seep into the daily practice of their workerhood. More on the plasticity of our needs and drives to satisfy them in later posts.

- -

Ms. Cobel, in contrast to Helly’s and Mark’s doubtful and doubting red, is a stormy and icy blue. (We must wait until season two to uncover the historical and psychological depth of this colour for Harmony Cobel.) She is the figure with a body that seems to be the most in charge, of those we meet in this episode. Though Ms. Cobel is not a master in herself, it seems, for she too is subjected to a disembodied voice-via-device, ‘the board’, albeit which only appears evidently as an ear so far (“The board won’t be contributing to this meeting vocally”). Cobel is responsible for keeping the severed floor’s uncertainty in check, the ‘head’ that sits atop the variegated limbs of its disobedient body.

-

When Cobel reprimands Mark for his derailing of Helly’s orientation, she recalls an obscure and theological aspect of her parentage:

-
You know, my mother was an atheist. She used to say that there was good news and bad news about hell. The good news is, hell is just the product of a morbid human imagination. The bad news is, whatever humans can imagine, they can usually create.
-

At the close of the episode, just before Mark’s senile neighbor Mrs. Selvig (who we have only heard about through Mark’s voice thus far, when he is on the phone with her) visually reveals herself to be the same woman as Ms. Cobel, she gives a slightly different account of her heritage:

-
You know, my mother was a Catholic. She used to say it takes the saints eight hours to bless a sleeping child. I hope you aren’t rushing the saints.
-

It’s unclear at this point whether Cobel is a severed worker like Mark, or whether there is some other reason for her (strange, almost senseless) duplicity. Why lie about the religious leanings of one’s mother? Or maybe ‘mother’ is actually a name for something else, a kind of interim authority that gives synthetic weight to some hearsay, rumor, or idle phrase. (The other cameo of an ambiguously defined mother in this episode is in question five of Helly’s orientation survey: “To the best of your memory, what is or was the color of your mother’s eyes?”) Perhaps it is that, severed or not, atheist or Catholic, Cobel’s subjectivity is structured by a comparable split in her perceptual chronologies, whereby some memories (of her mother) get more airtime in her conscious experience of herself than others.

-

Severance flirts with this idea extensively, that the innie/outie dyad is analagous to the unconscious/conscious experience that we, as subjects, have of ourselves. Mark’s sister Devon hints at the psycho-logical reading of the severed condition in her diagnosis of Mark’s morose (outie) predicament as a state of failed therapy in response to mourning for his late wife: “I just feel like forgetting about her for eight hours a day isn’t the same thing as healing.” As with not-mothers and the plasticity of the drive, we will address the psychoanalytic implications here in later posts; but to finish I want to bring our attention to the imaging of time at work in just this first episode.

-

The fascinating details of failed synchronisation between all the watchfaces we see are enumerated in this Reddit thread. Many of the watch hands appear to be stalled, and the crossover from each to the next– as when Mark Scout switches his wrist watch in preparation for his elevator descent into the workday of innie Mark S– doesn’t match with our experience of the actors on screen. One of the few things we do know about the severance procedure is that it ‘alters perceptual chronologies’, and that this messing with a subject’s sense of time is thought to

-
    -
  1. make them more adequate or productive in a certain kind of work (for why else would Lumon go to the necessary lengths to sever some employees)
  2. -
  3. supposes to section off innie memories and experience from outie memories and experience
  4. -
-

So the subject’s subjectivity is marked by its sense of time, and Lumon’s success (profitability?) hinges in some way on altering their employees’ stable sense of it while in the space of the severed floor.

-

Mark S’s temporal predicament here has been explained by a man whose last name we get by speeding up the saying of his own, Karl Marx (Mar-k-S). Logically speaking, Marx argues, there is an amount of time that goes missing in the worker’s employment by way of a wage, when he advances some portion of his time to the capitalist in exchange for a pay-check one or more weeks later. I refer the reader interested in the details to chapter 20 of Capital Vol. I: but the essential point here is that it is through an obfuscation of the real value of a worker’s time that the capitalist manages to produce surplus-value. The production of this kind of time-distorted surplus-value is the engine of capitalism as a social relation that appears, on the surface, to be equally fair to capitalist and worker alike. So the project of controlling ‘perceptual chronologies’ with which Lumon seems to be so concerned is perhaps not as esoteric and inessential as it might at first seem. Perhaps it is an embodiment of the core ingredient of the company’s success as a company, of its incorporation as an entity that ought to be sustained even at the expense of its members’ happiness, their health, and their livelihoods.

-
-


-
- -
- - diff --git a/crates/tests/ref/examples/severance_minusep_minus1/pdf/severance-ep-1.metadata.json b/crates/tests/ref/examples/severance_minusep_minus1/pdf/severance-ep-1.metadata.json deleted file mode 100644 index 4fb48c54..00000000 --- a/crates/tests/ref/examples/severance_minusep_minus1/pdf/severance-ep-1.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 942321, - "page_count": 5 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/target_function/epub/target_function.metadata.json b/crates/tests/ref/examples/target_function/epub/target_function.metadata.json deleted file mode 100644 index 4bdc0c60..00000000 --- a/crates/tests/ref/examples/target_function/epub/target_function.metadata.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "filetype": "epub", - "file_size": 2911, - "title": "Target Function", - "language": "en", - "spine_files": [ - "main.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/target_function/epub/xhtml/main.xhtml b/crates/tests/ref/examples/target_function/epub/xhtml/main.xhtml deleted file mode 100644 index 99b3578d..00000000 --- a/crates/tests/ref/examples/target_function/epub/xhtml/main.xhtml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - -
-

Target Function Test

-

This test verifies that the target() function returns format-specific values.

-

Current format: epub

-

Conditional Content

-

EPUB-specific content: This appears only in EPUB output

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/target_function/html/main.html b/crates/tests/ref/examples/target_function/html/main.html deleted file mode 100644 index 15fc8bdc..00000000 --- a/crates/tests/ref/examples/target_function/html/main.html +++ /dev/null @@ -1,142 +0,0 @@ - - - - - - - -

Target Function Test

-

This test verifies that the target() function returns format-specific values.

-

Current format: html

-

Conditional Content

-

HTML-specific content: This appears only in HTML output

- - diff --git a/crates/tests/ref/examples/target_function/pdf/main.metadata.json b/crates/tests/ref/examples/target_function/pdf/main.metadata.json deleted file mode 100644 index 6dd3f58a..00000000 --- a/crates/tests/ref/examples/target_function/pdf/main.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 21381, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/target_function_in_module/epub/target_function_in_module.metadata.json b/crates/tests/ref/examples/target_function_in_module/epub/target_function_in_module.metadata.json deleted file mode 100644 index bb56d55c..00000000 --- a/crates/tests/ref/examples/target_function_in_module/epub/target_function_in_module.metadata.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "filetype": "epub", - "file_size": 3071, - "title": "Target Function in Module", - "language": "en", - "spine_files": [ - "main.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/target_function_in_module/epub/xhtml/main.xhtml b/crates/tests/ref/examples/target_function_in_module/epub/xhtml/main.xhtml deleted file mode 100644 index 54deaa30..00000000 --- a/crates/tests/ref/examples/target_function_in_module/epub/xhtml/main.xhtml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - -
-

Target Function in Module

-

Main File

-

Main: epub

-

Imported Module

-

Module returns: epub

-

Module Conditional

-

Module: EPUB

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/target_function_in_module/html/format_helper.html b/crates/tests/ref/examples/target_function_in_module/html/format_helper.html deleted file mode 100644 index a6c80d5d..00000000 --- a/crates/tests/ref/examples/target_function_in_module/html/format_helper.html +++ /dev/null @@ -1,136 +0,0 @@ - - - - - - - - diff --git a/crates/tests/ref/examples/target_function_in_module/html/main.html b/crates/tests/ref/examples/target_function_in_module/html/main.html deleted file mode 100644 index 75a1d524..00000000 --- a/crates/tests/ref/examples/target_function_in_module/html/main.html +++ /dev/null @@ -1,144 +0,0 @@ - - - - - - - -

Target Function in Module

-

Main File

-

Main: html

-

Imported Module

-

Module returns: html

-

Module Conditional

-

Module: HTML

- - diff --git a/crates/tests/ref/examples/target_function_in_module/pdf/format_helper.metadata.json b/crates/tests/ref/examples/target_function_in_module/pdf/format_helper.metadata.json deleted file mode 100644 index d4deaae2..00000000 --- a/crates/tests/ref/examples/target_function_in_module/pdf/format_helper.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 2123, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/target_function_in_module/pdf/main.metadata.json b/crates/tests/ref/examples/target_function_in_module/pdf/main.metadata.json deleted file mode 100644 index 48b18d63..00000000 --- a/crates/tests/ref/examples/target_function_in_module/pdf/main.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 14387, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/examples/target_function_in_package/epub/target_function_in_package.metadata.json b/crates/tests/ref/examples/target_function_in_package/epub/target_function_in_package.metadata.json deleted file mode 100644 index b8322ad3..00000000 --- a/crates/tests/ref/examples/target_function_in_package/epub/target_function_in_package.metadata.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "filetype": "epub", - "file_size": 2966, - "title": "Target Function in Package", - "language": "en", - "spine_files": [ - "main.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/examples/target_function_in_package/epub/xhtml/main.xhtml b/crates/tests/ref/examples/target_function_in_package/epub/xhtml/main.xhtml deleted file mode 100644 index 4fef17e5..00000000 --- a/crates/tests/ref/examples/target_function_in_package/epub/xhtml/main.xhtml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - -
-

Target Function in Package

-

Using bullseye package

-

Package sees: html

-

Using target()

-

Main file target: epub

- - -
\ No newline at end of file diff --git a/crates/tests/ref/examples/target_function_in_package/html/main.html b/crates/tests/ref/examples/target_function_in_package/html/main.html deleted file mode 100644 index a00e9c77..00000000 --- a/crates/tests/ref/examples/target_function_in_package/html/main.html +++ /dev/null @@ -1,142 +0,0 @@ - - - - - - - -

Target Function in Package

-

Using bullseye package

-

Package sees: html

-

Using target()

-

Main file target: html

- - diff --git a/crates/tests/ref/files/1d75da8a42d8f937/portable_epubs/html/portable_epubs.html b/crates/tests/ref/files/1d75da8a42d8f937/portable_epubs/html/portable_epubs.html deleted file mode 100644 index f3fec726..00000000 --- a/crates/tests/ref/files/1d75da8a42d8f937/portable_epubs/html/portable_epubs.html +++ /dev/null @@ -1,220 +0,0 @@ - - - - Portable EPUBs - - -
-

Portable EPUBs

- Will CrichtonBrown UniversityJanuary 25, 2024Despite decades of advances in document rendering technology, most of the world’s documents are stuck in the 1990s due to the limitations of PDF. Yet, modern document formats like HTML have yet to provide a competitive alternative to PDF. This post explores what prevents HTML documents from being portable, and I propose a way forward based on the EPUB format. To demonstrate my ideas, this post is presented using a prototype EPUB reading system. -
-

The Good and Bad of PDF

-

PDF is the de facto file format for reading and sharing digital documents like papers, textbooks, and flyers. People use the PDF format for several reasons:

-
    -
  • -

    PDFs are self-contained. A PDF is a single file that contains all the images, fonts, and other data needed to render it. It’s easy to pass around a PDF. A PDF is unlikely to be missing some critical dependency on your computer.

    -
  • -
  • -

    PDFs are rendered consistently. A PDF specifies precisely how it should be rendered, so a PDF author can be confident that a reader will see the same document under any conditions.

    -
  • -
  • -

    PDFs are stable over time. PDFs from decades ago still render the same today. PDFs have a relatively stable standard. PDFs cannot be easily edited.

    -
  • -
-

Yet, in the 32 years since the initial release of PDF, a lot has changed. People print out documents less and less. People use phones, tablets, and e-readers to read digital documents. The internet happened; web browsers now provide a platform for rendering rich documents. These changes have laid bare the limitations of PDF:

-
    -
  • -

    PDFs cannot easily adapt to different screen sizes. Most PDFs are designed to mimic 8.5x11″ paper (or worse, 145,161 km2). These PDFs are readable on a computer monitor, but they are less readable on a tablet, and far less readable on a phone.

    -
  • -
  • -

    PDFs cannot be easily understood by programs. A plain PDF is just a scattered sequence of lines and characters. For accessibility, screen readers may not know which order to read through the text. For data extraction, scraping tables out of a PDF is an open area of research.

    -
  • -
  • -

    PDFs cannot easily express interaction. PDFs were primarily designed as static documents that cannot react to user input beyond filling in forms.

    -
  • -
-

These pros and cons can be traced back to one key fact: the PDF representation of a document is fundamentally unstructured. A PDF consists of commands like:

-
-
Move the cursor to the right by 0.5 inches.
Set the current font color to black.
Draw the text "Hello World" at the current position.
-
-

PDF commands are unstructured because a document’s organization is only clear to a person looking at the rendered document, and not clear from the commands themselves. Reflowing, accessibility, data extraction, and interaction all rely on programmatically understanding the structure of a document. Hence, these aspects are not easy to integrate with PDFs.

-

This raises the question: how can we design digital documents with the benefits of PDFs but without the limitations?

-

Can’t We Just Fix PDF?

-

A simple answer is to improve the PDF format. After all, we already have billions of PDFs — why reinvent the wheel?

-

The designers of PDF are well aware of its limitations. I carefully hedged each bullet with “easily”, because PDF does make it possible to overcome each limitation, at least partially. PDFs can be annotated with their logical structure to create a tagged PDF. Most PDF exporters will not add tags automatically — the simplest option is to use Adobe’s subscription-only Acrobat Pro, which provides an “Automatically tag PDF” action. For example, here is a recent paper of mine with added tags:

-
- -
Figure 1: A LaTeX-generated paper with automatically added tags.
-
-

If you squint, you can see that the logical structure closely resembles the HTML document model. The document has sections, headings, paragraphs, and links. Adobe characterizes the logical structure as an accessibility feature, but it has other benefits. You may be surprised to know that Adobe Acrobat allows you to reflow tagged PDFs at different screen sizes. You may be unsurprised to know that reflowing does not always work well. For example:

-
-
- -
Figure 3: A section of the paper in its default fixed layout. Note that the second paragraph is wrapped around the code snippet.
-
-
- -
Figure 4: The same section of the paper after reflowing to a smaller width. Note that the code is now interleaved with the second paragraph.
-
-
-

In theory, these issues could be fixed. If the world’s PDF exporters could be modified to include logical structure. If Adobe’s reflowing algorithm could be improved to fix its edge cases. If the reflowing algorithm could be specified, and if Adobe were willing to release it publicly, and if it were implemented in each PDF viewer. And that doesn’t even cover interaction! So in practice, I don’t think we can just fix the PDF format, at least within a reasonable time frame.

-

The Good and Bad of HTML

-

In the meantime, we already have a structured document format which can be flexibly and interactively rendered: HTML (and CSS and Javascript, but here just collectively referred to as HTML). The HTML format provides almost exactly the inverse advantages and disadvantages of PDF.

-
    -
  • HTML can more easily adapt to different screen sizes. Over the last 20 years, web developers and browser vendors have created a wide array of techniques for responsive design.
  • -
  • HTML can be more easily understood by a program. HTML provides both an inherent structure plus additional attributes to support accessibility tools.
  • -
  • HTML can more easily express interaction. People have used HTML to produce amazing interactive documents that would be impossible in PDF. Think: Distill.pub, Explorable Explanations, Bartosz Ciechanowski, and Bret Victor, just to name a few.
  • -
-

Again, these advantages are hedged with “more easily”. One can easily produce a convoluted or inaccessible HTML document. But on balance, these aspects are more true than not compared to PDF. However, HTML is lacking where PDF shines:

-
    -
  • HTML is not self-contained. HTML files may contain URL references to external files that may be hosted on a server. One can rarely download an HTML file and have it render correctly without an internet connection.
  • -
  • HTML is not always rendered consistently. HTML’s dynamic layout means that an author may not see the same document as a reader. Moreover, HTML layout is not fully specified, so browsers may differ in their implementation.
  • -
  • HTML is not fully stable over time. Browsers try to maintain backwards compatibility (come on and slam!), but the HTML format is still evolving. The HTML standard is a “living standard” due to the rapidly changing needs and feature sets of modern browsers.
  • -
-

So I’ve been thinking: how can we design HTML documents to gain the benefits of PDFs without losing the key strengths of HTML? The rest of this document will present some early prototypes and tentative proposals in this direction.

-

Self-Contained HTML with EPUB

-

First, how can we make HTML documents self-contained? This is an old problem with many potential solutions. WARC, webarchive, and MHTML are all file formats designed to contain all the resources needed to render a web page. But these formats are more designed for snapshotting an existing website, rather than serving as a single source of truth for a web document. From my research, the most sensible format for this purpose is EPUB.

-

EPUB is a “distribution and interchange format for digital publications and documents”, per the EPUB 3 Overview. Reductively, an EPUB is a ZIP archive of web files: HTML, CSS, JS, and assets like images and fonts. On a technical level, what distinguishes EPUB from archival formats is that EPUB includes well-specified files that describe metadata about a document. On a social level, EPUB appears to be the HTML publication format with the most adoption and momentum in 2024, compared to moribund formats like Mobi.

-

The EPUB spec has all the gory details, but to give you a rough sense, a sample EPUB might have the following file structure:

-
-
sample.epub
├── META-INF
│ └── container.xml
└── EPUB
├── package.opf
├── nav.xhtml
├── chapter1.xhtml
├── chapter2.xhtml
└── img
└── sample.jpg
-
-

An EPUB contains content documents (like chapter1.xhtml and chapter2.xhtml) which contain the core HTML content. Content documents can contain relative links to assets in the EPUB, like img/sample.jpg. The navigation document (nav.xhtml) provides a table of contents, and the package document (package.opf) provides metadata about the document. These files collectively define one “rendition” of the whole document, and the container file (container.xml) points to each rendition contained in the EPUB.

-

The EPUB format optimizes for machine-readable content and metadata. HTML content is required to be in XML format (hence, XHTML). Document metadata like the title and author is provided in structured form in the package document. The navigation document has a carefully prescribed tag structure so the TOC can be consistently extracted.

-

Overall, EPUB’s structured format makes it a solid candidate for a single-file HTML document container. However, EPUB is not a silver bullet. EPUB is quite permissive in what kinds of content can be put into a content document.

-

For example, a major issue for self-containment is that EPUB content can embed external assets. A content document can legally include an image or font file whose src is a URL to a hosted server. This is not hypothetical, either; as of the time of writing, Google Doc’s EPUB exporter will emit CSS that will @include external Google Fonts files. The problem is that such an EPUB will not render correctly without an internet connection, nor will it render correctly if Google changes the URLs of its font files.

-

Hence, I will propose a new format which I call a portable EPUB, which is an EPUB with additional requirements and recommendations to improve PDF-like portability. The first requirement is:

-
Local asset requirement: All assets (like images, scripts, and fonts) embedded in a content document of a portable EPUB must refer to local files included in the EPUB. Hyperlinks to external files are permissible.
-

Consistency vs. Flexibility in Rendering

-

There is a fundamental tension between consistency and flexibility in document rendering. A PDF is consistent because it is designed to render in one way: one layout, one choice of fonts, one choice of colors, one pagination, and so on. Consistency is desirable because an author can be confident that their document will look good for a reader (or at least, not look bad). Consistency has subtler benefits — because a PDF is chunked into a consistent set of pages, a passage can be cited by referring to the page containing the passage.

-

On the other hand, flexibility is desirable because people want to read documents under different conditions. Device conditions include screen size (from phone to monitor) and screen capabilities (E-ink vs. LCD). Some readers may prefer larger fonts or higher contrasts for visibility, alternative color schemes for color blindness, or alternative font faces for dyslexia. Sufficiently flexible documents can even permit readers to select a level of detail appropriate for their background (here’s an example).

-

Finding a balance between consistency and flexibility is arguably the most fundamental design challenge in attempting to replace PDF with EPUB. To navigate this trade-off, we first need to talk about EPUB reading systems, or the tools that render an EPUB for human consumption. To get a sense of variation between reading systems, I tried rendering this post as an EPUB (without any styling, just HTML) on four systems: Calibre, Adobe Digital Editions, Apple Books, and Amazon Kindle. This is how the first page looks on each system (omitting Calibre because it looked the same as Adobe Digital Editions):

-
-
- -
Figure 6: Adobe Digital Editions
-
-
- -
Figure 7: Apple Books
-
-
- -
Figure 8: Amazon Kindle
-
-
-

Calibre and Adobe Digital Editions both render the document in a plain web view, as if you opened the HTML file directly in the browser. Apple Books applies some styling, using the New York font by default and changing link decorations. Amazon Kindle increases the line height and also uses my Kindle’s globally-configured default font, Bookerly.

-

As you can see, an EPUB may look quite different on different reading systems. The variation displayed above seems reasonable to me. But how different is too different? For instance, I was recently reading A History of Writing on my Kindle. Here’s an example of how a figure in the book renders on the Kindle:

-
- -
Figure 9: A figure in the EPUB version of A History of Writing on my Kindle
-
-

When I read this page, I thought, “wow, this looks like crap.” The figure is way too small (although you can long-press the image and zoom), and the position of the figure seems nonsensical. I found a PDF version online, and indeed the PDF’s figure has a proper size in the right location:

-
- -
Figure 10: A figure in the PDF version of A History of Writing on my Mac
-
-

This is not a fully fair comparison, but it nonetheless exemplifies an author’s reasonable concern today with EPUB: what if it makes my document looks like crap?

-

Principles for Consistent EPUB Rendering

-

I think the core solution for consistently rendering EPUBs comes down to this:

-
    -
  1. The document format (i.e., portable EPUB) needs to establish a subset of HTML (call it “portable HTML”) which could represent most, but not all, documents.
  2. -
  3. Reading systems need to guarantee that a document within the subset will always look reasonable under all reading conditions.
  4. -
  5. If a document uses features outside this subset, then the document author is responsible for ensuring the readability of the document.
  6. -
-

If someone wants to write a document such as this post, then that person need not be a frontend web developer to feel confident that their document will render reasonably. Conversely, if someone wants to stuff the entire Facebook interface into an EPUB, then fine, but it’s on them to ensure the document is responsive.

-

For instance, one simple version of portable HTML could be described by this grammar:

-
-
Document ::= <article> Block* </article>
Block ::= <p> Inline* </p> | <figure> Block* </figure>
Inline ::= text | <strong> Inline* </strong>
-
-

The EPUB spec already defines a comparable subset for navigation documents. I am essentially proposing to extend this idea for content documents, but as a soft constraint rather than a hard constraint. Finding the right subset of HTML will take some experimentation, so I can only gesture toward the broad solution here.

-
Portable HTML rendering requirement: if a document only uses features in the portable HTML subset, then a portable EPUB reading system must guarantee that the document will render reasonably.
-
Portable HTML generation principle: when possible, systems that generate portable EPUB should output portable HTML.
-

A related challenge is to define when a particular rendering is “good” or “reasonable”, so one could evaluate either a document or a reading system on its conformance to spec. For instance, if document content is accidentally rendered in an inaccesible location off-screen, then that would be a bad rendering. A more aggressive definition might say that any rendering which violates accessibility guidelines is a bad rendering. Again, finding the right standard for rendering quality will take some experimentation.

-

If an author is particularly concerned about providing a single “canonical” rendering of their document, one fallback option is to provide a fixed-layout rendition. The EPUB format permits a rendition to specify that it should be rendered in fixed viewport size and optionally a fixed pagination. A fixed-layout rendition could then manually position all content on the page, similar to a PDF. Of course, this loses the flexibility of a reflowable rendition. But an EPUB could in theory provide multiple renditions, offering users the choice of whichever best suits their reading conditions and aesthetic preferences.

-
Fixed-layout fallback principle: systems that generate portable EPUB can consider providing both a reflowable and fixed-layout rendition of a document.
-

It’s possible that the reading system, the document author, and the reader can each express preferences about how a document should render. If these preferences are conflicting, then the renderer should generally prioritize the reader over the author, and the author over the reading system. This is an ideal use case for the “cascading” aspect of CSS:

-
Cascading styles principle: both documents and reading systems should express stylistic preferences (such as font face, font size, and document width) as CSS styles which can be overriden (e.g., do not use !important). The reading system should load the CSS rules such that the priority order is reading system styles < document styles < reader styles.
-

A Lighter EPUB Reading System

-

The act of working with PDFs is relatively fluid. I can download a PDF, quickly open it in a PDF reading system like Preview, and keep or discard the PDF as needed. But EPUB reading systems feel comparatively clunky. Loading an EPUB into Apple Books or Calibre will import the EPUB into the application’s library, which both copies and potentially decompresses the file. Loading an EPUB on a Kindle requires waiting several minutes for the Send to Kindle service to complete.

-

Worse, EPUB reading systems often don’t give you appropriate control over rendering an EPUB. For example, to emulate the experience of reading a book, most reading systems will chunk an EPUB into pages. A reader cannot scroll the document but rather “turn” the page, meaning textually-adjacent content can be split up between pages. Whether a document is paginated or scrolled should be a reader’s choice, but 3/4 reading systems I tested would only permit pagination (Calibre being the exception).

-

Therefore I decided to build a lighter EPUB reading system, Bene. You’re using it right now. This document is an EPUB — you can download it by clicking the button in the top-right corner. The styling and icons are mostly borrowed from pdf.js. Bene is implemented in Tauri, so it can work as both a desktop app and a browser app. Please appreciate this picture of Bene running as a desktop app:

-
- -
Figure 11: The Bene reading system running as a desktop app. Wow! It works!
-
-

Bene is designed to make opening and reading an EPUB feel fast and non-committal. The app is much quicker to open on my Macbook (<1sec) than other desktop apps. It decompresses files on-the-fly so no additional disk space is used. The backend is implemented in Rust and compiled to Wasm for the browser version.

-

The general design goal of Bene is to embody my ideals for a portable EPUB reader. That is, a utilitarian interface into an EPUB that satisfies my additional requirements for portability. Bene allows you to configure document rendering by changing the font size (try the +/- buttons in the top bar) and the viewer width (if you’re on desktop, move your mouse over the right edge of the document, and drag the handle). Long-term, I want Bene to also provide richer document interactions than a standard EPUB reader, which means we must discuss scripting.

-

Defensively Scripting EPUBs

-

To some people, the idea of code in their documents is unappealing. Last time one of my document-related projects was posted to Hacker News, the top comment was complaining about dynamic documents. The sentiment is understandable — concerns include:

-
    -
  • Bad code: your document shouldn’t crash or glitch due to a failure in a script.
  • -
  • Bad browsers: your document shouldn’t fail to render when a browser updates.
  • -
  • Bad actors: a malicious document shouldn’t be able to pwn your computer.
  • -
  • Bad interfaces: a script shouldn’t cause your document to become unreadable.
  • -
-

Yet, document scripting provides many opportunities for improving how we communicate information. For one example, if you haven’t yet, try hovering your mouse over any instance of the term portable EPUB (or long press it on a touch screen). You should see a tooltip appear with the term’s definition. The goal of these tooltips is to simplify reading a document that contains a lot of specialized notation or terminology. If you forget a definition, you can quickly look it up without having to jump around.

-

The key design challenge is how to permit useful scripting behaviors while limiting the downsides of scripting. One strategy is as follows:

-
Structure over scripts principle: documents should prefer structural annotations over scripts where possible. Documents should rely on reading systems to utilize structure where possible.
-

As an example of this principle, consider how the portable EPUB definition and references are expressed in this document:

-
-
-
<p><dfn-container>Hence, I will propose a new format which I call a <dfn id="portable-epub">portable EPUB</dfn>, which is an EPUB with additional requirements and recommendations to improve PDF-like portability.</dfn-container> The first requirement is:</p>
-
Listing 5: Creating a definition
-
-
-
For one example, if you haven't yet, try hovering your mouse over any instance of the term <a href="#portable-epub" data-target="dfn">portable EPUB</a> (or long press it on a touch screen).
-
Listing 6: Referencing a definition
-
-
-

The definition uses the <dfn> element wrapped in a custom <dfn-container> element to indicate the scope of the definition. The reference to the definition uses a standard anchor with an addition data-target attribute to emphasize that a definition is being linked. The document itself does not provide a script. The Bene reading system automatically detects these annotations and provides the tooltip interaction.

-

Encapsulating Scripts with Web Components

-

But what if a document wants to provide an interactive component that isn’t natively supported by the reading system? For instance, I have recently been working with The Rust Programming Language, a textbook that explains the different features of Rust. It contains a lot of passages like this one:

-
-
let x = 5;
let x = x + 1;
{
let x = x * 2;
println!("The value of x in the inner scope is: {x}");
}
println!("The value of x is: {x}");
}
-

This program first binds x to a value of 5. Then it creates a new variable x by repeating let x =, taking the original value and adding 1 so the value of x is then 6. Then, within an inner scope created with the curly brackets, the third let statement also shadows x and creates a new variable, multiplying the previous value by 2 to give x a value of 12. When that scope is over, the inner shadowing ends and x returns to being 6. When we run this program, it will output the following:

-
-

A challenge in reading this passage is finding the correspondences between the prose and the code. An interactive code reading component can help you track those correspondences, like this (try mousing-over or clicking-on each sentence):

-
-
fn main() { 
-  let x = 5
-  let x = x + 1
-  { 
-    let x = x * 2
-    println!(“The value of x in the inner scope is: {x}”);
-  }
-  println!(“The value of x is: {x}”);
-}
-

This program first binds x to a value of 5.Then it creates a new variable x by repeating let x =,taking the original value and adding 1 so the value of x is then 6.Then, within an inner scope created with the curly brackets,the third let statement also shadows x and creates a new variable,multiplying the previous value by 2 to give x a value of 12.When that scope is over, the inner shadowing ends and x returns to being 6.

-
-

The interactive code description component is used as follows:

-
-
<code-description>
<pre><code>fn main() {
let <span id="code-1">x</span> = <span id="code-2">5</span>;
<!-- rest of the code... -->
}</code></pre>
<p>
<code-step>This program first binds <a href="#code-1"><code>x</code></a> to a value of <a href="#code-2"><code>5</code></a>.</code-step>
<!-- rest of the prose... -->
</p>
</code-description>
-
-

Again, the document content contains no actual script. It contains a custom element <code-description>, and it contains a series of annotations as spans and anchors. The <code-description> element is implemented as a web component.

-

Web components are a programming model for writing encapsulated interactive fragments of HTML, CSS, and Javascript. Web components are one of many ways to write componentized HTML, such as React, Solid, Svelte, and Angular. I see web components as the most suitable as a framework for portable EPUBs because:

-
    -
  • Web components are a standardized technology. Its key features like custom elements (for specifying the behavior of novel elements) and shadow trees (for encapsulating a custom element from the rest of the document) are part of the official HTML and DOM specifications. This improves the likelihood that future browsers will maintain backwards compatibility with web components written today.
  • -
  • Web components are designed for tight encapusulation. The shadow tree mechanism ensures that styling applied within a custom component cannot accidentally affect other components on the page.
  • -
  • Web components have a decent ecosystem to leverage. As far as I can tell, web components are primarily used by Google, which has created notable frameworks like Lit.
  • -
  • Web components provide a clear fallback mechanism. If a renderer does not support Javascript, or if a renderer loses the ability to render web components, then an HTML renderer will simply ignore custom tags and render their contents.
  • -
-

Thus, I propose one principle and one requirement:

-
Encapsulated scripts principle: interactive components should be implemented as web components when possible, or otherwise be carefully designed to avoid conflicting with the base document or other components.
-
Components fallback requirement: interactive components must provide a fallback mechanism for rendering a reasonable substitute if Javascript is disabled.
-

Where To Go From Here?

-

Every time I have told someone “I want to replace PDF”, the statement has been met with extreme skepticism. Hopefully this document has convinced you that HTML-via-EPUB could potentially be a viable and desirable document format for the future.

-

My short-term goal is to implement a few more documents in the portable EPUB format, such as my PLDI paper. That will challenge both the file format and the reading system to be flexible enough to support each document type. In particular, each document should look good under a range of reading conditions (screen sizes, font sizes and faces, etc.).

-

My long-term goal is to design a document language that makes it easy to generate portable EPUBs. Writing XHTML by hand is not reasonable. I designed Nota before I was thinking about EPUBs, so its next iteration will be targeted at this new format.

-

If you have any thoughts about how to make this work or why I’m wrong, let me know by email or Twitter or Mastodon or wherever this gets posted. If you would like to help out, please reach out! This is just a passion project in my free time (for now…), so any programming or document authoring assistance could provide a lot of momentum to the project.

-

But What About…

-

A brief postscript for a few things I haven’t touched on.

-

…security? You might dislike the idea that document authors can run arbitrary Javascript on your personal computer. But then again, you presumably use both a PDF reader and a web browser on the daily, and those both run Javascript. What I’m proposing is not really any less secure than our current state of affairs. If anything, I’d hope that browsers are more battle-hardened than PDF viewers regarding code execution. Certainly the designers of EPUB reading systems should be careful to not give documents any additional capabilities beyond those already provided by the browser.

-

…privacy? Modern web sites use many kinds of telemetry and cookies to track user behavior. I strongly believe that EPUBs should not follow this trend. Telemetry must at least require the explicit consent of the user, and even that may be too generous. Companies will inevitably do things like offer discounts in exchange for requiring your consent to telemetry, similar to Amazon’s Kindle ads policy. Perhaps it is better to preempt this behavior by banning all tracking.

-

…aesthetics? People often intuit that LaTeX-generated PDFs look prettier than HTML documents, or even prettier than PDFs created by other software. This is because Donald Knuth took his job very seriously. In particular, the Knuth-Plass line-breaking algorithm tends to produce better-looking justified text than whatever algorithm is used by browsers.

-

There’s two ways to make progress here. One is for browsers to provide more typography tools. Allegedly, text-wrap: pretty is supposed to help, but in my brief testing it doesn’t seem to improve line-break quality. The other way is to pre-calculate line breaks, which would only work for fixed-layout renditions.

-

…page citations? I think we just have to give up on citing content by pages. Instead, we should mandate a consistent numbering scheme for block elements within a document, and have people cite using that scheme. (Allison Morrell points out this is already the standard in the Canadian legal system.) For example, Bene will auto-number all blocks. If you’re on a desktop, try hovering your mouse in the left column next to the top-right of any paragraph.

-

…annotations? Ideally it should be as easy to mark up an EPUB as a PDF. The Web Annotations specification seems to be a good starting point for annotating EPUBs. Web Annotations seem designed for annotations on “targetable” objects, like a labeled element or a range of text. It’s not yet clear how to deal with free-hand annotations, especially on reflowable documents.

- - - \ No newline at end of file diff --git a/crates/tests/ref/files/1d75da8a42d8f937/portable_epubs/pdf/portable_epubs.metadata.json b/crates/tests/ref/files/1d75da8a42d8f937/portable_epubs/pdf/portable_epubs.metadata.json deleted file mode 100644 index 50e46c09..00000000 --- a/crates/tests/ref/files/1d75da8a42d8f937/portable_epubs/pdf/portable_epubs.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 5063041, - "page_count": 10 -} \ No newline at end of file diff --git a/crates/tests/ref/files/47c9eeba6b52a15f/cover-letter/pdf/cover-letter.metadata.json b/crates/tests/ref/files/47c9eeba6b52a15f/cover-letter/pdf/cover-letter.metadata.json deleted file mode 100644 index 4f7061e2..00000000 --- a/crates/tests/ref/files/47c9eeba6b52a15f/cover-letter/pdf/cover-letter.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 1212760, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/files/5b4554f7aaa1292c/test/epub/test.metadata.json b/crates/tests/ref/files/5b4554f7aaa1292c/test/epub/test.metadata.json deleted file mode 100644 index 1c2d5922..00000000 --- a/crates/tests/ref/files/5b4554f7aaa1292c/test/epub/test.metadata.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "filetype": "epub", - "file_size": 2971, - "title": "Test", - "language": "en", - "spine_files": [ - "test.xhtml" - ], - "has_nav": true -} \ No newline at end of file diff --git a/crates/tests/ref/files/6fdadcdcac7454ad/index/html/index.html b/crates/tests/ref/files/6fdadcdcac7454ad/index/html/index.html deleted file mode 100644 index 0f598725..00000000 --- a/crates/tests/ref/files/6fdadcdcac7454ad/index/html/index.html +++ /dev/null @@ -1,24 +0,0 @@ - - - - - -

Screening the subject

-

Screening the subject is a blog that analyses content on both the big and small screen in reasonable detail, i.e. episode-by-episode or scene-by-scene. Contact us at info@ohrg.org for enquiries.

- -
-


-
- -
- - - \ No newline at end of file diff --git a/crates/tests/ref/files/6fdadcdcac7454ad/index/pdf/index.metadata.json b/crates/tests/ref/files/6fdadcdcac7454ad/index/pdf/index.metadata.json deleted file mode 100644 index 23d3525c..00000000 --- a/crates/tests/ref/files/6fdadcdcac7454ad/index/pdf/index.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 4215, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/ref/files/9a129f9c736a7947/severance-ep-1/html/severance-ep-1.html b/crates/tests/ref/files/9a129f9c736a7947/severance-ep-1/html/severance-ep-1.html deleted file mode 100644 index c10fc107..00000000 --- a/crates/tests/ref/files/9a129f9c736a7947/severance-ep-1/html/severance-ep-1.html +++ /dev/null @@ -1,43 +0,0 @@ - - - - Good news about hell - Severance [s1/e1] - - -

Good news about hell - Severance [s1/e1]

- -

The first thing to notice is the colour palette. She is dressed in blue, but her hair is chestnut red. It spills out for the frame of her figure into the table around it, blockaded at its border by chairs and a carpet clad in green, yellow, then green again; then gray. The establishing shot is a bird’s eye view of an unknown woman who is soon revealed to have been put in the board room by someone else’s design, who learns about her predicament only by a man’s voice that emanates from the little device that rests on the table along with the woman, arranged so that it aims directly at her head.

-

This opening image is a graph of the subject’s predicament on the severed floor at Lumon. Blue is the company colour. Employees are almost invariably dressed in shades of it– navy, midnight, Prussian, Oxford, cobalt– and more reliably so as we work our way up the hierarchy. Red is unruly passion, the tone of tempers that itch to tear off the straitjacket directives, to disregulate the business-as-usual in which there is no obvious place for illicit activities. Green is the accent of Macro Data Refinement, the division of Lumon in which the show’s protagonists are employed. The device directs a man’s voice at a woman’s body in an attempt to keep her tempers in check, to ensure her firecraft does not smoke out the staid edifice of personality management, to order her “perceptual chronologies” accordingly. (Later in the episode, we learn that she almost manages to “break in” on the control room during that opening sequence: the solidity of its enclosure is threatened from the very first.)

-

It is instructive to attempt to articulate the dynamics that this graph indexes before we start talking about other scenes in the show. Graphs are not at one with what they represent, for in the decision to render ‘data’ in the very act of a representation, we both lose and gain distinction of the dynamics in question. The voice that opens Helly R up to the world of Lumon’s severed floor begins: “Who are you?” This question is a mistake. We retroactively learn, in a later scene, that Mark S was in fact supposed to begin with a less interrogative, more perfunctory: “Hi there, you on the table. I wonder if you’d mind taking a brief survey.” As Irving puts it: “You [Mark S] skipped the preamble”. Helly R is thrust, by this accident, immediately into questioning not only herself, but also the self-assurance of the voice that interrogates her. Does this voice in my head [she could be thinking] really know what it is doing? Or is it just a role of similarly confused actors struggling to stick to a badly written script?

-

This episode-length recap of the first episode names this graph ‘the Helly incident’, a poorly executed orientation of Helly’s newfound subjectivity that can be blamed at one level on Mark S (for starting with the wrong part of the manual), at another on Mr. Milchick (for misguiding Mark while he was distracted setting up the visual feed), on Ms. Cobel (for giving Mark Petey K’s old manual without redacting his obscurely scribbled notes and paper bookmarks), or even on Irving (for neglecting to intervene and clarify how Mark should begin being the more senior refiner in the situation: “Irving will be there to shadow. Just stick to the flowchart and escalate properly depending on dialectics.”). Wherever to place blame, there is doubtless a misconfiguration that takes place. Helly’s instinctual reaction seems to be to try to kill the voice pointed at her head, rather than to befriend it as Mark states he did (where Petey was Mark). (Helly will eventually have sex with the source of the voice, rather than murdering or fraternizing with it.) In this episode, however, Mark (the voice’s source) is physically assaulted by Helly, dented in his temple by the same vocalization device that mediated their first communication.

- -

So this is the Macro Data refiner’s situation. On the one hand, she is affronted with a voice that compels her to abide by the rules and permits her to enjoy some small reliefs (egress from a locked room) if she concedes to it. On the other, she is always teeming and thus flirting with red, considering escape routes that involve drawing blood, setting off alarms, or removing clothes.

-

This unruly red is what Macro Data Refinement’s greening procedures are supposed to contain to produce a completely controlled and scripted blot of blue. Perhaps this is why the glipse of the vacant desks planned for the severed floor’s expansion are draped in purple, for that shade of subjectivity would better incorporate the contrasting contours into a unified and taskable tone. The red that threatens Lumon’s corporate, calm, and collected blue (the Lumon logo is a water droplet that suspiciously resembles a camera) is splattered across scenes in the episode. It is, for example, the envelope that Petey slips Mark at the company-owned restaurant Pip’s with the suggestion that he should read it if he wants to know “what’s going on down there”. It is the sweater Mark wears to his sister’s dinnerless dinner party, punctuated by red place mats (“what a lot of people overlook, I think, is that life is not food”), where the ontological substance of his innie is called into question, and where we learn about the passions he has lost– the history of World War II, educating, whiskey– the last of which seems to have given way to an indiscriminate consumption of beer, wine, anything that will drown out the clarity of sober consciousness. It is the general hue of his sister’s house, which consisently wants him to question that placid blue of his company-subsidized housing at Baird Creek Manor.

-

This dinner tells us something more about the subject in question in Severance. Just as Helly’s outie had alerted us to the basic principle in the video her innie was shown in curiously lo-fi resolution to conclude her innie’s orientation– “perceptual chronologies… surgically split”– Mark’s predicament is comparably explained to him by another more or less ignorant (we can’t help but imagine) third party: “One’s memories are bifurcated, so when you’re at work, you have no recollection of what it is you do there.” As pretentious as they are, the dinner’s guests do seem to be attuned to an important dimension of the meaning of life, which is that it can’t only be about satiating biological needs such as food. What each individual ‘needs’ is a disharmonious melange of needs and demands, openings of desire that emerge not only through a graph of bare necessities– food, water, shelter– but also through capricious carapaces that emerge from more ambiguous pinings in the social sphere– company, care, love. The real question of Lumon’s smooth functioning is whether it will be able to effectively plug up these pinings, the incidental moments at work where one wonders what one is really doing with one’s life, whether the company can really manage its employees’ unsanctioned thoughts and the way in which those illicit ideas seep into the daily practice of their workerhood. More on the plasticity of our needs and drives to satisfy them in later posts.

- -

Ms. Cobel, in contrast to Helly’s and Mark’s doubtful and doubting red, is a stormy and icy blue. (We must wait until season two to uncover the historical and psychological depth of this colour for Harmony Cobel.) She is the figure with a body that seems to be the most in charge, of those we meet in this episode. Though Ms. Cobel is not a master in herself, it seems, for she too is subjected to a disembodied voice-via-device, ‘the board’, albeit which only appears evidently as an ear so far (“The board won’t be contributing to this meeting vocally”). Cobel is responsible for keeping the severed floor’s uncertainty in check, the ‘head’ that sits atop the variegated limbs of its disobedient body.

-

When Cobel reprimands Mark for his derailing of Helly’s orientation, she recalls an obscure and theological aspect of her parentage:

-
You know, my mother was an atheist. She used to say that there was good news and bad news about hell. The good news is, hell is just the product of a morbid human imagination. The bad news is, whatever humans can imagine, they can usually create.
-

At the close of the episode, just before Mark’s senile neighbor Mrs. Selvig (who we have only heard about through Mark’s voice thus far, when he is on the phone with her) visually reveals herself to be the same woman as Ms. Cobel, she gives a slightly different account of her heritage:

-
You know, my mother was a Catholic. She used to say it takes the saints eight hours to bless a sleeping child. I hope you aren’t rushing the saints.
-

It’s unclear at this point whether Cobel is a severed worker like Mark, or whether there is some other reason for her (strange, almost senseless) duplicity. Why lie about the religious leanings of one’s mother? Or maybe ‘mother’ is actually a name for something else, a kind of interim authority that gives synthetic weight to some hearsay, rumor, or idle phrase. (The other cameo of an ambiguously defined mother in this episode is in question five of Helly’s orientation survey: “To the best of your memory, what is or was the color of your mother’s eyes?”) Perhaps it is that, severed or not, atheist or Catholic, Cobel’s subjectivity is structured by a comparable split in her perceptual chronologies, whereby some memories (of her mother) get more airtime in her conscious experience of herself than others.

-

Severance flirts with this idea extensively, that the innie/outie dyad is analagous to the unconscious/conscious experience that we, as subjects, have of ourselves. Mark’s sister Devon hints at the psycho-logical reading of the severed condition in her diagnosis of Mark’s morose (outie) predicament as a state of failed therapy in response to mourning for his late wife: “I just feel like forgetting about her for eight hours a day isn’t the same thing as healing.” As with not-mothers and the plasticity of the drive, we will address the psychoanalytic implications here in later posts; but to finish I want to bring our attention to the imaging of time at work in just this first episode.

-

The fascinating details of failed synchronisation between all the watchfaces we see are enumerated in this Reddit thread. Many of the watch hands appear to be stalled, and the crossover from each to the next– as when Mark Scout switches his wrist watch in preparation for his elevator descent into the workday of innie Mark S– doesn’t match with our experience of the actors on screen. One of the few things we do know about the severance procedure is that it ‘alters perceptual chronologies’, and that this messing with a subject’s sense of time is thought to

-
    -
  1. make them more adequate or productive in a certain kind of work (for why else would Lumon go to the necessary lengths to sever some employees)
  2. -
  3. supposes to section off innie memories and experience from outie memories and experience
  4. -
-

So the subject’s subjectivity is marked by its sense of time, and Lumon’s success (profitability?) hinges in some way on altering their employees’ stable sense of it while in the space of the severed floor.

-

Mark S’s temporal predicament here has been explained by a man whose last name we get by speeding up the saying of his own, Karl Marx (Mar-k-S). Logically speaking, Marx argues, there is an amount of time that goes missing in the worker’s employment by way of a wage, when he advances some portion of his time to the capitalist in exchange for a pay-check one or more weeks later. I refer the reader interested in the details to chapter 20 of Capital Vol. I: but the essential point here is that it is through an obfuscation of the real value of a worker’s time that the capitalist manages to produce surplus-value. The production of this kind of time-distorted surplus-value is the engine of capitalism as a social relation that appears, on the surface, to be equally fair to capitalist and worker alike. So the project of controlling ‘perceptual chronologies’ with which Lumon seems to be so concerned is perhaps not as esoteric and inessential as it might at first seem. Perhaps it is an embodiment of the core ingredient of the company’s success as a company, of its incorporation as an entity that ought to be sustained even at the expense of its members’ happiness, their health, and their livelihoods.

-
-


-
- -
- - - \ No newline at end of file diff --git a/crates/tests/ref/files/9a129f9c736a7947/severance-ep-1/pdf/severance-ep-1.metadata.json b/crates/tests/ref/files/9a129f9c736a7947/severance-ep-1/pdf/severance-ep-1.metadata.json deleted file mode 100644 index f0089a1e..00000000 --- a/crates/tests/ref/files/9a129f9c736a7947/severance-ep-1/pdf/severance-ep-1.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 898313, - "page_count": 2 -} \ No newline at end of file diff --git a/crates/tests/ref/files/f0b104671a707ab2/multiple_links_inline/html/multiple_links_inline.html b/crates/tests/ref/files/f0b104671a707ab2/multiple_links_inline/html/multiple_links_inline.html deleted file mode 100644 index e4a7c8e2..00000000 --- a/crates/tests/ref/files/f0b104671a707ab2/multiple_links_inline/html/multiple_links_inline.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - - -

Multiple Links Test

-

Adjacent Links with Text

-

See File 1 and File 2 for details.

-

Multiple References in List

-

References: A, B, C.

-

Minimal Separation

-

Adjacent links: XY

-

Multiple Links in Sentence

-

Check the introduction, then chapter 1, and finally the conclusion.

- - - \ No newline at end of file diff --git a/crates/tests/ref/files/f0b104671a707ab2/multiple_links_inline/pdf/multiple_links_inline.metadata.json b/crates/tests/ref/files/f0b104671a707ab2/multiple_links_inline/pdf/multiple_links_inline.metadata.json deleted file mode 100644 index b3203d95..00000000 --- a/crates/tests/ref/files/f0b104671a707ab2/multiple_links_inline/pdf/multiple_links_inline.metadata.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "filetype": "pdf", - "file_size": 3918, - "page_count": 1 -} \ No newline at end of file diff --git a/crates/tests/src/helpers/cli.rs b/crates/tests/src/helpers/cli.rs deleted file mode 100644 index e76c0828..00000000 --- a/crates/tests/src/helpers/cli.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::process::Command; - -/// Build a `Command` that invokes the rheo CLI. -/// -/// - If `RHEO_BIN` is set: returns `Command::new()` — caller appends -/// subcommand args directly (no `cargo run` prefix). -/// - Else: returns `Command::new("cargo")` with `run` + optional -/// `--manifest-path ` + `-p rheo --`, reproducing the -/// current monorepo `cargo run -p rheo --` behaviour. -pub fn rheo_cli_command() -> Command { - if let Ok(bin) = std::env::var("RHEO_BIN") { - Command::new(bin) - } else { - let mut cmd = Command::new("cargo"); - cmd.arg("run"); - if let Ok(manifest) = std::env::var("RHEO_MANIFEST") { - cmd.args(["--manifest-path", &manifest]); - } - cmd.args(["-p", "rheo", "--"]); - cmd - } -} diff --git a/crates/tests/src/helpers/comparison.rs b/crates/tests/src/helpers/comparison.rs deleted file mode 100644 index ab3c9398..00000000 --- a/crates/tests/src/helpers/comparison.rs +++ /dev/null @@ -1,925 +0,0 @@ -use similar::{ChangeTag, TextDiff}; -use std::collections::HashMap; -use std::collections::hash_map::DefaultHasher; -use std::env; -use std::fs; -use std::hash::{Hash, Hasher}; -use std::path::{Path, PathBuf}; -use walkdir::WalkDir; - -use super::is_single_file_test; - -pub fn verify_html_output(test_name: &str, actual_dir: &Path) { - let ref_dir = get_reference_dir(actual_dir, test_name, "html"); - ensure_reference_exists(&ref_dir, test_name, "HTML"); - - validate_html_assets(&ref_dir, actual_dir).expect("HTML asset validation failed"); - - for_each_file_with_ext(&ref_dir, "html", |entry| { - let rel_path = entry.path().strip_prefix(&ref_dir).unwrap(); - let actual_file = actual_dir.join(rel_path); - compare_html_content(entry.path(), &actual_file, test_name).expect("HTML content mismatch"); - }); -} - -pub fn verify_pdf_output(test_name: &str, actual_dir: &Path) { - let ref_dir = get_reference_dir(actual_dir, test_name, "pdf"); - ensure_reference_exists(&ref_dir, test_name, "PDF"); - - validate_pdf_assets(&ref_dir, actual_dir).expect("PDF asset validation failed"); - - for_each_file_with_ext(actual_dir, "pdf", |entry| { - let rel_path = entry.path().strip_prefix(actual_dir).unwrap(); - let metadata_file = ref_dir.join(format!( - "{}.metadata.json", - rel_path.file_stem().unwrap().to_string_lossy() - )); - - if !metadata_file.exists() { - panic!( - "PDF metadata reference not found: {}. Run with UPDATE_REFERENCES=1", - metadata_file.display() - ); - } - - let ref_metadata_json = - fs::read_to_string(&metadata_file).expect("Failed to read reference metadata"); - let ref_metadata = - serde_json::from_str(&ref_metadata_json).expect("Failed to parse reference metadata"); - let actual_metadata = - extract_pdf_metadata(entry.path()).expect("Failed to extract PDF metadata"); - - compare_pdf_metadata(&ref_metadata, &actual_metadata).expect("PDF metadata mismatch"); - }); -} - -/// Compute a short hash of a file path for reference directory naming -fn compute_file_hash(path: &Path) -> String { - let mut hasher = DefaultHasher::new(); - path.to_string_lossy().hash(&mut hasher); - format!("{:08x}", hasher.finish()) -} - -fn get_reference_dir(actual_dir: &Path, test_name: &str, output_type: &str) -> PathBuf { - // Check if this is a single-file test (test name contains file path components) - if test_name.contains("_slash") - && (test_name.contains("_full_stop") || test_name.ends_with("typ")) - { - // This is likely a single-file test - // Extract the original file path from the sanitized test name - let file_path_guess = test_name - .replace("_slash", "/") - .replace("_full_stop", ".") - .replace("_colon", ":") - .replace("_minus", "-"); - - // Check if this is a single .typ file test - if is_single_file_test(&file_path_guess) { - let file_path = Path::new(&file_path_guess); - let hash = compute_file_hash(file_path); - let filename = file_path - .file_stem() - .unwrap_or(file_path.as_os_str()) - .to_string_lossy(); - - return PathBuf::from("ref/files") - .join(&hash) - .join(filename.as_ref()) - .join(output_type); - } - } - - // Default: project-based references - let ref_base = if actual_dir.starts_with("examples/") { - PathBuf::from("ref/examples") - } else if actual_dir.starts_with("tests/cases/") { - PathBuf::from("ref/cases") - } else { - PathBuf::from("ref/examples") - }; - ref_base.join(test_name).join(output_type) -} - -fn ensure_reference_exists(ref_dir: &Path, test_name: &str, output_type: &str) { - if !ref_dir.exists() { - panic!( - "{} reference not found for {}. Run with UPDATE_REFERENCES=1 to generate.", - output_type, test_name - ); - } -} - -fn for_each_file_with_ext(dir: &Path, extension: &str, mut callback: F) -where - F: FnMut(&walkdir::DirEntry), -{ - for entry in WalkDir::new(dir).into_iter().filter_map(Result::ok) { - if entry.file_type().is_file() - && entry.path().extension().and_then(|s| s.to_str()) == Some(extension) - { - callback(&entry); - } - } -} - -fn extract_build_relative_path(repo_relative_path: &str) -> PathBuf { - let path = PathBuf::from(repo_relative_path); - let components: Vec<_> = path.components().collect(); - // If path has fewer than 3 components, it's already a build-relative path (e.g. "style.css") - if components.len() <= 2 { - return path; - } - let after_project = components.into_iter().skip(2).collect::(); - after_project - .strip_prefix("content") - .unwrap_or(&after_project) - .to_path_buf() -} - -fn compare_html_content(reference: &Path, actual: &Path, test_name: &str) -> Result<(), String> { - let ref_content = - fs::read_to_string(reference).map_err(|e| format!("Failed to read reference: {}", e))?; - let actual_content = - fs::read_to_string(actual).map_err(|e| format!("Failed to read actual: {}", e))?; - - if ref_content == actual_content { - Ok(()) - } else { - let diff = compute_html_diff(&ref_content, &actual_content); - - // Generate test-specific update command - let test_name_sanitized = test_name - .replace('/', "_slash") - .replace('.', "_full_stop") - .replace(':', "_colon") - .replace('-', "_minus"); - let update_cmd = format!( - "UPDATE_REFERENCES=1 cargo test run_test_case_{} -- --nocapture", - test_name_sanitized - ); - - // Check diff limit for additional suggestion - let diff_limit = env::var("RHEO_TEST_DIFF_LIMIT") - .ok() - .and_then(|v| v.parse::().ok()) - .unwrap_or(2000); - - let mut error_msg = format!( - "HTML content mismatch for {}\n\n{}\n\nTo update this reference, run:\n {}", - reference.display(), - diff, - update_cmd - ); - - // If diff was truncated, suggest increasing the limit - if diff.contains("showing first") { - error_msg.push_str(&format!( - "\n\nOr to see full diff:\n RHEO_TEST_DIFF_LIMIT={} cargo test run_test_case_{} -- --nocapture", - diff_limit * 5, - test_name_sanitized - )); - } - - Err(error_msg) - } -} - -fn compute_html_diff(reference: &str, actual: &str) -> String { - let diff = TextDiff::from_lines(reference, actual); - - // Collect statistics - let mut insertions = 0; - let mut deletions = 0; - let mut unchanged = 0; - - let mut diff_output = String::new(); - for change in diff.iter_all_changes() { - let sign = match change.tag() { - ChangeTag::Delete => { - deletions += 1; - "-" - } - ChangeTag::Insert => { - insertions += 1; - "+" - } - ChangeTag::Equal => { - unchanged += 1; - " " - } - }; - diff_output.push_str(&format!("{}{}", sign, change)); - } - - // Get diff limit from environment variable or use default - let diff_limit = env::var("RHEO_TEST_DIFF_LIMIT") - .ok() - .and_then(|v| v.parse().ok()) - .unwrap_or(2000); - - // Build output with statistics - let mut output = String::new(); - output.push_str(&format!( - "Diff: {} insertions(+), {} deletions(-), {} lines unchanged\n\n", - insertions, deletions, unchanged - )); - - if diff_output.len() > diff_limit { - output.push_str(&diff_output[..diff_limit]); - output.push_str(&format!( - "\n\n... (showing first {} chars of {} bytes total)", - diff_limit, - diff_output.len() - )); - } else { - output.push_str(&diff_output); - } - - output -} - -fn collect_files_by_predicate(dir: &Path, predicate: F) -> Vec -where - F: Fn(&walkdir::DirEntry) -> bool, -{ - WalkDir::new(dir) - .into_iter() - .filter_map(Result::ok) - .filter(|e| e.file_type().is_file() && predicate(e)) - .filter_map(|e| e.path().strip_prefix(dir).ok().map(|p| p.to_path_buf())) - .collect() -} - -fn validate_html_assets(reference_dir: &Path, actual_dir: &Path) -> Result<(), String> { - let mut errors = Vec::new(); - - let ref_files = collect_files_by_predicate(reference_dir, |e| { - !e.path().to_string_lossy().ends_with(".metadata.json") - }); - - let binary_refs = collect_files_by_predicate(reference_dir, |e| { - e.path().extension().and_then(|s| s.to_str()) == Some("json") - && e.path().to_string_lossy().ends_with(".metadata.json") - }); - - for ref_file in &ref_files { - if !actual_dir.join(ref_file).exists() { - errors.push(format!( - "Missing file: {}\n Expected in: {}\n Referenced from: {}", - ref_file.display(), - actual_dir.display(), - reference_dir.display() - )); - } - } - - for metadata_file in &binary_refs { - validate_binary_file_from_metadata(reference_dir, actual_dir, metadata_file, &mut errors); - } - - let actual_files = collect_files_by_predicate(actual_dir, |_| true); - let expected_files = build_expected_files_set(reference_dir, &ref_files, &binary_refs); - - for actual_file in &actual_files { - if !expected_files.contains(actual_file) { - errors.push(format!( - "Unexpected file: {}\n Found in: {}\n Not defined in references: {}\n This file may need to be added to reference or excluded in rheo.toml", - actual_file.display(), - actual_dir.join(actual_file).display(), - reference_dir.display() - )); - } - } - - if errors.is_empty() { - Ok(()) - } else { - let mut error_msg = String::from("HTML asset validation failed:\n\n"); - for (i, error) in errors.iter().enumerate() { - error_msg.push_str(&format!("{}. {}\n\n", i + 1, error)); - } - Err(error_msg) - } -} - -fn validate_binary_file_from_metadata( - reference_dir: &Path, - actual_dir: &Path, - metadata_file: &Path, - errors: &mut Vec, -) { - let metadata_path = reference_dir.join(metadata_file); - if let Ok(json_str) = fs::read_to_string(&metadata_path) - && let Ok(metadata) = serde_json::from_str::(&json_str) - { - let build_relative_path = metadata - .path - .as_ref() - .map(|p| extract_build_relative_path(p)) - .unwrap_or_else(|| { - let file_str = metadata_file.to_string_lossy(); - PathBuf::from(file_str.strip_suffix(".metadata.json").unwrap_or(&file_str)) - }); - - let actual_file_path = actual_dir.join(&build_relative_path); - - if !actual_file_path.exists() { - errors.push(format!( - "Missing binary file: {} (expected at {})", - metadata.path.as_deref().unwrap_or(""), - build_relative_path.display() - )); - return; - } - - // Validate CSS metadata - if metadata.filetype == "css" { - match extract_css_metadata(&actual_file_path) { - Ok(actual_metadata) => { - if let Err(e) = compare_css_metadata(&metadata, &actual_metadata) { - errors.push(format!( - "CSS validation failed for {}: {}", - build_relative_path.display(), - e - )); - } - } - Err(e) => { - errors.push(format!( - "Failed to extract CSS metadata for {}: {}", - build_relative_path.display(), - e - )); - } - } - } - } -} - -fn build_expected_files_set( - reference_dir: &Path, - ref_files: &[PathBuf], - binary_refs: &[PathBuf], -) -> std::collections::HashSet { - let mut expected_files = ref_files - .iter() - .cloned() - .collect::>(); - - for metadata_file in binary_refs { - let metadata_path = reference_dir.join(metadata_file); - if let Ok(json_str) = fs::read_to_string(&metadata_path) - && let Ok(metadata) = serde_json::from_str::(&json_str) - { - let build_relative_path = metadata - .path - .as_ref() - .map(|p| extract_build_relative_path(p)) - .unwrap_or_else(|| { - let file_str = metadata_file.to_string_lossy(); - PathBuf::from(file_str.strip_suffix(".metadata.json").unwrap_or(&file_str)) - }); - expected_files.insert(build_relative_path); - } - } - - expected_files -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct BinaryFileMetadata { - #[serde(default = "default_filetype")] - pub filetype: String, - pub file_size: u64, - #[serde(skip_serializing_if = "Option::is_none")] - pub path: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub page_count: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub hash: Option, -} - -fn default_filetype() -> String { - "pdf".to_string() -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct EpubMetadata { - pub filetype: String, - pub file_size: u64, - pub title: String, - pub language: String, - pub spine_files: Vec, - pub has_nav: bool, -} - -pub fn extract_pdf_metadata(pdf_path: &Path) -> Result { - use lopdf::Document; - - let file_size = fs::metadata(pdf_path) - .map_err(|e| format!("Failed to read PDF metadata: {}", e))? - .len(); - - let doc = Document::load(pdf_path).map_err(|e| format!("Failed to load PDF: {}", e))?; - let page_count = doc.get_pages().len() as u32; - - Ok(BinaryFileMetadata { - filetype: "pdf".to_string(), - file_size, - path: None, - page_count: Some(page_count), - hash: None, - }) -} - -pub fn extract_css_metadata(css_path: &Path) -> Result { - use sha2::{Digest, Sha256}; - - let file_size = fs::metadata(css_path) - .map_err(|e| format!("Failed to read CSS metadata: {}", e))? - .len(); - - let contents = fs::read(css_path).map_err(|e| format!("Failed to read CSS contents: {}", e))?; - - let hash_bytes = Sha256::digest(&contents); - let hash: String = hash_bytes - .iter() - .map(|byte| format!("{:02x}", byte)) - .collect(); - - Ok(BinaryFileMetadata { - filetype: "css".to_string(), - file_size, - path: None, - page_count: None, - hash: Some(hash), - }) -} - -pub fn extract_epub_metadata(epub_path: &Path) -> Result { - use rheo_epub::package::Package; - use std::io::Read; - use zip::ZipArchive; - - // Get file size - let file_size = fs::metadata(epub_path) - .map_err(|e| format!("Failed to read EPUB metadata: {}", e))? - .len(); - - // Open EPUB as ZIP archive - let file = fs::File::open(epub_path).map_err(|e| format!("Failed to open EPUB file: {}", e))?; - let mut archive = - ZipArchive::new(file).map_err(|e| format!("Failed to read EPUB archive: {}", e))?; - - // Read package.opf - let opf_contents = { - let mut opf_file = archive - .by_name("EPUB/package.opf") - .map_err(|e| format!("Failed to find package.opf: {}", e))?; - let mut contents = String::new(); - opf_file - .read_to_string(&mut contents) - .map_err(|e| format!("Failed to read package.opf: {}", e))?; - contents - }; - - // Parse package.opf XML - let package: Package = serde_xml_rs::from_str(&opf_contents) - .map_err(|e| format!("Failed to parse package.opf: {}", e))?; - - // Extract metadata - let title = package.metadata.title.to_string(); - let language = package.metadata.language.to_string(); - - // Extract spine files - map idrefs to hrefs via manifest - let spine_files: Vec = package - .spine - .itemref - .iter() - .filter_map(|itemref| { - package - .manifest - .items - .iter() - .find(|item| item.id == itemref.idref) - .map(|item| item.href.to_string()) - }) - .collect(); - - // Check if nav.xhtml exists (opf_file is dropped, so we can borrow archive again) - let has_nav = archive.by_name("EPUB/nav.xhtml").is_ok(); - - Ok(EpubMetadata { - filetype: "epub".to_string(), - file_size, - title, - language, - spine_files, - has_nav, - }) -} - -/// Extract XHTML content files from an EPUB archive -pub fn extract_epub_xhtml(epub_path: &Path) -> Result, String> { - use std::io::Read; - use zip::ZipArchive; - - let file = fs::File::open(epub_path).map_err(|e| format!("Failed to open EPUB file: {}", e))?; - let mut archive = - ZipArchive::new(file).map_err(|e| format!("Failed to read EPUB archive: {}", e))?; - - let mut xhtml_files = HashMap::new(); - - for i in 0..archive.len() { - let mut file = archive - .by_index(i) - .map_err(|e| format!("Failed to read archive entry: {}", e))?; - - let name = file.name().to_string(); - - // Extract only XHTML content files (not nav.xhtml) - if name.starts_with("EPUB/") && name.ends_with(".xhtml") && !name.ends_with("nav.xhtml") { - let mut contents = String::new(); - file.read_to_string(&mut contents) - .map_err(|e| format!("Failed to read XHTML content: {}", e))?; - - // Store with relative name (strip EPUB/ prefix) - let rel_name = name.strip_prefix("EPUB/").unwrap_or(&name); - xhtml_files.insert(rel_name.to_string(), contents); - } - } - - Ok(xhtml_files) -} - -/// Verify EPUB XHTML content against reference files -fn verify_epub_xhtml_content( - ref_dir: &Path, - actual_xhtml: &HashMap, - test_name: &str, -) -> Result<(), String> { - let xhtml_ref_dir = ref_dir.join("xhtml"); - - // If no xhtml reference directory exists, skip XHTML verification - if !xhtml_ref_dir.exists() { - return Ok(()); - } - - // Check all reference XHTML files exist in actual - for entry in WalkDir::new(&xhtml_ref_dir) - .into_iter() - .filter_map(Result::ok) - .filter(|e| { - e.file_type().is_file() - && e.path() - .extension() - .map(|ext| ext == "xhtml") - .unwrap_or(false) - }) - { - let rel_path = entry - .path() - .strip_prefix(&xhtml_ref_dir) - .map_err(|e| format!("Failed to get relative path: {}", e))?; - let filename = rel_path.to_string_lossy().to_string(); - - let ref_content = fs::read_to_string(entry.path()) - .map_err(|e| format!("Failed to read reference XHTML: {}", e))?; - - let actual_content = actual_xhtml - .get(&filename) - .ok_or_else(|| format!("Missing XHTML file in EPUB: {}", filename))?; - - if ref_content != *actual_content { - let diff = compute_html_diff(&ref_content, actual_content); - - let test_name_sanitized = test_name - .replace('/', "_slash") - .replace('.', "_full_stop") - .replace(':', "_colon") - .replace('-', "_minus"); - - return Err(format!( - "EPUB XHTML content mismatch for {}\n\n{}\n\nTo update, run: UPDATE_REFERENCES=1 cargo test run_test_case_{}", - filename, diff, test_name_sanitized - )); - } - } - - Ok(()) -} - -fn compare_pdf_metadata( - reference: &BinaryFileMetadata, - actual: &BinaryFileMetadata, -) -> Result<(), String> { - let mut errors = Vec::new(); - - if reference.filetype != actual.filetype { - errors.push(format!( - "Filetype mismatch: expected {}, got {}", - reference.filetype, actual.filetype - )); - } - - // Enhanced page count error with more context - if reference.page_count != actual.page_count { - let ref_count = reference.page_count.unwrap_or(0); - let actual_count = actual.page_count.unwrap_or(0); - let page_diff = (actual_count as i32 - ref_count as i32).abs(); - let change_type = if actual_count > ref_count { - "added" - } else { - "removed" - }; - - errors.push(format!( - "Page count: expected {}, got {} ({} pages {})", - ref_count, actual_count, page_diff, change_type - )); - } - - // Enhanced file size error with percentage - let size_diff = (actual.file_size as i64 - reference.file_size as i64).unsigned_abs(); - let tolerance = reference.file_size / 10; - - if size_diff > tolerance { - let size_percent_diff = - ((size_diff as f64 / reference.file_size as f64) * 100.0).round() as u32; - - errors.push(format!( - "File size: expected {} bytes, got {} bytes ({}% diff, beyond 10% tolerance)", - reference.file_size, actual.file_size, size_percent_diff - )); - } - - if errors.is_empty() { - Ok(()) - } else { - let mut error_msg = String::from("PDF metadata mismatch:\n"); - for error in &errors { - error_msg.push_str(&format!(" - {}\n", error)); - } - error_msg.push_str("\nThis may indicate a change in content or formatting."); - Err(error_msg) - } -} - -fn compare_css_metadata( - reference: &BinaryFileMetadata, - actual: &BinaryFileMetadata, -) -> Result<(), String> { - let mut errors = Vec::new(); - - if reference.filetype != actual.filetype { - errors.push(format!( - "Filetype mismatch: expected {}, got {}", - reference.filetype, actual.filetype - )); - } - - if reference.hash != actual.hash { - errors.push(format!( - "Hash mismatch: expected {:?}, got {:?}", - reference.hash, actual.hash - )); - } - - if reference.file_size != actual.file_size { - errors.push(format!( - "File size mismatch: expected {}, got {}", - reference.file_size, actual.file_size - )); - } - - if errors.is_empty() { - Ok(()) - } else { - Err(errors.join("\n")) - } -} - -fn compare_epub_metadata(reference: &EpubMetadata, actual: &EpubMetadata) -> Result<(), String> { - let mut errors = Vec::new(); - - if reference.filetype != actual.filetype { - errors.push(format!( - "Filetype mismatch: expected {}, got {}", - reference.filetype, actual.filetype - )); - } - - if reference.title != actual.title { - errors.push(format!( - "Title mismatch: expected '{}', got '{}'", - reference.title, actual.title - )); - } - - if reference.language != actual.language { - errors.push(format!( - "Language mismatch: expected '{}', got '{}'", - reference.language, actual.language - )); - } - - if reference.spine_files != actual.spine_files { - errors.push(format!( - "Spine order mismatch:\n Expected: [{}]\n Got: [{}]", - reference.spine_files.join(", "), - actual.spine_files.join(", ") - )); - } - - if reference.has_nav != actual.has_nav { - errors.push(format!( - "Navigation file: expected {}, got {}", - if reference.has_nav { - "present" - } else { - "missing" - }, - if actual.has_nav { "present" } else { "missing" } - )); - } - - // File size with 10% tolerance (like PDF) - let size_diff = (actual.file_size as i64 - reference.file_size as i64).unsigned_abs(); - let tolerance = reference.file_size / 10; - - if size_diff > tolerance { - let size_percent_diff = - ((size_diff as f64 / reference.file_size as f64) * 100.0).round() as u32; - - errors.push(format!( - "File size: expected {} bytes, got {} bytes ({}% diff, beyond 10% tolerance)", - reference.file_size, actual.file_size, size_percent_diff - )); - } - - if errors.is_empty() { - Ok(()) - } else { - let mut error_msg = String::from("EPUB metadata mismatch:\n"); - for error in &errors { - error_msg.push_str(&format!(" - {}\n", error)); - } - Err(error_msg) - } -} - -fn validate_pdf_assets(reference_dir: &Path, actual_dir: &Path) -> Result<(), String> { - let ref_files: Vec = collect_files_by_predicate(reference_dir, |e| { - e.path().extension().and_then(|s| s.to_str()) == Some("json") - }) - .into_iter() - .filter_map(|p| { - p.file_stem() - .and_then(|s| s.to_str()) - .and_then(|s| s.strip_suffix(".metadata")) - .map(|s| format!("{}.pdf", s)) - }) - .collect(); - - let actual_files: Vec = collect_files_by_predicate(actual_dir, |e| { - e.path().extension().and_then(|s| s.to_str()) == Some("pdf") - }) - .into_iter() - .map(|p| p.to_string_lossy().to_string()) - .collect(); - - let mut errors = Vec::new(); - - for ref_file in &ref_files { - if !actual_dir.join(ref_file).exists() { - errors.push(format!( - "Missing PDF file: {}\n Expected in: {}\n Metadata reference: {}", - ref_file, - actual_dir.display(), - reference_dir - .join(format!( - "{}.metadata.json", - ref_file.strip_suffix(".pdf").unwrap_or(ref_file) - )) - .display() - )); - } - } - - for actual_file in &actual_files { - if !ref_files.contains(actual_file) { - errors.push(format!( - "Unexpected PDF file: {}\n Found in: {}\n Not defined in references: {}\n Run UPDATE_REFERENCES=1 to add this file to references", - actual_file, - actual_dir.join(actual_file).display(), - reference_dir.display() - )); - } - } - - if errors.is_empty() { - Ok(()) - } else { - let mut error_msg = String::from("PDF asset validation failed:\n\n"); - for (i, error) in errors.iter().enumerate() { - error_msg.push_str(&format!("{}. {}\n\n", i + 1, error)); - } - Err(error_msg) - } -} - -fn validate_epub_assets(reference_dir: &Path, actual_dir: &Path) -> Result<(), String> { - let ref_files: Vec = collect_files_by_predicate(reference_dir, |e| { - e.path().extension().and_then(|s| s.to_str()) == Some("json") - }) - .into_iter() - .filter_map(|p| { - p.file_stem() - .and_then(|s| s.to_str()) - .and_then(|s| s.strip_suffix(".metadata")) - .map(|s| format!("{}.epub", s)) - }) - .collect(); - - let actual_files: Vec = collect_files_by_predicate(actual_dir, |e| { - e.path().extension().and_then(|s| s.to_str()) == Some("epub") - }) - .into_iter() - .map(|p| p.to_string_lossy().to_string()) - .collect(); - - let mut errors = Vec::new(); - - for ref_file in &ref_files { - if !actual_dir.join(ref_file).exists() { - errors.push(format!( - "Missing EPUB file: {}\n Expected in: {}\n Metadata reference: {}", - ref_file, - actual_dir.display(), - reference_dir - .join(format!( - "{}.metadata.json", - ref_file.strip_suffix(".epub").unwrap_or(ref_file) - )) - .display() - )); - } - } - - for actual_file in &actual_files { - if !ref_files.contains(actual_file) { - errors.push(format!( - "Unexpected EPUB file: {}\n Found in: {}\n Not defined in references: {}\n Run UPDATE_REFERENCES=1 to add this file to references", - actual_file, - actual_dir.join(actual_file).display(), - reference_dir.display() - )); - } - } - - if errors.is_empty() { - Ok(()) - } else { - let mut error_msg = String::from("EPUB asset validation failed:\n\n"); - for (i, error) in errors.iter().enumerate() { - error_msg.push_str(&format!("{}. {}\n\n", i + 1, error)); - } - Err(error_msg) - } -} - -pub fn verify_epub_output(test_name: &str, actual_dir: &Path) { - let ref_dir = get_reference_dir(actual_dir, test_name, "epub"); - ensure_reference_exists(&ref_dir, test_name, "EPUB"); - - validate_epub_assets(&ref_dir, actual_dir).expect("EPUB asset validation failed"); - - for_each_file_with_ext(actual_dir, "epub", |entry| { - let rel_path = entry.path().strip_prefix(actual_dir).unwrap(); - let metadata_file = ref_dir.join(format!( - "{}.metadata.json", - rel_path.file_stem().unwrap().to_string_lossy() - )); - - if !metadata_file.exists() { - panic!( - "EPUB metadata reference not found: {}. Run with UPDATE_REFERENCES=1", - metadata_file.display() - ); - } - - let ref_metadata_json = - fs::read_to_string(&metadata_file).expect("Failed to read reference metadata"); - let ref_metadata: EpubMetadata = - serde_json::from_str(&ref_metadata_json).expect("Failed to parse reference metadata"); - let actual_metadata = - extract_epub_metadata(entry.path()).expect("Failed to extract EPUB metadata"); - - compare_epub_metadata(&ref_metadata, &actual_metadata).expect("EPUB metadata mismatch"); - - // Verify XHTML content if reference files exist - let actual_xhtml = extract_epub_xhtml(entry.path()).expect("Failed to extract EPUB XHTML"); - verify_epub_xhtml_content(&ref_dir, &actual_xhtml, test_name) - .expect("EPUB XHTML content mismatch"); - }); -} diff --git a/crates/tests/src/helpers/fixtures.rs b/crates/tests/src/helpers/fixtures.rs deleted file mode 100644 index 03bdf913..00000000 --- a/crates/tests/src/helpers/fixtures.rs +++ /dev/null @@ -1,118 +0,0 @@ -use std::fs; -use std::path::{Path, PathBuf}; - -use super::{is_single_file_test, markers::read_test_metadata}; - -/// Test case variants for different compilation modes -#[derive(Debug, Clone)] -pub enum TestCase { - /// Test a directory-based project with rheo.toml - Directory { - /// Name of the test case - name: String, - /// Project path relative to rheo top-level. - project_path: PathBuf, - }, - /// Test a single .typ file - SingleFile { - name: String, - file_path: PathBuf, - formats: Vec, - metadata: Option, - }, -} - -impl TestCase { - pub fn new(raw_path: &str) -> Self { - // Check if the path is a .typ file - if is_single_file_test(raw_path) { - let file_path = Path::new(raw_path); - // Use just the file stem (filename without extension) for the test name - let name = file_path - .file_stem() - .unwrap() - .to_str() - .unwrap() - .replace('.', "_full_stop") - .replace(':', "_colon") - .replace('-', "_minus"); - - // Read test markers to determine formats and metadata - let metadata = read_test_metadata(file_path); - let formats = metadata - .as_ref() - .map(|m| m.formats.clone()) - .unwrap_or_else(|| vec!["html".to_string(), "epub".to_string(), "pdf".to_string()]); - - return Self::SingleFile { - name, - file_path: file_path.into(), - formats, - metadata, - }; - } - - // Otherwise, auto-detect based on filesystem metadata - let path = Path::new(raw_path); - let fs_metadata = fs::metadata(path).unwrap(); - let name = path.file_stem().unwrap().to_str().unwrap().to_string(); - if fs_metadata.is_file() { - let test_metadata = read_test_metadata(path); - let formats = test_metadata - .as_ref() - .map(|m| m.formats.clone()) - .unwrap_or_else(|| vec!["html".to_string(), "epub".to_string(), "pdf".to_string()]); - - Self::SingleFile { - name, - file_path: path.into(), - formats, - metadata: test_metadata, - } - } else if fs_metadata.is_dir() { - Self::Directory { - name, - project_path: path.into(), - } - } else { - panic!("test case should only be a file or a directory"); - } - } - - pub fn name(&self) -> &str { - match self { - TestCase::Directory { name, .. } => name, - TestCase::SingleFile { name, .. } => name, - } - } - - pub fn project_path(&self) -> &PathBuf { - match self { - TestCase::Directory { project_path, .. } => project_path, - TestCase::SingleFile { file_path, .. } => file_path, - } - } - - /// Returns the format names to test for this test case - pub fn formats(&self) -> Vec { - match self { - TestCase::Directory { .. } => { - vec!["html".to_string(), "epub".to_string(), "pdf".to_string()] - } - TestCase::SingleFile { formats, .. } => formats.clone(), - } - } - - /// Check if this test case is a single file test - pub fn is_single_file(&self) -> bool { - matches!(self, TestCase::SingleFile { .. }) - } - - /// Get test metadata for SingleFile tests, None for Directory tests - pub fn metadata(&self) -> Option<&super::markers::TestMetadata> { - match self { - TestCase::SingleFile { metadata, .. } => metadata.as_ref(), - TestCase::Directory { .. } => None, - } - } -} diff --git a/crates/tests/src/helpers/markers.rs b/crates/tests/src/helpers/markers.rs deleted file mode 100644 index 57bd6475..00000000 --- a/crates/tests/src/helpers/markers.rs +++ /dev/null @@ -1,252 +0,0 @@ -/// Test marker parser for .typ files -/// -/// Parses comment-based test markers that declare files as test cases -/// and provide metadata for test execution. -/// -/// Marker syntax: -/// ```typst -/// // @rheo:test -/// // @rheo:formats html,pdf -/// // @rheo:description Tests blog post rendering with footnotes -/// ``` -use std::path::Path; - -/// Metadata extracted from test markers in .typ files -#[derive(Debug, Clone, PartialEq)] -pub struct TestMetadata { - /// Output formats to test (html, pdf, epub) - pub formats: Vec, - /// Human-readable description of the test - pub description: Option, - /// Expected compilation outcome ("error" or "success", None defaults to success) - pub expect: Option, - /// Required error patterns to check in stderr (for error cases) - pub error_patterns: Vec, -} - -impl Default for TestMetadata { - fn default() -> Self { - Self { - formats: vec!["html".to_string(), "pdf".to_string()], - description: None, - expect: None, - error_patterns: vec![], - } - } -} - -/// Checks if a line contains the @rheo:test marker -pub fn is_test_marker(line: &str) -> bool { - let trimmed = line.trim(); - trimmed.starts_with("//") && trimmed.contains("@rheo:test") -} - -/// Parses test metadata from .typ file source -/// -/// Returns Some(TestMetadata) if the file contains // @rheo:test marker, -/// otherwise returns None. -pub fn parse_test_metadata(source: &str) -> Option { - let mut has_test_marker = false; - let mut metadata = TestMetadata::default(); - - for line in source.lines() { - let trimmed = line.trim(); - - // Must start with comment - if !trimmed.starts_with("//") { - continue; - } - - let comment = trimmed.trim_start_matches("//").trim(); - - // Check for @rheo:test marker - if comment == "@rheo:test" { - has_test_marker = true; - continue; - } - - // Parse @rheo:formats - if let Some(formats_str) = comment.strip_prefix("@rheo:formats") { - let formats_str = formats_str.trim(); - metadata.formats = formats_str - .split(',') - .map(|f| f.trim().to_string()) - .filter(|f| !f.is_empty()) - .collect(); - continue; - } - - // Parse @rheo:description - if let Some(desc) = comment.strip_prefix("@rheo:description") { - metadata.description = Some(desc.trim().to_string()); - continue; - } - - // Parse @rheo:expect - if let Some(expect_str) = comment.strip_prefix("@rheo:expect") { - let expect_value = expect_str.trim().to_string(); - if !expect_value.is_empty() { - metadata.expect = Some(expect_value); - } - continue; - } - - // Parse @rheo:error-patterns - if let Some(patterns_str) = comment.strip_prefix("@rheo:error-patterns") { - // Patterns are comma-separated quoted strings - // Example: @rheo:error-patterns "error", "cannot add", "│" - let patterns_str = patterns_str.trim(); - metadata.error_patterns = patterns_str - .split(',') - .map(|p| p.trim()) - .filter(|p| !p.is_empty()) - .map(|p| p.trim_matches('"').to_string()) // Remove quotes - .collect(); - continue; - } - } - - if has_test_marker { - Some(metadata) - } else { - None - } -} - -/// Reads test metadata from a .typ file -pub fn read_test_metadata(path: &Path) -> Option { - let source = std::fs::read_to_string(path).ok()?; - parse_test_metadata(&source) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_is_test_marker() { - assert!(is_test_marker("// @rheo:test")); - assert!(is_test_marker(" // @rheo:test ")); - assert!(!is_test_marker("@rheo:test")); - assert!(!is_test_marker("// some other comment")); - } - - #[test] - fn test_parse_test_metadata_minimal() { - let source = "// @rheo:test\n= Content"; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!(metadata.formats, vec!["html", "pdf"]); - assert_eq!(metadata.description, None); - } - - #[test] - fn test_parse_test_metadata_with_formats() { - let source = "// @rheo:test\n// @rheo:formats html\n= Content"; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!(metadata.formats, vec!["html"]); - } - - #[test] - fn test_parse_test_metadata_with_multiple_formats() { - let source = "// @rheo:test\n// @rheo:formats html,pdf,epub\n= Content"; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!(metadata.formats, vec!["html", "pdf", "epub"]); - } - - #[test] - fn test_parse_test_metadata_with_description() { - let source = "// @rheo:test\n// @rheo:description Tests blog post with images\n= Content"; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!( - metadata.description, - Some("Tests blog post with images".to_string()) - ); - } - - #[test] - fn test_parse_test_metadata_complete() { - let source = r#"// @rheo:test -// @rheo:formats html,pdf -// @rheo:description Main blog index page with post listings - -= Blog Index -"#; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!(metadata.formats, vec!["html", "pdf"]); - assert_eq!( - metadata.description, - Some("Main blog index page with post listings".to_string()) - ); - } - - #[test] - fn test_parse_test_metadata_no_marker() { - let source = "= Content without markers"; - assert!(parse_test_metadata(source).is_none()); - } - - #[test] - fn test_parse_test_metadata_ignores_non_comment_lines() { - let source = r#"// @rheo:test -@rheo:formats html,pdf -// @rheo:formats epub -= Content -"#; - let metadata = parse_test_metadata(source).unwrap(); - // Should only parse the comment line, not the non-comment @rheo:formats - assert_eq!(metadata.formats, vec!["epub"]); - } - - #[test] - fn test_read_test_metadata_from_file() { - // Test reading markers from an actual example file - let manifest_dir = option_env!("CARGO_MANIFEST_DIR").unwrap_or("."); - let path = Path::new(manifest_dir).join("../../examples/blog_site/content/index.typ"); - let metadata = read_test_metadata(&path).unwrap(); - assert_eq!(metadata.formats, vec!["html", "pdf"]); - assert_eq!( - metadata.description, - Some("Main blog index page with post listings".to_string()) - ); - } - - #[test] - fn test_read_test_metadata_pdf_only() { - // Test reading PDF-only markers - let manifest_dir = option_env!("CARGO_MANIFEST_DIR").unwrap_or("."); - let path = Path::new(manifest_dir).join("../../examples/cover-letter.typ"); - let metadata = read_test_metadata(&path).unwrap(); - assert_eq!(metadata.formats, vec!["pdf"]); - assert_eq!( - metadata.description, - Some("Job application cover letter with custom formatting".to_string()) - ); - } - - #[test] - fn test_parse_test_metadata_with_expect_error() { - let source = "// @rheo:test\n// @rheo:expect error\n= Content"; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!(metadata.expect, Some("error".to_string())); - } - - #[test] - fn test_parse_test_metadata_with_error_patterns() { - let source = r#"// @rheo:test -// @rheo:expect error -// @rheo:error-patterns "error", "cannot add", "│" -= Content"#; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!(metadata.expect, Some("error".to_string())); - assert_eq!(metadata.error_patterns, vec!["error", "cannot add", "│"]); - } - - #[test] - fn test_parse_test_metadata_error_patterns_with_spaces() { - let source = r#"// @rheo:test -// @rheo:error-patterns "pattern one", "pattern two" -= Content"#; - let metadata = parse_test_metadata(source).unwrap(); - assert_eq!(metadata.error_patterns, vec!["pattern one", "pattern two"]); - } -} diff --git a/crates/tests/src/helpers/mod.rs b/crates/tests/src/helpers/mod.rs deleted file mode 100644 index b97b9972..00000000 --- a/crates/tests/src/helpers/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -pub mod cli; -pub mod comparison; -pub mod fixtures; -pub mod markers; -pub mod reference; -pub mod remote; -pub mod test_store; - -/// Determines if a test path represents a single .typ file -pub fn is_single_file_test(path: &str) -> bool { - path.ends_with(".typ") -} diff --git a/crates/tests/src/helpers/reference.rs b/crates/tests/src/helpers/reference.rs deleted file mode 100644 index 9e578e36..00000000 --- a/crates/tests/src/helpers/reference.rs +++ /dev/null @@ -1,391 +0,0 @@ -use crate::helpers::comparison::{BinaryFileMetadata, extract_pdf_metadata}; -use crate::helpers::is_single_file_test; -use std::collections::hash_map::DefaultHasher; -use std::fs; -use std::hash::{Hash, Hasher}; -use std::path::{Path, PathBuf}; -use walkdir::WalkDir; - -/// Compute a short hash of a file path for reference directory naming -fn compute_file_hash(path: &Path) -> String { - let mut hasher = DefaultHasher::new(); - path.to_string_lossy().hash(&mut hasher); - format!("{:08x}", hasher.finish()) -} - -/// Update HTML reference files from test output -pub fn update_html_references( - test_name: &str, - actual_dir: &Path, - project_path: &Path, -) -> Result<(), String> { - // Determine if this is a single-file test - let ref_dir = if test_name.contains("_slash") - && (test_name.contains("_full_stop") || test_name.ends_with("typ")) - { - // Single-file test - use hash-based path - let file_path_guess = test_name - .replace("_slash", "/") - .replace("_full_stop", ".") - .replace("_colon", ":") - .replace("_minus", "-"); - - if is_single_file_test(&file_path_guess) { - let file_path = Path::new(&file_path_guess); - let hash = compute_file_hash(file_path); - let filename = file_path - .file_stem() - .unwrap_or(file_path.as_os_str()) - .to_string_lossy(); - - PathBuf::from("ref/files") - .join(&hash) - .join(filename.as_ref()) - .join("html") - } else { - // Fallback to project-based path - get_project_ref_dir(project_path, test_name, "html") - } - } else { - // Project-based test - get_project_ref_dir(project_path, test_name, "html") - }; - - // Remove existing references - if ref_dir.exists() { - fs::remove_dir_all(&ref_dir) - .map_err(|e| format!("Failed to remove old references: {}", e))?; - } - - // Copy all files from actual to reference, replacing binary files with .metadata.json - copy_directory_with_binary_refs(actual_dir, &ref_dir, project_path)?; - - println!( - "Updated HTML references for {} at {}", - test_name, - ref_dir.display() - ); - Ok(()) -} - -/// Get project-based reference directory -fn get_project_ref_dir(project_path: &Path, test_name: &str, output_type: &str) -> PathBuf { - let ref_base = if project_path.starts_with("examples/") { - PathBuf::from("ref/examples") - } else if project_path.starts_with("tests/cases/") { - PathBuf::from("ref/cases") - } else { - PathBuf::from("ref/examples") // fallback - }; - ref_base.join(test_name).join(output_type) -} - -/// Update PDF metadata references from test output -pub fn update_pdf_references(test_name: &str, actual_dir: &Path) -> Result<(), String> { - // Determine if this is a single-file test - let ref_dir = if test_name.contains("_slash") - && (test_name.contains("_full_stop") || test_name.ends_with("typ")) - { - // Single-file test - use hash-based path - let file_path_guess = test_name - .replace("_slash", "/") - .replace("_full_stop", ".") - .replace("_colon", ":") - .replace("_minus", "-"); - - if is_single_file_test(&file_path_guess) { - let file_path = Path::new(&file_path_guess); - let hash = compute_file_hash(file_path); - let filename = file_path - .file_stem() - .unwrap_or(file_path.as_os_str()) - .to_string_lossy(); - - PathBuf::from("ref/files") - .join(&hash) - .join(filename.as_ref()) - .join("pdf") - } else { - // Fallback to project-based path - let ref_base = if actual_dir.starts_with("examples/") { - PathBuf::from("ref/examples") - } else if actual_dir.starts_with("tests/cases/") { - PathBuf::from("ref/cases") - } else { - PathBuf::from("ref/examples") - }; - ref_base.join(test_name).join("pdf") - } - } else { - // Project-based test - let ref_base = if actual_dir.starts_with("examples/") { - PathBuf::from("ref/examples") - } else if actual_dir.starts_with("tests/cases/") { - PathBuf::from("ref/cases") - } else { - PathBuf::from("ref/examples") - }; - ref_base.join(test_name).join("pdf") - }; - - // Remove existing references - if ref_dir.exists() { - fs::remove_dir_all(&ref_dir) - .map_err(|e| format!("Failed to remove old references: {}", e))?; - } - - // Create reference directory - fs::create_dir_all(&ref_dir) - .map_err(|e| format!("Failed to create PDF reference directory: {}", e))?; - - // Find all PDF files in actual output - for entry in WalkDir::new(actual_dir) { - if let Ok(entry) = entry - && entry.file_type().is_file() - && let Some(ext) = entry.path().extension() - && ext == "pdf" - { - // Extract metadata - let metadata = extract_pdf_metadata(entry.path())?; - - // Get relative path - let rel_path = entry - .path() - .strip_prefix(actual_dir) - .map_err(|e| format!("Failed to get relative path: {}", e))?; - - // Save metadata JSON - let metadata_file = ref_dir.join(format!( - "{}.metadata.json", - rel_path.file_stem().unwrap().to_string_lossy() - )); - - let json = serde_json::to_string_pretty(&metadata) - .map_err(|e| format!("Failed to serialize metadata: {}", e))?; - - fs::write(&metadata_file, json) - .map_err(|e| format!("Failed to write metadata: {}", e))?; - - println!("Updated PDF metadata for {}", rel_path.display()); - } - } - - Ok(()) -} - -/// Update EPUB metadata references from test output -pub fn update_epub_references(test_name: &str, actual_dir: &Path) -> Result<(), String> { - use crate::helpers::comparison::{extract_epub_metadata, extract_epub_xhtml}; - - // Determine if this is a single-file test - let ref_dir = if test_name.contains("_slash") - && (test_name.contains("_full_stop") || test_name.ends_with("typ")) - { - // Single-file test - use hash-based path - let file_path_guess = test_name - .replace("_slash", "/") - .replace("_full_stop", ".") - .replace("_colon", ":") - .replace("_minus", "-"); - - if is_single_file_test(&file_path_guess) { - let file_path = Path::new(&file_path_guess); - let hash = compute_file_hash(file_path); - let filename = file_path - .file_stem() - .unwrap_or(file_path.as_os_str()) - .to_string_lossy(); - - PathBuf::from("ref/files") - .join(&hash) - .join(filename.as_ref()) - .join("epub") - } else { - // Fallback to project-based path - let ref_base = if actual_dir.starts_with("examples/") { - PathBuf::from("ref/examples") - } else if actual_dir.starts_with("tests/cases/") { - PathBuf::from("ref/cases") - } else { - PathBuf::from("ref/examples") - }; - ref_base.join(test_name).join("epub") - } - } else { - // Project-based test - let ref_base = if actual_dir.starts_with("examples/") { - PathBuf::from("ref/examples") - } else if actual_dir.starts_with("tests/cases/") { - PathBuf::from("ref/cases") - } else { - PathBuf::from("ref/examples") - }; - ref_base.join(test_name).join("epub") - }; - - // Remove existing references - if ref_dir.exists() { - fs::remove_dir_all(&ref_dir) - .map_err(|e| format!("Failed to remove old references: {}", e))?; - } - - // Create reference directory - fs::create_dir_all(&ref_dir) - .map_err(|e| format!("Failed to create EPUB reference directory: {}", e))?; - - // Find all EPUB files in actual output - for entry in WalkDir::new(actual_dir) { - if let Ok(entry) = entry - && entry.file_type().is_file() - && let Some(ext) = entry.path().extension() - && ext == "epub" - { - // Extract metadata - let metadata = extract_epub_metadata(entry.path())?; - - // Get relative path - let rel_path = entry - .path() - .strip_prefix(actual_dir) - .map_err(|e| format!("Failed to get relative path: {}", e))?; - - // Save metadata JSON - let metadata_file = ref_dir.join(format!( - "{}.metadata.json", - rel_path.file_stem().unwrap().to_string_lossy() - )); - - let json = serde_json::to_string_pretty(&metadata) - .map_err(|e| format!("Failed to serialize metadata: {}", e))?; - - fs::write(&metadata_file, json) - .map_err(|e| format!("Failed to write metadata: {}", e))?; - - println!("Updated EPUB metadata for {}", rel_path.display()); - - // Extract and save XHTML content files - let xhtml_content = extract_epub_xhtml(entry.path())?; - let xhtml_dir = ref_dir.join("xhtml"); - fs::create_dir_all(&xhtml_dir) - .map_err(|e| format!("Failed to create XHTML directory: {}", e))?; - - for (filename, content) in xhtml_content { - let xhtml_path = xhtml_dir.join(&filename); - // Create parent dirs if nested (e.g., chapters/ch1.xhtml) - if let Some(parent) = xhtml_path.parent() { - fs::create_dir_all(parent).ok(); - } - fs::write(&xhtml_path, &content) - .map_err(|e| format!("Failed to write XHTML: {}", e))?; - println!("Updated EPUB XHTML: {}", filename); - } - } - } - - println!( - "Updated EPUB references for {} at {}", - test_name, - ref_dir.display() - ); - Ok(()) -} - -/// Copy directory recursively, replacing binary files with .metadata.json references -pub fn copy_directory_with_binary_refs( - src: &Path, - dst: &Path, - project_path: &Path, -) -> Result<(), String> { - fs::create_dir_all(dst).map_err(|e| format!("Failed to create directory: {}", e))?; - - for entry in WalkDir::new(src) { - let entry = entry.map_err(|e| format!("Failed to read entry: {}", e))?; - let rel_path = entry - .path() - .strip_prefix(src) - .map_err(|e| format!("Failed to get relative path: {}", e))?; - let dst_path = dst.join(rel_path); - - if entry.file_type().is_dir() { - fs::create_dir_all(&dst_path) - .map_err(|e| format!("Failed to create directory: {}", e))?; - } else if is_binary_file(entry.path()) { - // For binary files, create .metadata.json instead of copying - let metadata = create_binary_metadata(entry.path(), rel_path, project_path)?; - - let metadata_path = dst_path.with_extension("metadata.json"); - let json = serde_json::to_string_pretty(&metadata) - .map_err(|e| format!("Failed to serialize metadata: {}", e))?; - - fs::write(&metadata_path, json) - .map_err(|e| format!("Failed to write metadata: {}", e))?; - } else { - // Copy text files normally - fs::copy(entry.path(), &dst_path).map_err(|e| format!("Failed to copy file: {}", e))?; - } - } - - Ok(()) -} - -/// Check if a file is a binary file based on extension -fn is_binary_file(path: &Path) -> bool { - if let Some(ext) = path.extension() { - let ext_str = ext.to_string_lossy().to_lowercase(); - matches!( - ext_str.as_str(), - "png" | "jpg" | "jpeg" | "gif" | "webp" | "mp4" | "webm" | "pdf" | "css" - ) - } else { - false - } -} - -/// Create metadata for a binary file -fn create_binary_metadata( - file_path: &Path, - rel_path: &Path, - project_path: &Path, -) -> Result { - let file_size = fs::metadata(file_path) - .map_err(|e| format!("Failed to read file metadata: {}", e))? - .len(); - - let filetype = file_path - .extension() - .and_then(|e| e.to_str()) - .unwrap_or("unknown") - .to_lowercase(); - - // Compute hash for CSS files - let hash = if filetype == "css" { - use sha2::{Digest, Sha256}; - let contents = - fs::read(file_path).map_err(|e| format!("Failed to read file contents: {}", e))?; - let digest = Sha256::digest(&contents); - Some(digest.iter().map(|byte| format!("{:02x}", byte)).collect()) - } else { - None - }; - - // Detect source file location to create repo-relative path - // For CSS files, store the build-relative path directly (CSS is at the html root) - let repo_relative_path = if filetype == "css" { - PathBuf::from(rel_path) - } else if project_path.join("content").join(rel_path).exists() { - project_path.join("content").join(rel_path) - } else if project_path.join(rel_path).exists() { - project_path.join(rel_path) - } else { - // Fallback: just prepend project path to rel_path - project_path.join(rel_path) - }; - - Ok(BinaryFileMetadata { - filetype, - file_size, - path: Some(repo_relative_path.to_string_lossy().to_string()), - page_count: None, - hash, - }) -} diff --git a/crates/tests/src/helpers/remote.rs b/crates/tests/src/helpers/remote.rs deleted file mode 100644 index 35906ef6..00000000 --- a/crates/tests/src/helpers/remote.rs +++ /dev/null @@ -1,117 +0,0 @@ -use std::fs; -use std::path::Path; -use std::path::PathBuf; -use std::process::Command; - -use super::cli::rheo_cli_command; - -/// Clone a public GitHub repo using `git clone --depth 1`. -/// -/// Destination: `crates/tests/store/compat//`. -/// If the destination already exists, skip cloning (fast local re-runs). -/// -/// Returns the path to the cloned directory. -pub fn clone_repo(url: &str, name: &str) -> PathBuf { - let manifest_dir = env!("CARGO_MANIFEST_DIR"); - let dest = PathBuf::from(manifest_dir).join("store/compat").join(name); - - if dest.exists() { - return dest; - } - - fs::create_dir_all(&dest).unwrap_or_else(|e| { - panic!( - "Failed to create compat store directory {}: {}", - dest.display(), - e - ) - }); - - let output = Command::new("git") - .args(["clone", "--depth", "1", url, dest.to_str().unwrap()]) - .output() - .unwrap_or_else(|e| panic!("Failed to run git clone: {}", e)); - - if !output.status.success() { - panic!( - "Failed to clone repo {}: {}\nstdout: {}\nstderr: {}", - url, - output.status, - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - } - - dest -} - -/// Patch the `version` field in `/rheo.toml` to match -/// `env!("CARGO_PKG_VERSION")`. Overrides whatever version the external -/// project declares, so version-mismatch errors don't mask real failures. -/// -/// Does nothing if no rheo.toml is present. -pub fn patch_rheo_version(project_path: &Path) { - let toml_path = project_path.join("rheo.toml"); - if !toml_path.exists() { - return; - } - - let content = fs::read_to_string(&toml_path) - .unwrap_or_else(|e| panic!("Failed to read {}: {}", toml_path.display(), e)); - - let version = env!("CARGO_PKG_VERSION"); - let had_trailing_newline = content.ends_with('\n'); - - let patched: String = content - .lines() - .map(|line| { - let key = line.trim_start().split('=').next().unwrap_or("").trim(); - if key == "version" { - format!("version = \"{}\"", version) - } else { - line.to_string() - } - }) - .collect::>() - .join("\n"); - - let patched = if had_trailing_newline { - patched + "\n" - } else { - patched - }; - - fs::write(&toml_path, patched) - .unwrap_or_else(|e| panic!("Failed to write {}: {}", toml_path.display(), e)); -} - -/// Clone the repo, patch its version, run `rheo compile `, -/// and panic with full stdout+stderr if exit code is non-zero or if errors -/// are present in the output. -pub fn run_compat(url: &str, name: &str) { - let cloned_path = clone_repo(url, name); - patch_rheo_version(&cloned_path); - - let output = rheo_cli_command() - .args(["compile", cloned_path.to_str().unwrap()]) - .env("TYPST_IGNORE_SYSTEM_FONTS", "1") - .output() - .unwrap_or_else(|e| panic!("Failed to run rheo compile: {}", e)); - - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - let combined = format!("{}\n{}", stdout, stderr); - - // Check exit code - if !output.status.success() { - panic!("rheo compile failed for {} ({}):\n{}", name, url, combined); - } - - // Check for ERROR strings in output - if combined.contains("ERROR") || combined.contains("error:") { - panic!( - "rheo compile for {} ({}) produced errors despite success exit code:\n{}", - name, url, combined - ); - } -} diff --git a/crates/tests/src/helpers/test_store.rs b/crates/tests/src/helpers/test_store.rs deleted file mode 100644 index 2363c42e..00000000 --- a/crates/tests/src/helpers/test_store.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::fs; -use std::path::Path; -use walkdir::WalkDir; - -/// Copies project source files to test store directory, excluding build artifacts. -/// -/// For `rheo.toml` files missing a `version` field, automatically injects -/// `version = ""` so test cases don't need to hardcode it. -pub fn copy_project_to_test_store(project_path: &Path, test_store: &Path) -> Result<(), String> { - // Create test store - fs::create_dir_all(test_store).map_err(|e| format!("Failed to create test store: {}", e))?; - - // Copy all project files except build/ - for entry in WalkDir::new(project_path) { - let entry = entry.map_err(|e| format!("Walk error: {}", e))?; - let rel_path = entry - .path() - .strip_prefix(project_path) - .map_err(|e| format!("Path error: {}", e))?; - - // Skip build directory - if rel_path.starts_with("build") { - continue; - } - - let dest = test_store.join(rel_path); - - if entry.file_type().is_dir() { - fs::create_dir_all(&dest).map_err(|e| format!("Dir creation error: {}", e))?; - } else if entry.file_type().is_symlink() { - continue; // skip symlinks - } else if entry.path().file_name().is_some_and(|n| n == "rheo.toml") { - copy_rheo_toml_with_version(entry.path(), &dest)?; - } else { - fs::copy(entry.path(), &dest).map_err(|e| format!("File copy error: {}", e))?; - } - } - - Ok(()) -} - -/// Copies a rheo.toml file, injecting the version field if missing. -fn copy_rheo_toml_with_version(src: &Path, dest: &Path) -> Result<(), String> { - let content = - fs::read_to_string(src).map_err(|e| format!("Failed to read {}: {}", src.display(), e))?; - - if content.contains("version =") || content.contains("version=") { - // Already has a version field, copy as-is - fs::write(dest, content) - .map_err(|e| format!("Failed to write {}: {}", dest.display(), e))?; - } else { - // Inject version at the top - let versioned = format!("version = \"{}\"\n{}", env!("CARGO_PKG_VERSION"), content); - fs::write(dest, versioned) - .map_err(|e| format!("Failed to write {}: {}", dest.display(), e))?; - } - - Ok(()) -} diff --git a/crates/tests/src/lib.rs b/crates/tests/src/lib.rs deleted file mode 100644 index 2990d774..00000000 --- a/crates/tests/src/lib.rs +++ /dev/null @@ -1,4 +0,0 @@ -// Test-only library for rheo integration tests - -// Re-export test helpers at the crate root for easy access -pub mod helpers; diff --git a/crates/tests/store/.gitignore b/crates/tests/store/.gitignore deleted file mode 100644 index 4f65b44d..00000000 --- a/crates/tests/store/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!compat -!.gitignore diff --git a/crates/tests/store/compat/.gitignore b/crates/tests/store/compat/.gitignore deleted file mode 100644 index 05bb3f29..00000000 --- a/crates/tests/store/compat/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!merged-imports -!.gitignore diff --git a/crates/tests/store/compat/merged-imports/content/chapters/ch01.typ b/crates/tests/store/compat/merged-imports/content/chapters/ch01.typ deleted file mode 100644 index 7c079d92..00000000 --- a/crates/tests/store/compat/merged-imports/content/chapters/ch01.typ +++ /dev/null @@ -1,7 +0,0 @@ -#import "../shared/macros.typ": greeting - -= Chapter One - -#greeting("Reader") - -This is chapter one. diff --git a/crates/tests/store/compat/merged-imports/content/chapters/ch02.typ b/crates/tests/store/compat/merged-imports/content/chapters/ch02.typ deleted file mode 100644 index bb1ae50a..00000000 --- a/crates/tests/store/compat/merged-imports/content/chapters/ch02.typ +++ /dev/null @@ -1,5 +0,0 @@ -#include "../shared/helpers.typ" - -= Chapter Two - -This is chapter two. diff --git a/crates/tests/store/compat/merged-imports/content/shared/helpers.typ b/crates/tests/store/compat/merged-imports/content/shared/helpers.typ deleted file mode 100644 index 06ab6601..00000000 --- a/crates/tests/store/compat/merged-imports/content/shared/helpers.typ +++ /dev/null @@ -1,2 +0,0 @@ -// Shared helper functions -#let greeting(name) = [*Hi, #name!*] diff --git a/crates/tests/store/compat/merged-imports/content/shared/macros.typ b/crates/tests/store/compat/merged-imports/content/shared/macros.typ deleted file mode 100644 index 1ddc74b7..00000000 --- a/crates/tests/store/compat/merged-imports/content/shared/macros.typ +++ /dev/null @@ -1,4 +0,0 @@ -// Shared macros -#let emph-box(body) = rect(fill: blue.darken(10%), text(body)) - -#let greeting(name) = [*Hi, #name!*] diff --git a/crates/tests/store/compat/merged-imports/rheo.toml b/crates/tests/store/compat/merged-imports/rheo.toml deleted file mode 100644 index 68f83071..00000000 --- a/crates/tests/store/compat/merged-imports/rheo.toml +++ /dev/null @@ -1,13 +0,0 @@ -version = "0.2.1" - -content_dir = "content" -build_dir = "build" -formats = ["pdf"] - -[pdf.spine] -title = "Merged Imports" -vertebrae = [ - "chapters/ch01.typ", - "chapters/ch02.typ" -] -merge = true diff --git a/crates/tests/tests/build_api.rs b/crates/tests/tests/build_api.rs deleted file mode 100644 index 09bb1f09..00000000 --- a/crates/tests/tests/build_api.rs +++ /dev/null @@ -1,65 +0,0 @@ -//! Proves `rheo_core::Build` can compile a project from a `ProjectConfig` plus -//! plugins, with no CLI involvement — the library entry point introduced when the -//! orchestrator moved out of `crates/cli`. - -use rheo_core::manifest_version; -use rheo_core::project::ProjectConfig; -use rheo_core::{Build, BuildOptions}; -use std::fs; -use tempfile::TempDir; - -/// Write a minimal one-page rheo project into a fresh temp dir and return it. -fn scaffold_project() -> TempDir { - let temp = TempDir::new().unwrap(); - let root = temp.path(); - - fs::write( - root.join("rheo.toml"), - format!("version = \"{}\"\n", manifest_version::CURRENT), - ) - .unwrap(); - - let content = root.join("content"); - fs::create_dir_all(&content).unwrap(); - fs::write(content.join("index.typ"), "= Hello\n\nA paragraph.\n").unwrap(); - - temp -} - -#[test] -fn build_compiles_html_without_cli() { - // SAFETY: tests in this binary run single-threaded for this env var; mirrors - // the harness which sets the same flag for deterministic font behaviour. - unsafe { - std::env::set_var("TYPST_IGNORE_SYSTEM_FONTS", "1"); - } - - let temp = scaffold_project(); - let build_dir = temp.path().join("build"); - - let project = ProjectConfig::from_path(temp.path(), None).expect("load project"); - - let mut build = Build::prepare( - project, - vec![Box::new(rheo_html::HtmlPlugin)], - BuildOptions { - formats: vec!["html".to_string()], - build_dir: Some(build_dir.clone()), - font_dirs: vec![], - }, - ) - .expect("prepare build"); - - // Only the requested format is selected. - let names: Vec<&str> = build.plugins().iter().map(|p| p.name()).collect(); - assert_eq!(names, vec!["html"]); - - let results = build.run().expect("run build"); - assert_eq!(results.get("html").succeeded, 1); - assert_eq!(results.get("html").failed, 0); - - let html = build_dir.join("html").join("index.html"); - assert!(html.is_file(), "expected {} to exist", html.display()); - let body = fs::read_to_string(&html).unwrap(); - assert!(body.contains("Hello"), "compiled HTML missing heading text"); -} diff --git a/crates/tests/tests/compat.rs b/crates/tests/tests/compat.rs deleted file mode 100644 index 511f2bf4..00000000 --- a/crates/tests/tests/compat.rs +++ /dev/null @@ -1,27 +0,0 @@ -use rheo_tests::helpers::remote::run_compat; - -fn compat_enabled() -> bool { - std::env::var("RUN_COMPAT_TESTS").as_deref() == Ok("1") -} - -macro_rules! smoke_tests { - ( $( ($name:ident, $url:expr) ),* $(,)? ) => { - $( - ::paste::paste! { - #[test] - fn []() { - if !compat_enabled() { return; } - run_compat($url, stringify!($name)); - } - } - )* - }; -} - -smoke_tests! { - (maths_ohrg_org, "https://github.com/freecomputinglab/maths.ohrg.org"), - (rheo_ohrg_org, "https://github.com/freecomputinglab/rheo.ohrg.org"), - (freecomputinglab_ohrg_org, "https://github.com/freecomputinglab/freecomputinglab.ohrg.org"), - (lolm_ohrg_org, "https://github.com/freecomputinglab/lolm.ohrg.org"), - (digitaltheory_dot_org, "https://github.com/digitaltheorylab/digitaltheory-dot-org"), -} diff --git a/crates/tests/tests/harness.rs b/crates/tests/tests/harness.rs deleted file mode 100644 index 26fa9766..00000000 --- a/crates/tests/tests/harness.rs +++ /dev/null @@ -1,1732 +0,0 @@ -use ntest::test_case; -use rheo_core::{RheoConfig, project::ProjectConfig}; -use rheo_tests::helpers::{ - cli::rheo_cli_command, - comparison::{verify_epub_output, verify_html_output, verify_pdf_output}, - fixtures::TestCase, - reference::{update_epub_references, update_html_references, update_pdf_references}, - test_store::copy_project_to_test_store, -}; -use std::env; -use std::path::PathBuf; - -#[test_case("../../examples/blog_site")] -#[test_case("../../examples/blog_post")] -#[test_case("../../examples/cover-letter.typ")] -#[test_case("../../examples/blog_site/content/index.typ")] -#[test_case("../../examples/blog_site/content/severance-ep-1.typ")] -#[test_case("../../examples/blog_post/portable_epubs.typ")] -#[test_case("cases/code_blocks_with_links")] -#[test_case("cases/cross_directory_links")] -#[test_case("cases/epub_inferred_spine")] -#[test_case("cases/link_path_edge_cases")] -#[test_case("cases/link_transformation")] -#[test_case("cases/links_with_fragments")] -#[test_case("cases/multiple_links_inline.typ")] -#[test_case("cases/pdf_individual")] -#[test_case("cases/pdf_merge_false")] -#[test_case("cases/script_injection")] -#[test_case("cases/script_injection_no_css")] -#[test_case("cases/relative_path_links")] -#[test_case("cases/target_function")] -#[test_case("cases/target_function_in_module")] -#[test_case("cases/target_function_in_package")] -#[test_case("cases/error_formatting/type_error.typ")] -#[test_case("cases/error_formatting/undefined_var.typ")] -#[test_case("cases/error_formatting/syntax_error.typ")] -#[test_case("cases/error_formatting/function_arg_error.typ")] -#[test_case("cases/error_formatting/import_error.typ")] -#[test_case("cases/error_formatting/unknown_function.typ")] -#[test_case("cases/error_formatting/invalid_method.typ")] -#[test_case("cases/error_formatting/invalid_field.typ")] -#[test_case("cases/error_formatting/multiple_errors.typ")] -#[test_case("cases/error_formatting/array_index_error.typ")] -#[test_case("cases/merged_subdir_imports")] -#[test_case("store/compat/merged-imports")] -fn run_test_case(name: &str) { - let test_case = TestCase::new(name); - let update_mode = env::var("UPDATE_REFERENCES").is_ok(); - let test_name = test_case.name(); - let original_project_path = test_case.project_path(); - - // Create isolated test store - let test_store = PathBuf::from("store").join(test_name); - - // Clean previous test artifacts - if test_store.exists() { - std::fs::remove_dir_all(&test_store).expect("Failed to clean test store"); - } - std::fs::create_dir_all(&test_store).expect("Failed to create test store"); - - // Copy project to test store for isolation - if test_case.is_single_file() { - // For single-file tests, copy just the file and its parent directory structure - let parent = original_project_path - .parent() - .expect("Single file should have parent"); - copy_project_to_test_store(parent, &test_store) - .expect("Failed to copy project to test store"); - } else { - // For directory tests, copy the whole project - copy_project_to_test_store(original_project_path, &test_store) - .expect("Failed to copy project to test store"); - } - - // Patch rheo.toml version to match current crate version - let store_toml = test_store.join("rheo.toml"); - if store_toml.exists() { - let content = std::fs::read_to_string(&store_toml).expect("Failed to read rheo.toml"); - let patched = content.replace( - &format!( - "version = \"{}\"", - content - .lines() - .find_map(|l| l - .strip_prefix("version = \"") - .and_then(|s| s.strip_suffix('"'))) - .unwrap_or("") - ), - &format!("version = \"{}\"", env!("CARGO_PKG_VERSION")), - ); - std::fs::write(&store_toml, patched).expect("Failed to patch rheo.toml version"); - } - - // Use test store as project path - let project_path = if test_case.is_single_file() { - let rel_path = original_project_path - .strip_prefix( - original_project_path - .parent() - .expect("Single file should have parent"), - ) - .expect("Failed to get relative path"); - test_store.join(rel_path) - } else { - test_store.clone() - }; - - // Load project from isolated copy - let project = ProjectConfig::from_path(&project_path, None).expect("Failed to load project"); - let config = RheoConfig::load(&project.root); - - // Get declared formats from test case (respects markers for single-file tests) - let declared_formats = test_case.formats(); - - // Check environment variables for format filtering - let env_html = env::var("RUN_HTML_TESTS").is_ok(); - let env_pdf = env::var("RUN_PDF_TESTS").is_ok(); - let env_epub = env::var("RUN_EPUB_TESTS").is_ok(); - - // If no env vars set, run all declared formats - let run_all = !env_html && !env_pdf && !env_epub; - - // Compute which formats to actually run - // For single-file tests: use declared formats (config check optional, markers are authoritative) - // For directory tests: require config support (preserve existing behavior) - let run_html = declared_formats.iter().any(|f| f == "html") - && (run_all || env_html) - && (test_case.is_single_file() || config.as_ref().is_ok_and(|cfg| cfg.has_format("html"))); - let run_pdf = declared_formats.iter().any(|f| f == "pdf") - && (run_all || env_pdf) - && (test_case.is_single_file() || config.as_ref().is_ok_and(|cfg| cfg.has_format("pdf"))); - let run_epub = declared_formats.iter().any(|f| f == "epub") - && (run_all || env_epub) - && (test_case.is_single_file() || config.as_ref().is_ok_and(|cfg| cfg.has_format("epub"))); - - // Get build directory in test store - let build_dir = test_store.join("build"); - - // Build compile command with format flags - let mut compile_args = vec!["compile", project_path.to_str().unwrap()]; - - // Use isolated build directory - compile_args.push("--build-dir"); - compile_args.push(build_dir.to_str().unwrap()); - - // For single-file tests, add explicit format flags based on declared formats - // For directory tests, let rheo use config/defaults (no flags = backward compatible) - if test_case.is_single_file() { - if run_html { - compile_args.push("--html"); - } - if run_pdf { - compile_args.push("--pdf"); - } - if run_epub { - compile_args.push("--epub"); - } - } - - // Compile the project using rheo CLI logic - let output = rheo_cli_command() - .args(&compile_args) - .env("TYPST_IGNORE_SYSTEM_FONTS", "1") - .output() - .expect("Failed to run rheo compile"); - - // Check if test expects compilation error - let expects_error = test_case - .metadata() - .and_then(|m| m.expect.as_ref()) - .map(|e| e == "error") - .unwrap_or(false); - - if expects_error { - // Test expects compilation to fail - assert!( - !output.status.success(), - "Expected compilation to fail for {}, but it succeeded", - test_name - ); - - let stderr = String::from_utf8_lossy(&output.stderr); - - // Check all required error patterns - if let Some(metadata) = test_case.metadata() { - for pattern in &metadata.error_patterns { - assert!( - stderr.contains(pattern), - "Expected error output to contain pattern '{}', but it was not found.\nFull stderr:\n{}", - pattern, - stderr - ); - } - } - - // For error cases, skip reference comparison and return early - // Clean test store before returning - if test_store.exists() { - std::fs::remove_dir_all(&test_store).ok(); - } - return; - } - - // For success cases, continue with existing logic - if !output.status.success() { - panic!( - "Compilation failed for {}: {}", - test_name, - String::from_utf8_lossy(&output.stderr) - ); - } - - // let run_epub = env::var("RUN_EPUB_TESTS").is_ok() || env::var("RUN_EPUB_TESTS").is_err(); - - // Test HTML output - if run_html { - let html_output = build_dir.join("html"); - if html_output.exists() { - if update_mode { - update_html_references(test_name, &html_output, &project_path) - .expect("Failed to update HTML references"); - } else { - verify_html_output(test_name, &html_output); - } - } - } - - // Test PDF output - if run_pdf { - let pdf_output = build_dir.join("pdf"); - if pdf_output.exists() { - if update_mode { - update_pdf_references(test_name, &pdf_output) - .expect("Failed to update PDF references"); - } else { - verify_pdf_output(test_name, &pdf_output); - } - } - } - - // Test EPUB output - if run_epub { - let epub_output = build_dir.join("epub"); - if epub_output.exists() { - if update_mode { - update_epub_references(test_name, &epub_output) - .expect("Failed to update EPUB references"); - } else { - verify_epub_output(test_name, &epub_output); - } - } - } - - // Clean test store after test - if test_store.exists() { - std::fs::remove_dir_all(&test_store).ok(); - } -} - -/// Test PDF merge functionality specifically -#[test] -fn test_pdf_merge() { - use lopdf::Document; - use rheo_tests::helpers::comparison::extract_pdf_metadata; - - let test_name = "pdf_merge"; - let test_case = TestCase::new(&format!("cases/{}", test_name)); - let original_project_path = test_case.project_path(); - - // Create isolated test store - let test_store = PathBuf::from("store").join(test_name); - if test_store.exists() { - std::fs::remove_dir_all(&test_store).expect("Failed to clean test store"); - } - std::fs::create_dir_all(&test_store).expect("Failed to create test store"); - copy_project_to_test_store(original_project_path, &test_store) - .expect("Failed to copy project to test store"); - - let project_path = test_store.clone(); - let build_dir = test_store.join("build"); - - // Compile with PDF merge - let output = rheo_cli_command() - .args([ - "compile", - project_path.to_str().unwrap(), - "--pdf", - "--build-dir", - build_dir.to_str().unwrap(), - ]) - .env("TYPST_IGNORE_SYSTEM_FONTS", "1") - .output() - .expect("Failed to run rheo compile"); - - if !output.status.success() { - panic!( - "Compilation failed: {}", - String::from_utf8_lossy(&output.stderr) - ); - } - - // Verify merged PDF created with correct name - let pdf_path = build_dir.join("pdf/pdf_merge.pdf"); - assert!(pdf_path.exists(), "Merged PDF not created at expected path"); - - // Verify valid PDF format and can be loaded - let doc = Document::load(&pdf_path).expect("Failed to load merged PDF"); - let page_count = doc.get_pages().len(); - assert!(page_count > 0, "PDF has no pages"); - - // Verify we have at least 1 page - // Note: With minimal content, Typst may fit everything on one page - assert!( - page_count >= 1, - "Expected at least 1 page, got {}", - page_count - ); - - // Verify PDF metadata can be extracted - let metadata = extract_pdf_metadata(&pdf_path).expect("Failed to extract PDF metadata"); - assert_eq!( - metadata.page_count, - Some(page_count as u32), - "Page count mismatch" - ); - - // Clean up - if test_store.exists() { - std::fs::remove_dir_all(&test_store).ok(); - } -} - -/// Test error case: link to file not in spine -#[test] -fn test_pdf_merge_link_not_in_spine() { - // Create a test case with a file that links to a non-spine file - let test_dir = PathBuf::from("tests/cases/pdf_merge_error_nonspine"); - std::fs::create_dir_all(&test_dir).expect("Failed to create test directory"); - - // Create rheo.toml with only intro.typ in spine - std::fs::write( - test_dir.join("rheo.toml"), - r#"[pdf.merge] -spine = ["intro.typ"] -title = "Test Error Case" -"#, - ) - .expect("Failed to write rheo.toml"); - - // Create intro.typ that links to chapter1.typ (not in spine) - std::fs::write( - test_dir.join("intro.typ"), - r#"= Introduction - -This links to #link()[Chapter 1] which is not in the spine. -"#, - ) - .expect("Failed to write intro.typ"); - - // Create chapter1.typ (not in spine, but referenced) - std::fs::write( - test_dir.join("chapter1.typ"), - r#"= Chapter 1 - -Content here. -"#, - ) - .expect("Failed to write chapter1.typ"); - - // Try to compile - should fail or warn - let output = rheo_cli_command() - .args(["compile", test_dir.to_str().unwrap(), "--pdf"]) - .env("TYPST_IGNORE_SYSTEM_FONTS", "1") - .output() - .expect("Failed to run rheo compile"); - - // Clean up - std::fs::remove_dir_all(&test_dir).ok(); - - // Check if compilation failed with link error - let stderr = String::from_utf8_lossy(&output.stderr); - let stdout = String::from_utf8_lossy(&output.stdout); - let combined = format!("{}{}", stderr, stdout); - - // The compilation should fail because chapter1.typ is not in the spine - // The transform_typ_links_to_labels function should detect this and return an error - assert!( - !output.status.success() || combined.contains("not found in spine"), - "Expected error about link target not in spine, got:\nstderr: {}\nstdout: {}", - stderr, - stdout - ); -} - -/// Test error case: duplicate filenames in spine -#[test] -fn test_pdf_merge_duplicate_filenames() { - // Create a test case with duplicate filenames in different directories - let test_dir = PathBuf::from("tests/cases/pdf_merge_error_duplicate"); - let dir1 = test_dir.join("dir1"); - let dir2 = test_dir.join("dir2"); - std::fs::create_dir_all(&dir1).expect("Failed to create dir1"); - std::fs::create_dir_all(&dir2).expect("Failed to create dir2"); - - // Create rheo.toml with both files in spine - std::fs::write( - test_dir.join("rheo.toml"), - r#"[pdf.merge] -spine = ["dir1/chapter.typ", "dir2/chapter.typ"] -title = "Test Duplicate Error" -"#, - ) - .expect("Failed to write rheo.toml"); - - // Create dir1/chapter.typ with a label - std::fs::write( - dir1.join("chapter.typ"), - r#"= Chapter from Dir1 - -Content from dir1. -"#, - ) - .expect("Failed to write dir1/chapter.typ"); - - // Create dir2/chapter.typ with the same label - std::fs::write( - dir2.join("chapter.typ"), - r#"= Chapter from Dir2 - -Content from dir2. -"#, - ) - .expect("Failed to write dir2/chapter.typ"); - - // Try to compile - should fail with duplicate label error - let output = rheo_cli_command() - .args(["compile", test_dir.to_str().unwrap(), "--pdf"]) - .env("TYPST_IGNORE_SYSTEM_FONTS", "1") - .output() - .expect("Failed to run rheo compile"); - - // Clean up - std::fs::remove_dir_all(&test_dir).ok(); - - // Typst will detect duplicate labels and fail - // Check for error in output - let stderr = String::from_utf8_lossy(&output.stderr); - let stdout = String::from_utf8_lossy(&output.stdout); - let combined = format!("{}{}", stderr, stdout); - - // Typst should report duplicate label error - assert!( - !output.status.success() || combined.contains("duplicate") || combined.contains("label"), - "Expected error about duplicate labels, got:\nstderr: {}\nstdout: {}", - stderr, - stdout - ); -} - -/// Test HTML post-processing: CSS link injection -#[test] -fn test_html_css_link_injection() { - let test_case = TestCase::new("../../examples/blog_site"); - let project_path = test_case.project_path(); - - // Clean and compile - let clean_output = rheo_cli_command() - .args(["clean", project_path.to_str().unwrap()]) - .output() - .expect("Failed to run rheo clean"); - - if !clean_output.status.success() { - eprintln!( - "Warning: Clean failed: {}", - String::from_utf8_lossy(&clean_output.stderr) - ); - } - - let output = rheo_cli_command() - .args(["compile", project_path.to_str().unwrap(), "--html"]) - .env("TYPST_IGNORE_SYSTEM_FONTS", "1") - .output() - .expect("Failed to run rheo compile"); - - if !output.status.success() { - panic!( - "Compilation failed: {}", - String::from_utf8_lossy(&output.stderr) - ); - } - - // Read compiled HTML - let html_path = project_path.join("build/html/index.html"); - let html = std::fs::read_to_string(&html_path).expect("Failed to read HTML file"); - - // Test 1: CSS is inlined as a