diff --git a/.Rprofile b/.Rprofile
new file mode 100644
index 0000000..81b960f
--- /dev/null
+++ b/.Rprofile
@@ -0,0 +1 @@
+source("renv/activate.R")
diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 0000000..feaa6b8
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,31 @@
+{
+ "permissions": {
+ "allow": [
+ "Bash(sed 's/.*\"\"Package\"\": \"\"//')",
+ "Bash(R_CONFIG_ACTIVE=draft Rscript -e \":*)",
+ "Bash(ps -p 50426 -o pid,command)",
+ "Bash(while kill -0 50426)",
+ "Bash(do sleep 3)",
+ "Bash(DOCSTYLE_DEBUG=1 quarto render full-study-protocol.qmd)",
+ "Bash(Rscript -e \"cat\\(system.file\\(package=''docstyle''\\)\\)\")",
+ "Bash(Rscript -e \"ls\\(getNamespace\\(''docstyle''\\)\\)\")",
+ "Bash(Rscript -e \"packageVersion\\(''docstyle''\\)\")",
+ "Bash(Rscript -e \"renv::install\\(''~/github/docstyle''\\)\")",
+ "Bash(Rscript -e \"renv::install\\(''splines2''\\)\")",
+ "Bash(Rscript -e \"testthat::test_file\\(''tests/testthat/test-apc-data.R''\\)\")",
+ "Bash(Rscript -e \"testthat::test_dir\\(''tests/testthat/''\\)\")",
+ "Bash(R_CONFIG_ACTIVE=draft Rscript -e \"targets::tar_make\\(names = c\\(''apc_data'', ''apc_model_initiation_men'', ''apc_model_initiation_women'', ''apc_model_cessation_men'', ''apc_model_cessation_women''\\)\\)\")",
+ "Bash(Rscript -e \"docstyle::update_extension\\(\\)\")",
+ "Bash(Rscript -e \"renv::install\\(''DougManuel/docstyle''\\)\")",
+ "Bash(Rscript -e \"docstyle::update_extension\\(''/Users/dmanuel/github/cshgm-dev/docs''\\)\")",
+ "Bash(R_CONFIG_ACTIVE=draft Rscript -e \"targets::tar_make\\(names = c\\(''apc_model_initiation_men'', ''apc_model_initiation_women'', ''apc_model_cessation_men'', ''apc_model_cessation_women''\\)\\)\")",
+ "Read(//Users/dmanuel/github/cshgm-dev/**)",
+ "Bash(du -h data/dev/*.RData)",
+ "Read(//Users/dmanuel/github/cchsflow/.claude/worktrees/smoking-cleanup/**)",
+ "Read(//Users/dmanuel/github/cchsflow/.claude/worktrees/smoking-cleanup/$1 ~ /^\\(age_start_smoking|age_first_cigarette|cigs_per_day|time_quit_smoking|pack_years|SMKDSTY_cat5|smoke_simple\\)$/**)"
+ ],
+ "additionalDirectories": [
+ "/Users/dmanuel/.claude/projects/-Users-dmanuel-github-cshgm-dev"
+ ]
+ }
+}
diff --git a/.gitignore b/.gitignore
index def92c1..f313e75 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,11 +6,60 @@
# Built
_targets
dist
+_site/
+_freeze/
+
+# renv library files (large)
+renv/library/
# Mac
.DS_Store
-data/prod
-.prettierrc
+# Data (raw source files — never commit; pipeline outputs kept locally only)
+data/dev/*.RData
+data/dev/*.rdata
+data/*.rds
+
+# Secure config (RDC paths — never commit)
+config/statscan.yml
+
+# Legacy SAS code (ICES copyright — not for distribution)
+resources/legacy-code/*.sas
+# Editor configs
+.prettierrc
.vscode/
+
+# Manuscript rendered output
+manuscript/output/
+manuscript/_docstyle/
+
+# Generated files
+*.html
+*.pdf
+*.log
+*.aux
+
+/.quarto/
+docs/.quarto/
+
+/.luarc.json
+
+# Internal development notes (working documents, not for collaborators)
+docs/development/
+
+# Working copies
+worksheets/cshm-variables-working-copy.csv
+
+# Runtime
+logs/
+
+# Machine-local Claude Code settings
+.claude/settings.local.json
+
+resources/
+
+# Quarto project caches
+manuscript/.quarto/
+
+**/*.quarto_ipynb
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..f19ce29
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,160 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+For project overview, methodology, and architecture see [README.md](README.md) and [docs/development/project-architecture.md](docs/development/project-architecture.md).
+
+## Common commands
+
+```r
+# Run the full pipeline
+targets::tar_make()
+
+# Visualize pipeline DAG
+targets::tar_visnetwork()
+
+# Run a single target / inspect / check outdated
+targets::tar_make(study_data)
+targets::tar_read(study_data)
+targets::tar_outdated()
+
+# Run tests
+testthat::test_dir("tests/testthat/")
+testthat::test_file("tests/testthat/test-apc-data.R")
+
+# Switch config profile (dev = 10% sample; draft = 5% from cchsflow-data release)
+Sys.setenv(R_CONFIG_ACTIVE = "dev")
+targets::tar_make()
+
+# Add a new dependency
+renv::install("package-name")
+renv::snapshot()
+```
+
+```bash
+# Preview / build documentation website
+quarto preview
+quarto render
+
+# Render manuscript to Word (docstyle)
+quarto render manuscript/manuscript.qmd
+```
+
+## Developer context
+
+### Pipeline and configuration
+
+The pipeline follows the DemPoRT-V2-dev pattern (`~/github/DemPoRT-V2-dev`); its CLAUDE.md is the reference implementation.
+
+| File | Purpose |
+|------|---------|
+| [_targets.R](_targets.R) | Pipeline definition (stages 1–8 active; 9–10 stubbed) |
+| [config.yml](config.yml) | Environment profiles (`default`, `draft`, `dev`, `prod`, `statscan`) |
+| [worksheets/cshm-variables.csv](worksheets/cshm-variables.csv) | Study variable list — `role`, `source`, and `purpose` columns |
+| [worksheets/cshm-variable-details.csv](worksheets/cshm-variable-details.csv) | CSHM extension rows: GEOGPRV and WTS_M for cchs2019_2020_p and cchs2022_p (DHH_SEX/DHHGAGE_cont rows removed — cchsflow v3 now covers them) |
+| [R/study-data.R](R/study-data.R) | `load_study_data()` — load + harmonize CCHS cycles |
+| [R/data-cleaning.R](R/data-cleaning.R) | `clean_study_data()` — distribution checks, truncation |
+| [R/imputation.R](R/imputation.R) | `impute_data()` — MICE imputation |
+| [R/descriptive-data.R](R/descriptive-data.R) | `get_cshm_desc_data()` — Table 1 statistics wrapper |
+| [R/get-descriptive-data.R](R/get-descriptive-data.R) | `get_descriptive_data()` — core stats engine (ported from DemPoRT) |
+| [R/create-descriptive-tables.R](R/create-descriptive-tables.R) | `create_descriptive_table()`, `create_cycle_specific_descriptive_table()` |
+| [R/variables-sheet-utils.R](R/variables-sheet-utils.R) | Variables worksheet helpers (ported from DemPoRT) |
+| [R/variable-details-sheet-utils.R](R/variable-details-sheet-utils.R) | Variable details helpers (ported from DemPoRT) |
+| [R/apc-model.R](R/apc-model.R) | APC data prep + model fitting |
+| [R/smoking-histories.R](R/smoking-histories.R) | Rate table generation (Stage 9, stub) |
+| [R/validation.R](R/validation.R) | Prevalence validation |
+| [docs/results/table-1.qmd](docs/results/table-1.qmd) | Table 1a, 1b, and cycle appendix |
+| [R/legacy/smoking.R](R/legacy/smoking.R) | Interim smoking variables (pre-cchsflow v3) |
+| [R/legacy/process_smoking_initiation.R](R/legacy/process_smoking_initiation.R) | APC data prep (pre-pipeline; superseded by R/apc-model.R) |
+| [resources/legacy-code/Modeling2013.sas](resources/legacy-code/Modeling2013.sas) | Original SAS implementation (Manuel et al. 2020) |
+| docs/references/Manuel_HR_2020.pdf | Key reference paper (local only; PDFs are gitignored) |
+| [config/statscan.yml.example](config/statscan.yml.example) | RDC config template (copy to `config/statscan.yml`, gitignored) |
+
+**Documentation structure** (three purposes):
+
+| Location | Purpose |
+|----------|---------|
+| [docs/protocol/full-protocol.qmd](docs/protocol/full-protocol.qmd) | Prespecified study protocol |
+| [docs/protocol/study-summary.qmd](docs/protocol/study-summary.qmd) | One-page protocol summary |
+| [docs/workflow/](docs/workflow/) | Step QMDs — one per pipeline stage (Stages 1–8) |
+| [manuscript/manuscript.qmd](manuscript/manuscript.qmd) | Study manuscript (all numbers inline R from pipeline) |
+| [docs/how-to/](docs/how-to/) | Task-oriented guides |
+| [docs/explanation/](docs/explanation/) | Conceptual explanations of APC methodology |
+| [docs/reference/](docs/reference/) | Variable, function, and model reference |
+
+**Development artefacts** (`docs/development/` — gitignored, local only): planning documents, meeting notes, protocol drafts, pipeline progress notes.
+
+`config.yml` profiles (set via `R_CONFIG_ACTIVE`):
+- **default** — PUMF data from `~/github/cchsflow-data/data/sources/rdata/` (renamed via scripts/rename-pumf-objects.R); full sample
+- **draft** — 5% sample from `cchsflow-data` release files (`CCHS_2001.RData` naming, internal object `table`)
+- **dev** — 10% sample from default PUMF source; single imputation; fast iteration
+- **prod** — full PUMF sample; WARN logging
+- **statscan** — delegates to `config/statscan.yml` (gitignored); Master file paths at RDC
+
+APC spline knots: Age `[10, 15, 20, 50, 60]` · Period `[1940, 1950, 1960, 1970, 1980]` · Cohort `[1930, 1940, 1945, 1950, 1955, 1960, 1965, 1970, 1975, 1980]`
+
+### Data paths
+
+PUMF `.RData` files: `~/github/cchsflow/data/` (cycles 2001–2017/18 with correct `cchs*_p` naming). Cycles 2019–20 and 2022 require renaming from `cchsflow-data` GH release via `scripts/rename-pumf-objects.R`.
+
+CCHS metadata CLI:
+```bash
+python3 ~/github/cchsflow-docs/mcp-server/cli.py search smoking
+python3 ~/github/cchsflow-docs/mcp-server/cli.py detail SMKDSTY
+python3 ~/github/cchsflow-docs/mcp-server/cli.py compare cchs2013_2014_p cchs2013_2014_m
+```
+
+### Variable naming
+
+The `variableStart` worksheet column uses cchsflow notation: `cchs2001_p::SMKA_01A, cchs2007_2008_p::SMK_01A, [SMK_01A]` — `_p` = PUMF, `_m` = Master, `[VAR]` = fallback name.
+
+**Unified variables (preferred):** `age_first_cigarette`, `age_start_smoking`, `time_quit_smoking`
+
+**Master-only continuous:** `SMK_01C`, `SMK_040`, `SMK_09C` / `SMK_06C` / `SMK_10C`
+
+**PUMF pseudo-continuous (midpoint imputed):** `SMKG01C_cont`, `SMKG040_cont`, `SMK_09A_cont` / `SMK_06A_cont` / `SMK_10A_cont`
+
+**Deprecated aliases:** `SMK_005` → `SMK_202`; `SMK_030` → `SMK_05D`
+
+**APC model variables (internal):** `age`, `cohort`, `period`, `init`, `weighting`, `ont_id`
+
+### cchsflow dependency
+
+Branch: `v3` (smoking work merged 2026-04-29, commit bd0df3ac; PR #163 closed in favour of direct merge). The pipeline reads recoding rules from the in-repo snapshot `worksheets/cchsflow-variable-details.csv` (taken from `~/github/cchsflow/inst/extdata/variable_details.csv` with local fixes for cchsflow #184/#185); refresh it when upstream merges the fixes. renv installs the cchsflow *package* from the local `~/github/cchsflow` checkout on `v3` (CRAN 2.1.0 lacks the v3 derivation functions). Key smoking files:
+`R/smoke-start.R`, `R/smoke-stop.R`, `R/smoke-intensity.R`, `R/smoking-status.R`, `R/smoking-cessation.R`, `R/clean-variables.R`, `R/missing-data-functions.R`
+
+cchsflow must be *attached* (not just `::`-qualified) when calling `rec_with_table()` — v3 derivation functions use unqualified dplyr/rlang helpers that resolve through its `Depends`. `_targets.R` handles this with `tar_option_set(packages = "cchsflow")`.
+
+### Variable roles
+
+Roles are comma-separated in `cshm-variables.csv`. A variable may carry multiple roles. `select_vars_by_role(role, variables_sheet)` handles this correctly.
+
+| Role | Group | Purpose |
+|------|-------|---------|
+| `design` | Survey design | Survey infrastructure (SurveyCycle, WTS_M) |
+| `intermediate` | Harmonization | Raw cchsflow input needed to derive a unified variable; not used directly by pipeline code |
+| `predictor` | Model | Covariate in the APC model or descriptive analysis |
+| `model-stratifier` | Model | Stratifies APC into separate fits (e.g. DHH_SEX) |
+| `table1` | Descriptive | Row in Table 1 descriptive statistics |
+| `table1-stratifier` | Descriptive | Stratifies Table 1 columns |
+| `apc-numerator` | APC data prep | Defines the event indicator in Stage 7 |
+| `apc-denominator` | APC data prep | Constructs the at-risk person-year denominator in Stage 7 |
+| `imputation-predictor` | Imputation | Included in MICE imputation model |
+| `sensitivity-analysis` | Analysis | Used in sensitivity analyses only |
+
+Role vocabulary (single source of truth): [schemas/cshm-variables.yaml](schemas/cshm-variables.yaml) `VariableRoleEnum`. Role helpers in `R/variables-sheet-utils.R` are project-local for now; long-term home is cchsflow (skill branch `skills/review-validation`).
+
+### Missing data conventions
+
+`haven::tagged_na()` throughout: **NA(a)** = not applicable · **NA(b)** = don't know/refused · **NA(c)** = not asked this cycle
+
+## Code style
+
+- Follow tidyverse design principles; snake_case for all function and variable names
+- Format code with the `styler` package
+
+## Editorial style
+
+- Canadian English: "modelling", "behaviour", "analyse"
+- Sentence case for all headings (except document title)
+- Provide DOI or PMID for references
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..8c8f82a
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,160 @@
+# Contributing to CSHM
+
+Thank you for your interest in contributing to the Canadian Smoking Histories Model (CSHM) project! This document provides guidelines for contributions to help maintain consistency and quality across the project.
+
+## Code of Conduct
+
+All contributors are expected to adhere to the project's code of conduct, which promotes a respectful and inclusive environment for collaboration.
+
+## How to Contribute
+
+There are many ways to contribute to the CSHM project:
+
+1. **Report issues**: Submit bug reports or feature requests through the GitHub issue tracker.
+2. **Suggest improvements**: Share ideas for enhancing the code, documentation, or workflow.
+3. **Submit code**: Contribute code improvements via pull requests.
+4. **Improve documentation**: Help make the documentation more comprehensive and clear.
+5. **Test the software**: Report unexpected behavior or performance issues.
+
+## Development Workflow
+
+### 1. Fork the Repository
+
+Start by forking the repository and creating a local clone of your fork:
+
+```bash
+git clone https://github.com/your-username/cshgm.git
+cd cshgm
+
+# Install renv if you don't have it
+install.packages("renv")
+
+# Set up the project environment with renv
+renv::restore()
+```
+
+This will set up all the required package dependencies in a project-specific library.
+
+### 2. Create a Branch
+
+Create a branch for your changes:
+
+```bash
+git checkout -b your-branch-name
+```
+
+Use descriptive branch names that reflect the purpose of your changes:
+- `feature/add-cessation-model`
+- `fix/initiation-calculation-bug`
+- `docs/improve-apc-explanation`
+
+### 3. Make Your Changes
+
+Follow these guidelines when making changes:
+
+- Adhere to the code style guidelines in the project specifications
+- Use clear, descriptive variable and function names
+- Add comprehensive documentation for new functions
+- Include tests for new functionality
+- Follow Canadian spelling conventions
+
+#### Package Management
+
+When adding new package dependencies:
+
+```r
+# Install a new package with renv
+renv::install("packagename")
+
+# Update the renv.lock file to record the dependency
+renv::snapshot()
+```
+
+Include the updated `renv.lock` file in your pull request so other contributors will get the same dependencies.
+
+### 4. Test Your Changes
+
+Before submitting a pull request, ensure that:
+
+- All tests pass
+- New functionality is tested
+- Documentation is updated and complete
+
+Run tests with:
+
+```r
+testthat::test_dir("tests/testthat/")
+```
+
+### 5. Update Documentation
+
+If your changes require documentation updates:
+
+1. Edit the appropriate `.qmd` files in the `docs/` directory
+2. Preview your changes locally:
+ ```bash
+ quarto preview
+ ```
+3. Make sure your documentation changes render correctly before submitting your PR
+
+#### Documentation Structure
+
+- `docs/reference/`: Technical reference documentation
+- `docs/how-to/`: Task-oriented guides
+- `docs/explanation/`: Conceptual explanations
+- `docs/tutorials/`: Learning-oriented tutorials
+
+#### Writing Style
+
+- Use sentence case for headings
+- Follow Canadian spelling conventions
+- Be clear, concise, and direct
+- Use examples where appropriate
+
+### 6. Submit a Pull Request
+
+1. Push your changes to your fork:
+ ```bash
+ git push origin your-branch-name
+ ```
+
+2. Create a pull request from your branch to the main CSHM repository
+3. Provide a clear title and description for your pull request, explaining:
+ - What changes you've made
+ - Why these changes are necessary
+ - Any dependencies or potential issues
+
+### 7. Code Review
+
+All pull requests will be reviewed by project maintainers. Be prepared to:
+- Answer questions about your implementation
+- Make requested changes to meet project standards
+- Be patient during the review process
+
+## Styleguides
+
+### Code Style
+
+- Follow tidyverse design principles
+- Use snake_case for function and variable names
+- Include roxygen2 documentation for all functions
+- Format code with the styler package
+
+### Commit Messages
+
+Write clear, concise commit messages that explain what the commit does and why:
+
+```
+Add smoking cessation function
+
+Implements the processing for smoking cessation data based on the APC model.
+Function extracts cessation age and calculates cessation probabilities.
+```
+
+## Licence
+
+By contributing to CSHM, you agree that your contributions will be licensed under the [MIT License](LICENSE).
+
+## Questions?
+
+If you have questions about contributing, please open an issue on GitHub for clarification.
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 03ffbba..64b7db2 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,28 +1,21 @@
-BSD 3-Clause License
+MIT License
-Copyright (c) 2025, Doug Manuel
+Copyright (c) 2025 Doug Manuel and The Ottawa Hospital Research Institute
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
-1. Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
-2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/R/apc-model.R b/R/apc-model.R
new file mode 100644
index 0000000..052f970
--- /dev/null
+++ b/R/apc-model.R
@@ -0,0 +1,518 @@
+# apc-model.R
+# Stage 7: Prepare APC numerator/denominator datasets
+# Stage 8: Fit constrained cubic spline age-period-cohort models
+#
+# Based on the Canadian Smoking Histories Model (Manuel et al., Health Reports 2020)
+# and the Holford constrained spline APC framework (Holford et al., Cancer Epidemiol
+# Biomarkers Prev 2014).
+#
+# CCHS data used here is accessed and adapted in accordance with the Statistics Canada
+# Open Licence (https://www.statcan.gc.ca/eng/reference/licence).
+#
+# References:
+# Manuel DG et al. Health Reports 2020. doi:10.25318/82-003-x202001100002-eng
+# Holford TR et al. Cancer Epidemiol Biomarkers Prev. 2014;23(11):2356-65.
+
+
+# ---------------------------------------------------------------------------
+# Stage 7 entry point
+# ---------------------------------------------------------------------------
+
+#' Prepare APC datasets for model fitting
+#'
+#' Builds long-format person-year data frames for smoking initiation (by sex)
+#' and cessation (by sex). Each row is either a transition event (event = 1)
+#' or an at-risk person-year (event = 0). Applies mortality survival correction
+#' via cfg$apc$mortality_method.
+#'
+#' @param analysis_data Output of impute_data()
+#' @param cfg Config object from config::get()
+#' @return Named list: initiation_men, initiation_women, cessation_men,
+#' cessation_women. Each element is a data frame with columns:
+#' age, cohort, period, event, weight.
+prepare_apc_data <- function(analysis_data, cfg) {
+ data <- derive_survey_year(analysis_data, cfg)
+
+ init_men <- build_initiation_data(data[data[[survey_var(cfg, "sex")]] == 1, ], cfg)
+ init_women <- build_initiation_data(data[data[[survey_var(cfg, "sex")]] == 2, ], cfg)
+ cess_men <- build_cessation_data(data[data[[survey_var(cfg, "sex")]] == 1, ], cfg)
+ cess_women <- build_cessation_data(data[data[[survey_var(cfg, "sex")]] == 2, ], cfg)
+
+ list(
+ initiation_men = apply_survival_correction(init_men, cfg),
+ initiation_women = apply_survival_correction(init_women, cfg),
+ cessation_men = apply_survival_correction(cess_men, cfg),
+ cessation_women = apply_survival_correction(cess_women, cfg)
+ )
+}
+
+
+# ---------------------------------------------------------------------------
+# Stage 7 sub-functions
+# ---------------------------------------------------------------------------
+
+#' Add survey_year and cohort columns to analysis data
+#'
+#' Maps SurveyCycle factor codes ("1"–"11") to integer calendar years using
+#' cfg$cycle_survey_years. Cohort is defined as survey_year − round(age).
+#' NOTE: SurveyCycle is a factor — as.character() is required before lookup.
+#'
+#' @param data Data frame containing SurveyCycle and age columns
+#' @param cfg Config object
+#' @return data with survey_year (integer) and cohort (integer) columns added
+derive_survey_year <- function(data, cfg) {
+ cycle_col <- survey_var(cfg, "cycle")
+ age_col <- survey_var(cfg, "age")
+
+ year_map <- cfg$cycle_survey_years
+ cycle_keys <- as.character(data[[cycle_col]])
+
+ # Map each key individually so missing keys return NA (not NULL)
+ survey_years <- vapply(cycle_keys, function(k) {
+ v <- year_map[[k]]
+ if (is.null(v)) NA_integer_ else as.integer(v)
+ }, integer(1))
+
+ missing <- is.na(survey_years)
+ if (any(missing)) {
+ bad <- unique(cycle_keys[missing])
+ stop("Unknown SurveyCycle codes with no year mapping: ", paste(bad, collapse = ", "))
+ }
+
+ data$survey_year <- as.integer(survey_years)
+ data$cohort <- data$survey_year - round(data[[age_col]])
+ data
+}
+
+
+#' Build combined initiation numerator + denominator dataset
+#'
+#' @param data Data frame for one sex, with survey_year and cohort columns
+#' @param cfg Config object
+#' @return Long-format data frame: age, cohort, period, event, weight
+build_initiation_data <- function(data, cfg) {
+ status_col <- survey_var(cfg, "smoking_status")
+ age_col <- survey_var(cfg, "age_first_cigarette")
+ weight_col <- survey_var(cfg, "weight")
+ min_age <- survey_bound(cfg, "age_first_cigarette", "min")
+ cohort_min <- cfg$apc$cohort_min
+ period_min <- cfg$apc$period_min
+ period_max <- cfg$apc$period_max
+
+ # Restrict to valid cohorts
+ data <- data[data$cohort >= cohort_min, ]
+
+ # Identify ever-smokers: SMKDSTY_original %in% 1:5, age_first_cigarette >= min_age
+ # Never-smokers (SMKDSTY_original = 6) carry NA(a) for age_first_cigarette;
+ # 55 is the legitimate midpoint of the "50+ years" category among ever-smokers.
+ # SMKDSTY_original categories: 1=daily, 2=occ(fmr daily), 3=always occ, 4=fmr daily, 5=fmr occ, 6=never
+ smkdsty <- data[[status_col]]
+ ever_smoker <- !is.na(smkdsty) & smkdsty %in% 1:5
+
+ age_init_raw <- data[[age_col]]
+
+ # The analytic floor is survey_bound(cfg, "age_first_cigarette", "min"):
+ # 13 for PUMF, 8 for Master per config.yml. Note SMKG01C_cont has a 5-11
+ # category (midpoint 8) in all PUMF cycles, so a floor of 13 excludes that
+ # group — whether to lower the PUMF floor to 8 is an open study decision.
+ # Source of truth for category midpoints: cchsflow variable_details.csv (recEnd).
+ ages_among_smokers <- age_init_raw[ever_smoker & !is.na(age_init_raw)]
+ if (length(ages_among_smokers) > 0 && min(ages_among_smokers) > 10) {
+ warning(
+ "min(age_first_cigarette) = ", min(ages_among_smokers),
+ " among ever-smokers — early-initiation categories appear absent or ",
+ "excluded by the configured floor (", min_age, "). RDC Master run will ",
+ "use exact ages."
+ )
+ }
+
+ # Issue 2: flag implausible initiation ages (age_first > current age)
+ age_survey <- data[[survey_var(cfg, "age")]]
+ implausible <- ever_smoker & !is.na(age_init_raw) & age_init_raw > age_survey
+ n_implausible <- sum(implausible, na.rm = TRUE)
+ if (n_implausible > 0) {
+ message("Excluding ", n_implausible, " rows with age_first_cigarette > current age.")
+ }
+
+ # Valid initiators: ever-smoker, plausible age, age >= min_age
+ valid_init <- ever_smoker &
+ !is.na(age_init_raw) &
+ age_init_raw >= min_age &
+ !implausible
+
+ # Numerator: one row per initiator
+ num <- data[valid_init, ]
+ age_num <- as.integer(round(num[[age_col]]))
+ numerator <- data.frame(
+ age = age_num,
+ cohort = num$cohort,
+ period = num$cohort + age_num,
+ event = rep(1L, nrow(num)),
+ weight = num[[weight_col]]
+ )
+
+ # Denominator: person-years at risk before initiation
+ # Person attributes needed for expand
+ denom_source <- data.frame(
+ person_id = seq_len(nrow(data)),
+ cohort = data$cohort,
+ age_init = ifelse(valid_init, as.integer(round(age_init_raw)), NA_integer_),
+ # Never-smokers and invalid: treat as still at risk through end of period range
+ age_survey = as.integer(round(data[[survey_var(cfg, "age")]])),
+ weight = data[[weight_col]]
+ )
+ # For never-smokers (no initiation), denominator runs to survey age (proxy for period_max)
+ # For initiators, denominator runs up to (but not including) age_init
+ denom_source$age_denom_max <- ifelse(
+ is.na(denom_source$age_init),
+ denom_source$age_survey, # never initiated — at risk through observed age
+ denom_source$age_init - 1L # initiated — at risk until year before initiation
+ )
+
+ period_range <- seq(period_min, period_max)
+
+ denominator <- expand_denominator(denom_source, period_range, min_age)
+
+ rbind(numerator, denominator)
+}
+
+
+#' Expand person × period denominator with immediate at-risk filter
+#'
+#' @param denom_source Data frame with: person_id, cohort, age_denom_max, weight
+#' @param period_range Integer vector of calendar years
+#' @param min_age Minimum age for being at risk
+#' @return Data frame: age, cohort, period, event=0, weight
+expand_denominator <- function(denom_source, period_range, min_age) {
+ # Vectorised approach: for each person, compute valid period range and expand
+ # This avoids materialising the full cross-product before filtering
+ rows <- vector("list", nrow(denom_source))
+
+ for (i in seq_len(nrow(denom_source))) {
+ p <- denom_source$person_id[i]
+ co <- denom_source$cohort[i]
+ am <- denom_source$age_denom_max[i]
+ w <- denom_source$weight[i]
+
+ if (is.na(co) || is.na(am)) next
+
+ # Period range for this person: they are at risk from min_age to age_denom_max
+ p_min <- max(period_range[1], co + min_age)
+ p_max <- min(period_range[length(period_range)], co + am)
+
+ if (p_max < p_min) next
+
+ ps <- seq(p_min, p_max)
+ rows[[i]] <- data.frame(
+ age = as.integer(ps - co),
+ cohort = co,
+ period = as.integer(ps),
+ event = 0L,
+ weight = w
+ )
+ }
+
+ non_null <- rows[!vapply(rows, is.null, logical(1))]
+ if (length(non_null) == 0) {
+ return(data.frame(age = integer(0), cohort = integer(0), period = integer(0),
+ event = integer(0), weight = numeric(0)))
+ }
+ do.call(rbind, non_null)
+}
+
+
+#' Build combined cessation numerator + denominator dataset
+#'
+#' Restricted to ever-daily smokers using SMKDSTY_original categories:
+#' 1 = daily, 2 = occasional (formerly daily), 4 = former daily.
+#' Category 3 (always occasional) and 5 (former occasional) are excluded
+#' because they never smoked daily. See GH#1.
+#'
+#' @param data Data frame for one sex, with survey_year and cohort columns
+#' @param cfg Config object
+#' @return Long-format data frame: age, cohort, period, event, weight
+build_cessation_data <- function(data, cfg) {
+ status_col <- survey_var(cfg, "smoking_status")
+ quit_col <- survey_var(cfg, "years_since_quit")
+ age_col <- survey_var(cfg, "age")
+ weight_col <- survey_var(cfg, "weight")
+ min_age <- survey_bound(cfg, "years_since_quit", "min")
+ cohort_min <- cfg$apc$cohort_min
+ period_min <- cfg$apc$period_min
+ period_max <- cfg$apc$period_max
+
+ # SMKDSTY_original: 1=daily, 2=occ(fmr daily), 3=always occ, 4=fmr daily, 5=fmr occ, 6=never
+ # Cessation scope: ever-daily smokers only (1, 2, 4). Excludes always-occasional (3)
+ # and former-occasional (5) — they never smoked daily so cessation timing is undefined.
+ smkdsty_raw <- data[[status_col]]
+ in_scope <- !is.na(smkdsty_raw) & smkdsty_raw %in% c(1, 2, 4) & data$cohort >= cohort_min
+ data <- data[in_scope, ]
+
+ smkdsty <- data[[status_col]]
+ years_quit <- data[[quit_col]]
+ age_survey <- data[[age_col]]
+ age_cessation <- age_survey - years_quit
+
+ former_daily <- smkdsty == 4
+ current_daily <- smkdsty %in% c(1, 2)
+
+ # Issue 7: plausibility filter for former daily smokers.
+ # PUMF: time_quit_smoking_daily top-coded at 15 years; cessation ages below
+ # approximately (survey_age - 15) are not directly observed. Master has exact values.
+ # Source of truth for bounds: config.yml survey: years_since_quit: pumf/master: max.
+ implausible_cess <- former_daily & (
+ is.na(age_cessation) | age_cessation < min_age | age_cessation < 0
+ )
+ n_implausible <- sum(implausible_cess, na.rm = TRUE)
+ if (n_implausible > 0) {
+ message("Excluding ", n_implausible,
+ " cessation rows with age_cessation < ", min_age, " or negative.")
+ }
+
+ valid_cess <- former_daily & !implausible_cess & !is.na(age_cessation)
+
+ # Numerator: one row per quitter
+ num <- data[valid_cess, ]
+ age_cess_int <- as.integer(round(age_cessation[valid_cess]))
+ numerator <- data.frame(
+ age = age_cess_int,
+ cohort = num$cohort,
+ period = num$cohort + age_cess_int,
+ event = rep(1L, nrow(num)),
+ weight = num[[weight_col]]
+ )
+
+ # Denominator: current and valid former daily smokers at risk of cessation
+ in_denom <- valid_cess | current_daily
+
+ age_denom_max <- ifelse(
+ valid_cess[in_denom],
+ as.integer(round(age_cessation[in_denom])) - 1L,
+ as.integer(round(age_survey[in_denom]))
+ )
+
+ denom_source <- data.frame(
+ person_id = seq_len(sum(in_denom)),
+ cohort = data$cohort[in_denom],
+ age_denom_max = age_denom_max,
+ weight = data[[weight_col]][in_denom]
+ )
+
+ period_range <- seq(period_min, period_max)
+ denominator <- expand_denominator(denom_source, period_range, min_age)
+
+ rbind(numerator, denominator)
+}
+
+
+#' Apply mortality survival correction to APC dataset
+#'
+#' Dispatches on cfg$apc$mortality_method.
+#' "peto" — weight unchanged (Peto approximation, weight × 1.0)
+#' "mport" — not yet implemented
+#'
+#' @param apc_data Data frame with weight column
+#' @param cfg Config object
+#' @return apc_data with weight column adjusted
+apply_survival_correction <- function(apc_data, cfg) {
+ method <- cfg$apc$mortality_method
+
+ if (method == "peto") {
+ # Peto stub: weights unchanged
+ return(apc_data)
+ }
+
+ if (method == "mport") {
+ stop(
+ "MPoRT mortality correction not yet implemented. ",
+ "Set cfg$apc$mortality_method = 'peto' for current pipeline runs. ",
+ "See protocol-todo.md issue #4 for interaction with WTS_M."
+ )
+ }
+
+ stop("Unknown mortality_method: '", method, "'. Expected 'peto' or 'mport'.")
+}
+
+
+# ---------------------------------------------------------------------------
+# Stage 8 entry point
+# ---------------------------------------------------------------------------
+
+#' Fit APC model for one sex × transition combination
+#'
+#' Fits a weighted binomial logistic regression on a constrained natural
+#' cubic spline basis (Holford et al. 2014). Period and cohort effects are
+#' clamped before basis construction to hold them constant beyond the
+#' observed range (data-side constraint).
+#'
+#' @param apc_dataset One element of the list returned by prepare_apc_data()
+#' @param model_type Character: "initiation" or "cessation"
+#' @param sex Integer: 1 (men) or 2 (women)
+#' @param cfg Config object from config::get()
+#' @return Fitted glm object with attributes: knots, constraints, model_type,
+#' spline_type, sex
+fit_apc_model <- function(apc_dataset, model_type, sex, cfg) {
+ basis <- build_spline_basis(apc_dataset, model_type, sex, cfg)
+ fit <- fit_binomial_apc(basis, apc_dataset$event, apc_dataset$weight)
+
+ attr(fit, "knots") <- list(
+ age = cfg$apc$age_knots,
+ period = cfg$apc$period_knots,
+ cohort = cfg$apc$cohort_knots
+ )
+ attr(fit, "constraints") <- list(
+ period_max = get_period_constraint(model_type, sex, cfg),
+ cohort_min = cfg$apc$cohort_constraints$initiation_prior_to,
+ cohort_max = cfg$apc$cohort_constraints$cessation_from
+ )
+ attr(fit, "model_type") <- model_type
+ attr(fit, "spline_type") <- cfg$apc$spline_type
+ attr(fit, "sex") <- sex
+
+ fit
+}
+
+
+# ---------------------------------------------------------------------------
+# Stage 8 sub-functions
+# ---------------------------------------------------------------------------
+
+#' Filter knots to those strictly inside the observed data range
+#'
+#' nsp() sets boundary knots automatically at min/max(x). Interior knots
+#' outside that range raise an error. This can occur for cessation models
+#' where the clamped period range (e.g. 1965–2013) excludes early knots
+#' (e.g. 1940, 1950, 1960) specified for the full denominator range.
+#'
+#' @param x Numeric vector of observed values
+#' @param knots Numeric vector of candidate interior knot positions
+#' @return Numeric vector of knots strictly inside (min(x), max(x))
+interior_knots <- function(x, knots) {
+ lo <- min(x, na.rm = TRUE)
+ hi <- max(x, na.rm = TRUE)
+ knots[knots > lo & knots < hi]
+}
+
+
+#' Build combined age-period-cohort spline basis matrix
+#'
+#' Applies period and cohort clamping, then constructs natural spline bases
+#' for each dimension. Dispatches on cfg$apc$spline_type ("nsp" or "rcs").
+#'
+#' @param apc_dataset Data frame with age, period, cohort columns
+#' @param model_type "initiation" or "cessation"
+#' @param sex 1 or 2
+#' @param cfg Config object
+#' @return Named matrix: columns age_1...age_k, period_1...period_k,
+#' cohort_1...cohort_k (intercept = FALSE in all bases)
+build_spline_basis <- function(apc_dataset, model_type, sex, cfg) {
+ period_constraint <- get_period_constraint(model_type, sex, cfg)
+ cohort_prior <- cfg$apc$cohort_constraints$initiation_prior_to
+ cohort_from <- cfg$apc$cohort_constraints$cessation_from
+
+ period_clamped <- pmin(apc_dataset$period, period_constraint)
+ cohort_clamped <- pmin(pmax(apc_dataset$cohort, cohort_prior), cohort_from)
+
+ # Filter interior knots to those strictly inside the observed data range.
+ # nsp() boundary knots are set automatically at min/max(x); interior knots
+ # outside that range cause an error. This can happen for cessation where the
+ # effective period range (1965–2013 after clamping) excludes the early
+ # period knots (1940, 1950, 1960) inherited from the full denominator spec.
+ age_knots <- interior_knots(apc_dataset$age, cfg$apc$age_knots)
+ period_knots <- interior_knots(period_clamped, cfg$apc$period_knots)
+ cohort_knots <- interior_knots(cohort_clamped, cfg$apc$cohort_knots)
+
+ spline_type <- cfg$apc$spline_type
+
+ if (spline_type == "nsp") {
+ if (!requireNamespace("splines2", quietly = TRUE)) {
+ stop("Package 'splines2' required for spline_type = 'nsp'. Install with renv::install('splines2').")
+ }
+ age_basis <- splines2::nsp(apc_dataset$age, knots = age_knots, intercept = FALSE)
+ period_basis <- splines2::nsp(period_clamped, knots = period_knots, intercept = FALSE)
+ cohort_basis <- splines2::nsp(cohort_clamped, knots = cohort_knots, intercept = FALSE)
+
+ } else if (spline_type == "rcs") {
+ if (!requireNamespace("rms", quietly = TRUE)) {
+ stop("Package 'rms' required for spline_type = 'rcs'. Install with renv::install('rms').")
+ }
+ age_basis <- rms::rcs(apc_dataset$age, knots = age_knots)
+ period_basis <- rms::rcs(period_clamped, knots = period_knots)
+ cohort_basis <- rms::rcs(cohort_clamped, knots = cohort_knots)
+
+ } else {
+ stop("Unknown spline_type: '", spline_type, "'. Expected 'nsp' or 'rcs'.")
+ }
+
+ colnames(age_basis) <- paste0("age_", seq_len(ncol(age_basis)))
+ colnames(period_basis) <- paste0("period_", seq_len(ncol(period_basis)))
+ colnames(cohort_basis) <- paste0("cohort_", seq_len(ncol(cohort_basis)))
+
+ cbind(age_basis, period_basis, cohort_basis)
+}
+
+
+#' Look up the period constraint year for a given model type and sex
+#'
+#' @param model_type "initiation" or "cessation"
+#' @param sex 1 (men) or 2 (women)
+#' @param cfg Config object
+#' @return Integer year beyond which the period effect is held constant
+get_period_constraint <- function(model_type, sex, cfg) {
+ pc <- cfg$apc$period_constraints
+
+ if (model_type == "initiation") {
+ if (sex == 2) return(pc$initiation_women_from)
+ if (sex == 1) return(pc$initiation_men_from)
+ stop("sex must be 1 or 2, got: ", sex)
+ }
+
+ if (model_type == "cessation") {
+ return(pc$cessation_from)
+ }
+
+ stop("model_type must be 'initiation' or 'cessation', got: ", model_type)
+}
+
+
+#' Aggregate person-years to age-period-cohort cells and fit binomial APC model
+#'
+#' Follows the SAS PROC MEANS → PROC GENMOD pattern from Modeling2013.sas:
+#' survey weights are summed within each (age, period, cohort) cell to produce
+#' weighted numerator (d) and denominator (pop), then fitted as
+#' glm(cbind(d, pop - d) ~ basis, family = binomial).
+#'
+#' Fitting on individual-level rows with raw weights causes numerical failure
+#' because large survey weights (~10,000) create extreme leverage, driving
+#' glm.fit to push some probabilities to exactly 0 or 1.
+#'
+#' @param basis_matrix Named matrix from build_spline_basis()
+#' @param event Integer vector of 0/1 outcomes
+#' @param weight Numeric vector of survey weights
+#' @return Fitted glm object (family = binomial)
+fit_binomial_apc <- function(basis_matrix, event, weight) {
+ # Aggregate to unique basis rows (= unique age-period-cohort cells after clamping)
+ df <- as.data.frame(basis_matrix)
+ df$.event <- event
+ df$.weight <- weight
+
+ # Sum weighted events (d) and weighted person-years (pop) per unique cell
+ agg_key <- do.call(paste, c(df[, !names(df) %in% c(".event", ".weight"), drop = FALSE], sep = "|"))
+ cell_ids <- match(agg_key, unique(agg_key))
+ n_cells <- max(cell_ids)
+
+ d <- vapply(seq_len(n_cells), function(i) sum(df$.weight[cell_ids == i & df$.event == 1]), numeric(1))
+ pop <- vapply(seq_len(n_cells), function(i) sum(df$.weight[cell_ids == i]), numeric(1))
+
+ # Extract one basis row per unique cell
+ cell_rows <- match(seq_len(n_cells), cell_ids)
+ basis_agg <- basis_matrix[cell_rows, , drop = FALSE]
+
+ cell_df <- as.data.frame(basis_agg)
+ cell_df$.d <- d
+ cell_df$.pop <- pop
+
+ glm(cbind(.d, .pop - .d) ~ . - .d - .pop, data = cell_df,
+ family = binomial(),
+ control = glm.control(maxit = 100, epsilon = 1e-8))
+}
diff --git a/R/config-utils.R b/R/config-utils.R
new file mode 100644
index 0000000..d8fe1b6
--- /dev/null
+++ b/R/config-utils.R
@@ -0,0 +1,32 @@
+# config-utils.R — helpers for accessing config.yml values
+#
+# survey_var(cfg, "age") → variable name for current data_source
+# survey_bound(cfg, "age", "min") → analytical bound for a survey variable
+
+# Null-coalescing operator (base R >= 4.4 ships %||%; keep this for R >= 4.2 compat)
+`%||%` <- function(x, y) if (is.null(x)) y else x
+
+survey_var <- function(cfg, key) {
+ entry <- cfg$survey[[key]]
+ if (is.null(entry)) stop("survey_var: unknown key '", key, "'")
+ # Scalar values (e.g. cycle) are stored directly, not as pumf/master lists
+ if (!is.list(entry)) return(entry)
+ src <- cfg$data_source %||% "pumf"
+ src_entry <- entry[[src]]
+ if (is.null(src_entry)) stop("survey_var: no '", src, "' entry for key '", key, "'")
+ # Source entry is itself a list with var, min, max; or a plain scalar
+ if (is.list(src_entry)) src_entry[["var"]] else src_entry
+}
+
+# Access a bound (min/max) for the active data source.
+# e.g. survey_bound(cfg, "age_first_cigarette", "min") → 13 (pumf) or 8 (master)
+survey_bound <- function(cfg, key, bound) {
+ entry <- cfg$survey[[key]]
+ if (is.null(entry)) stop("survey_bound: unknown key '", key, "'")
+ src <- cfg$data_source %||% "pumf"
+ src_entry <- entry[[src]]
+ if (is.null(src_entry)) stop("survey_bound: no '", src, "' entry for key '", key, "'")
+ val <- if (is.list(src_entry)) src_entry[[bound]] else NULL
+ if (is.null(val)) stop("survey_bound: no '", bound, "' for key '", key, "' source '", src, "'")
+ val
+}
diff --git a/R/create-descriptive-tables.R b/R/create-descriptive-tables.R
new file mode 100644
index 0000000..497a119
--- /dev/null
+++ b/R/create-descriptive-tables.R
@@ -0,0 +1,482 @@
+# create-descriptive-tables.R
+# Format descriptive statistics into publication-ready gt tables.
+# Ported from DemPoRT-V2-dev (origin/dev).
+
+NA_c_label <- "Missing from survey"
+
+categorical_predictor_footnote <- paste0(
+ "For categorical variables the values are displayed as N (percent). ",
+ "Percents may not sum to 100 due to missingness."
+)
+continuous_predictor_footnote <- paste0(
+ "For continuous predictors, the values are displayed as min - max, median (IQR)."
+)
+
+# ---- Formatting helpers -----------------------------------------------------
+
+format_cat_descriptive_data <- function(descriptive_data_row) {
+ if (is.na(descriptive_data_row[1, "n"]) || descriptive_data_row[1, "n"] == 0) {
+ return("No data")
+ }
+ formatted_n <- format(descriptive_data_row[1, "n"], big.mark = ",")
+ paste0(formatted_n, "\n (", round(descriptive_data_row[1, "percent"] * 100, 1), ")")
+}
+
+format_cont_descriptive_data <- function(descriptive_data_row) {
+ if (descriptive_data_row[1, "n"] == 0) return("No data")
+ paste0(
+ descriptive_data_row[1, "min"], " - ", descriptive_data_row[1, "max"], ",\n",
+ descriptive_data_row[1, "median"],
+ " (", descriptive_data_row[1, "percentile25"],
+ " - ", descriptive_data_row[1, "percentile75"], ")"
+ )
+}
+
+.format_cont_type <- function(variables_sheet_row) {
+ stopifnot(is_continuous_variable(variables_sheet_row))
+ units <- variables_sheet_row$units
+ units_suffix <- ifelse(units != "N/A", paste0("(in ", units, ")"), "")
+ paste(variables_sheet_row$variableType, units_suffix)
+}
+
+# ---- Row builders -----------------------------------------------------------
+
+create_descriptive_table_row <- function(
+ variable, type, stratifier_details_rows, get_stratifier_value
+) {
+ stratifier_cols <- purrr::map(
+ seq_len(nrow(stratifier_details_rows)),
+ function(i) {
+ row <- stratifier_details_rows[i, ]
+ data.frame(setNames(
+ list(get_stratifier_value(row)),
+ row$catLabel
+ ))
+ }
+ ) |> purrr::list_cbind()
+ cbind(data.frame(variable = variable, type = type), stratifier_cols)
+}
+
+create_descriptive_table_missing_rows <- function(
+ variable, variable_details_sheet, stratifier_rows, data_for_variable
+) {
+ missing_cats <- get_unique_rec_end_rows(
+ variable_details_sheet, variable, TRUE
+ ) |> dplyr::filter(recEnd %in% c("NA::a", "NA::b"))
+
+ missing_rows <- purrr::map(
+ seq_len(nrow(missing_cats)),
+ function(i) {
+ mc <- missing_cats[i, ]
+ create_descriptive_table_row(
+ mc$catLabel, "", stratifier_rows,
+ function(sr) {
+ d <- data_for_variable |>
+ dplyr::filter(
+ groupBy_1 == sr$variable &
+ groupByValue_1 == sr$recEnd &
+ cat == mc$recEnd
+ )
+ format_cat_descriptive_data(d[1, ])
+ }
+ )
+ }
+ ) |> purrr::list_rbind()
+
+ na_c_row <- create_descriptive_table_row(
+ NA_c_label, "", stratifier_rows,
+ function(sr) {
+ d <- data_for_variable |>
+ dplyr::filter(
+ groupBy_1 == sr$variable &
+ groupByValue_1 == sr$recEnd &
+ cat == "NA::c"
+ )
+ format_cat_descriptive_data(d[1, ])
+ }
+ )
+ rbind(missing_rows, na_c_row)
+}
+
+# ---- Core table builder -----------------------------------------------------
+
+.build_descriptive_table_data <- function(
+ descriptive_data,
+ variables_sheet,
+ variable_details_sheet,
+ variables,
+ column_stratifier = NULL,
+ row_stratifiers = list(),
+ sections_order = NULL,
+ include_na = TRUE
+) {
+ stratify_config <- row_stratifiers
+ if (!is.null(column_stratifier)) {
+ stratify_config[["all"]] <- c(column_stratifier)
+ }
+
+ unrounded <- descriptive_data
+ formatted <- descriptive_data |>
+ dplyr::mutate(dplyr::across(where(is.numeric) & !c(n), ~ signif(.x, 4)))
+
+ # Determine sections
+ sections_in_table <- c()
+ for (v in variables) {
+ s <- variables_sheet[variables_sheet$variable == v, ]$section[1]
+ if (!s %in% sections_in_table) sections_in_table <- c(sections_in_table, s)
+ }
+ if (!is.null(sections_order)) sections_in_table <- sections_order
+
+ stratifier_rows <- get_unique_rec_end_rows(variable_details_sheet, column_stratifier)
+ table_variables <- c()
+ table_type <- c()
+ table_row_types <- c()
+ stratify_by_stats <- list()
+ for (i in seq_len(nrow(stratifier_rows))) {
+ stratify_by_stats[[stratifier_rows[i, "catLabel"]]] <- c()
+ }
+
+ merge_stats <- function(stats) {
+ table_variables <<- c(table_variables, stats$variable)
+ table_type <<- c(table_type, stats$type)
+ for (i in seq_len(nrow(stratifier_rows))) {
+ cat_label <- stratifier_rows[i, "catLabel"]
+ stratify_by_stats[[cat_label]] <<- c(
+ stratify_by_stats[[cat_label]], stats[[cat_label]]
+ )
+ }
+ }
+
+ for (section in sections_in_table) {
+ table_variables <- c(table_variables, section)
+ table_type <- c(table_type, "")
+ table_row_types <- c(table_row_types, "section")
+ for (i in seq_len(nrow(stratifier_rows))) {
+ cat_label <- stratifier_rows[i, "catLabel"]
+ stratify_by_stats[[cat_label]] <- c(stratify_by_stats[[cat_label]], "")
+ }
+
+ for (variable in variables) {
+ vrow <- get_row_for_variable(variable, variables_sheet)
+ if (vrow[1, ]$section != section) next
+ if (!is.null(row_stratifiers[[variable]])) next
+
+ data_for_var <- formatted[formatted$variable == variable, ]
+
+ if (vrow[1, ]$variableType == "Categorical") {
+ table_variables <- c(table_variables, vrow[1, "label"])
+ table_type <- c(table_type, "Categorical")
+ table_row_types <- c(table_row_types, "variable")
+ for (i in seq_len(nrow(stratifier_rows))) {
+ stratify_by_stats[[stratifier_rows[i, "catLabel"]]] <- c(
+ stratify_by_stats[[stratifier_rows[i, "catLabel"]]], ""
+ )
+ }
+
+ categories <- get_unique_rec_end_rows(
+ variable_details_sheet, variable, include_na
+ )
+ # Append NA(c) row
+ na_row <- categories[1, , drop = FALSE]
+ for (col in colnames(na_row)) {
+ if (is.character(na_row[[col]])) na_row[[col]][1] <- ""
+ else na_row[[col]][1] <- NA
+ }
+ na_row$variable[1] <- variable
+ na_row$recEnd[1] <- "NA::c"
+ na_row$catLabel[1] <- NA_c_label
+ na_row$typeEnd[1] <- "cat"
+ na_row$units[1] <- "N/A"
+ categories <- rbind(categories, na_row)
+
+ for (ci in seq_len(nrow(categories))) {
+ table_variables <- c(table_variables, categories[ci, "catLabel"])
+ table_type <- c(table_type, "")
+ table_row_types <- c(table_row_types, "category")
+ for (i in seq_len(nrow(stratifier_rows))) {
+ sr <- stratifier_rows[i, ]
+ d <- data_for_var[
+ data_for_var$cat == categories[ci, ]$recEnd &
+ data_for_var$groupBy_1 == column_stratifier &
+ data_for_var$groupByValue_1 == sr$recEnd, ]
+ stratify_by_stats[[sr$catLabel]] <- c(
+ stratify_by_stats[[sr$catLabel]],
+ format_cat_descriptive_data(d)
+ )
+ }
+ }
+ } else {
+ # Continuous
+ first_row <- create_descriptive_table_row(
+ vrow[1, ]$label, .format_cont_type(vrow), stratifier_rows,
+ function(sr) {
+ ixs <- which(
+ is.na(data_for_var$cat) &
+ data_for_var$groupBy_1 == column_stratifier &
+ data_for_var$groupByValue_1 == sr$recEnd
+ )
+ if (length(ixs) != 1) stop(paste(
+ "Expected 1 continuous row for", variable,
+ "stratifier", sr$recEnd, "- found", length(ixs)
+ ))
+ format_cont_descriptive_data(data_for_var[ixs, ])
+ }
+ )
+ merge_stats(first_row)
+ table_row_types <- c(table_row_types, "variable")
+
+ missing_rows <- create_descriptive_table_missing_rows(
+ variable, variable_details_sheet, stratifier_rows, data_for_var
+ )
+ for (i in seq_len(nrow(missing_rows))) {
+ merge_stats(missing_rows[i, ])
+ table_row_types <- c(table_row_types, "category")
+ }
+ }
+ }
+ }
+
+ descriptive_table <- data.frame(
+ variable = table_variables,
+ type = table_type,
+ row_type = table_row_types
+ )
+ for (i in seq_len(nrow(stratifier_rows))) {
+ cat_label <- stratifier_rows[i, "catLabel"]
+ descriptive_table[[cat_label]] <- stratify_by_stats[[cat_label]]
+ }
+
+ attr(descriptive_table, "unrounded_data") <- unrounded
+ attr(descriptive_table, "stratifier_rows") <- stratifier_rows
+ attr(descriptive_table, "column_stratifier") <- column_stratifier
+
+ descriptive_table
+}
+
+# ---- gt display -------------------------------------------------------------
+
+create_descriptive_table_display <- function(descriptive_table_data) {
+ descriptive_table_data$section_group <- NA_character_
+ current_section <- NA_character_
+ for (i in seq_len(nrow(descriptive_table_data))) {
+ if (descriptive_table_data$row_type[i] == "section") {
+ current_section <- descriptive_table_data$variable[i]
+ }
+ descriptive_table_data$section_group[i] <- current_section
+ }
+
+ data_filtered <- descriptive_table_data[
+ descriptive_table_data$row_type != "section", ]
+
+ gt_table <- data_filtered |>
+ dplyr::select(-row_type) |>
+ gt::gt(rowname_col = "variable", groupname_col = "section_group") |>
+ gt::tab_style(
+ style = gt::cell_text(weight = "bold"),
+ locations = gt::cells_row_groups()
+ ) |>
+ gt::tab_options(table.font.size = gt::px(10)) |>
+ gt::opt_table_lines(extent = "default")
+
+ if (any(data_filtered$row_type == "variable")) {
+ variable_labels <- data_filtered$variable[data_filtered$row_type == "variable"]
+ gt_table <- gt_table |>
+ gt::tab_style(
+ style = list(
+ gt::cell_text(weight = "bold"),
+ gt::cell_text(indent = gt::px(10))
+ ),
+ locations = gt::cells_stub(rows = variable_labels)
+ )
+ }
+ if (any(data_filtered$row_type == "category")) {
+ cat_labels <- data_filtered$variable[data_filtered$row_type == "category"]
+ gt_table <- gt_table |>
+ gt::tab_style(
+ style = gt::cell_text(indent = gt::px(20)),
+ locations = gt::cells_stub(rows = cat_labels)
+ )
+ }
+
+ gt_table
+}
+
+# ---- Public functions -------------------------------------------------------
+
+#' Create sex-stratified (+ Overall) descriptive table
+#'
+#' @param descriptive_data Output of get_descriptive_data()
+#' @param variables_sheet Variables worksheet data frame
+#' @param variable_details_sheet Variable details worksheet data frame
+#' @param variables Character vector of variables to include
+#' @param column_stratifier Variable name for column stratification (from config)
+#' @param sections_order Optional character vector to order sections
+#' @param include_na Whether to show missing categories
+#' @return A gt table object
+create_descriptive_table <- function(
+ descriptive_data,
+ variables_sheet,
+ variable_details_sheet,
+ variables,
+ column_stratifier = NULL,
+ sections_order = NULL,
+ include_na = TRUE
+) {
+ descriptive_table <- .build_descriptive_table_data(
+ descriptive_data, variables_sheet, variable_details_sheet,
+ variables, column_stratifier, list(), sections_order, include_na
+ )
+
+ unrounded <- attr(descriptive_table, "unrounded_data")
+ stratifier_rows <- attr(descriptive_table, "stratifier_rows")
+
+ header_labels <- list(variable = "Variable", type = "Type")
+ for (i in seq_len(nrow(stratifier_rows))) {
+ d <- unrounded[
+ !is.na(unrounded$groupBy_1) &
+ unrounded$groupBy_1 == column_stratifier &
+ !is.na(unrounded$groupByValue_1) &
+ unrounded$groupByValue_1 == stratifier_rows[i, "recEnd"], ]
+ valid <- d[!is.na(d$percent) & d$percent > 0, ]
+ total_n <- if (nrow(valid) > 0 && valid[1, "percent"] > 0) {
+ round(valid[1, "n"] / valid[1, "percent"])
+ } else {
+ sum(d$n, na.rm = TRUE)
+ }
+ header_labels[[stratifier_rows[i, "catLabel"]]] <- gt::html(paste0(
+ stratifier_rows[i, "catLabel"],
+ " (N = ", format(total_n, big.mark = ","), ")a"
+ ))
+ }
+
+ create_descriptive_table_display(descriptive_table) |>
+ gt::cols_label(.list = header_labels) |>
+ gt::tab_style(
+ style = gt::cell_text(weight = "bold"),
+ locations = gt::cells_column_labels()
+ ) |>
+ gt::tab_header(title = "Sex-stratified population characteristics") |>
+ gt::cols_width(type ~ gt::px(144)) |>
+ gt::tab_footnote(footnote = gt::html(paste0(
+ "a ", categorical_predictor_footnote, " ",
+ continuous_predictor_footnote
+ ))) |>
+ gt::tab_footnote(
+ footnote = "Abbreviations: IQR, interquartile range; N, number"
+ )
+}
+
+#' Create cycle-stratified appendix table (sex within each cycle)
+#'
+#' @param study_data Study data frame
+#' @param variables_sheet Variables worksheet data frame
+#' @param variable_details_sheet Variable details worksheet data frame
+#' @param variables Character vector of variables to include
+#' @param cycle_col Name of the survey cycle column (from config)
+#' @param cycle_labels Named vector mapping integer cycle codes to display labels
+#' @param column_stratifier Variable for column stratification (from config)
+#' @param sections_order Optional character vector to order sections
+#' @param include_na Whether to show missing categories
+#' @return A gt table object
+create_cycle_specific_descriptive_table <- function(
+ study_data,
+ variables_sheet,
+ variable_details_sheet,
+ variables,
+ cycle_col,
+ cycle_labels,
+ column_stratifier = NULL,
+ sections_order = NULL,
+ include_na = TRUE
+) {
+ cycles <- sort(unique(as.integer(as.character(study_data[[cycle_col]]))))
+ cycles <- cycles[!is.na(cycles)]
+ if (length(cycles) == 0) stop("No valid cycle values found in study_data[[\"", cycle_col, "\"]]")
+
+ stratify_config <- list()
+ if (!is.null(column_stratifier)) {
+ stratify_config[["all"]] <- list(column_stratifier)
+ }
+
+ cycle_tables <- list()
+ cycle_data_list <- list()
+
+ for (cycle in cycles) {
+ cycle_data <- study_data[
+ as.integer(as.character(study_data[[cycle_col]])) == cycle, ]
+ key <- paste0("Cycle_", cycle)
+ cycle_data_list[[key]] <- cycle_data
+
+ cycle_desc <- get_descriptive_data(
+ cycle_data, variables_sheet, variable_details_sheet,
+ variables, stratify_config
+ )
+ cycle_tables[[key]] <- .build_descriptive_table_data(
+ cycle_desc, variables_sheet, variable_details_sheet,
+ variables, column_stratifier, list(), sections_order, include_na
+ )
+ }
+
+ # Combine tables side by side
+ combined <- data.frame(
+ variable = cycle_tables[[1]]$variable,
+ type = cycle_tables[[1]]$type,
+ row_type = cycle_tables[[1]]$row_type
+ )
+ for (cycle in cycles) {
+ key <- paste0("Cycle_", cycle)
+ ct <- cycle_tables[[key]]
+ # Use the catLabel column names from the first cycle's stratifier_rows
+ strat_rows <- attr(ct, "stratifier_rows")
+ for (i in seq_len(nrow(strat_rows))) {
+ cat_label <- strat_rows[i, "catLabel"]
+ combined[[paste0("Cycle", cycle, "_", cat_label)]] <- ct[[cat_label]]
+ }
+ }
+
+ strat_rows_ref <- attr(cycle_tables[[1]], "stratifier_rows")
+ cat_labels_ref <- strat_rows_ref$catLabel
+
+ gt_table <- create_descriptive_table_display(combined)
+
+ col_labels <- list(type = "Type")
+ for (cycle in cycles) {
+ key <- paste0("Cycle_", cycle)
+ cycle_data <- cycle_data_list[[key]]
+ cycle_lbl <- cycle_labels[as.character(cycle)]
+
+ span_cols <- c()
+ for (cat_label in cat_labels_ref) {
+ col_name <- paste0("Cycle", cycle, "_", cat_label)
+ strat_val <- strat_rows_ref$recEnd[strat_rows_ref$catLabel == cat_label]
+ n_val <- nrow(cycle_data[
+ !is.na(cycle_data[[column_stratifier]]) &
+ cycle_data[[column_stratifier]] == strat_val, ])
+ col_labels[[col_name]] <- gt::html(paste0(
+ cat_label, " (N = ", format(n_val, big.mark = ","), ")a"
+ ))
+ span_cols <- c(span_cols, col_name)
+ }
+
+ gt_table <- gt_table |>
+ gt::tab_spanner(label = cycle_lbl, columns = dplyr::all_of(span_cols))
+ }
+
+ gt_table |>
+ gt::cols_label(.list = col_labels) |>
+ gt::tab_style(
+ style = gt::cell_text(weight = "bold"),
+ locations = gt::cells_column_labels()
+ ) |>
+ gt::tab_header(
+ title = "Sex and cycle-stratified population characteristics"
+ ) |>
+ gt::cols_width(type ~ gt::px(108)) |>
+ gt::tab_footnote(footnote = gt::html(paste0(
+ "a ", categorical_predictor_footnote, " ",
+ continuous_predictor_footnote
+ ))) |>
+ gt::tab_footnote(
+ footnote = "Abbreviations: IQR, interquartile range; N, number"
+ )
+}
diff --git a/R/data-cleaning.R b/R/data-cleaning.R
new file mode 100644
index 0000000..a2a3f06
--- /dev/null
+++ b/R/data-cleaning.R
@@ -0,0 +1,131 @@
+# data-cleaning.R
+# Age restriction, distribution checks, and out-of-range truncation.
+# Pipeline target: cleaned_data
+
+#' Clean study data
+#'
+#' Applies age restriction (excludes youngest age group per cfg), checks
+#' skewness of continuous predictors, and truncates at the 99th percentile
+#' for any with |skewness| >= 1. Tagged NAs (NA(a)/NA(b)/NA(c)) are preserved
+#' throughout.
+#'
+#' @param study_data Output of load_study_data()
+#' @param variables_sheet Variables worksheet data frame
+#' @param cfg Config object from config::get()
+#' @return Cleaned data frame
+clean_study_data <- function(study_data, variables_sheet, cfg) {
+ # Step 1: Age restriction — exclude respondents below the study floor
+ # (cfg$age_exclusion_min, typically 18: drops the 12-17 age groups)
+ age_col <- survey_var(cfg, "age")
+ if (!is.null(cfg$age_exclusion_min) && age_col %in% colnames(study_data)) {
+ n_before <- nrow(study_data)
+ study_data <- study_data[
+ is.na(study_data[[age_col]]) |
+ study_data[[age_col]] >= cfg$age_exclusion_min,
+ ]
+ message(
+ "Age restriction: excluded ", n_before - nrow(study_data),
+ " respondents younger than ", cfg$age_exclusion_min,
+ " (", nrow(study_data), " remaining)"
+ )
+ }
+
+ # Step 2: Identify continuous predictor variables for skewness check
+ # Roles are comma-separated; select_vars_by_role() handles multi-role rows
+ continuous_predictors <- intersect(
+ variables_sheet$variable[variables_sheet$variableType == "Continuous"],
+ unique(c(
+ select_vars_by_role("predictor", variables_sheet),
+ select_vars_by_role("model-stratifier", variables_sheet)
+ ))
+ )
+ continuous_predictors <- intersect(continuous_predictors, colnames(study_data))
+
+ # Step 3: Check skewness and truncate where |skewness| >= cfg$skewness_threshold
+ skewness_summary <- check_skewness(
+ study_data, continuous_predictors, cfg$skewness_threshold
+ )
+
+ message("\nSkewness check for continuous predictors:")
+ print(skewness_summary$summary)
+
+ if (length(skewness_summary$vars_to_truncate) > 0) {
+ message(
+ "\nTruncating at ", cfg$truncate_percentile, "th percentile: ",
+ paste(skewness_summary$vars_to_truncate, collapse = ", ")
+ )
+ study_data <- truncate_continuous(
+ study_data, skewness_summary$vars_to_truncate, cfg$truncate_percentile
+ )
+ } else {
+ message("\nNo truncation needed.")
+ }
+
+ study_data
+}
+
+#' Check skewness of continuous variables
+#'
+#' @param data Data frame
+#' @param vars Character vector of continuous variable names to check
+#' @param threshold Absolute skewness threshold (default 1)
+#' @return List with `vars_to_truncate` (character) and `summary` (data frame)
+check_skewness <- function(data, vars, threshold) {
+ rows <- lapply(vars, function(var) {
+ x <- data[[var]]
+ # Exclude tagged NAs — use only non-NA values for skewness calculation
+ x_valid <- x[!is.na(x)]
+ if (length(x_valid) < 3) return(NULL)
+
+ # Pearson's moment coefficient of skewness (type 2 = unbiased, matches SAS)
+ n <- length(x_valid)
+ m <- mean(x_valid)
+ s <- sd(x_valid)
+ if (s == 0) return(NULL)
+ skew <- (sum((x_valid - m)^3) / n) / (s^3) * sqrt(n * (n - 1)) / (n - 2)
+
+ data.frame(
+ variable = var,
+ n_valid = n,
+ skewness = round(skew, 3),
+ abs_skewness = round(abs(skew), 3),
+ action = if (abs(skew) >= threshold) "truncate" else "keep",
+ stringsAsFactors = FALSE
+ )
+ })
+
+ rows <- do.call(rbind, rows[!sapply(rows, is.null)])
+
+ vars_to_truncate <- if (!is.null(rows)) {
+ rows$variable[rows$action == "truncate"]
+ } else {
+ character(0)
+ }
+
+ list(vars_to_truncate = vars_to_truncate, summary = rows)
+}
+
+#' Truncate continuous variables at a given percentile
+#'
+#' Values above the truncation percentile are capped at that value.
+#' Tagged NAs (haven::tagged_na) are preserved — not capped or coerced.
+#'
+#' @param data Data frame
+#' @param vars Character vector of variable names to truncate
+#' @param percentile Percentile to truncate at (0-100)
+#' @return Data frame with truncated values
+truncate_continuous <- function(data, vars, percentile) {
+ stopifnot(percentile >= 0, percentile <= 100)
+ for (var in vars) {
+ x <- data[[var]]
+ cap <- quantile(x, percentile / 100, na.rm = TRUE)
+ # Preserve tagged NAs: only cap non-NA values that exceed the cap
+ data[[var]] <- dplyr::if_else(
+ !is.na(x) & x > cap,
+ cap,
+ x,
+ missing = x # tagged NAs pass through unchanged
+ )
+ }
+ data
+}
diff --git a/R/descriptive-data.R b/R/descriptive-data.R
new file mode 100644
index 0000000..e299307
--- /dev/null
+++ b/R/descriptive-data.R
@@ -0,0 +1,37 @@
+# descriptive-data.R
+# CSHM-specific wrapper for calculating descriptive statistics.
+# Pipeline targets: table_1a_data, table_1b_data
+
+#' Calculate descriptive statistics for the CSHM study population
+#'
+#' Computes statistics for all predictor variables stratified by model-stratifier.
+#' No row stratification is applied in the base table.
+#'
+#' @param data Cleaned or imputed study data frame
+#' @param variables_sheet Variables worksheet data frame
+#' @param variable_details_sheet Variable details worksheet data frame
+#' @return Data frame of descriptive statistics (input to create_descriptive_table)
+get_cshm_desc_data <- function(data, variables_sheet, variable_details_sheet) {
+ predictor_vars <- select_vars_by_role("predictor", variables_sheet)
+ sex_stratifier <- select_vars_by_role("model-stratifier", variables_sheet)[1]
+
+ # Only describe variables that were actually harmonized into data
+ # (some variables in the sheet may be absent if no variable_details rows matched)
+ available <- intersect(predictor_vars, colnames(data))
+ absent <- setdiff(predictor_vars, colnames(data))
+ if (length(absent) > 0) {
+ message("Descriptive table: skipping ", length(absent),
+ " variables not in data: ", paste(absent, collapse = ", "))
+ }
+
+ stratify_config <- list()
+ stratify_config[["all"]] <- list(sex_stratifier)
+
+ get_descriptive_data(
+ data = data,
+ variables_sheet = variables_sheet,
+ variables_details_sheet = variable_details_sheet,
+ variables = available,
+ stratify_config = stratify_config
+ )
+}
diff --git a/R/get-descriptive-data.R b/R/get-descriptive-data.R
new file mode 100644
index 0000000..b17e512
--- /dev/null
+++ b/R/get-descriptive-data.R
@@ -0,0 +1,218 @@
+# get-descriptive-data.R
+# Calculate descriptive statistics for the study population.
+# Ported from DemPoRT-V2-dev (origin/dev).
+
+get_descriptive_data <- function(
+ data,
+ variables_sheet,
+ variables_details_sheet,
+ variables,
+ stratify_config
+) {
+ descriptive_data <- data.frame(
+ variable = c(),
+ cat = c(),
+ median = c(),
+ percentile25 = c(),
+ percentile75 = c(),
+ min = c(),
+ max = c(),
+ n = c(),
+ percent = c()
+ )
+
+ largest_num_stratifiers <- 0
+ for (variable in names(stratify_config)) {
+ for (stratify_config_for_variable_index in seq_len(length(stratify_config[[variable]]))) {
+ current_num_stratifiers <- length(
+ stratify_config[[variable]][[stratify_config_for_variable_index]]
+ )
+ if (!is.null(stratify_config[["all"]])) {
+ current_num_stratifiers <- current_num_stratifiers +
+ length(stratify_config[["all"]])
+ }
+ if (largest_num_stratifiers < current_num_stratifiers) {
+ largest_num_stratifiers <- current_num_stratifiers
+ }
+ }
+ }
+
+ for (variable in variables) {
+ variable_sheet_row <- variables_sheet[variables_sheet$variable == variable, ]
+
+ if (is_continuous_variable(variable_sheet_row)) {
+ map_stratifier_data(
+ data, variables_sheet, variables_details_sheet, variable, stratify_config,
+ function(current_stratifier_info) {
+ new_row <- data.frame(
+ variable = variable, cat = NA,
+ median = NA, percentile25 = NA, percentile75 = NA,
+ min = NA, max = NA, n = NA, percent = NA
+ )
+ for (si in seq_len(largest_num_stratifiers)) {
+ new_row[[paste0("groupBy_", si)]] <- NA
+ new_row[[paste0("groupByValue_", si)]] <- NA
+ }
+ for (si in seq_len(length(current_stratifier_info$stratifiers))) {
+ strat <- current_stratifier_info$stratifiers[[si]]
+ new_row[[paste0("groupBy_", si)]] <- strat
+ new_row[[paste0("groupByValue_", si)]] <-
+ current_stratifier_info$stratifier_combination[[strat]][1]
+ }
+ vals <- current_stratifier_info$data[[variable]]
+ vals <- vals[!is.na(vals)]
+ s <- summary(vals)
+ new_row$median <- s[[3]]
+ new_row$percentile25 <- s[[2]]
+ new_row$percentile75 <- s[[5]]
+ new_row$min <- s[[1]]
+ new_row$max <- s[[6]]
+ new_row$n <- length(vals)
+ descriptive_data <<- rbind(descriptive_data, new_row)
+ }
+ )
+ }
+
+ for (na_type in c("NA::a", "NA::b", "NA::c")) {
+ tagged_na_type <- switch(na_type, "NA::a" = "a", "NA::b" = "b", "NA::c" = "c")
+ map_stratifier_data(
+ data, variables_sheet, variables_details_sheet, variable, stratify_config,
+ function(current_stratifier_info) {
+ new_row <- data.frame(
+ variable = variable, cat = na_type,
+ median = NA, percentile25 = NA, percentile75 = NA, min = NA, max = NA
+ )
+ for (si in seq_len(largest_num_stratifiers)) {
+ new_row[[paste0("groupBy_", si)]] <- NA
+ new_row[[paste0("groupByValue_", si)]] <- NA
+ }
+ for (si in seq_len(length(current_stratifier_info$stratifiers))) {
+ strat <- current_stratifier_info$stratifiers[[si]]
+ new_row[[paste0("groupBy_", si)]] <- strat
+ new_row[[paste0("groupByValue_", si)]] <-
+ current_stratifier_info$stratifier_combination[[strat]][1]
+ }
+ filtered <- dplyr::filter(
+ current_stratifier_info$data,
+ haven::is_tagged_na(.data[[variable]], tagged_na_type) |
+ .data[[variable]] == paste0("NA(", tagged_na_type, ")")
+ )
+ new_row$n <- nrow(filtered)
+ new_row$percent <- new_row$n / nrow(current_stratifier_info$data)
+ descriptive_data <<- rbind(descriptive_data, new_row)
+ }
+ )
+ }
+
+ if (is_categorical_variable(variable_sheet_row)) {
+ variable_detail_rows <- get_unique_rec_end_rows(
+ variables_details_sheet, variable, FALSE
+ )
+ for (vdi in seq_len(nrow(variable_detail_rows))) {
+ current_rec_end <- variable_detail_rows[vdi, "recEnd"]
+ map_stratifier_data(
+ data, variables_sheet, variables_details_sheet, variable, stratify_config,
+ function(current_stratifier_info) {
+ new_row <- data.frame(
+ variable = variable,
+ median = NA, percentile25 = NA, percentile75 = NA,
+ min = NA, max = NA,
+ cat = variable_detail_rows[vdi, "recEnd"]
+ )
+ for (si in seq_len(largest_num_stratifiers)) {
+ new_row[[paste0("groupBy_", si)]] <- NA
+ new_row[[paste0("groupByValue_", si)]] <- NA
+ }
+ for (si in seq_len(length(current_stratifier_info$stratifiers))) {
+ strat <- current_stratifier_info$stratifiers[[si]]
+ new_row[[paste0("groupBy_", si)]] <- strat
+ new_row[[paste0("groupByValue_", si)]] <-
+ current_stratifier_info$stratifier_combination[[strat]][1]
+ }
+ filtered <- dplyr::filter(
+ current_stratifier_info$data,
+ .data[[variable]] == current_rec_end
+ )
+ new_row$n <- nrow(filtered)
+ new_row$percent <- new_row$n / nrow(current_stratifier_info$data)
+ descriptive_data <<- rbind(descriptive_data, new_row)
+ }
+ )
+ }
+ }
+ }
+
+ return(descriptive_data)
+}
+
+map_stratifier_data <- function(
+ data, variables_sheet, variables_details_sheet,
+ variable, stratify_config, iterator
+) {
+ stratifier_config_for_variable <- NA
+ if (!is.null(stratify_config[[variable]])) {
+ stratifier_config_for_variable <- stratify_config[[variable]]
+ }
+ if (!is.null(stratify_config[["all"]])) {
+ if (length(stratifier_config_for_variable) == 1 &&
+ is.na(stratifier_config_for_variable)) {
+ stratifier_config_for_variable <- list(stratify_config[["all"]])
+ } else {
+ for (si in seq_len(length(stratifier_config_for_variable))) {
+ stratifier_config_for_variable[[si]] <- append(
+ stratifier_config_for_variable[[si]],
+ stratify_config[["all"]], 0
+ )
+ }
+ }
+ }
+
+ if (length(stratifier_config_for_variable) == 1 &&
+ is.na(stratifier_config_for_variable)) {
+ return(iterator(list(
+ data = data,
+ stratifiers = list(),
+ stratifier_combination = data.frame()
+ )))
+ }
+
+ for (stratifiers in stratifier_config_for_variable) {
+ expand_grid_args <- list()
+ for (strat in stratifiers) {
+ strat_row <- get_row_for_variable(strat, variables_sheet)
+ if (is_continuous_variable(strat_row)) {
+ stop(paste("Stratifier", strat, "for variable", variable,
+ "is continuous — not supported"))
+ }
+ expand_grid_args[[strat]] <- c(
+ get_unique_rec_end_rows(variables_details_sheet, strat, TRUE)$recEnd,
+ "NA::c"
+ )
+ }
+ all_combos <- do.call(tidyr::expand_grid, expand_grid_args)
+
+ for (ci in seq_len(nrow(all_combos))) {
+ data_for_combo <- data
+ combo <- all_combos[ci, ]
+ for (si in seq_len(length(stratifiers))) {
+ strat <- stratifiers[[si]]
+ strat_cat <- combo[1, ][[strat]]
+ formatted_cat <- dplyr::case_when(
+ strat_cat == "NA::a" ~ "NA(a)",
+ strat_cat == "NA::b" ~ "NA(b)",
+ strat_cat == "NA::c" ~ "NA(c)",
+ TRUE ~ strat_cat
+ )
+ data_for_combo <- dplyr::filter(
+ data_for_combo,
+ !!as.symbol(strat) == formatted_cat
+ )
+ }
+ iterator(list(
+ data = data_for_combo,
+ stratifiers = stratifiers,
+ stratifier_combination = combo
+ ))
+ }
+ }
+}
diff --git a/R/imputation.R b/R/imputation.R
new file mode 100644
index 0000000..8096a57
--- /dev/null
+++ b/R/imputation.R
@@ -0,0 +1,106 @@
+# imputation.R
+# Multiple imputation via MICE for missing smoking history variables.
+# Pipeline target: analysis_data
+
+#' Impute missing values via MICE
+#'
+#' Applies multiple imputation to predictor variables with random missingness
+#' (NA(b) = don't know/refused). Structural missingness — NA(a) (not
+#' applicable) and NA(c) (not asked this cycle) — is preserved and not imputed.
+#'
+#' Number of imputations and iterations are controlled by cfg$imputation_m and
+#' cfg$imputation_maxit. Use m=1, maxit=1 in draft/dev for fast iteration.
+#'
+#' @param cleaned_data Output of clean_study_data()
+#' @param variables_sheet Variables worksheet data frame
+#' @param cfg Config object from config::get()
+#' @return Data frame with NA(b) values imputed (first completed dataset)
+impute_data <- function(cleaned_data, variables_sheet, cfg) {
+ # Identify variables for the MICE predictor matrix
+ # "imputation-predictor" role covers all variables that should inform imputation
+ impute_vars <- select_vars_by_role("imputation-predictor", variables_sheet)
+ impute_vars <- intersect(impute_vars, colnames(cleaned_data))
+
+ # Check which variables actually have NA(b) — only those need imputation
+ has_na_b <- vapply(impute_vars, function(var) {
+ x <- cleaned_data[[var]]
+ if (is.factor(x)) {
+ "NA(b)" %in% as.character(x)
+ } else {
+ any(haven::is_tagged_na(x, "b"))
+ }
+ }, logical(1))
+
+ vars_to_impute <- impute_vars[has_na_b]
+
+ message(
+ "Imputation: ", length(vars_to_impute),
+ " variables with NA(b) missingness:\n ",
+ paste(vars_to_impute, collapse = ", ")
+ )
+
+ if (length(vars_to_impute) == 0) {
+ message("No NA(b) values found — returning cleaned data unchanged.")
+ return(cleaned_data)
+ }
+
+ # Prepare data for MICE: pass all impute_vars so auxiliary predictors (those
+ # without NA(b)) are included in the predictor matrix, improving imputation
+ # quality. Only vars_to_impute will actually have NAs for MICE to fill.
+ mice_input <- prepare_for_mice(cleaned_data, impute_vars)
+
+ # Run MICE
+ message(
+ "Running MICE: m=", cfg$imputation_m,
+ ", maxit=", cfg$imputation_maxit
+ )
+ mice_result <- mice::mice(
+ mice_input,
+ m = cfg$imputation_m,
+ maxit = cfg$imputation_maxit,
+ printFlag = FALSE
+ )
+
+ # Complete: use first imputation dataset
+ completed <- mice::complete(mice_result, action = 1)
+
+ # Write imputed values back; all other columns are unchanged
+ result <- cleaned_data
+ for (var in vars_to_impute) {
+ result[[var]] <- completed[[var]]
+ }
+
+ message("Imputation complete.")
+ result
+}
+
+#' Prepare variables for MICE by converting NA(b) to regular NA
+#'
+#' Only NA(b) (random missing) is converted to regular NA for MICE to impute.
+#' NA(a) (not applicable) and NA(c) (not asked this cycle) are left as-is,
+#' which prevents MICE from imputing structural missingness.
+#'
+#' @param data Data frame
+#' @param vars Character vector of variable names to pass to MICE
+#' @return Data frame subset with only `vars`, NA(b) converted to regular NA
+prepare_for_mice <- function(data, vars) {
+ out <- data[, vars, drop = FALSE]
+
+ for (var in vars) {
+ x <- out[[var]]
+
+ if (is.factor(x)) {
+ # Replace "NA(b)" level with regular NA; keep "NA(a)" and "NA(c)" as levels
+ char_x <- as.character(x)
+ char_x[char_x == "NA(b)"] <- NA_character_
+ out[[var]] <- factor(char_x, levels = setdiff(levels(x), "NA(b)"))
+
+ } else if (is.numeric(x)) {
+ # Replace tagged NA(b) with regular NA; tagged NA(a) and NA(c) pass through
+ x[haven::is_tagged_na(x, "b")] <- NA_real_
+ out[[var]] <- x
+ }
+ }
+
+ out
+}
diff --git a/R/legacy/process_smoking_initiation.R b/R/legacy/process_smoking_initiation.R
new file mode 100644
index 0000000..85cb83a
--- /dev/null
+++ b/R/legacy/process_smoking_initiation.R
@@ -0,0 +1,248 @@
+#' Process Smoking Initiation Data for Age-Period-Cohort Modeling
+#'
+#' @title Process Smoking Initiation Data
+#'
+#' @description
+#' This function processes data for smoking initiation based on the Canadian Community Health Survey (CCHS)
+#' data that has been harmonized with the cchsflow package. It identifies individuals who have initiated
+#' smoking and creates a dataset with key variables needed for age-period-cohort (APC) modeling of smoking
+#' initiation rates.
+#'
+#' This function implements part of the Canadian Smoking Histories Model in R, specifically
+#' replicating the data preparation step from line 120 in the original SAS code (Modeling2013.sas).
+#'
+#' @param dataset A data frame containing harmonized CCHS smoking history data (from cchsflow).
+#' The dataset must contain the following variables:
+#' \itemize{
+#' \item \code{sex} - Sex of the respondent ("M" or "F")
+#' \item \code{SMK_01A} - Whether respondent has smoked 100+ cigarettes in lifetime (1=Yes, 2=No)
+#' \item \code{agefirst} - Age when respondent first smoked a whole cigarette
+#' \item \code{cchsbdate} - CCHS survey date as a Date object
+#' }
+#' @param sex Character string indicating sex ("M" or "F") for filtering the data
+#'
+#' @return A data frame with the variables needed for smoking initiation APC modeling:
+#' \describe{
+#' \item{ont_id}{Ontario resident identifier - unique ID for each respondent}
+#' \item{weighting}{Survey weighting factor - sampling weight assigned to each respondent}
+#' \item{age}{Age of smoking initiation - age when respondent first smoked}
+#' \item{cohort}{Birth cohort (calendar year) - year of birth}
+#' \item{period}{Period (calendar year) when smoking was initiated - calculated as cohort + age}
+#' \item{init}{Binary indicator of smoking initiation (1=initiated smoking)}
+#' }
+#'
+#' The returned data frame contains ONLY respondents who have initiated smoking (smoked at least
+#' 100 cigarettes in their lifetime) and were at least 8 years old when they started. Never-smokers
+#' are excluded from the final dataset. Only birth cohorts from 1920 onwards are included.
+#'
+#' This dataset serves as the numerator for calculating age-period-cohort rates of smoking initiation.
+#'
+#' @details
+#' This function replicates the SAS code used in the Canadian History Smoking Generator Model.
+#' It filters data by sex, processes the harmonized smoking status variables, and creates a dataset
+#' that serves as the numerator for age-period-cohort rates in smoking initiation modeling.
+#'
+#' The function uses the following workflow:
+#' 1. Filters dataset by sex
+#' 2. Identifies never-smokers (who have not smoked 100+ cigarettes)
+#' 3. Sets initialization status (init=1 for smokers, init=0 for never-smokers)
+#' 4. Calculates smoking initiation date
+#' 5. Filters to include only those born in 1920 or later
+#' 6. Creates a subset containing only those who initiated smoking
+#' 7. Applies age filter (age >= 8)
+#' 8. Calculates period (year of initiation) as cohort + age
+#' 9. Returns final dataset sorted by age, period, and cohort
+#'
+#' The function uses harmonized variable names from the cchsflow package which standardizes variables
+#' across different CCHS cycles.
+#'
+#' For age-period-cohort modeling, this dataset should be paired with a denominator dataset that includes
+#' the entire population at risk for smoking initiation at each age, period, and cohort combination.
+#'
+#' @examples
+#' \dontrun{
+#' # Assuming 'cchs_data' is your harmonized CCHS dataset from cchsflow
+#' inits_male <- process_smoking_initiation(cchs_data, sex = "M")
+#' inits_female <- process_smoking_initiation(cchs_data, sex = "F")
+#'
+#' # Check the structure of the output
+#' str(inits_male)
+#'
+#' # View the first few rows
+#' head(inits_male)
+#'
+#' # Get summary statistics
+#' summary(inits_male)
+#' }
+#'
+#' @section Test Data:
+#' Here's a small example dataset to test the function:
+#'
+#' ```r
+#' # Create test data
+#' test_data <- data.frame(
+#' ont_id = c(1001, 1002, 1003, 1004, 1005),
+#' sex = c("M", "F", "M", "F", "M"),
+#' SMK_01A = c(1, 2, 1, 1, 1), # 1=Yes, 2=No to 100+ cigarettes
+#' agefirst = c(16, NA, 12, 21, 7),
+#' cchsbdate = as.Date(c("2001-06-15", "2001-07-20", "2001-08-10",
+#' "2001-09-05", "2001-10-25")),
+#' weighting = c(150, 200, 175, 225, 190)
+#' )
+#'
+#' # Process the test data for males
+#' result <- process_smoking_initiation(test_data, sex = "M")
+#' print(result)
+#' # Expected output: A dataset with 2 rows (ont_id 1001 and 1003)
+#' # ont_id 1005 is excluded because age < 8
+#' ```
+#'
+#' @seealso
+#' \code{\link[cchsflow]{cchsflow}} for information on the CCHS data harmonization
+#'
+#' @references
+#' Manuel, D.G. et al. (2013) "Canadian Smoking Histories Model"
+#'
+#' @export
+process_smoking_initiation <- function(dataset, sex = "M") {
+ # Validate inputs
+ if (!is.data.frame(dataset)) {
+ stop("dataset must be a data frame")
+ }
+
+ if (!(sex %in% c("M", "F"))) {
+ stop("sex must be either 'M' or 'F'")
+ }
+
+ required_vars <- c("sex", "SMK_01A", "agefirst", "cchsbdate", "ont_id", "weighting")
+ missing_vars <- required_vars[!required_vars %in% names(dataset)]
+ if (length(missing_vars) > 0) {
+ stop("Missing required variables in dataset: ", paste(missing_vars, collapse = ", "))
+ }
+
+ # Filter dataset by sex
+ filtered_data <- dataset[dataset$sex == sex, ]
+
+ # Process smoking initiation data
+ # Set initialization variables for non-smokers (never smoked 100+ cigarettes)
+ # Using harmonized variable SMK_01A: "In lifetime, smoked 100 or more cigarettes"
+ # Value 1 = "Yes", Value 2 = "No"
+ filtered_data$agefirst <- ifelse(
+ filtered_data$SMK_01A == 2, # Value 2 = "No" to smoking 100+ cigarettes
+ 101, filtered_data$agefirst # 101 is used as a flag for never-smokers
+ )
+
+ # Define initialization status
+ # init = 1: Has initiated smoking (smoked 100+ cigarettes)
+ # init = 0: Never initiated smoking (has not smoked 100+ cigarettes)
+ filtered_data$init <- ifelse(filtered_data$agefirst == 101, 0, 1)
+
+ # Create initialization date (using randomization within the year as in original SAS code)
+ # This adds random days within the year of reported age to avoid artificial clumping
+ filtered_data$init_date <- as.Date(
+ ifelse(
+ filtered_data$agefirst != 101,
+ filtered_data$cchsbdate +
+ filtered_data$agefirst * 365 +
+ floor(runif(nrow(filtered_data)) * 365),
+ NA
+ ),
+ origin = "1970-01-01"
+ )
+
+ # Determine cohort (year of birth) from survey date
+ filtered_data$cohort <- as.numeric(format(filtered_data$cchsbdate, "%Y"))
+
+ # Filter for cohorts from 1920 onwards (as in original SAS code)
+ filtered_data <- subset(filtered_data, cohort >= 1920)
+
+ # For those who initiated smoking, extract relevant variables
+ # This step EXCLUDES never-smokers (init=0) from the final dataset
+ inits <- subset(filtered_data, init == 1)
+
+ # Set age to age of first cigarette
+ # In the harmonized dataset, this corresponds to SMKG01C_cont
+ inits$age <- inits$agefirst
+
+ # Filter for ages 8 and above (as in original SAS code)
+ # This removes unlikely very early smoking initiation ages
+ inits <- subset(inits, age >= 8)
+
+ # Calculate period (calendar year when smoking was initiated)
+ # Period = birth cohort + age at initiation
+ inits$period <- inits$cohort + inits$age
+
+ # Select only the required variables for APC modeling
+ result <- inits[, c("ont_id", "weighting", "age", "cohort", "period", "init")]
+
+ # Sort the data by age, period, cohort (as in original SAS code)
+ result <- result[order(result$age, result$period, result$cohort), ]
+
+ return(result)
+}
+
+#' Create Test Data for Smoking Initiation Processing
+#'
+#' @title Create Test CCHS Smoking Data
+#'
+#' @description
+#' Creates a sample dataset mimicking harmonized CCHS data for testing the
+#' smoking initiation processing function.
+#'
+#' @param n Number of observations to generate
+#' @param seed Random seed for reproducibility
+#'
+#' @return A data frame with simulated CCHS data
+#'
+#' @examples
+#' test_data <- create_smoking_test_data(10)
+#' head(test_data)
+#'
+#' @export
+create_smoking_test_data <- function(n = 5, seed = 123) {
+ set.seed(seed)
+
+ # Generate random IDs
+ ids <- 1000 + 1:n
+
+ # Generate random sex
+ sexes <- sample(c("M", "F"), n, replace = TRUE)
+
+ # Generate smoking status (1=Yes, 2=No to 100+ cigarettes)
+ # About 70% smokers, 30% non-smokers
+ smk_status <- sample(c(1, 2), n, replace = TRUE, prob = c(0.7, 0.3))
+
+ # Generate age of first cigarette (NA for non-smokers)
+ age_first <- numeric(n)
+ for (i in 1:n) {
+ if (smk_status[i] == 1) {
+ # Smokers get age between 8 and 30, with higher probability for teenage years
+ age_first[i] <- sample(c(8:14, 15:19, 20:30), 1,
+ prob = c(rep(0.05, 7), rep(0.1, 5), rep(0.03, 11)))
+ } else {
+ # Non-smokers get NA
+ age_first[i] <- NA
+ }
+ }
+
+ # Generate survey dates between 2000 and 2018
+ survey_years <- sample(2000:2018, n, replace = TRUE)
+ survey_months <- sample(1:12, n, replace = TRUE)
+ survey_days <- sample(1:28, n, replace = TRUE)
+ survey_dates <- as.Date(paste(survey_years, survey_months, survey_days, sep = "-"))
+
+ # Generate survey weights between 50 and 300
+ weights <- round(runif(n, 50, 300), 1)
+
+ # Create the data frame
+ test_data <- data.frame(
+ ont_id = ids,
+ sex = sexes,
+ SMK_01A = smk_status,
+ agefirst = age_first,
+ cchsbdate = survey_dates,
+ weighting = weights
+ )
+
+ return(test_data)
+}
\ No newline at end of file
diff --git a/R/legacy/smoking.R b/R/legacy/smoking.R
new file mode 100644
index 0000000..ba25692
--- /dev/null
+++ b/R/legacy/smoking.R
@@ -0,0 +1,1117 @@
+#' @title Time since quit smoking
+#'
+#' @description This function creates a derived variable (time_quit_smoking_der)
+#' that calculates the approximate time a former smoker has quit smoking based
+#' on various CCHS smoking variables. This variable is for CCHS respondents in
+#' CCHS surveys 2003-2014.
+#'
+#' @param SMK_09A_B number of years since quitting smoking. Variable asked to
+#' former daily smokers who quit <3 years ago.
+#'
+#' @param SMKG09C number of years since quitting smoking. Variable asked to
+#' former daily smokers who quit >=3 years ago.
+#'
+#' @return value for time since quit smoking in time_quit_smoking_der.
+#'
+#' @examples
+#' # Using time_quit_smoking_fun() to create pack-years values across CCHS
+#' # cycles.
+#' # time_quit_smoking_fun() is specified in variable_details.csv along with the
+#' # CCHS variables and cycles included.
+#'
+#' # To transform time_quit_smoking across cycles, use rec_with_table() for each
+#' # CCHS cycle and specify time_quit_smoking, along with each smoking variable.
+#' # Then by using merge_rec_data(), you can combine time_quit_smoking across
+#' # cycles.
+#'
+#' library(cchsflow)
+#'
+#' time_quit2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, c(
+#' "SMK_09A_B", "SMKG09C", "time_quit_smoking"
+#' )
+#' )
+#'
+#' head(time_quit2009_2010)
+#'
+#' time_quit2011_2012 <- rec_with_table(
+#' cchs2011_2012_p, c(
+#' "SMK_09A_B", "SMKG09C", "time_quit_smoking"
+#' )
+#' )
+#'
+#' tail(time_quit2011_2012)
+#'
+#' combined_time_quit <- suppressWarnings(merge_rec_data(time_quit2009_2010,
+#' time_quit2011_2012))
+#'
+#' head(combined_time_quit)
+#' tail(combined_time_quit)
+#'
+#' # Using time_quit_smoking_fun() to generate a pack-years value with user
+#' # inputted number of years since quitting smoking for both former daily
+#' # smokers who quit <3 and >=3 years ago. time_quit_smoking_fun() can also
+#' # generate a pack-years value if you input a value for both number of years
+#' # since quitting smoking. Let's say you quit smoking <3 years ago and stopped
+#' # smoking daily 2 to <3 years ago and it's been 3 to 5 years since you stopped
+#' # smoking daily, your time since quitting smoking can be calculated as follows:
+#'
+#' library(cchsflow)
+#' time_quit_smoking <- time_quit_smoking_fun(3,1)
+#' print(time_quit_smoking)
+#'
+#' # Additional examples of time since quitting smoking calculations produced
+#' # using time_quit_smoking_fun() can be found below. Multiple instances exist
+#' # where an NA output may be produced such as a negative entry and or
+#' # missing SMKG09C values.
+#'
+#' library(cchsflow)
+#' SMK_09A_B <- c(3, 4, 1, 4)
+#' SMKG09C <- c(1, 2, NA, -2)
+#' time_quit_smoking_data <- data.frame(SMK_09A_B, SMKG09C)
+#' print(time_quit_smoking_data)
+#' time_quit_smoking_data$time_quit_smoking <-
+#' time_quit_smoking_fun(time_quit_smoking_data$SMK_09A_B, time_quit_smoking_data$SMKG09C)
+#' print(time_quit_smoking_data)
+#' @export
+
+time_quit_smoking_fun <- function(SMK_09A_B, SMKG09C) {
+ SMKG09C_cont <-
+ if_else2(
+ SMKG09C == 1, 4,
+ if_else2(
+ SMKG09C == 2, 8,
+ if_else2(SMKG09C == 3, 12,
+ if_else2(SMKG09C == "NA(a)", tagged_na("a"), tagged_na("b")
+ )
+ )
+ )
+ )
+ tsq_ds <-
+ if_else2(
+ SMK_09A_B == 1, 0.5,
+ if_else2(
+ SMK_09A_B == 2, 1.5,
+ if_else2(
+ SMK_09A_B == 3, 2.5,
+ if_else2(SMK_09A_B == 4, SMKG09C_cont,
+ if_else2(SMK_09A_B == "NA(a)", tagged_na("a"), tagged_na("b")
+ )
+ )
+ )
+ )
+ )
+ return(tsq_ds)
+}
+
+#' # Updating the original function time_quit_smoking_fun_A() generates a
+#' # pack-years value with user inputted number of years since quitting smoking
+#' # for both former daily smokers who quit <3 and >=3 years ago. This function
+#' # version utilizes a continuous version of SMKG09C that is available within
+#' # the shared files. time_quit_smoking_fun_A() can also generate a pack-years
+#' # value if you input a value for both number of years since quitting smoking.
+#' # Let's say you quit smoking <3 years ago and stopped smoking daily 2 to <3
+#' # years ago and it's been 2 years since you stopped smoking daily, your
+#' # time since quitting smoking can be calculated as follows:
+#'
+#' library(cchsflow)
+#' time_quit_smoking <- time_quit_smoking_fun_A(3,2)
+#' print(time_quit_smoking)
+#'
+#' # Additional examples of time since quitting smoking calculations produced
+#' # using time_quit_smoking_fun() can be found below. Multiple instances exist
+#' # where an NA output may be produced such as a negative entry and or
+#' # missing SMKG09C values.
+#'
+#' library(cchsflow)
+#' SMK_09A_B <- c(3, 4, 4, 4, 4, 4)
+#' SMKG09C_cont <- c(1, 62, NA, -2, 2, 85)
+#' time_quit_smoking_data <- data.frame(SMK_09A_B, SMKG09C_cont)
+#' print(time_quit_smoking_data)
+#' time_quit_smoking_data$time_quit_smoking <-
+#' time_quit_smoking_fun_A(time_quit_smoking_data$SMK_09A_B, time_quit_smoking_data$SMKG09C)
+#' print(time_quit_smoking_data)
+#' @export
+
+time_quit_smoking_fun_A <- function(SMK_09A_B, SMKG09C_cont,
+ min_SMKG09C_cont=3, max_SMKG09C_cont=82) {
+ tsq_ds <-
+ if_else2(
+ SMK_09A_B == 1, 0.5,
+ if_else2(
+ SMK_09A_B == 2, 1.5,
+ if_else2(
+ SMK_09A_B == 3, 2.5,
+ #if stopped smoking 3 or more years than use SMKG09C_cont value (only
+ #for positive values falling within given min and max values)
+ if_else2(SMK_09A_B == 4 & SMKG09C_cont > 0 &
+ SMKG09C_cont > min_SMKG09C_cont &
+ SMKG09C_cont < max_SMKG09C_cont, SMKG09C_cont,
+ if_else2(SMK_09A_B == "NA(a)", tagged_na("a"), tagged_na("b")
+ )
+ )
+ )
+ )
+ )
+ return(tsq_ds)
+}
+
+#' @title Simple smoking status
+#'
+#' @description This function creates a derived smoking variable (smoke_simple)
+#' with four categories:
+#'
+#' \itemize{
+#' \item non-smoker (never smoked)
+#' \item current smoker (daily and occasional?)
+#' \item former daily smoker quit =<5 years or former occasional smoker
+#' \item former daily smoker quit >5 years
+#' }
+#'
+#' @param SMKDSTY_cat5 derived variable that classifies an individual's smoking
+#' status. This variable captures cycles 2001-2018.
+#'
+#' @param time_quit_smoking derived variable that calculates the approximate
+#' time a former smoker has quit smoking.
+#' See \code{\link{time_quit_smoking_fun}} for documentation on how variable
+#' was derived.
+#'
+#' @examples
+#' # Using the 'smoke_simple_fun' function to create the derived smoking
+#' # variable across CCHS cycles.
+#' # smoke_simple_fun() is specified in the variable_details.csv
+#'
+#' # To create a harmonized smoke_simple variable across CCHS cycles, use
+#' # rec_with_table() for each CCHS cycle and specify smoke_simple_fun and
+#' # the required base variables. Since time_quit_smoking_der is also a derived
+#' # variable, you will have to specify the variables that are derived from it.
+#' # Using merge_rec_data(), you can combine smoke_simple across cycles.
+#'
+#' library(cchsflow)
+#'
+#' smoke_simple2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, c(
+#' "SMKDSTY", "SMK_09A_B", "SMKG09C", "time_quit_smoking",
+#' "smoke_simple"
+#' )
+#' )
+#'
+#' head(smoke_simple2009_2010)
+#'
+#' smoke_simple2011_2012 <- rec_with_table(
+#' cchs2011_2012_p,c(
+#' "SMKDSTY", "SMK_09A_B", "SMKG09C", "time_quit_smoking",
+#' "smoke_simple"
+#' )
+#' )
+#'
+#' tail(smoke_simple2011_2012)
+#'
+#' combined_smoke_simple <-
+#' suppressWarnings(merge_rec_data(smoke_simple2009_2010,smoke_simple2011_2012))
+#'
+#' head(combined_smoke_simple)
+#' tail(combined_smoke_simple)
+#'
+#' # Using smoke_simple_fun() to generate a derived smoking variable across CCHS
+#' # cycles with user inputted smoking status and approximate time a former smoker
+#' # has quit smoking. smoke_simple_fun() can also generate derived smoking variable
+#' # value if you input a value for both number of smoking status and approximate
+#' # time a former smoker has quit smoking. Let's say your smoking status is former
+#' # daily (3) and your approximate time of quit smoking is 6 to 10 years (2),
+#' # your derived smoking variable can be calculated as follows:
+#'
+#' library(cchsflow)
+#' time_quit_smoking <- time_quit_smoking_fun(3,2)
+#' print(time_quit_smoking)
+#'
+#' smoke_simple <- smoke_simple_fun(3,3)
+#' print(smoke_simple)
+#'
+#' # Additional examples of derived smoking variable using smoke_simple_fun() can
+#' # be found below. Multiple instances exist where an NA output may be produced
+#' # such as a negative entry or missing SMKG09C values.
+#'
+#' library(cchsflow)
+#' library(cchsflow)
+#' SMKDSTY_cat5 <- c(1, 3, 2, 3)
+#' SMKG09C <- c(1, 2, 5, NA)
+#' smoke_simple_data <- data.frame(SMKDSTY_cat5, SMKG09C)
+#' print(smoke_simple_data)
+#' smoke_simple_data$smoke_simple <-
+#' smoke_simple_fun(smoke_simple_data$SMKDSTY_cat5, smoke_simple_data$SMKG09C)
+#' print(smoke_simple_data)
+#' @export
+smoke_simple_fun <-
+ function(SMKDSTY_cat5, time_quit_smoking) {
+
+ # Nested function: current smoker status
+ derive_current_smoker <- function(SMKDSTY_cat5) {
+ smoker <-
+ ifelse(SMKDSTY_cat5 %in% c(1, 2), 1,
+ ifelse(SMKDSTY_cat5 %in% c(3, 4, 5), 0,
+ ifelse(SMKDSTY_cat5 == "NA(a)", "NA(a)", "NA(b)")))
+ return(smoker)
+ }
+ smoker <- derive_current_smoker(SMKDSTY_cat5)
+
+ # Nested function: ever smoker status
+ derive_ever_smoker <- function(SMKDSTY_cat5) {
+ eversmoker <-
+ ifelse(SMKDSTY_cat5 %in% c(1, 2, 3, 4), 1,
+ ifelse(SMKDSTY_cat5 == 5, 0,
+ ifelse(SMKDSTY_cat5 == "NA(a)", "NA(a)", "NA(b)")))
+ return(eversmoker)
+ }
+ eversmoker <- derive_ever_smoker(SMKDSTY_cat5)
+
+ # smoke_simple 0 = non-smoker
+ smoke_simple <-
+ ifelse(smoker == 0 & eversmoker == 0, 0,
+ # smoke_simple 1 = current smoker
+ ifelse(smoker == 1 & eversmoker == 1, 1,
+ # smoke_simple 2 = former daily smoker quit =<5 years or former occasional
+ # smoker
+ ifelse(smoker == 0 & eversmoker == 1 & time_quit_smoking <= 5 |
+ SMKDSTY_cat5 == 4, 2,
+ # smoke_simple 3 = former daily smoker quit > 5 years
+ ifelse(smoker == 0 & eversmoker == 1 & time_quit_smoking > 5,
+ 3,
+ ifelse(smoker == "NA(a)" & eversmoker == "NA(a)" &
+ time_quit_smoking == "NA(a)", "NA(a)", "NA(b)")))))
+ return(smoke_simple)
+ }
+
+#' # Updating the original function smoke_simple_fun_A() generates a
+#' # derived smoking variable across CCHS cycles with user inputted smoking
+#' # status and approximate time a former smoker has quit smoking. This function
+#' # version utilizes a continuous version of SMKG09C that is available within
+#' # the shared files. smoke_simple_fun() can also generate derived smoking variable
+#' # value if you input a value for both number of smoking status and approximate
+#' # time a former smoker has quit smoking. Let's say your smoking status is former
+#' # daily (3) and your approximate time of quit smoking is 6 to 10 years (2),
+#' # your derived smoking variable can be calculated as follows:
+#'
+#' library(cchsflow)
+#' time_quit_smoking <- time_quit_smoking_fun_A(4,20)
+#' print(time_quit_smoking)
+#'
+#' smoke_simple <- smoke_simple_fun_A(3,20)
+#' print(smoke_simple)
+#'
+#' # Additional examples of time since quitting smoking calculations produced
+#' # using time_quit_smoking_fun() can be found below. Multiple instances exist
+#' # where an NA output may be produced such as a negative entry and or
+#' # missing SMKG09C values.
+#'
+#' library(cchsflow)
+#' SMKDSTY_cat5 <- c(3, 2, 3, 3)
+#' SMKG09C <- c(2, 1, NA, -2)
+#' smoke_simple_data <- data.frame(SMKDSTY_cat5, SMKG09C)
+#' print(smoke_simple_data)
+#' smoke_simple_data$smoke_simple <-
+#' smoke_simple_fun(smoke_simple_data$SMKDSTY_cat5, smoke_simple_data$SMKG09C)
+#' print(smoke_simple_data)
+#' @export
+smoke_simple_fun_A <-
+ function(SMKDSTY_cat5, time_quit_smoking,
+ min_time_quit_smoking=0.5, max_time_quit_smoking=82) {
+
+ # Nested function: current smoker status
+ derive_current_smoker <- function(SMKDSTY_cat5) {
+ smoker <-
+ ifelse(SMKDSTY_cat5 %in% c(1, 2), 1,
+ ifelse(SMKDSTY_cat5 %in% c(3, 4, 5), 0,
+ ifelse(SMKDSTY_cat5 == "NA(a)", "NA(a)", "NA(b)")))
+ return(smoker)
+ }
+ smoker <- derive_current_smoker(SMKDSTY_cat5)
+
+ # Nested function: ever smoker status
+ derive_ever_smoker <- function(SMKDSTY_cat5) {
+ eversmoker <-
+ ifelse(SMKDSTY_cat5 %in% c(1, 2, 3, 4), 1,
+ ifelse(SMKDSTY_cat5 == 5, 0,
+ ifelse(SMKDSTY_cat5 == "NA(a)", "NA(a)", "NA(b)")))
+ return(eversmoker)
+ }
+ eversmoker <- derive_ever_smoker(SMKDSTY_cat5)
+
+ # smoke_simple 0 = non-smoker
+ smoke_simple <-
+ ifelse(smoker == 0 & eversmoker == 0, 0,
+ # smoke_simple 1 = current smoker
+ ifelse(smoker == 1 & eversmoker == 1, 1,
+ # smoke_simple 2 = former daily smoker quit =<5 years or former occasional
+ # smoker
+ ifelse(smoker == 0 & eversmoker == 1 & time_quit_smoking <= 5 &
+ time_quit_smoking >= min_time_quit_smoking &
+ time_quit_smoking <= max_time_quit_smoking |
+ SMKDSTY_cat5 == 4, 2,
+ # smoke_simple 3 = former daily smoker quit > 5 years
+ ifelse(smoker == 0 & eversmoker == 1 & time_quit_smoking > 5 &
+ time_quit_smoking >= min_time_quit_smoking &
+ time_quit_smoking <= max_time_quit_smoking,
+ 3,
+ ifelse(smoker == "NA(a)" & eversmoker == "NA(a)" &
+ time_quit_smoking == "NA(a)", "NA(a)", "NA(b)")))))
+return(smoke_simple)
+ }
+
+#' @title Smoking pack-years
+#'
+#' @description This function creates a derived variable (pack_years_der) that
+#' measures an individual's smoking pack-years based on various CCHS smoking
+#' variables. This is a popular variable used by researchers to quantify
+#' lifetime exposure to cigarette use.
+#'
+#' @details pack-years is calculated by multiplying the number of cigarette
+#' packs per day (20 cigarettes per pack) by the number of years. Example 1:
+#' a respondent who is a current smoker who smokes 1 package of cigarettes for
+#' the last 10 years has smoked 10 pack-years. Pack-years is also calculated
+#' for former smokers. Example 2: a respondent who started smoking at age
+#' 20 years and smoked half a pack of cigarettes until age 40 years smoked for
+#' 10 pack-years.
+#'
+#' @param SMKDSTY_A variable used in CCHS cycles 2001-2014 that classifies an
+#' individual's smoking status.
+#'
+#' @param DHHGAGE_cont continuous age variable.
+#'
+#' @param time_quit_smoking derived variable that calculates the approximate
+#' time a former smoker has quit smoking.
+#' See \code{\link{time_quit_smoking_fun}} for documentation on how variable
+#' was derived
+#'
+#' @param SMKG203_cont age started smoking daily. Variable asked to daily
+#' smokers.
+#'
+#' @param SMKG207_cont age started smoking daily. Variable asked to former
+#' daily smokers.
+#'
+#' @param SMK_204 number of cigarettes smoked per day. Variable asked to
+#' daily smokers.
+#'
+#' @param SMK_05B number of cigarettes smoked per day. Variable asked to
+#' occasional smokers
+#'
+#' @param SMK_208 number of cigarettes smoked per day. Variable asked to former
+#' daily smokers
+#'
+#' @param SMK_05C number of days smoked at least one cigarette
+#'
+#' @param SMK_01A smoked 100 cigarettes in lifetime (y/n)
+#'
+#' @param SMKG01C_cont age smoked first cigarette
+#'
+#' @return value for smoking pack-years in the pack_years_der variable
+#'
+#' @examples
+#' # Using pack_years_fun() to create pack-years values across CCHS cycles
+#' # pack_years_fun() is specified in variable_details.csv along with the CCHS
+#' # variables and cycles included.
+#'
+#' # To transform pack_years_der across cycles, use rec_with_table() for each
+#' # CCHS cycle and specify pack_years_der, along with each smoking variable.
+#' # Since time_quit_smoking_der is also a derived
+#' # variable, you will have to specify the variables that are derived from it.
+#' # Then by using merge_rec_data(), you can combine pack_years_der across
+#' # cycles
+#'
+#' library(cchsflow)
+#'
+#' pack_years2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, c(
+#' "SMKDSTY_A", "DHHGAGE_cont", "SMK_09A_B", "SMKG09C", "time_quit_smoking",
+#' "SMKG203_cont", "SMKG207_cont", "SMK_204", "SMK_05B", "SMK_208",
+#' "SMK_05C", "SMK_01A", "SMKG01C_cont", "pack_years_der"
+#' )
+#' )
+#'
+#' head(pack_years2009_2010)
+#'
+#' pack_years2011_2012 <- rec_with_table(
+#' cchs2011_2012_p,c(
+#' "SMKDSTY_A", "DHHGAGE_cont", "SMK_09A_B", "SMKG09C", "time_quit_smoking",
+#' "SMKG203_cont", "SMKG207_cont", "SMK_204", "SMK_05B", "SMK_208",
+#' "SMK_05C", "SMK_01A", "SMKG01C_cont", "pack_years_der"
+#' )
+#' )
+#'
+#' tail(pack_years2011_2012)
+#'
+#' combined_pack_years <- suppressWarnings(merge_rec_data(pack_years2009_2010,
+#' pack_years2011_2012))
+#'
+#' head(combined_pack_years)
+#' tail(combined_pack_years)
+#' @export
+pack_years_fun <-
+ function(SMKDSTY_A, DHHGAGE_cont, time_quit_smoking, SMKG203_cont,
+ SMKG207_cont, SMK_204, SMK_05B,
+ SMK_208, SMK_05C, SMKG01C_cont, SMK_01A) {
+ # Age verification
+ if (is.na(DHHGAGE_cont)) {
+ return(tagged_na("b"))
+ } else if (DHHGAGE_cont < 0) {
+ return(tagged_na("b"))
+ }
+
+ # PackYears for Daily Smoker
+ pack_years <-
+ if_else2(
+ SMKDSTY_A == 1, pmax(((DHHGAGE_cont - SMKG203_cont) *
+ (SMK_204 / 20)), 0.0137),
+ # PackYears for Occasional Smoker (former daily)
+ if_else2(
+ SMKDSTY_A == 2, pmax(((DHHGAGE_cont - SMKG207_cont -
+ time_quit_smoking) * (SMK_208 / 20)), 0.0137) +
+ (pmax((SMK_05B * SMK_05C / 30), 1) *time_quit_smoking),
+ # PackYears for Occasional Smoker (never daily)
+ if_else2(
+ SMKDSTY_A == 3, (pmax((SMK_05B * SMK_05C / 30), 1) / 20) *
+ (DHHGAGE_cont - SMKG01C_cont),
+ # PackYears for former daily smoker (non-smoker now)
+ if_else2(
+ SMKDSTY_A == 4, pmax(((DHHGAGE_cont - SMKG207_cont -
+ time_quit_smoking) *
+ (SMK_208 / 20)), 0.0137),
+ # PackYears for former occasional smoker (non-smoker now) who
+ # smoked at least 100 cigarettes lifetime
+ if_else2(
+ SMKDSTY_A == 5 & SMK_01A == 1, 0.0137,
+ # PackYears for former occasional smoker (non-smoker now) who
+ # have not smoked at least 100 cigarettes lifetime
+ if_else2(
+ SMKDSTY_A == 5 & SMK_01A == 2, 0.007,
+ # Non-smoker
+ if_else2(SMKDSTY_A == 6, 0,
+ # Account for NA(a)
+ if_else2(SMKDSTY_A == "NA(a)", tagged_na("a"),
+ tagged_na("b"))
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ return(pack_years)
+ }
+
+#' # Updating the original function pack_years_fun_A() generates pack-years values
+#' # across CCHS cycles. Updated pack_years_fun_A() incorporates minimum and maximum
+#' # values of all inputted variables. Note that while minimum and maximum values are
+#' # preset, they can be altered to researcher-specific values if needed.
+#'
+#' # Additional examples of pack years calculations produced
+#' # using pack_years_fun_A() can be found below. Multiple instances exist
+#' # where an NA output may be produced such as a negative/missing entry or an
+#' # entry existing outside of the allowed variable minimum and maximum values.
+#'
+#' # Examples demonstrating output when preset min/max values are exceeded within data
+#' SMKDSTY_A <- c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5)
+#' DHHGAGE_cont <- c(50, 50, 10, 105, 50, 50, 50, 50, 50, 50,
+#' 50, 50, 50, 50, 50, 50, 50, 50, 50)
+#' time_quit_smoking <- c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.5, 0, 85,
+#' 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5)
+#' SMKG203_cont <- c(15, 15, 15, 15, 15, 3, 90, 15, 15,
+#' 15, 15, 15, 15, 15, 15, 15,15, 15, 15)
+#' SMKG207_cont <- c(NA, NA, NA, NA, NA, NA, NA, 35, 35,
+#' 35, 35, 3, 86, 35, 35, 35, 35, 35, 35)
+#' SMK_204 <- c(25, 25, 25, 25, 25, 25, 25, 25, 25,
+#' 25, 25, 25, 25, 25, 25, 25, 25, 25, 25)
+#' SMK_05B <- c(NA, NA, NA, NA, NA, NA, NA, 10, 10,
+#' 10, 10, 10, 10, 10, 10, 10, 10, 10, 10)
+#' SMK_208 <- c(NA, NA, NA, NA, NA, NA, NA, 10, 10,
+#' 10, 10, 10, 10, 10, 0, 150, 10, 10, 10)
+#' SMK_05C <- c(12, 12, 12, 12, 12, 12, 12, 12, 12,
+#' 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)
+#' SMKG01C_cont <- c(18, 18, 18, 18, 18, 18, 18, 18, 18,
+#' 18, 18, 18, 18, 18, 18, 18, 18, 18, 18)
+#' SMK_01A <- c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
+#' NA, NA, NA, NA, NA, NA, NA, 1, 2, 3)
+#'
+#' pack_years_data <- data.frame(SMKDSTY_A, DHHGAGE_cont, time_quit_smoking,
+#' SMKG203_cont, SMKG207_cont, SMK_204, SMK_05B,
+#' SMK_208, SMK_05C, SMKG01C_cont, SMK_01A)
+#' print(pack_years_data)
+#' pack_years_data$pack_years <-
+#' pack_years_fun_A(pack_years_data$SMKDSTY_A,
+#' pack_years_data$DHHGAGE_cont,
+#' pack_years_data$time_quit_smoking,
+#' pack_years_data$SMKG203_cont,
+#' pack_years_data$SMKG207_cont,
+#' pack_years_data$SMK_204,
+#' pack_years_data$SMK_05B,
+#' pack_years_data$SMK_208,
+#' pack_years_data$SMK_05C,
+#' pack_years_data$SMKG01C_cont,
+#' pack_years_data$SMK_01A)
+#' print(pack_years_data)
+#'
+#' # Examples demonstrating output when min/max values set by user are exceeded
+#' SMKDSTY_A <- c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5)
+#' DHHGAGE_cont <- c(50, 50, 10, 105, 50, 50, 50, 50, 50, 50,
+#' 50, 50, 50, 50, 50, 50, 50, 50, 50)
+#' time_quit_smoking <- c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.5, 0, 85,
+#' 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5)
+#' SMKG203_cont <- c(15, 15, 15, 15, 15, 3, 90, 15, 15,
+#' 15, 15, 15, 15, 15, 15, 15,15, 15, 15)
+#' SMKG207_cont <- c(NA, NA, NA, NA, NA, NA, NA, 35, 35,
+#' 35, 35, 3, 86, 35, 35, 35, 35, 35, 35)
+#' SMK_204 <- c(25, 25, 25, 25, 25, 25, 25, 25, 25,
+#' 25, 25, 25, 25, 25, 25, 25, 25, 25, 25)
+#' SMK_05B <- c(NA, NA, NA, NA, NA, NA, NA, 10, 10,
+#' 10, 10, 10, 10, 10, 10, 10, 10, 10, 10)
+#' SMK_208 <- c(NA, NA, NA, NA, NA, NA, NA, 10, 10,
+#' 10, 10, 10, 10, 10, 0, 150, 10, 10, 10)
+#' SMK_05C <- c(12, 12, 12, 12, 12, 12, 12, 12, 12,
+#' 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)
+#' SMKG01C_cont <- c(18, 18, 18, 18, 18, 18, 18, 18, 18,
+#' 18, 18, 18, 18, 18, 18, 18, 18, 18, 18)
+#' SMK_01A <- c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
+#' NA, NA, NA, NA, NA, NA, NA, 1, 2, 3)
+#'
+#' pack_years_data <- data.frame(SMKDSTY_A, DHHGAGE_cont, time_quit_smoking,
+#' SMKG203_cont, SMKG207_cont, SMK_204, SMK_05B,
+#' SMK_208, SMK_05C, SMKG01C_cont, SMK_01A,
+#' min_DHHGAGE_cont = 11 , max_DHHGAGE_cont = 103,
+#' min_time_quit_smoking = 1.0, max_time_quit_smoking = 84,
+#' min_SMKG203_cont = 4, max_SMKG203_cont = 88,
+#' min_SMKG207_cont = 4, max_SMKG207_cont = 85,
+#' min_SMK_204 = 0, max_SMK_204 = 100, min_SMK_05B = 0, max_SMK_05B = 100,
+#' min_SMK_208 = 0, max_SMK_208 = 100, min_SMK_05C = 0, max_SMK_05C = 31,
+#' min_SMKG01C_cont = 5, max_SMKG01C_cont = 80)
+#' print(pack_years_data)
+#' pack_years_data$pack_years <-
+#' pack_years_fun_A(pack_years_data$SMKDSTY_A,
+#' pack_years_data$DHHGAGE_cont,
+#' pack_years_data$time_quit_smoking,
+#' pack_years_data$SMKG203_cont,
+#' pack_years_data$SMKG207_cont,
+#' pack_years_data$SMK_204,
+#' pack_years_data$SMK_05B,
+#' pack_years_data$SMK_208,
+#' pack_years_data$SMK_05C,
+#' pack_years_data$SMKG01C_cont,
+#' pack_years_data$SMK_01A)
+#' print(pack_years_data)
+#' @export
+pack_years_fun_A <-
+ function(SMKDSTY_A, DHHGAGE_cont, time_quit_smoking, SMKG203_cont,
+ SMKG207_cont, SMK_204, SMK_05B,
+ SMK_208, SMK_05C, SMKG01C_cont, SMK_01A,
+ min_DHHGAGE_cont = 12 , max_DHHGAGE_cont = 102,
+ min_time_quit_smoking = 0.5, max_time_quit_smoking = 82,
+ min_SMKG203_cont = 5, max_SMKG203_cont = 84,
+ min_SMKG207_cont = 5, max_SMKG207_cont = 80,
+ min_SMK_204 = 1, max_SMK_204 = 99, min_SMK_05B = 1, max_SMK_05B = 99,
+ min_SMK_208 = 1, max_SMK_208 = 99, min_SMK_05C = 0, max_SMK_05C = 31,
+ min_SMKG01C_cont = 5, max_SMKG01C_cont = 80) {
+ # Age verification
+ DHHGAGE_cont<-
+ if_else2(is.na(DHHGAGE_cont), tagged_na("b"),
+ if_else2(DHHGAGE_cont < 0, tagged_na("b"),
+ if_else2(DHHGAGE_cont < min_DHHGAGE_cont, tagged_na("b"),
+ if_else2(DHHGAGE_cont > max_DHHGAGE_cont, tagged_na("b"),DHHGAGE_cont))))
+
+ # PackYears for Daily Smoker
+ pack_years <-
+ if_else2(
+ SMKDSTY_A == 1 & SMKG203_cont > min_SMKG203_cont & SMKG203_cont < max_SMKG203_cont
+ & SMK_204 > min_SMK_204 & SMK_204 < max_SMK_204, pmax(((DHHGAGE_cont - SMKG203_cont) *
+ (SMK_204 / 20)), 0.0137),
+ # PackYears for Occasional Smoker (former daily)
+ if_else2(
+ SMKDSTY_A == 2 & SMKG207_cont > min_SMKG207_cont & SMKG207_cont < max_SMKG207_cont
+ & time_quit_smoking > min_time_quit_smoking & time_quit_smoking < max_time_quit_smoking
+ & SMK_05B > min_SMK_05B & SMK_05B < max_SMK_05B
+ & SMK_208 > min_SMK_208 & SMK_208 < max_SMK_208, pmax(((DHHGAGE_cont - SMKG207_cont -
+ time_quit_smoking) * (SMK_208 / 20)), 0.0137) +
+ (pmax((SMK_05B * SMK_05C / 30), 1) *time_quit_smoking),
+ # PackYears for Occasional Smoker (never daily)
+ if_else2(
+ SMKDSTY_A == 3 & SMK_05C > min_SMK_05C & SMK_05C < max_SMK_05C
+ & SMKG207_cont > min_SMKG207_cont & SMKG207_cont < max_SMKG207_cont,
+ (pmax((SMK_05B * SMK_05C / 30), 1) / 20) * (DHHGAGE_cont - SMKG01C_cont),
+ # PackYears for former daily smoker (non-smoker now)
+ if_else2(
+ SMKDSTY_A == 4 & SMKG207_cont > min_SMKG207_cont & SMKG207_cont < max_SMKG207_cont
+ & time_quit_smoking > min_time_quit_smoking & time_quit_smoking < max_time_quit_smoking
+ & SMK_208 > min_SMK_208 & SMK_208 < max_SMK_208, pmax(((DHHGAGE_cont - SMKG207_cont -
+ time_quit_smoking) *
+ (SMK_208 / 20)), 0.0137),
+ # PackYears for former occasional smoker (non-smoker now) who
+ # smoked at least 100 cigarettes lifetime
+ if_else2(
+ SMKDSTY_A == 5 & SMK_01A == 1, 0.0137,
+ # PackYears for former occasional smoker (non-smoker now) who
+ # have not smoked at least 100 cigarettes lifetime
+ if_else2(
+ SMKDSTY_A == 5 & SMK_01A == 2, 0.007,
+ # Non-smoker
+ if_else2(SMKDSTY_A == 6, 0,
+ # Account for NA(a)
+ if_else2(SMKDSTY_A == "NA(a)", tagged_na("a"),
+ tagged_na("b"))
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ return(pack_years)
+ }
+
+#' @title Age started smoking daily - daily/former daily smokers
+#'
+#' @description This function creates a continuous derived variable
+#' (SMKG040_fun) that calculates the approximate age that a daily or former
+#' daily smoker began smoking daily.
+#'
+#' @details SMKG203 (daily smoker) and SMKG207 (former daily) are present in
+#' CCHS 2001-2014, and are separate variables. For CCHS 2015 and onward, SMKG040
+#' (daily/former daily) combines the two previous variables. SMKG040_fun takes
+#' the continuous functions (SMKG203_cont and SMKG207_cont) to create SMKG040
+#' for 2001-2014.
+#'
+#' @note In previous cycles, both SMKG203 and SMKG207 included respondents who
+#' did not state their smoking status. From CCHS 2015 and onward, SMKG040 only
+#' included respondents who specified daily smoker or former daily smoker. As
+#' a result, SMKG040 has a large number of missing respondents for CCHS 2015
+#' survey cycles and onward.
+#'
+#' @param SMKG203_cont age started smoking daily. Variable asked to daily
+#' smokers.
+#'
+#' @param SMKG207_cont age started smoking daily. Variable asked to former
+#' daily smokers.
+#'
+#' @return value for age started smoking daily for daily/former daily smokers in
+#' the SMKG040_cont variable
+#'
+#' @examples
+#' # Using SMKG040_fun() to create age values across CCHS cycles
+#' # SMKG040_fun() is specified in variable_details.csv under SMKG040_cont.
+#'
+#' # To create a continuous harmonized variable for SMKG040, use rec_with_table()
+#' # for each CCHS cycle and specify SMKG040_cont.
+#'
+#' library(cchsflow)
+#'
+#' age_smoke_dfd_2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, c(
+#' "SMKG203_cont", "SMKG207_cont","SMKG040_cont"
+#' )
+#' )
+#'
+#' head(age_smoke_dfd_2009_2010)
+#'
+#' age_smoke_dfd_2011_2012 <- rec_with_table(
+#' cchs2011_2012_p,c(
+#' "SMKG203_cont", "SMKG207_cont","SMKG040_cont"
+#' )
+#' )
+#'
+#' tail(age_smoke_dfd_2011_2012)
+#'
+#' combined_age_smoke_dfd <- suppressWarnings(merge_rec_data
+#' (age_smoke_dfd_2009_2010,age_smoke_dfd_2011_2012))
+#'
+#' head(combined_age_smoke_dfd)
+#' tail(combined_age_smoke_dfd)
+#'
+#' SMKG203_cont <- c(30, NA, NA)
+#' SMKG207_cont <- c(NA, 42, NA)
+#' SMKG040_data <- data.frame(SMKG203_cont, SMKG207_cont)
+#' SMKG040_data$SMKG040 <- SMKG040_fun(SMKG040_data$SMKG203_cont, SMKG040_data$SMKG207_cont)
+#' print(SMKG040_data)
+#' @export
+
+SMKG040_fun <- function(SMKG203_cont, SMKG207_cont){
+ SMKG040_cont <-
+ if_else2((SMKG203_cont == tagged_na("a") & SMKG207_cont == tagged_na("a")),
+ tagged_na("a"),
+ if_else2((SMKG203_cont == tagged_na("b") &
+ SMKG207_cont == tagged_na("b")), tagged_na("b"),
+ if_else2(!is.na(SMKG203_cont), SMKG203_cont,
+ if_else2(!is.na(SMKG207_cont), SMKG207_cont,
+ tagged_na("b")))))
+ return(SMKG040_cont)
+}
+
+#' # Additional examples of SMKG040_cont calculations produced
+#' # using SMKG040_fun_A() can be found below. Multiple instances exist
+#' # where an NA output may be produced such as a negative/missing entry or an
+#' # entry existing outside of the allowed variable minimum and maximum values.
+#'
+#' SMKG203_cont <- c(30, NA, -2, 95, NA, NA, NA)
+#' SMKG207_cont <- c(NA, 42, NA, NA, -3, 86, NA)
+#' SMKG040_data <- data.frame(SMKG203_cont, SMKG207_cont)
+#' SMKG040_data$SMKG040 <- SMKG040_fun_A(SMKG040_data$SMKG203_cont, SMKG040_data$SMKG207_cont)
+#' print(SMKG040_data)
+#' @export
+
+SMKG040_fun_A <- function(SMKG203_cont, SMKG207_cont,
+ min_SMKG203_cont = 5, max_SMKG203_cont = 84,
+ min_SMKG207_cont = 5, max_SMKG207_cont = 80){
+ SMKG040_cont <-
+ if_else2((SMKG203_cont == tagged_na("a") & SMKG207_cont == tagged_na("a")),
+ tagged_na("a"),
+ if_else2((SMKG203_cont == tagged_na("b") &
+ SMKG207_cont == tagged_na("b")), tagged_na("b"),
+ if_else2(!is.na(SMKG203_cont) & SMKG203_cont > min_SMKG203_cont
+ & SMKG203_cont < max_SMKG203_cont, SMKG203_cont,
+ if_else2(!is.na(SMKG207_cont) & SMKG207_cont > min_SMKG207_cont
+ & SMKG207_cont < max_SMKG207_cont, SMKG207_cont,
+ tagged_na("b")))))
+ return(SMKG040_cont)
+}
+
+#' @title Categorical smoking pack-years
+#'
+#' @description This function creates a categorical derived variable
+#' (pack_years_cat) that categorizes smoking pack-years (pack_years_der).
+#'
+#' @details pack-years is calculated by multiplying the number of cigarette
+#' packs per day (20 cigarettes per pack) by the number of years.The categories
+#' were based on the Cardiovascular Disease Population Risk Tool
+#' (Douglas Manuel et al. 2018).
+#'
+#' pack_years_cat uses the derived variable pack_years_der. Pack_years_der uses
+#' age and various smoking variables that have been transformed by cchsflow (see
+#' documentation on pack_year_der). In order to categorize pack years across CCHS
+#' cycles, age and smoking variables must be transformed and harmonized.
+#'
+#' @param pack_years_der derived variable that calculates smoking pack-years
+#' See \code{\link{pack_years_fun}} for documentation on how variable
+#' was derived.
+#'
+#' @return value for pack year categories in the pack_years_cat variable.
+#'
+#' @examples
+#' # Using pack_years_fun_cat() to categorize pack year values across CCHS cycles
+#' # pack_years_fun_cat() is specified in variable_details.csv along with the
+#' # CCHS variables and cycles included.
+#'
+#' # To transform pack_years_cat across cycles, use rec_with_table() for each
+#' # CCHS cycle and specify pack_years_cat.
+#' # Since pack_year_der is also also derived variable, you will have to specify
+#' # the variables that are derived from it.
+#' # Since time_quit_smoking_der is also a derived variable in pack_year_der,
+#' # you will have to specify the variables that are derived from it.
+#' # Then by using merge_rec_data(), you can combine pack_years_cat across
+#' # cycles.
+#'
+#' library(cchsflow)
+#'
+#' pack_years_cat_2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, c(
+#' "SMKDSTY_A", "DHHGAGE_cont", "SMK_09A_B", "SMKG09C", "time_quit_smoking",
+#' "SMKG203_cont", "SMKG207_cont", "SMK_204", "SMK_05B", "SMK_208",
+#' "SMK_05C", "SMK_01A", "SMKG01C_cont", "pack_years_der", "pack_years_cat"
+#' )
+#' )
+#'
+#' head(pack_years_cat_2009_2010)
+#'
+#' pack_years_cat_2011_2012 <- rec_with_table(
+#' cchs2011_2012_p,c(
+#' "SMKDSTY_A", "DHHGAGE_cont", "SMK_09A_B", "SMKG09C", "time_quit_smoking",
+#' "SMKG203_cont", "SMKG207_cont", "SMK_204", "SMK_05B", "SMK_208",
+#' "SMK_05C", "SMK_01A", "SMKG01C_cont", "pack_years_der", "pack_years_cat"
+#' )
+#' )
+#'
+#' tail(pack_years_cat_2011_2012)
+#'
+#' combined_pack_years_cat <- suppressWarnings(merge_rec_data
+#' (pack_years_cat_2009_2010,pack_years_cat_2011_2012))
+#'
+#' head(combined_pack_years_cat)
+#' tail(combined_pack_years_cat)
+#' @export
+#'
+pack_years_fun_cat <- function(pack_years_der){
+ pack_years_cat <-
+ if_else2(pack_years_der == 0, 1,
+ if_else2(pack_years_der > 0 & pack_years_der <= 0.01, 2,
+ if_else2(pack_years_der > 0.01 & pack_years_der <= 3.0, 3,
+ if_else2(pack_years_der > 3.0 & pack_years_der <= 9.0, 4,
+ if_else2(pack_years_der > 9.0 & pack_years_der <= 16.2, 5,
+ if_else2(pack_years_der > 16.2 & pack_years_der <= 25.7, 6,
+ if_else2(pack_years_der > 25.7 & pack_years_der <= 40.0, 7,
+ if_else2(pack_years_der > 40.0, 8,
+ if_else2(haven::is_tagged_na(pack_years_der, "a"), "NA(a)", "NA(b)")))))))))
+
+ return(pack_years_cat)
+}
+
+#' @title Type of smokers
+#'
+#' @description This function creates a derived variable (SMKDSTY_A) for
+#' smoker type with 5 categories:
+#'
+#' \itemize{
+#' \item daily smoker
+#' \item current occasional smoker (former daily)
+#' \item current occasional smoker (never daily)
+#' \item current nonsmoker (former daily)
+#' \item current nonsmoker (never daily)
+#' \item nonsmoker
+#' }
+#'
+#' @details For CCHS 2001-2014, smoker type is derived from smoking more than
+#' 100 cigarettes in lifetime, type of smoker at present time, and ever smoked
+#' daily. For CCHS 2015-2018, smoker type was derived differently with different
+#' variables and categories. A function was created for a consistent smoker
+#' status across all cycles.
+#'
+#' @param SMK_005 type of smoker presently
+#'
+#' @param SMK_030 smoked daily - lifetime (occasional/former smoker)
+#'
+#' @param SMK_01A smoked 100 or more cigarettes in lifetime
+#'
+#' @return value for smoker type in the SMKDSTY_A variable
+#'
+#' @examples
+#' # Using SMKDSTY_fun() to derive smoke type values across CCHS cycles
+#' # SMKDSTY_fun() is specified in variable_details.csv along with the
+#' # CCHS variables and cycles included.
+#'
+#' # To transform SMKDSTY_A across cycles, use rec_with_table() for each
+#' # CCHS cycle and specify SMKDSTY_A.
+#' # For CCHS 2001-2014, only specify SMKDSTY_A for smoker type.
+#' # For CCHS 2015-2018, specify the parameters and SMKDSTY_A for smoker type.
+#'
+#' library(cchsflow)
+#'
+#' smoker_type_2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, "SMKDSTY_A")
+#'
+#' head(smoker_type_2009_2010)
+#'
+#' smoker_type_2017_2018 <- rec_with_table(
+#' cchs2017_2018_p,c(
+#' "SMK_01A", "SMK_005","SMK_030","SMKDSTY_A"
+#' )
+#' )
+#'
+#' tail(smoker_type_2017_2018)
+#'
+#' combined_smoker_type <- suppressWarnings(merge_rec_data
+#' (smoker_type_2009_2010,smoker_type_2017_2018))
+#'
+#' head(combined_smoker_type)
+#' tail(combined_smoker_type)
+#'
+#' @export
+
+SMKDSTY_fun<-function(SMK_005, SMK_030, SMK_01A){
+ if_else2(SMK_005 == 1, 1, # Daily smoker
+ if_else2(SMK_005 == 2 & SMK_030 == 1, 2, # Occasional smoker (former daily)
+ if_else2(SMK_005 == 2 & (SMK_030 == 2|SMK_030 == "NA(a)"|SMK_030 == "NA(b)"),
+ 3, # Occasional Smoker (never daily)
+ if_else2(SMK_005 == 3 & SMK_030 == 1 , 4, # Former daily
+ if_else2(SMK_005 == 3 & SMK_030 == 2 & SMK_01A == 1, 5, # Former occasional
+ if_else2(SMK_005 == 3 & SMK_01A == 2, 6, # Never smoked
+ if_else2(SMK_005 == "NA(a)", tagged_na("a"), tagged_na("b"))))))))
+}
+
+#' @title Age started to smoke daily - daily smoker
+#'
+#' @description This function creates a continuous derived variable
+#' (SMKG203_cont) for age started to smoke daily for daily smokers.
+#'
+#' @details For CCHS 2015-2018, age started to smoke daily was combined for daily
+#' and former daily smokers.Previous cycles had separate variables for age
+#' started to smoke daily. Type of smoker presently is used to define daily
+#' smoker.
+#'
+#' @param SMK_005 type of smoker presently
+#'
+#' @param SMKG040 age started to smoke daily - daily/former daily smoker
+#'
+#' @return value for continuous age started to smoke daily for daily smokers
+#' in the SMKG203_cont variable
+#'
+#' @examples
+#' # Using SMKG203_fun() to derive age started to smoke daily values across
+#' # CCHS cycles.
+#' # SMKG203_fun() is specified in variable_details.csv along with the
+#' # CCHS variables and cycles included.
+#'
+#' # To transform SMKG203_A across cycles, use rec_with_table() for each
+#' # CCHS cycle and specify SMKG203_A.
+#' # For CCHS 2001-2014, only specify SMKG203_A.
+#' # For CCHS 2015-2018, specify the parameters and SMKG203_A for daily smoker
+#' # age.
+#'
+#' library(cchsflow)
+#'
+#' agecigd_2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, "SMKG203_A")
+#'
+#' head(agecigd_2009_2010)
+#'
+#' agecigd_2017_2018 <- rec_with_table(
+#' cchs2017_2018_p,c(
+#' "SMK_005","SMKG040","SMKG203_A"
+#' )
+#' )
+#'
+#' tail(agecigd_2017_2018)
+#'
+#' combined_agecigd <- suppressWarnings(merge_rec_data
+#' (agecigd_2009_2010,agecigd_2017_2018))
+#'
+#' head(combined_agecigd)
+#' tail(combined_agecigd)
+#'
+#' @export
+
+SMKG203_fun <- function(SMK_005, SMKG040){
+ SMKG203 <- if_else2(
+ SMK_005 == 1, SMKG040,
+ if_else2(
+ SMK_005 == "NA(a)"|SMKG040 == "NA(a)", tagged_na("a"), tagged_na("b")))
+ SMKG203_cont <- if_else2(
+ SMKG203 == 1, 8,
+ if_else2(
+ SMKG203 == 2, 13,
+ if_else2(
+ SMKG203 == 3, 16,
+ if_else2(
+ SMKG203 == 4, 18.5,
+ if_else2(
+ SMKG203 == 5, 22,
+ if_else2(
+ SMKG203 == 6, 27,
+ if_else2(
+ SMKG203 == 7, 32,
+ if_else2(
+ SMKG203 == 8, 37,
+ if_else2(
+ SMKG203 == 9, 42,
+ if_else2(
+ SMKG203 == 10, 47,
+ if_else2(
+ SMKG203 == 11, 55,
+ if_else2(SMKG203 == "NA(a)",
+ tagged_na("a"), tagged_na("b")
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+
+ return(SMKG203_cont)
+}
+
+#' @title Age started to smoke daily - former daily smoker
+#'
+#' @description This function creates a continuous derived variable
+#' (SMKG207_cont) for age started to smoke daily for former daily smokers.
+#'
+#' @details For CCHS 2015-2018, age started to smoke daily was combined for daily
+#' and former daily smokers.Previous cycles had separate variables for age
+#' started to smoke daily. Smoked daily in lifetime is used to define former
+#' daily smoker.
+#'
+#' @param SMK_030 smoked daily - lifetime (occasional/former smoker)
+#'
+#' @param SMKG040 age started to smoke daily - daily/former daily smoker
+#'
+#' @return value for continuous age started to smoke daily for former daily
+#' smokers in the SMKG207_cont variable
+#'
+#' @examples
+#' # Using SMKG207_fun() to derive age started to smoke daily values across
+#' # CCHS cycles.
+#' # SMKG207_fun() is specified in variable_details.csv along with the
+#' # CCHS variables and cycles included.
+#'
+#' # To transform SMKG207_A across cycles, use rec_with_table() for each
+#' # CCHS cycle and specify SMKG207_A.
+#' # For CCHS 2001-2014, only specify SMKG207_A.
+#' # For CCHS 2015-2018, specify the parameters and SMKG207_A for former daily
+#' # smoker age.
+#'
+#' library(cchsflow)
+#'
+#' agecigfd_2009_2010 <- rec_with_table(
+#' cchs2009_2010_p, "SMKG207_A")
+#'
+#' head(agecigfd_2009_2010)
+#'
+#' agecigfd_2017_2018 <- rec_with_table(
+#' cchs2017_2018_p,c(
+#' "SMK_030","SMKG040","SMKG207_A"
+#' )
+#' )
+#'
+#' tail(agecigfd_2017_2018)
+#'
+#' combined_agecigfd <- suppressWarnings(merge_rec_data
+#' (agecigfd_2009_2010,agecigfd_2017_2018))
+#'
+#' head(combined_agecigfd)
+#' tail(combined_agecigfd)
+#'
+#' @export
+#' @export
+SMKG207_fun <- function(SMK_030, SMKG040){
+ SMKG207 <- if_else2(
+ SMK_030 == 1, SMKG040,
+ if_else2(
+ SMK_030 == "NA(a)"|SMKG040 == "NA(a)", tagged_na("a"), tagged_na("b")))
+ SMKG207_cont <- if_else2(
+ SMKG207 == 1, 8,
+ if_else2(
+ SMKG207 == 2, 13,
+ if_else2(
+ SMKG207 == 3, 16,
+ if_else2(
+ SMKG207 == 4, 18.5,
+ if_else2(
+ SMKG207 == 5, 22,
+ if_else2(
+ SMKG207 == 6, 27,
+ if_else2(
+ SMKG207 == 7, 32,
+ if_else2(
+ SMKG207 == 8, 37,
+ if_else2(
+ SMKG207 == 9, 42,
+ if_else2(
+ SMKG207 == 10, 47,
+ if_else2(
+ SMKG207 == 11, 55,
+ if_else2(SMKG207 == "NA(a)",
+ tagged_na("a"), tagged_na("b")
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+ )
+
+ return(SMKG207_cont)
+
+}
\ No newline at end of file
diff --git a/R/smoking-histories.R b/R/smoking-histories.R
new file mode 100644
index 0000000..2ce36d9
--- /dev/null
+++ b/R/smoking-histories.R
@@ -0,0 +1,20 @@
+# smoking-histories.R
+# Generate initiation/cessation rate tables from fitted APC models (Stage 9)
+# for the shg-rcpp simulation (Stage 10, separate repo).
+# Pipeline target: rate_tables
+
+#' Generate Canadian rate tables for shg-rcpp
+#'
+#' Extracts initiation and cessation probability tables from fitted APC models,
+#' formatted to match the CISNET shg-rcpp input specification. PUMF-derived
+#' tables can be shared internationally under the Statistics Canada Open Licence.
+#'
+#' @param apc_models List of fitted APC models (initiation + cessation, by sex)
+#' @param cfg Config object from config::get()
+#' @return List of rate tables (initiation_men, initiation_women, cessation)
+generate_rate_tables <- function(apc_models, cfg) {
+ # TODO: implement
+ # Output format: shg-rcpp input specification
+ # See: https://github.com/NCI-CISNET/shg-rcpp
+ stop("Not yet implemented")
+}
diff --git a/R/study-data.R b/R/study-data.R
new file mode 100644
index 0000000..7116b8c
--- /dev/null
+++ b/R/study-data.R
@@ -0,0 +1,116 @@
+# study-data.R
+# Load and harmonize CCHS cycles via cchsflow.
+# Pipeline target: study_data
+
+#' Survey cycle numeric code from dataset name
+#'
+#' @param data_name cchsflow dataset name (e.g. "cchs2001_p")
+#' @return Integer cycle code (1 = 2001, ..., 11 = 2022)
+survey_cycle_code <- function(data_name) {
+ codes <- c(
+ cchs2001_p = 1L,
+ cchs2003_p = 2L,
+ cchs2005_p = 3L,
+ cchs2007_2008_p = 4L,
+ cchs2009_2010_p = 5L,
+ cchs2011_2012_p = 6L,
+ cchs2013_2014_p = 7L,
+ cchs2015_2016_p = 8L,
+ cchs2017_2018_p = 9L,
+ cchs2019_2020_p = 10L,
+ cchs2022_p = 11L
+ )
+ if (!data_name %in% names(codes)) {
+ stop("Unknown CCHS dataset name: ", data_name)
+ }
+ codes[[data_name]]
+}
+
+#' Load and harmonize CCHS PUMF cycles
+#'
+#' Reads all configured CCHS cycles from raw_data_dir, harmonizes variables
+#' using cchsflow worksheets, adds SurveyCycle, and combines into a single
+#' data frame.
+#'
+#' @param cfg Config object from config::get()
+#' @param variables_sheet variables worksheet (data frame)
+#' @param variable_details_sheet variable_details worksheet (data frame)
+#' @return Combined harmonized data frame (all cycles, study variables only)
+load_study_data <- function(cfg, variables_sheet, variable_details_sheet,
+ coverage_check = NULL) {
+ # Filter to variables for this data source (pumf/master/both)
+ # The cycle variable (survey_var(cfg, "cycle")) is derived manually — rec_with_table cannot derive it
+ study_vars <- variables_sheet[
+ variables_sheet$variable != survey_var(cfg, "cycle") &
+ variables_sheet$source %in% c(cfg$data_source, "both"),
+ ]
+
+ harmonized <- NULL
+ data_env <- new.env()
+
+ for (cycle in cfg$cchs_cycles) {
+ # Support raw_data_file_map (cchsflow-data naming) or default cchs*_p.RData naming
+ filename <- if (!is.null(cfg$raw_data_file_map[[cycle]])) {
+ cfg$raw_data_file_map[[cycle]]
+ } else {
+ paste0(cycle, ".RData")
+ }
+ rdata_path <- file.path(cfg$raw_data_dir, filename)
+
+ if (!file.exists(rdata_path)) {
+ warning("Cycle file not found, skipping: ", rdata_path)
+ next
+ }
+
+ message("Harmonizing ", cycle)
+ loaded_name <- load(rdata_path, envir = data_env)
+
+ # cchsflow-data release files use `table` as the internal object name
+ obj_name <- if ("table" %in% loaded_name) "table" else loaded_name[1]
+ raw_data <- get(obj_name, envir = data_env)
+
+ if (cfg$sample_proportion < 1) {
+ n_sample <- round(nrow(raw_data) * cfg$sample_proportion)
+ raw_data <- raw_data[sample(nrow(raw_data), n_sample), ]
+ }
+
+ # rec_with_table (cchsflow v3) requires a plain data.frame, not a tibble.
+ # When data is a tibble, data[logi_idx, "col"] returns a 1-column tibble
+ # (a list), which breaks the copy recEnd assignment with a range recStart.
+ raw_data <- as.data.frame(raw_data)
+
+ cycle_data <- cchsflow::rec_with_table(
+ data = raw_data,
+ variables = study_vars,
+ database_name = cycle,
+ variable_details = variable_details_sheet,
+ notes = FALSE
+ )
+
+ cycle_data[[survey_var(cfg, "cycle")]] <- survey_cycle_code(cycle)
+
+ if (is.null(harmonized)) {
+ harmonized <- cycle_data
+ } else {
+ harmonized <- dplyr::bind_rows(harmonized, cycle_data)
+ }
+
+ rm(list = obj_name, envir = data_env)
+ message(" Done: ", cycle, " (", nrow(cycle_data), " rows)")
+ }
+
+ if (is.null(harmonized)) stop("No CCHS cycles loaded — check cfg$raw_data_dir")
+
+ # Convert types per variables worksheet
+ for (var in colnames(harmonized)) {
+ row <- variables_sheet[variables_sheet$variable == var, ]
+ if (nrow(row) == 0) next
+ if (row$variableType[1] == "Categorical") {
+ harmonized[[var]] <- as.factor(harmonized[[var]])
+ } else if (row$variableType[1] == "Continuous") {
+ harmonized[[var]] <- as.numeric(harmonized[[var]])
+ }
+ }
+
+ harmonized
+}
diff --git a/R/validate-coverage.R b/R/validate-coverage.R
new file mode 100644
index 0000000..c93bd0b
--- /dev/null
+++ b/R/validate-coverage.R
@@ -0,0 +1,152 @@
+# validate-coverage.R — Pre-flight validation of variable coverage
+#
+# Checks that every variable declared in cshm-variables.csv has matching
+# rows in the combined variable_details for each cycle it claims.
+# This is the CCHS-specific validation layer (the "spur"): it verifies
+# that config.yml variable mappings resolve correctly against cchsflow's
+# variable_details before the pipeline runs.
+#
+# Two checks:
+# 1. Declared coverage: does variable_details deliver what cshm-variables.csv
+# claims in its databaseStart column?
+# 2. Critical coverage: do variables with pipeline-critical roles
+# (design, model-stratifier, apc-numerator, apc-denominator) have
+# complete coverage across all pipeline cycles?
+
+#' Validate variable coverage against variable_details
+#'
+#' @param variables_sheet Data frame from cshm-variables.csv
+#' @param variable_details_sheet Data frame: rbind of cchsflow base +
+#' CSHM extension variable_details
+#' @param cfg Config list (needs cfg$cchs_cycles, cfg$data_source)
+#' @param strict If TRUE, stop on any critical coverage gap.
+#' If FALSE (default), warn and return results.
+#' @return Invisibly, a list with two data frames:
+#' \describe{
+#' \item{declared}{Gaps where cshm-variables.csv claims a cycle but
+#' variable_details cannot deliver it.}
+#' \item{critical}{Pipeline cycles missing for critical-role variables.}
+#' }
+validate_cycle_coverage <- function(variables_sheet,
+ variable_details_sheet,
+ cfg,
+ strict = FALSE) {
+ data_source <- cfg$data_source %||% "pumf"
+
+ # Filter variables sheet to active data source
+ source_col <- variables_sheet$source
+ active_vars <- variables_sheet[
+ !is.na(source_col) & (source_col == data_source | source_col == "both"),
+ ]
+
+ # Pipeline cycles (e.g. cchs2001_p ... cchs2022_p)
+ pipeline_cycles <- cfg$cchs_cycles
+ if (is.null(pipeline_cycles)) {
+ warning("validate_cycle_coverage: cfg$cchs_cycles is NULL; skipping")
+ return(invisible(list(declared = data.frame(), critical = data.frame())))
+ }
+
+ # Build lookup: for each variable in variable_details, which cycles are covered?
+ vd_coverage <- split(variable_details_sheet$databaseStart,
+ variable_details_sheet$variable)
+ vd_coverage <- lapply(vd_coverage, function(db_strings) {
+ unique(trimws(unlist(strsplit(db_strings, ","))))
+ })
+
+ # Roles that must have complete pipeline coverage
+ critical_roles <- c("design", "model-stratifier", "apc-numerator", "apc-denominator")
+
+ declared_gaps <- data.frame(
+ variable = character(0), cycle = character(0), role = character(0),
+ stringsAsFactors = FALSE
+ )
+ critical_gaps <- data.frame(
+ variable = character(0), cycle = character(0), role = character(0),
+ stringsAsFactors = FALSE
+ )
+
+ for (i in seq_len(nrow(active_vars))) {
+ var_name <- active_vars$variable[i]
+ var_start <- active_vars$variableStart[i]
+ var_role <- active_vars$role[i]
+
+ # Skip DerivedVar-only variables (e.g. SurveyCycle)
+ if (!is.na(var_start) && grepl("^DerivedVar::", var_start)) next
+ # Skip variables with empty variableStart (resolved internally by cchsflow)
+ if (is.na(var_start) || var_start == "") next
+
+ covered_cycles <- vd_coverage[[var_name]]
+ if (is.null(covered_cycles)) covered_cycles <- character(0)
+
+ # --- Check 1: Declared coverage ---
+ # Which pipeline cycles does cshm-variables.csv claim?
+ declared_db <- trimws(unlist(strsplit(active_vars$databaseStart[i], ",")))
+ declared_pipeline <- intersect(declared_db, pipeline_cycles)
+
+ for (cycle in declared_pipeline) {
+ if (!(cycle %in% covered_cycles)) {
+ declared_gaps <- rbind(declared_gaps, data.frame(
+ variable = var_name, cycle = cycle, role = var_role,
+ stringsAsFactors = FALSE
+ ))
+ }
+ }
+
+ # --- Check 2: Critical coverage ---
+ roles <- trimws(unlist(strsplit(var_role, ",")))
+ is_critical <- any(roles %in% critical_roles)
+
+ if (is_critical) {
+ for (cycle in pipeline_cycles) {
+ if (!(cycle %in% covered_cycles)) {
+ critical_gaps <- rbind(critical_gaps, data.frame(
+ variable = var_name, cycle = cycle, role = var_role,
+ stringsAsFactors = FALSE
+ ))
+ }
+ }
+ }
+ }
+
+ # --- Report declared gaps ---
+ if (nrow(declared_gaps) > 0) {
+ gap_summary <- tapply(declared_gaps$cycle, declared_gaps$variable, function(cycles) {
+ paste(cycles, collapse = ", ")
+ })
+ msg_lines <- vapply(names(gap_summary), function(v) {
+ sprintf(" %s: missing %s", v, gap_summary[[v]])
+ }, character(1))
+ warning(
+ "Declared coverage gaps (cshm-variables.csv claims cycle but variable_details lacks it):\n",
+ paste(msg_lines, collapse = "\n"),
+ call. = FALSE
+ )
+ }
+
+ # --- Report critical gaps ---
+ if (nrow(critical_gaps) > 0) {
+ gap_summary <- tapply(critical_gaps$cycle, critical_gaps$variable, function(cycles) {
+ paste(cycles, collapse = ", ")
+ })
+ msg_lines <- vapply(names(gap_summary), function(v) {
+ sprintf(" %s [%s]: missing %s", v,
+ critical_gaps$role[critical_gaps$variable == v][1],
+ gap_summary[[v]])
+ }, character(1))
+ msg <- paste0(
+ "Critical coverage gaps (pipeline-essential variables missing for some cycles):\n",
+ paste(msg_lines, collapse = "\n")
+ )
+ if (strict) {
+ stop(msg, call. = FALSE)
+ } else {
+ warning(msg, call. = FALSE)
+ }
+ }
+
+ if (nrow(declared_gaps) == 0 && nrow(critical_gaps) == 0) {
+ message("All variables pass coverage validation.")
+ }
+
+ invisible(list(declared = declared_gaps, critical = critical_gaps))
+}
diff --git a/R/validation.R b/R/validation.R
new file mode 100644
index 0000000..42b6abf
--- /dev/null
+++ b/R/validation.R
@@ -0,0 +1,16 @@
+# validation.R
+# Compare modelled smoking prevalence against independent historic surveys.
+# Pipeline target: validation_results
+
+#' Validate modelled prevalence against historic estimates
+#'
+#' Compares CSHM-modelled smoking prevalence by sex, age group, and year
+#' against observed CCHS estimates and other independent data sources.
+#'
+#' @param smoking_histories Output of generate_rate_tables()
+#' @param cfg Config object from config::get()
+#' @return Data frame of validation metrics (bias, RMSE by subgroup)
+validate_model <- function(smoking_histories, cfg) {
+ # TODO: implement
+ stop("Not yet implemented")
+}
diff --git a/R/variable-details-sheet-utils.R b/R/variable-details-sheet-utils.R
new file mode 100644
index 0000000..115c215
--- /dev/null
+++ b/R/variable-details-sheet-utils.R
@@ -0,0 +1,41 @@
+# variable-details-sheet-utils.R
+# Utility functions for working with the variable_details worksheet.
+# Ported from DemPoRT-V2-dev (origin/dev).
+
+#' Return unique recEnd rows for a variable
+#'
+#' @param variable_details_sheet Variable details worksheet data frame
+#' @param for_variable Variable name to look up
+#' @param include_NA Whether to include NA:: rows (default FALSE)
+#' @return Data frame of unique recEnd rows
+get_unique_rec_end_rows <- function(
+ variable_details_sheet,
+ for_variable,
+ include_NA = FALSE
+) {
+ all_unique <- variable_details_sheet |>
+ dplyr::filter(variable == for_variable) |>
+ dplyr::distinct(recEnd, .keep_all = TRUE) |>
+ dplyr::filter(!grepl("Func::", recEnd))
+
+ if (include_NA) {
+ return(all_unique)
+ }
+
+ all_unique |>
+ dplyr::filter(recEnd != "NA::a" & recEnd != "NA::b")
+}
+
+is_categorical <- function(variable, variable_details_sheet) {
+ "cat" %in% variable_details_sheet[
+ variable_details_sheet$variable == variable, "typeEnd"
+ ]
+}
+
+get_variable_type <- function(variable, variable_details_sheet) {
+ get_variable_rows(variable, variable_details_sheet)[1, "typeEnd"]
+}
+
+get_variable_rows <- function(variable, variable_details_sheet) {
+ variable_details_sheet[variable_details_sheet$variable == variable, ]
+}
diff --git a/R/variables-sheet-utils.R b/R/variables-sheet-utils.R
new file mode 100644
index 0000000..caae324
--- /dev/null
+++ b/R/variables-sheet-utils.R
@@ -0,0 +1,37 @@
+# variables-sheet-utils.R
+# Utility functions for working with the variables worksheet.
+# Ported from DemPoRT-V2-dev (origin/dev).
+
+get_row_for_variable <- function(variable, variables_sheet) {
+ variables_sheet[variables_sheet$variable == variable, ]
+}
+
+is_continuous_variable <- function(variables_sheet_row) {
+ variables_sheet_row[1, "variableType"] == "Continuous"
+}
+
+is_categorical_variable <- function(variables_sheet_row) {
+ variables_sheet_row[1, "variableType"] == "Categorical"
+}
+
+is_value_na <- function(value) {
+ value == "N/A"
+}
+
+#' Return variable names with a given role
+#'
+#' Roles are comma-separated in the worksheet, so a variable can carry
+#' multiple roles (e.g. "predictor, table1, apc-numerator").
+#' This function matches any variable whose role field contains `role`.
+#'
+#' @param role Character role value (e.g. "predictor", "table1", "apc-numerator")
+#' @param variables_sheet Variables worksheet data frame
+#' @return Character vector of matching variable names
+select_vars_by_role <- function(role, variables_sheet) {
+ has_role <- !is.na(variables_sheet$role) & vapply(
+ strsplit(variables_sheet$role, ","),
+ function(parts) role %in% trimws(parts),
+ logical(1)
+ )
+ variables_sheet$variable[has_role]
+}
diff --git a/README.md b/README.md
index 71c4310..ac6754a 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,195 @@
-# CHSGM-dev
-Canadian History Smoking Generator Model
+# Canadian Smoking Histories Model (CSHM)
+
+[](LICENSE)
+
+## Overview
+
+CSHM is an R implementation of an Age-Period-Cohort (APC) model that generates Canadian smoking histories from Canadian Community Health Survey (CCHS) data. It modernizes and extends the Ontario SHGM study (Manuel et al. 2020), updating the CCHS cycles, extending coverage from Ontario to all of Canada, and reimplementing the analysis in R from the original SAS code.
+
+**Key reference:**
+
+> Manuel DG, Wilton AS, Bennett C, Dass R, Laporte A, Holford TR. Smoking patterns based on birth-cohort-specific histories from 1965 to 2013, with projections to 2041. *Health Reports*. 2020;31(11):16–31. doi:[10.25318/82-003-x202001100002-eng](https://doi.org/10.25318/82-003-x202001100002-eng)
+
+## How it works
+
+The model has two stages:
+
+**Stage 1 — Estimate rates from CCHS data.** Two separate APC logistic regression models are fit using harmonized CCHS survey cycles:
+- **Initiation model** — probability of transitioning from never smoker to current smoker at age *a*, conditional on being a never smoker at *a*−1 (zero before age 8)
+- **Cessation model** — conditional probability of a current smoker quitting at age *a* (zero before age 15)
+
+Both models use constrained cubic splines with the same knot structure as Holford et al. (2014). The fundamental APC identity is `cohort = period − age`.
+
+**Stage 2 — Simulate smoking histories.** Estimated initiation and cessation rates are used to simulate individual-level smoking histories for synthetic Canadian populations.
+
+### APC model constraints
+
+Period effects are held constant beyond the observed data range:
+
+| Model | Constraint |
+|-------|-----------|
+| Initiation — men | Constant from 1999 forward |
+| Initiation — women | Constant from 2003 forward |
+| Cessation | Constant from 2013 forward |
+| Cohort (initiation) | Constant prior to 1920 |
+| Cohort (cessation) | Constant from 1985 forward |
+
+### Mortality adjustment
+
+Ever-smokers have lower survival to survey date than never-smokers. Survival bias is corrected using MPoRT weights adjusted for age, smoking status, years since quitting, immigration, and sex. A sensitivity analysis uses the Peto constant mortality risk ratio, consistent with the original Holford et al. (2014) US implementation.
+
+### Smoking status definitions
+
+| Status | Definition |
+|--------|-----------|
+| Never smoker | <100 lifetime cigarettes AND never smoked a whole cigarette |
+| Current smoker | ≥100 lifetime cigarettes AND currently smokes daily or occasionally |
+| Former smoker | ≥100 lifetime cigarettes AND not currently smoking |
+
+## Data
+
+### Two computing environments
+
+| Environment | Data | Notes |
+|-------------|------|-------|
+| Development | CCHS PUMF `.RData` files | Open licence; used for model development and validation |
+| Production | CCHS Master files | Statistics Canada RDC; exact continuous variables; final model run |
+
+PUMF files use midpoint-estimated pseudo-continuous variables (e.g., `SMKG01C_cont`, `SMKG040_cont`). Master files provide exact continuous values (`SMK_01C`, `SMK_040`). PUMF-derived results are the shareable international artifact; Master data produces the definitive estimates.
+
+**CCHS cycles used:** 2001, 2003, 2005, 2007–08, 2009–10, 2011–12, 2013–14, 2015–16, 2017–18, 2019–20, 2022 (PUMF); 2001–2023 (Master).
+
+### CCHS harmonization
+
+Variables are harmonized across CCHS cycles using the [cchsflow](https://github.com/Big-Life-Lab/cchsflow) R package. The active development version (v3, PR #163) introduces unified smoking variables that route automatically to the appropriate source depending on file type:
+
+- `age_first_cigarette` — age first smoked whole cigarette (Master: exact; PUMF: midpoint estimate)
+- `age_start_smoking` — age started smoking daily (Master: exact; PUMF: midpoint ±3 years)
+- `time_quit_smoking` — years since quit smoking
+
+## Pipeline
+
+The analysis uses a [`{targets}`](https://docs.ropensci.org/targets/) pipeline with environment-specific configuration via `config.yml`. This pattern follows the [DemPoRT v2](https://github.com/Big-Life-Lab/DemPoRT-V2-dev) project.
+
+| Stage | Target | Status |
+|-------|--------|--------|
+| 1 | `variables_sheet`, `variable_details_sheet` | ✅ Active |
+| 2 | `study_data` | ✅ Active |
+| 3 | `cleaned_data` | ✅ Active |
+| 4 | `table_1a_data` | ✅ Active |
+| 5 | `analysis_data` (MICE imputation) | ✅ Active |
+| 6 | `table_1b_data` | ✅ Active |
+| 7 | `apc_data` | ✅ Active |
+| 8 | `apc_model_initiation_men/women`, `apc_model_cessation_men/women` | ✅ Active |
+| 9 | `rate_tables` | 🔲 Stub |
+| 10 | `validation_results` | 🔲 Stub |
+
+## Project layout
+
+A guide for new collaborators.
+
+### Configuration (`config.yml`)
+
+`config.yml` controls all environment-specific settings. Set the active profile before running:
+
+```r
+Sys.setenv(R_CONFIG_ACTIVE = "dev") # 10% sample, debug logging
+Sys.setenv(R_CONFIG_ACTIVE = "draft") # 5% sample from cchsflow-data release
+Sys.setenv(R_CONFIG_ACTIVE = "prod") # Full PUMF sample
+# statscan: delegates to config/statscan.yml (gitignored) for RDC paths
+```
+
+Key sections in `config.yml`:
+- **`cchs_cycles`** — 11 PUMF cycles (2001–2022); statscan profile adds 2023
+- **`apc:`** — spline knots, period/cohort constraints, mortality method, projection horizon
+- **`sensitivity:`** — documents all prespecified sensitivity analyses with protocol rationale
+- **`survey:`** — maps conceptual variable roles to actual CCHS variable names; change these (and the variable worksheets) to adapt the pipeline to a different survey
+
+### Schemas (`schemas/`)
+
+LinkML YAML schemas define the data contracts for key pipeline inputs and outputs:
+
+| Schema | Describes |
+|--------|-----------|
+| `cshm-variables.yaml` | Study variable dictionary (extends cchsflow `variables.csv`) |
+| `cshm-rate-tables.yaml` | APC model output — initiation/cessation probability tables |
+| `cshm-cohort.yaml` | Synthetic population used in Stage 9 simulation |
+
+Schemas are documentation and validation specs, not runtime code. The `role`, `source`, and `purpose` fields in `cshm-variables.yaml` are the CSHM additions to the cchsflow variable convention.
+
+### Variable worksheets (`worksheets/`)
+
+Two CSVs define which CCHS variables are loaded at each pipeline stage:
+
+| File | Purpose |
+|------|---------|
+| `worksheets/cshm-variables.csv` | Study variable list: `variable`, `role`, `source`, `purpose` |
+| `worksheets/cshm-variable-details.csv` | CSHM extension rows for 2019–20 and 2022 cycles not yet in cchsflow v3 |
+
+The `role` column is comma-separated and controls pipeline behaviour — e.g., `apc-numerator` variables define the event indicator in Stage 7; `imputation-predictor` variables go into MICE. See `schemas/cshm-variables.yaml` for the full role vocabulary.
+
+`cchsflow`'s `variable_details.csv` (the recoding rules) is loaded separately from `~/github/cchsflow/inst/extdata/` and combined with the CSHM extension rows at pipeline start.
+
+### R functions (`R/`)
+
+| File | Stage | Entry point |
+|------|-------|-------------|
+| `study-data.R` | 2 | `load_study_data()` |
+| `data-cleaning.R` | 3 | `clean_study_data()` |
+| `descriptive-data.R` | 4, 6 | `get_cshm_desc_data()` |
+| `imputation.R` | 5 | `impute_data()` |
+| `apc-model.R` | 7–8 | `prepare_apc_data()`, `fit_apc_model()` |
+| `variables-sheet-utils.R` | — | Variable role helpers |
+| `variable-details-sheet-utils.R` | — | Variable details helpers |
+
+### `{targets}` workflow
+
+```r
+targets::tar_make() # Run full pipeline
+targets::tar_make(apc_data) # Run through a specific target
+targets::tar_read(apc_data) # Inspect a target's output
+targets::tar_outdated() # See what needs rerunning
+targets::tar_visnetwork() # Visualise pipeline DAG
+```
+
+Changing `config.yml` invalidates all downstream targets. Changing an R function invalidates only targets that call it.
+
+## Relationship to CISNET shg-rcpp
+
+The [CISNET Smoking History Generator](https://github.com/NCI-CISNET/shg-rcpp) (`SmokingHistoryGenerator` R package) implements the same APC methodology for the US population. CSHM is the Canadian adaptation.
+
+CSHM generates individual-level smoking histories directly from the estimated rate tables (Stage 9), following the approach of Manuel et al. (2020) and Holford et al. (2014). Because CCHS PUMF data have an open Statistics Canada licence, PUMF-derived Canadian rate tables can be shared internationally.
+
+## Setup
+
+```r
+# Restore R environment
+renv::restore()
+
+# Run the pipeline
+targets::tar_make()
+```
+
+See `CONTRIBUTING.md` for the development workflow. Full documentation is in the `docs/` directory and rendered at [GitHub Pages](https://big-life-lab.github.io/cshm-dev/).
+
+## Licence
+
+The code in this repository is licensed under the [MIT License](LICENSE).
+
+## Statistics Canada attribution
+
+CCHS data used in this project is accessed and adapted in accordance with the
+[Statistics Canada Open Licence](https://www.statcan.gc.ca/eng/reference/licence).
+
+Source: Statistics Canada, Canadian Community Health Survey 2001 to 2022 PUMF, accessed 2025.
+Reproduced and distributed on an "as is" basis with the permission of Statistics Canada.
+
+Adapted from Statistics Canada, Canadian Community Health Surveys 2001 to 2022 PUMF, accessed 2025.
+This does not constitute an endorsement by Statistics Canada of this product.
+
+## Acknowledgements
+
+- Dr. Ted Holford for the foundational APC methodology
+- Statistics Canada for the CCHS data
+- The CISNET Lung Working Group for the foundational US Smoking History Generator
+- All contributors to the CSHM Consortium
diff --git a/_docstyle/field-codes.json b/_docstyle/field-codes.json
new file mode 100644
index 0000000..65e2941
--- /dev/null
+++ b/_docstyle/field-codes.json
@@ -0,0 +1,496 @@
+{
+ "documentProperties": {
+ "custom": []
+ },
+ "citations": {
+ "HealthCanada_SmokingMortality_2024": {
+ "itemData": {
+ "id": 80521,
+ "type": "document",
+ "citation-key": "HealthCanada_SmokingMortality_2024",
+ "language": "en",
+ "title": "Smoking and mortality",
+ "URL": "https://www.canada.ca/en/health-canada/services/health-concerns/tobacco/legislation/tobacco-product-labelling/smoking-mortality.html",
+ "author": [
+ {
+ "literal": "Health Canada"
+ }
+ ],
+ "issued": {
+ "date-parts": [["2024"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/BIVEE78X"
+ ]
+ },
+ "CSUCH_2023": {
+ "itemData": {
+ "id": 80520,
+ "type": "report",
+ "citation-key": "CanadianSubstanceUseCostsandHarmsScientificWorkingGroup_CanadianSubstanceUse_2023",
+ "language": "en",
+ "publisher": "Canadian Institute for Substance Use Research and Canadian Centre on Substance Use and Addiction",
+ "publisher-place": "Victoria, BC",
+ "title": "Canadian substance use costs and harms 2007-2020",
+ "URL": "https://csuch.ca/publications/csuch-report/",
+ "author": [
+ {
+ "literal": "Canadian Substance Use Costs and Harms Scientific Working Group"
+ }
+ ],
+ "issued": {
+ "date-parts": [["2023"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/JERACS4C"
+ ]
+ },
+ "mitra2015": {
+ "itemData": {
+ "id": 99901,
+ "type": "article-journal",
+ "citation-key": "mitra2015",
+ "container-title": "Health Reports",
+ "DOI": "10.25318/82-003-x201500614195-eng",
+ "ISSN": "0840-6529",
+ "issue": "6",
+ "page": "12-20",
+ "title": "Social determinants of lung cancer incidence in Canada: A 13-year prospective study",
+ "volume": "26",
+ "author": [
+ {"family": "Mitra", "given": "Dipjyoti"},
+ {"family": "Shaw", "given": "Amanda"},
+ {"family": "Tjepkema", "given": "Michael"},
+ {"family": "Peters", "given": "Paul"}
+ ],
+ "issued": {
+ "date-parts": [["2015", "6"]]
+ }
+ }
+ },
+ "hennessy2015": {
+ "itemData": {
+ "id": 4042,
+ "type": "article-journal",
+ "citation-key": "hennessy2015",
+ "container-title": "Population Health Metrics",
+ "DOI": "10.1186/s12963-015-0057-x",
+ "ISSN": "1478-7954",
+ "issue": "1",
+ "page": "24",
+ "title": "The Population Health Model (POHEM): an overview of rationale, methods and applications",
+ "volume": "13",
+ "author": [
+ {"family": "Hennessy", "given": "Deirdre A."},
+ {"family": "Flanagan", "given": "William M."},
+ {"family": "Tanuseputro", "given": "Peter"},
+ {"family": "Bennett", "given": "Carol"},
+ {"family": "Tuna", "given": "Meltem"},
+ {"family": "Kopec", "given": "Jacek"},
+ {"family": "Wolfson", "given": "Michael C."},
+ {"family": "Manuel", "given": "Douglas G."}
+ ],
+ "issued": {
+ "date-parts": [["2015"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/PVTTRS4W"
+ ]
+ },
+ "gauvreau2017": {
+ "itemData": {
+ "id": 3626,
+ "type": "article-journal",
+ "citation-key": "gauvreauOncoSimModelDevelopment2017",
+ "container-title": "Current Oncology",
+ "issue": "6",
+ "page": "401",
+ "title": "The OncoSim model: development and use for better decision-making in Canadian cancer control",
+ "volume": "24",
+ "author": [
+ {"family": "Gauvreau", "given": "C. L."},
+ {"family": "Fitzgerald", "given": "N. R."},
+ {"family": "Memon", "given": "S."},
+ {"family": "Flanagan", "given": "W. M."},
+ {"family": "Nadeau", "given": "C."},
+ {"family": "Asakawa", "given": "K."},
+ {"family": "Garner", "given": "R."},
+ {"family": "Miller", "given": "A. B."},
+ {"family": "Evans", "given": "W. K."},
+ {"family": "Popadiuk", "given": "C. M."}
+ ],
+ "issued": {
+ "date-parts": [["2017"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/RHRBI9I6"
+ ]
+ },
+ "chaiton2021": {
+ "itemData": {
+ "id": 80251,
+ "type": "article-journal",
+ "citation-key": "Chaiton_F_2021",
+ "container-title": "Forecasting",
+ "DOI": "10.3390/forecast3020017",
+ "ISSN": "2571-9394",
+ "issue": "2",
+ "page": "267-275",
+ "title": "Tobacco endgame simulation modelling: assessing the impact of policy changes on smoking prevalence in 2035",
+ "volume": "3",
+ "author": [
+ {"family": "Chaiton", "given": "Michael"},
+ {"family": "Dubray", "given": "Jolene"},
+ {"family": "Guindon", "given": "G. Emmanuel"},
+ {"family": "Schwartz", "given": "Robert"}
+ ],
+ "issued": {
+ "date-parts": [["2021", 4, 13]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/groups/5363837/items/7HY84EER"
+ ]
+ },
+ "beland2002": {
+ "itemData": {
+ "id": 7459,
+ "type": "article-journal",
+ "citation-key": "belandCanadianCommunityHealth2002",
+ "container-title": "Health Reports",
+ "issue": "2",
+ "page": "9-14",
+ "title": "Canadian Community Health Survey - Methodological overview",
+ "volume": "13",
+ "author": [
+ {"family": "Beland", "given": "Y."}
+ ],
+ "issued": {
+ "date-parts": [["2002"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/AJA8XSK2"
+ ]
+ },
+ "kopasker2023": {
+ "itemData": {
+ "id": 47313,
+ "type": "article-journal",
+ "citation-key": "Kopasker_TLRH-E_2023",
+ "container-title": "The Lancet Regional Health - Europe",
+ "DOI": "10.1016/j.lanepe.2023.100758",
+ "ISSN": "2666-7762",
+ "PMID": "37876527",
+ "PMCID": "PMC10590730",
+ "title": "Microsimulation as a flexible tool to evaluate policies and their impact on socioeconomic inequalities in health",
+ "volume": "34",
+ "author": [
+ {"family": "Kopasker", "given": "Daniel"},
+ {"family": "Katikireddi", "given": "Srinivasa Vittal"},
+ {"family": "Santos", "given": "João Vasco"},
+ {"family": "Richiardi", "given": "Matteo"},
+ {"family": "Bronka", "given": "Patryk"},
+ {"family": "Rostila", "given": "Mikael"},
+ {"family": "Cecchini", "given": "Michele"},
+ {"family": "Ali", "given": "Shehzad"},
+ {"family": "Emmert-Fees", "given": "Karl"},
+ {"family": "Bambra", "given": "Clare"},
+ {"family": "Hoven", "given": "Hanno"},
+ {"family": "Backhaus", "given": "Insa"},
+ {"family": "Balaj", "given": "Mirza"},
+ {"family": "Eikemo", "given": "Terje Andreas"}
+ ],
+ "issued": {
+ "date-parts": [["2023"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/UV6ASDG2"
+ ]
+ },
+ "vasquezlavin2022": {
+ "itemData": {
+ "id": 80230,
+ "type": "article-journal",
+ "citation-key": "[@Vasquez-Lavin_AE_2022]",
+ "container-title": "Applied Economics",
+ "DOI": "10.1080/00036846.2021.2019186",
+ "issue": "34",
+ "page": "3972-3988",
+ "title": "Assessing the use of pseudo-panels to estimate the value of statistical life",
+ "volume": "54",
+ "author": [
+ {"family": "Vasquez-Lavin", "given": "Felipe"},
+ {"family": "Bratti", "given": "Luna"},
+ {"family": "Orrego", "given": "Sergio"},
+ {"family": "Barrientos", "given": "Manuel"}
+ ],
+ "issued": {
+ "date-parts": [["2022"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/IZGGUTPK"
+ ]
+ },
+ "Holford_AJPM_2014": {
+ "itemData": {
+ "id": 2357,
+ "type": "article-journal",
+ "citation-key": "Holford_AJPM_2014",
+ "container-title": "American Journal of Preventive Medicine",
+ "DOI": "10.1016/j.amepre.2013.10.022",
+ "issue": "2",
+ "page": "e31-7",
+ "title": "Patterns of birth cohort-specific smoking histories, 1965-2009",
+ "volume": "46",
+ "author": [
+ {"family": "Holford", "given": "T. R."},
+ {"family": "Levy", "given": "D. T."},
+ {"family": "McKay", "given": "L. A."},
+ {"family": "Clarke", "given": "L."},
+ {"family": "Racine", "given": "B."},
+ {"family": "Meza", "given": "R."},
+ {"family": "Land", "given": "S."},
+ {"family": "Jeon", "given": "J."},
+ {"family": "Feuer", "given": "E. J."}
+ ],
+ "issued": {
+ "date-parts": [["2014"]]
+ }
+ }
+ },
+ "Manuel_HR_2020": {
+ "itemData": {
+ "id": 2358,
+ "type": "article-journal",
+ "citation-key": "Manuel_HR_2020",
+ "container-title": "Health Reports",
+ "DOI": "10.25318/82-003-x202001100002-eng",
+ "ISSN": "0840-6529",
+ "issue": "11",
+ "page": "16-31",
+ "title": "Smoking patterns based on birth-cohort-specific histories from 1965 to 2013, with projections to 2041",
+ "URL": "https://www150.statcan.gc.ca/n1/pub/82-003-x/2020011/article/00002-eng.htm",
+ "volume": "31",
+ "author": [
+ {"family": "Manuel", "given": "Douglas G."},
+ {"family": "Wilton", "given": "Andrew S."},
+ {"family": "Bennett", "given": "Carol"},
+ {"family": "Dass", "given": "Rohit"},
+ {"family": "Laporte", "given": "Audrey"},
+ {"family": "Holford", "given": "Theodore R."}
+ ],
+ "issued": {
+ "date-parts": [["2020"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/BIVEE78X"
+ ]
+ },
+ "Tam_AJPM_2023": {
+ "itemData": {
+ "id": 80522,
+ "type": "article-journal",
+ "citation-key": "Tam_AJPM_2023",
+ "container-title": "American Journal of Preventive Medicine",
+ "DOI": "10.1016/j.amepre.2022.12.002",
+ "issue": "4",
+ "page": "S63-S71",
+ "title": "Patterns of birth cohort-specific smoking histories in Brazil",
+ "volume": "64",
+ "author": [
+ {"family": "Tam", "given": "Jamie"},
+ {"family": "Jaffri", "given": "Mohammed A."},
+ {"family": "Mok", "given": "Yoonseo"},
+ {"family": "Jeon", "given": "Jihyoun"},
+ {"family": "Szklo", "given": "André S."},
+ {"family": "Souza", "given": "Mirian C."},
+ {"family": "Holford", "given": "Theodore R."},
+ {"family": "Levy", "given": "David T."},
+ {"family": "Cao", "given": "Pianpian"},
+ {"family": "Sánchez-Romero", "given": "Luz M."},
+ {"family": "Meza", "given": "Rafael"}
+ ],
+ "issued": {
+ "date-parts": [["2023"]]
+ }
+ }
+ },
+ "gagne2017": {
+ "itemData": {
+ "id": 99903,
+ "type": "article-journal",
+ "citation-key": "gagne2017",
+ "container-title": "Canadian Journal of Public Health",
+ "DOI": "10.17269/CJPH.108.5895",
+ "ISSN": "0008-4263",
+ "PMID": "28910259",
+ "PMCID": "PMC6972049",
+ "issue": "3",
+ "page": "e331-e334",
+ "title": "Estimation of smoking prevalence in Canada: Implications of survey characteristics in the CCHS and CTUMS/CTADS",
+ "volume": "108",
+ "author": [
+ {"family": "Gagné", "given": "Tara"}
+ ],
+ "issued": {
+ "date-parts": [["2017"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/FZNFHVCK"
+ ]
+ },
+ "chen2020joinpoint": {
+ "itemData": {
+ "id": 99904,
+ "type": "article-journal",
+ "citation-key": "chen2020joinpoint",
+ "container-title": "Journal of Official Statistics",
+ "DOI": "10.2478/jos-2020-0003",
+ "PMCID": "PMC7380682",
+ "issue": "1",
+ "page": "49-62",
+ "title": "The Joinpoint-Jump and Joinpoint-Comparability Ratio Model for Trend Analysis with Applications to Coding Changes in Health Statistics",
+ "volume": "36",
+ "author": [
+ {"family": "Chen", "given": "Huann-Sheng"},
+ {"family": "Zeichner", "given": "Samantha"},
+ {"family": "Anderson", "given": "Robert N."},
+ {"family": "Espey", "given": "Donald K."},
+ {"family": "Kim", "given": "Hyune-Ju"},
+ {"family": "Feuer", "given": "Eric J."}
+ ],
+ "issued": {
+ "date-parts": [["2020"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/9QEJUJRX"
+ ]
+ },
+ "opazobretton2022": {
+ "itemData": {
+ "id": 99905,
+ "type": "article-journal",
+ "citation-key": "opazobretton2022",
+ "container-title": "Addiction",
+ "DOI": "10.1111/add.15696",
+ "ISSN": "0965-2140",
+ "PMID": "34590368",
+ "issue": "5",
+ "page": "1392-1403",
+ "title": "Understanding long-term trends in smoking in England, 1972–2019: an age-period-cohort approach",
+ "volume": "117",
+ "author": [
+ {"family": "Opazo Breton", "given": "Magdalena"},
+ {"family": "Gillespie", "given": "Duncan"},
+ {"family": "Pryce", "given": "Robert"},
+ {"family": "Bogdanovica", "given": "Ilze"},
+ {"family": "Angus", "given": "Colin"},
+ {"family": "Brennan", "given": "Alan"},
+ {"family": "Britton", "given": "John"}
+ ],
+ "issued": {
+ "date-parts": [["2022"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/TZ6UPAGW"
+ ]
+ },
+ "wade2025": {
+ "itemData": {
+ "id": 99906,
+ "type": "article-journal",
+ "citation-key": "wade2025",
+ "container-title": "Statistical Methods in Medical Research",
+ "DOI": "10.1177/09622802241310326",
+ "ISSN": "0962-2802",
+ "PMCID": "PMC11951451",
+ "title": "Using Bayesian evidence synthesis to quantify uncertainty in population trends in smoking behaviour",
+ "author": [
+ {"family": "Wade", "given": "Stephanie"},
+ {"family": "Sarich", "given": "Patricia"},
+ {"family": "Vaneckova", "given": "Petra"}
+ ],
+ "issued": {
+ "date-parts": [["2025"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/X73KCFT7"
+ ]
+ },
+ "backinger2008": {
+ "itemData": {
+ "id": 99902,
+ "type": "article-journal",
+ "citation-key": "backinger2008",
+ "container-title": "Epidemiologic Perspectives & Innovations",
+ "DOI": "10.1186/1742-5573-5-8",
+ "ISSN": "1742-5573",
+ "PMID": "19055824",
+ "PMCID": "PMC2627846",
+ "page": "8",
+ "title": "Using the National Health Interview Survey to understand and address the impact of tobacco in the United States: past perspectives and future considerations",
+ "volume": "5",
+ "author": [
+ {"family": "Backinger", "given": "Cathy L."},
+ {"family": "Lawrence", "given": "Deirdre"},
+ {"family": "Swan", "given": "Judith"},
+ {"family": "Winn", "given": "Deborah M."},
+ {"family": "Breen", "given": "Nancy"},
+ {"family": "Hartman", "given": "Anne"},
+ {"family": "Grana", "given": "Rachel"},
+ {"family": "Tran", "given": "David"},
+ {"family": "Farrell", "given": "Samantha"}
+ ],
+ "issued": {
+ "date-parts": [["2008"]]
+ }
+ },
+ "uris": [
+ "http://zotero.org/users/6858935/items/SKNEJQ5C"
+ ]
+ },
+ "Meza_J_2021": {
+ "itemData": {
+ "id": 80523,
+ "type": "article-journal",
+ "citation-key": "Meza_J_2021",
+ "container-title": "JAMA",
+ "DOI": "10.1001/jama.2021.1077",
+ "issue": "10",
+ "page": "988",
+ "title": "Evaluation of the benefits and harms of lung cancer screening with low-dose computed tomography: modeling study for the US preventive services task force",
+ "volume": "325",
+ "author": [
+ {"family": "Meza", "given": "Rafael"},
+ {"family": "Jeon", "given": "Jihyoun"},
+ {"family": "Toumazis", "given": "Iakovos"},
+ {"family": "Ten Haaf", "given": "Kevin"},
+ {"family": "Cao", "given": "Pianpian"},
+ {"family": "Bastani", "given": "Mehrad"},
+ {"family": "Han", "given": "Summer S."},
+ {"family": "Blom", "given": "Erik F."},
+ {"family": "Jonas", "given": "Daniel E."},
+ {"family": "Feuer", "given": "Eric J."},
+ {"family": "Plevritis", "given": "Sylvia K."},
+ {"family": "De Koning", "given": "Harry J."},
+ {"family": "Kong", "given": "Chung Yin"}
+ ],
+ "issued": {
+ "date-parts": [["2021"]]
+ }
+ }
+ }
+ }
+}
diff --git a/_docstyle/page-config.json b/_docstyle/page-config.json
new file mode 100644
index 0000000..752e345
--- /dev/null
+++ b/_docstyle/page-config.json
@@ -0,0 +1,75 @@
+{
+ "footer": {
+ "enabled": true,
+ "first_page": false,
+ "style": "footer",
+ "rPr_xml": "<\/w:rPr>",
+ "left": "DRAFT",
+ "center": "",
+ "right": "Page {page} of {pages}"
+ },
+ "header": {
+ "enabled": true,
+ "first_page": false,
+ "style": "header",
+ "rPr_xml": "<\/w:rPr>",
+ "left": "CSHM Study Protocol",
+ "center": "",
+ "right": ""
+ },
+ "table_styles": {
+ "table-formal": {
+ "borders": {
+ "top": {
+ "val": "single",
+ "sz": "8",
+ "color": "7F7F7F"
+ },
+ "bottom": {
+ "val": "single",
+ "sz": "8",
+ "color": "7F7F7F"
+ }
+ },
+ "header_shading": "D9D9D9",
+ "header_bold": false,
+ "font_size_half_pts": 22
+ },
+ "table-grid": {
+ "borders": {
+ "top": {
+ "val": "single",
+ "sz": "8",
+ "color": "000000"
+ },
+ "bottom": {
+ "val": "single",
+ "sz": "8",
+ "color": "000000"
+ },
+ "left": {
+ "val": "single",
+ "sz": "8",
+ "color": "000000"
+ },
+ "right": {
+ "val": "single",
+ "sz": "8",
+ "color": "000000"
+ },
+ "insideH": {
+ "val": "single",
+ "sz": "8",
+ "color": "000000"
+ },
+ "insideV": {
+ "val": "single",
+ "sz": "8",
+ "color": "000000"
+ }
+ },
+ "header_bold": true,
+ "font_size_half_pts": 22
+ }
+ }
+}
diff --git a/_docstyle/reference.docx b/_docstyle/reference.docx
new file mode 100644
index 0000000..49d00a6
Binary files /dev/null and b/_docstyle/reference.docx differ
diff --git a/_docstyle/reference.docx.hash b/_docstyle/reference.docx.hash
new file mode 100644
index 0000000..7347d0c
--- /dev/null
+++ b/_docstyle/reference.docx.hash
@@ -0,0 +1 @@
+df16fcecdd10278402b00fd6628083a56c8eb6d3d63d2ea55398b08e95ea7787
diff --git a/_extensions/docstyle/_extension.yml b/_extensions/docstyle/_extension.yml
new file mode 100644
index 0000000..7c85750
--- /dev/null
+++ b/_extensions/docstyle/_extension.yml
@@ -0,0 +1,28 @@
+title: docstyle
+author: POPCORN Initiative
+version: 0.1.0
+quarto-required: ">=1.4.0"
+contributes:
+ formats:
+ docx:
+ # Reference document generated by pre-render hook from CSS configuration
+ reference-doc: _docstyle/reference.docx
+ # Suppress Quarto's default title block; author-plate.lua renders it instead
+ title-block-style: none
+
+ # Lua filters applied in order
+ # page-section injects section breaks for named page styles (landscape, etc.)
+ # char-style runs at post-quarto to see expanded shortcodes
+ filters:
+ - page-section.lua
+ - toc-field.lua
+ - table-style.lua
+ - figure.lua
+ - list-style.lua
+ - version-history.lua
+ - author-plate.lua
+ - at: post-quarto
+ path: char-style.lua
+ - comment-inject.lua
+ - revisions-inject.lua
+ - zotero-inject.lua
diff --git a/_extensions/docstyle/author-plate.lua b/_extensions/docstyle/author-plate.lua
new file mode 100644
index 0000000..76df490
--- /dev/null
+++ b/_extensions/docstyle/author-plate.lua
@@ -0,0 +1,432 @@
+-- author-plate.lua
+-- Pandoc Lua filter that generates a formatted author plate from YAML metadata
+--
+-- Usage in QMD:
+-- ::: author-plate
+-- :::
+--
+-- Configuration in _quarto.yml (under docstyle.author-plate):
+-- corresponding-marker: "*" # Symbol for corresponding author
+-- equal-marker: "†" # Symbol for equal contributors
+-- show-orcid: false # Show ORCID after author name
+-- show-email: true # Show corresponding author email
+-- affiliation-style: numbered # numbered (superscripts) or inline
+--
+-- Author metadata in QMD YAML front matter (Quarto manuscript format):
+-- author:
+-- - name:
+-- given: "First"
+-- family: "Last"
+-- orcid: "0000-0000-0000-0000"
+-- email: "author@example.com"
+-- corresponding: true
+-- equal-contributor: true
+-- affiliations:
+-- - ref: inst1
+-- affiliations:
+-- - id: inst1
+-- name: "Institution Name"
+-- department: "Department"
+-- city: "City"
+-- region: "Province"
+-- country: "Country"
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local FORMAT = "openxml"
+
+-- Store metadata
+local authors = nil
+local affiliations = nil
+local config = {
+ corresponding_marker = "*",
+ equal_marker = "†",
+ show_orcid = false,
+ show_email = true,
+ affiliation_style = "numbered"
+}
+
+-- Unicode superscript digits
+local superscripts = {
+ ["0"] = "⁰", ["1"] = "¹", ["2"] = "²", ["3"] = "³", ["4"] = "⁴",
+ ["5"] = "⁵", ["6"] = "⁶", ["7"] = "⁷", ["8"] = "⁸", ["9"] = "⁹"
+}
+
+-- Use shared xml_escape from field-code-utils
+local xml_escape = fcu.xml_escape
+
+-- Convert number to superscript string
+local function to_superscript(num)
+ local s = tostring(num)
+ local result = ""
+ for i = 1, #s do
+ local digit = s:sub(i, i)
+ result = result .. (superscripts[digit] or digit)
+ end
+ return result
+end
+
+-- Get author display name from by-author entry
+-- Quarto's by-author has: name.literal (Inlines), name.given, name.family
+local function get_author_name(author)
+ -- Try name.literal first (Quarto's normalized format)
+ if author["name"] and author["name"]["literal"] then
+ return pandoc.utils.stringify(author["name"]["literal"])
+ end
+
+ -- Try given + family
+ if author["name"] then
+ local name = author["name"]
+ local given = name["given"] and pandoc.utils.stringify(name["given"]) or ""
+ local family = name["family"] and pandoc.utils.stringify(name["family"]) or ""
+ if given ~= "" and family ~= "" then
+ return given .. " " .. family
+ end
+ return family ~= "" and family or given
+ end
+
+ -- Fallback: stringify the whole author object
+ local name_str = pandoc.utils.stringify(author)
+ if name_str and name_str ~= "" then
+ return name_str
+ end
+
+ return ""
+end
+
+-- Build affiliation lookup table from affiliations metadata
+-- Works with both raw affiliations and Quarto's by-affiliation format
+local function build_affiliation_map(affs)
+ local map = {}
+ local ordered = {}
+
+ if not affs then return map, ordered end
+
+ for i, aff in ipairs(affs) do
+ local id = nil
+ if aff["id"] then
+ id = pandoc.utils.stringify(aff["id"])
+ end
+
+ local display = ""
+ local parts = {}
+
+ -- Build display string: Department, Name, City, Region, Country
+ if aff["department"] then
+ table.insert(parts, pandoc.utils.stringify(aff["department"]))
+ end
+ if aff["name"] then
+ table.insert(parts, pandoc.utils.stringify(aff["name"]))
+ end
+ if aff["city"] then
+ table.insert(parts, pandoc.utils.stringify(aff["city"]))
+ end
+ if aff["region"] then
+ table.insert(parts, pandoc.utils.stringify(aff["region"]))
+ end
+ if aff["country"] then
+ table.insert(parts, pandoc.utils.stringify(aff["country"]))
+ end
+
+ display = table.concat(parts, ", ")
+
+ local entry = {
+ id = id,
+ number = i,
+ display = display
+ }
+
+ if id then
+ map[id] = entry
+ end
+ table.insert(ordered, entry)
+ end
+
+ return map, ordered
+end
+
+-- Get affiliation numbers for an author
+-- In Quarto's by-author, affiliations are resolved objects with id, name, etc.
+local function get_author_affiliations(author, aff_map)
+ local numbers = {}
+
+ if not author["affiliations"] then return numbers end
+
+ for _, aff in ipairs(author["affiliations"]) do
+ local aff_id = nil
+
+ -- Quarto resolves affiliations, so we get full objects with id
+ if type(aff) == "table" and aff["id"] then
+ aff_id = pandoc.utils.stringify(aff["id"])
+ elseif type(aff) == "table" and aff["ref"] then
+ aff_id = pandoc.utils.stringify(aff["ref"])
+ end
+
+ if aff_id and aff_map[aff_id] then
+ table.insert(numbers, aff_map[aff_id].number)
+ end
+ end
+
+ return numbers
+end
+
+-- Check if author has attribute (Quarto stores these in attributes sub-object)
+local function has_attribute(author, attr)
+ -- Check in attributes sub-object first (Quarto's normalized location)
+ if author["attributes"] and author["attributes"][attr] then
+ local val = author["attributes"][attr]
+ if type(val) == "boolean" then return val end
+ local str_val = pandoc.utils.stringify(val)
+ return str_val == "true" or str_val == "1"
+ end
+
+ -- Check top-level as fallback
+ if author[attr] then
+ local val = author[attr]
+ if type(val) == "boolean" then return val end
+ local str_val = pandoc.utils.stringify(val)
+ return str_val == "true" or str_val == "1"
+ end
+
+ return false
+end
+
+-- Read configuration from metadata
+function Meta(meta)
+ -- Check whether author-plate is disabled before loading authors.
+ -- When disabled, docstyle.authors in _quarto.yml is the correct pattern
+ -- (it avoids Pandoc's native title block without triggering author-plate
+ -- rendering). Suppress the deprecation warning in that case.
+ local plate_enabled = true
+ if meta.docstyle and meta.docstyle["author-plate"] then
+ local ap = meta.docstyle["author-plate"]
+ if ap["enabled"] ~= nil then
+ local val = ap["enabled"]
+ plate_enabled = (type(val) == "boolean" and val) or
+ (pandoc.utils.stringify(val) == "true")
+ end
+ end
+
+ -- Priority 1: docstyle.authors (avoids Pandoc's native title block)
+ -- Priority 2: by-author (Quarto's normalized format)
+ -- Priority 3: author (basic Pandoc format)
+ if meta.docstyle and meta.docstyle["authors"] then
+ authors = meta.docstyle["authors"]
+ if plate_enabled then
+ io.stderr:write("[author-plate] Warning: docstyle.authors is deprecated. " ..
+ "Use standard Quarto author: metadata instead. " ..
+ "See https://quarto.org/docs/journals/authors.html\n")
+ end
+ io.stderr:write("[author-plate] Found " .. #authors .. " authors (from docstyle.authors)\n")
+ elseif meta["by-author"] then
+ authors = meta["by-author"]
+ io.stderr:write("[author-plate] Found " .. #authors .. " authors (from by-author)\n")
+ elseif meta.author then
+ authors = meta.author
+ io.stderr:write("[author-plate] Found " .. #authors .. " authors (from author - basic)\n")
+ end
+
+ -- Get affiliations - priority order mirrors authors
+ if meta.docstyle and meta.docstyle["affiliations"] then
+ affiliations = meta.docstyle["affiliations"]
+ if plate_enabled then
+ io.stderr:write("[author-plate] Warning: docstyle.affiliations is deprecated. " ..
+ "Use standard Quarto affiliations: metadata instead.\n")
+ end
+ io.stderr:write("[author-plate] Found " .. #affiliations .. " affiliations (from docstyle.affiliations)\n")
+ elseif meta["by-affiliation"] then
+ affiliations = meta["by-affiliation"]
+ io.stderr:write("[author-plate] Found " .. #affiliations .. " affiliations (from by-affiliation)\n")
+ elseif meta.affiliations then
+ affiliations = meta.affiliations
+ io.stderr:write("[author-plate] Found " .. #affiliations .. " affiliations\n")
+ end
+
+ -- Get config from docstyle.author-plate
+ if meta.docstyle and meta.docstyle["author-plate"] then
+ local ap_config = meta.docstyle["author-plate"]
+
+ if ap_config["corresponding-marker"] then
+ config.corresponding_marker = pandoc.utils.stringify(ap_config["corresponding-marker"])
+ end
+ if ap_config["equal-marker"] then
+ config.equal_marker = pandoc.utils.stringify(ap_config["equal-marker"])
+ end
+ if ap_config["show-orcid"] ~= nil then
+ local val = ap_config["show-orcid"]
+ config.show_orcid = (type(val) == "boolean" and val) or (pandoc.utils.stringify(val) == "true")
+ end
+ if ap_config["show-email"] ~= nil then
+ local val = ap_config["show-email"]
+ config.show_email = (type(val) == "boolean" and val) or (pandoc.utils.stringify(val) == "true")
+ end
+ if ap_config["affiliation-style"] then
+ config.affiliation_style = pandoc.utils.stringify(ap_config["affiliation-style"])
+ end
+ end
+
+ return nil
+end
+
+-- Build the author plate XML
+local function build_author_plate_xml()
+ if not authors or #authors == 0 then
+ return nil
+ end
+
+ local aff_map, aff_ordered = build_affiliation_map(affiliations)
+ local blocks = {}
+
+ -- Build author line with superscript affiliations
+ local author_runs = {}
+ local corresponding_email = nil
+ local has_equal_contributors = false
+
+ for i, author in ipairs(authors) do
+ local name = get_author_name(author)
+ local aff_nums = get_author_affiliations(author, aff_map)
+ local is_corresponding = has_attribute(author, "corresponding")
+ local is_equal = has_attribute(author, "equal-contributor")
+
+ if is_equal then has_equal_contributors = true end
+
+ -- Get email for corresponding author
+ if is_corresponding and author.email then
+ corresponding_email = pandoc.utils.stringify(author.email)
+ end
+
+ -- Build superscript string
+ local superscript_parts = {}
+ for _, num in ipairs(aff_nums) do
+ table.insert(superscript_parts, to_superscript(num))
+ end
+ if is_corresponding then
+ table.insert(superscript_parts, config.corresponding_marker)
+ end
+ if is_equal then
+ table.insert(superscript_parts, config.equal_marker)
+ end
+ local superscript_str = table.concat(superscript_parts, ",")
+
+ -- Add ORCID if configured
+ local orcid_str = ""
+ if config.show_orcid and author.orcid then
+ orcid_str = " " .. pandoc.utils.stringify(author.orcid)
+ end
+
+ -- Build run XML for this author
+ local author_xml = '' .. xml_escape(name) .. ''
+
+ -- Add superscript
+ if superscript_str ~= "" then
+ author_xml = author_xml ..
+ '' ..
+ '' .. xml_escape(superscript_str) .. ''
+ end
+
+ -- Add ORCID
+ if orcid_str ~= "" then
+ author_xml = author_xml .. '' .. xml_escape(orcid_str) .. ''
+ end
+
+ -- Add separator (comma) unless last author
+ if i < #authors then
+ author_xml = author_xml .. ', '
+ end
+
+ table.insert(author_runs, author_xml)
+ end
+
+ -- Author paragraph (centered, Author style)
+ local author_para = '' ..
+ '' ..
+ table.concat(author_runs) ..
+ ''
+ table.insert(blocks, author_para)
+
+ -- Empty paragraph for spacing
+ table.insert(blocks, '')
+
+ -- Affiliation lines (using Affiliation style)
+ for _, aff in ipairs(aff_ordered) do
+ local aff_line = to_superscript(aff.number) .. " " .. aff.display
+ local aff_para = '' ..
+ '' ..
+ '' .. xml_escape(aff_line) .. '' ..
+ ''
+ table.insert(blocks, aff_para)
+ end
+
+ -- Empty paragraph for spacing before footnotes
+ table.insert(blocks, '')
+
+ -- Corresponding author line (using Affiliation style for consistency)
+ if config.show_email and corresponding_email then
+ local corr_line = config.corresponding_marker .. "Corresponding author: " .. corresponding_email
+ local corr_para = '' ..
+ '' ..
+ '' .. xml_escape(corr_line) .. '' ..
+ ''
+ table.insert(blocks, corr_para)
+ end
+
+ -- Equal contributors line (using Affiliation style for consistency)
+ if has_equal_contributors then
+ local equal_line = config.equal_marker .. "These authors contributed equally to this work"
+ local equal_para = '' ..
+ '' ..
+ '' .. xml_escape(equal_line) .. '' ..
+ ''
+ table.insert(blocks, equal_para)
+ end
+
+ return table.concat(blocks)
+end
+
+-- Process Div elements looking for .author-plate class
+function Div(div)
+ -- Check if this div has the "author-plate" class
+ if not div.classes:includes("author-plate") then
+ return nil
+ end
+
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ io.stderr:write("[author-plate] Skipping (not docx output)\n")
+ return nil
+ end
+
+ if not authors or #authors == 0 then
+ io.stderr:write("[author-plate] No author metadata found\n")
+ return {} -- Remove the div entirely
+ end
+
+ io.stderr:write("[author-plate] Generating author plate with " .. #authors .. " authors\n")
+
+ -- Build the author plate XML
+ local plate_xml = build_author_plate_xml()
+ if not plate_xml then
+ return {}
+ end
+
+ -- Wrap in ADDIN DOCSTYLE field code (using shared utility)
+ return {
+ pandoc.RawBlock("openxml", fcu.build_div_field_start("author-plate")),
+ pandoc.RawBlock("openxml", plate_xml),
+ pandoc.RawBlock("openxml", fcu.build_block_field_end())
+ }
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Div = Div }
+}
diff --git a/_extensions/docstyle/char-style.lua b/_extensions/docstyle/char-style.lua
new file mode 100644
index 0000000..07c36a8
--- /dev/null
+++ b/_extensions/docstyle/char-style.lua
@@ -0,0 +1,172 @@
+-- char-style.lua
+-- Pandoc Lua filter that converts spans with style classes to Word character styles
+--
+-- Usage in QMD:
+-- Date: [{{< meta version-summary.date >}}]{.date} -- Shortcode with styling
+-- Date: []{.date} -- Auto-populated from metadata
+-- Custom: [my text]{.date} -- Explicit content
+--
+-- All three syntaxes work. Empty spans auto-populate from version-summary metadata.
+-- Shortcode syntax is preferred as it's explicit and works with any metadata field.
+--
+-- This applies w:rStyle to the run, creating a character-level style in Word.
+-- The style must exist in reference.docx (generated from CSS via docstyle).
+--
+-- Round-trip support: Each styled span is wrapped in an ADDIN DOCSTYLE field code
+-- that carries the original QMD source as JSON metadata. During harvest, the field
+-- code's instrText is parsed to restore the exact QMD source (e.g., shortcodes).
+-- See development/spec-round-trip-mechanism.md for the full specification.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Metadata values for auto-population (set in Meta filter)
+local meta_values = {
+ date = nil,
+ version = nil
+}
+
+-- Get style ID for a class from schema, with fallback
+local function get_style_id(class)
+ local class_def = fcu.get_char_class(class)
+ if class_def and class_def.word_style then
+ return class_def.word_style
+ end
+ -- Fallback for classes not in schema
+ local fallback = {
+ date = "Date",
+ version = "Version",
+ author = "Author",
+ affiliation = "Affiliation"
+ }
+ return fallback[class]
+end
+
+-- List of supported style classes (for iteration)
+local supported_classes = {"date", "version", "author", "affiliation"}
+
+-- Process Span elements with character style classes
+function Span(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Check if this span has any of our style classes
+ local matched_class = nil
+ local style_id = nil
+ for _, class in ipairs(supported_classes) do
+ if el.classes:includes(class) then
+ matched_class = class
+ style_id = get_style_id(class)
+ break
+ end
+ end
+
+ if not style_id then
+ return nil
+ end
+
+ -- Get the text content
+ local text = fcu.inlines_to_text(el.content)
+
+ -- Auto-populate empty spans from metadata
+ if text == "" or text == nil then
+ if matched_class and meta_values[matched_class] then
+ text = meta_values[matched_class]
+ debug("[char-style] Auto-populated '" .. style_id .. "' from metadata: " .. text .. "\n")
+ else
+ debug("[char-style] Warning: Empty span with style '" .. style_id .. "' and no metadata value\n")
+ return nil -- Return nil to keep span as-is if we can't populate it
+ end
+ else
+ debug("[char-style] Applying style '" .. style_id .. "' to: " .. text .. "\n")
+ end
+
+ -- Build field code XML using shared utility
+ local field_xml = fcu.build_char_field_code(style_id, text, matched_class)
+ debug("[char-style] Emitting field code for '" .. matched_class .. "'\n")
+
+ return pandoc.RawInline('openxml', field_xml)
+end
+
+-- Process Div elements with .center class for paragraph alignment
+function Div(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Check if this div has the center class
+ if not el.classes:includes('center') then
+ return nil
+ end
+
+ debug("[char-style] Applying center alignment to div\n")
+
+ -- For each paragraph in the div, add custom-style="Centered" attribute
+ -- This requires a "Centered" style in reference.docx with center alignment
+ -- Alternatively, we can directly inject the alignment via RawBlock
+ local result = {}
+ for _, block in ipairs(el.content) do
+ if block.t == "Para" then
+ -- Convert paragraph content to runs, wrapped in a centered paragraph
+ local runs = {}
+ for _, inline in ipairs(block.content) do
+ -- If it's already a RawInline openxml (from Span filter), keep it
+ if inline.t == "RawInline" and inline.format == "openxml" then
+ table.insert(runs, inline.text)
+ elseif inline.t == "Str" then
+ table.insert(runs, '' .. fcu.xml_escape(inline.text) .. '')
+ elseif inline.t == "Space" then
+ table.insert(runs, ' ')
+ end
+ end
+
+ local para_xml = '' .. table.concat(runs) .. ''
+ table.insert(result, pandoc.RawBlock('openxml', para_xml))
+ else
+ -- Keep other blocks as-is
+ table.insert(result, block)
+ end
+ end
+
+ return result
+end
+
+-- Extract metadata values for auto-population
+function Meta(meta)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ debug("[char-style] Filter active for Word output\n")
+ end
+
+ -- Extract version-summary.date and version-summary.version
+ if meta["version-summary"] then
+ local vs = meta["version-summary"]
+ if vs.date then
+ meta_values.date = pandoc.utils.stringify(vs.date)
+ debug("[char-style] Found version-summary.date: " .. meta_values.date .. "\n")
+ end
+ if vs.version then
+ meta_values.version = pandoc.utils.stringify(vs.version)
+ debug("[char-style] Found version-summary.version: " .. meta_values.version .. "\n")
+ end
+ end
+
+ return nil
+end
+
+-- Filter order: Meta first (to extract values), then Span (character styles), then Div (centering)
+return {
+ { Meta = Meta },
+ { Span = Span },
+ { Div = Div }
+}
diff --git a/_extensions/docstyle/comment-inject.lua b/_extensions/docstyle/comment-inject.lua
new file mode 100644
index 0000000..403770d
--- /dev/null
+++ b/_extensions/docstyle/comment-inject.lua
@@ -0,0 +1,162 @@
+-- comment-inject.lua
+-- Pandoc Lua filter that converts comment markers to OpenXML comment markers
+--
+-- Supported formats (HTML comments only):
+-- 1. Range comment: text
+-- 2. Point comment:
+--
+-- The HTML comment format is robust because it can span complex structures
+-- like tracked changes, multiple paragraphs, and nested formatting without
+-- breaking Pandoc's parsing.
+--
+-- Fallback behavior:
+-- - Start marker without end: Converts to point comment at document end
+-- - End marker without start: Ignored (orphan end markers are harmless)
+--
+-- After rendering, R post-processing (inject_comments) adds the actual
+-- comments.xml file to the DOCX container.
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+local xml_escape = fcu.xml_escape
+local parse_comment_marker = fcu.parse_comment_marker
+
+-- Track comment states for fallback handling:
+-- "started" = saw start marker, waiting for end
+-- "completed" = saw both start and end (proper range comment)
+-- "point" = point comment (no range, just a marker)
+local comment_states = {}
+
+-- Generate OpenXML for comment range start
+local function comment_start_xml(id)
+ return ''
+end
+
+-- Generate OpenXML for comment range end (includes the clickable reference marker)
+local function comment_end_xml(id)
+ return '' ..
+ ''
+end
+
+-- Generate OpenXML for point comment (start + end + reference together)
+local function comment_point_xml(id)
+ return '' ..
+ '' ..
+ ''
+end
+
+-- Process RawInline elements for HTML comment markers
+function RawInline(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Only process HTML raw content (where our markers live)
+ if el.format ~= "html" then
+ return nil
+ end
+
+ local id, marker_type = parse_comment_marker(el.text)
+ if not id then
+ return nil -- Not a comment marker, leave as-is
+ end
+
+ if marker_type == "point" then
+ -- Simple point comment - emit complete marker
+ debug("[comment-inject] Found point comment id=" .. id .. "\n")
+ comment_states[id] = "point"
+ return pandoc.RawInline('openxml', comment_point_xml(id))
+
+ elseif marker_type == "start" then
+ debug("[comment-inject] Found comment start marker id=" .. id .. "\n")
+ comment_states[id] = "started"
+ -- Emit only the start marker; end will come later (or we'll close at doc end)
+ return pandoc.RawInline('openxml', comment_start_xml(id))
+
+ elseif marker_type == "end" then
+ if comment_states[id] == "started" then
+ -- Normal case: matching end for a start we saw
+ debug("[comment-inject] Found comment end marker id=" .. id .. "\n")
+ comment_states[id] = "completed"
+ return pandoc.RawInline('openxml', comment_end_xml(id))
+ elseif comment_states[id] == "completed" then
+ -- Duplicate end marker - ignore
+ debug("[comment-inject] Warning: duplicate end marker id=" .. id .. " (ignoring)\n")
+ return pandoc.RawInline('openxml', '') -- Empty, effectively removes it
+ else
+ -- Orphan end marker (no matching start) - ignore
+ debug("[comment-inject] Warning: orphan end marker id=" .. id .. " (no matching start, ignoring)\n")
+ return pandoc.RawInline('openxml', '') -- Empty, effectively removes it
+ end
+ end
+
+ return nil
+end
+
+-- Handle orphan start markers at document end
+function Pandoc(doc)
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ debug("[comment-inject] Filter active for Word output\n")
+
+ -- Check for orphan start markers (started but never completed)
+ -- These need to be closed at the end of the document
+ local orphan_ids = {}
+ for id, state in pairs(comment_states) do
+ if state == "started" then
+ table.insert(orphan_ids, id)
+ end
+ end
+
+ if #orphan_ids == 0 then
+ return nil -- No orphans, document unchanged
+ end
+
+ -- We have orphan start markers - inject end markers at end of document
+ debug("[comment-inject] Warning: " .. #orphan_ids .. " orphan start marker(s) found, closing at document end\n")
+ for _, id in ipairs(orphan_ids) do
+ debug("[comment-inject] - Orphan comment id=" .. id .. "\n")
+ comment_states[id] = "point"
+ end
+
+ -- Inject the end+reference markers at the very end of the document
+ if #doc.blocks > 0 then
+ local last_block = doc.blocks[#doc.blocks]
+
+ -- Create the closing XML for all orphan comments
+ local closing_inlines = {}
+ for _, id in ipairs(orphan_ids) do
+ table.insert(closing_inlines, pandoc.RawInline('openxml', comment_end_xml(id)))
+ end
+
+ -- Append to last block based on its type
+ if last_block.t == "Para" or last_block.t == "Plain" then
+ for _, inline in ipairs(closing_inlines) do
+ table.insert(last_block.content, inline)
+ end
+ else
+ -- For other block types, add a new Plain block with the closures
+ table.insert(doc.blocks, pandoc.Plain(closing_inlines))
+ end
+ end
+
+ return doc
+end
+
+return {
+ -- First pass: process inline elements (populates comment_states)
+ { RawInline = RawInline },
+ -- Second pass: handle orphan comments at document level
+ { Pandoc = Pandoc }
+}
diff --git a/_extensions/docstyle/default.css b/_extensions/docstyle/default.css
new file mode 100644
index 0000000..320ba97
--- /dev/null
+++ b/_extensions/docstyle/default.css
@@ -0,0 +1,339 @@
+/* docstyle Default Styles
+ * CIHR-compliant formatting for Canadian health research documents
+ *
+ * Based on CIHR Application Formatting Requirements:
+ * https://cihr-irsc.gc.ca/e/29300.html
+ *
+ * Requirements:
+ * - Font: minimum 12pt Times New Roman, black (can be larger)
+ * - Margins: 2cm (0.79in) minimum, strictly enforced
+ * - Line spacing: single minimum
+ * - Page size: letter (8.5" x 11")
+ */
+
+/* ==========================================================================
+ TYPOGRAPHY - CIHR Compliant
+ ========================================================================== */
+
+/* Body text - 12pt Times New Roman, single spaced */
+p, body {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+ line-height: 1;
+}
+
+/* Headings - Times New Roman, bold, same size hierarchy */
+h1 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+h2 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+h3 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ font-style: italic;
+ color: #000000;
+}
+
+h4 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ font-style: italic;
+ color: #000000;
+}
+
+h5 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ text-decoration: underline;
+ color: #000000;
+}
+
+/* Links - blue for visibility */
+a {
+ color: #0000EE;
+}
+
+/* ==========================================================================
+ DOCUMENT STRUCTURE
+ ========================================================================== */
+
+/* Title - bold, slightly larger */
+.title {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 14pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+/* Subtitle */
+.subtitle {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+}
+
+/* Author and affiliation blocks */
+.author {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ text-align: center;
+}
+
+.affiliation {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-style: italic;
+ text-align: center;
+ color: #000000;
+}
+
+/* Date and version display */
+.date {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ text-align: center;
+}
+
+.version {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ text-align: center;
+}
+
+/* Header and footer */
+.header {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ color: #000000;
+}
+
+.footer {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ color: #000000;
+}
+
+/* ==========================================================================
+ TABLE OF CONTENTS
+ ========================================================================== */
+
+.toc-heading {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+.toc-1 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+ line-height: 1;
+}
+
+.toc-2 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+ margin-left: 12pt;
+ line-height: 1;
+}
+
+.toc-3 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+ margin-left: 24pt;
+ line-height: 1;
+}
+
+/* ==========================================================================
+ BODY TEXT VARIANTS
+ Pandoc assigns content to BodyText/FirstParagraph/Compact — not Normal.
+ These styles inherit from Normal via basedOn chains in reference.docx.
+ Use these selectors only when you need to override the inherited values.
+ ========================================================================== */
+
+/* .body-text — inherits from Normal; override only if body paragraphs
+ need different formatting than headings or other Normal-based styles */
+/* .body-text { } */
+
+/* .first-paragraph — first paragraph after a heading (basedOn BodyText) */
+/* .first-paragraph { } */
+
+/* .compact — tight list paragraphs (basedOn BodyText) */
+/* .compact { margin-bottom: 0; } */
+
+/* blockquote — block quotations (basedOn BodyText) */
+/* blockquote { margin-left: 0.5in; margin-right: 0.5in; } */
+
+/* ==========================================================================
+ CAPTIONS AND FIGURES
+ ========================================================================== */
+
+caption, .caption {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ color: #000000;
+}
+
+.table-caption {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+.image-caption {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-style: italic;
+ color: #000000;
+}
+
+/* .figure — image container paragraph */
+/* .figure { text-align: center; } */
+
+/* .captioned-figure — image with caption, keeps image and caption together */
+/* .captioned-figure { text-align: center; } */
+
+/* ==========================================================================
+ BIBLIOGRAPHY
+ ========================================================================== */
+
+.bibliography {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ color: #000000;
+ text-indent: -0.5in;
+ padding-left: 0.5in;
+}
+
+/* ==========================================================================
+ DEFINITION LISTS
+ ========================================================================== */
+
+dt {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+dd {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ color: #000000;
+ margin-left: 0.25in;
+}
+
+/* ==========================================================================
+ FOOTNOTES
+ ========================================================================== */
+
+.footnote-text {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-weight: normal;
+ color: #000000;
+ line-height: 1;
+}
+
+/* ==========================================================================
+ LISTS
+ ========================================================================== */
+
+ol {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+}
+
+ul {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+}
+
+/* ==========================================================================
+ TABLES
+ ========================================================================== */
+
+/* Formal table - top/bottom borders, shaded header */
+.table-formal {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ border-top: 1pt solid #000000;
+ border-bottom: 1pt solid #000000;
+ border-left: none;
+ border-right: none;
+}
+
+.table-formal th {
+ background-color: #D9D9D9;
+ font-weight: bold;
+ padding: 4pt;
+}
+
+.table-formal td {
+ padding: 4pt;
+}
+
+/* Grid table - all borders */
+.table-grid {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ border: 1pt solid #000000;
+ border-collapse: collapse;
+}
+
+.table-grid th,
+.table-grid td {
+ border: 1pt solid #000000;
+ padding: 4pt;
+}
+
+.table-grid th {
+ font-weight: bold;
+}
+
+/* ==========================================================================
+ REVISION STYLES (Track Changes Preview)
+ ========================================================================== */
+
+.del {
+ background-color: #ffebe9;
+ color: #6a737d;
+ text-decoration: line-through;
+}
+
+.ins {
+ background-color: #e6ffec;
+ text-decoration: underline;
+}
+
+/* ==========================================================================
+ COMMENT STYLES (Preview)
+ ========================================================================== */
+
+.comment {
+ background-color: #fff3cd;
+ border-bottom: 2px solid #ffc107;
+}
diff --git a/_extensions/docstyle/field-code-utils.lua b/_extensions/docstyle/field-code-utils.lua
new file mode 100644
index 0000000..48a131d
--- /dev/null
+++ b/_extensions/docstyle/field-code-utils.lua
@@ -0,0 +1,682 @@
+-- field-code-utils.lua
+-- Shared utilities for ADDIN DOCSTYLE field code generation
+--
+-- This module provides:
+-- 1. Schema loading from inst/schema/docstyle-field-codes.json
+-- 2. XML and JSON escaping functions
+-- 3. Field code XML builders for all types (char, div, list, section)
+--
+-- All Lua filters should require this module instead of reimplementing
+-- these functions locally.
+
+local M = {}
+
+-- Current schema version (must match R's DOCSTYLE_SCHEMA_VERSION)
+-- v2: R-First Assembly - Lua emits text markers, R builds sectPr
+M.SCHEMA_VERSION = 2
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function log_debug(msg)
+ if DEBUG then
+ io.stderr:write("[field-code-utils] " .. msg .. "\n")
+ end
+end
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Schema Loading
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Cached schema (loaded once per filter run)
+local cached_schema = nil
+
+-- Find the schema file path relative to the extension directory
+local function find_schema_path()
+ -- Try multiple locations:
+ -- 1. Installed R package: system.file("schema/docstyle-field-codes.json", package = "docstyle")
+ -- 2. Development: relative to _extensions/docstyle/
+ -- 3. Quarto extension: QUARTO_PROJECT_DIR/_extensions/docstyle/../../inst/schema/
+
+ local paths_to_try = {}
+
+ -- Get the directory of this Lua file
+ local source = debug.getinfo(1, "S").source
+ if source:sub(1, 1) == "@" then
+ local lua_dir = source:sub(2):match("(.*/)")
+ if lua_dir then
+ -- Development layout: _extensions/docstyle/ -> ../../inst/schema/
+ table.insert(paths_to_try, lua_dir .. "../../inst/schema/docstyle-field-codes.json")
+ -- Installed extension layout (schema copied to extension dir)
+ table.insert(paths_to_try, lua_dir .. "docstyle-field-codes.json")
+ end
+ end
+
+ -- Try QUARTO_PROJECT_DIR
+ local project_dir = os.getenv("QUARTO_PROJECT_DIR")
+ if project_dir then
+ table.insert(paths_to_try, project_dir .. "/_extensions/docstyle/docstyle-field-codes.json")
+ table.insert(paths_to_try, project_dir .. "/inst/schema/docstyle-field-codes.json")
+ end
+
+ for _, path in ipairs(paths_to_try) do
+ local f = io.open(path, "r")
+ if f then
+ f:close()
+ log_debug("Found schema at: " .. path)
+ return path
+ end
+ end
+
+ return nil
+end
+
+-- Simple JSON parser for our schema (handles objects, arrays, strings, numbers, booleans)
+-- This avoids requiring external JSON libraries in Pandoc Lua filters
+local function parse_json(str)
+ local pos = 1
+ local function skip_whitespace()
+ pos = str:match("^%s*()", pos)
+ end
+
+ local function parse_value()
+ skip_whitespace()
+ local c = str:sub(pos, pos)
+
+ if c == '"' then
+ -- String
+ local start = pos + 1
+ pos = pos + 1
+ while pos <= #str do
+ local ch = str:sub(pos, pos)
+ if ch == '"' then
+ local result = str:sub(start, pos - 1)
+ pos = pos + 1
+ -- Unescape basic sequences
+ result = result:gsub("\\n", "\n"):gsub("\\t", "\t"):gsub('\\"', '"'):gsub("\\\\", "\\")
+ return result
+ elseif ch == "\\" then
+ pos = pos + 2
+ else
+ pos = pos + 1
+ end
+ end
+ elseif c == "{" then
+ -- Object
+ pos = pos + 1
+ local obj = {}
+ skip_whitespace()
+ if str:sub(pos, pos) == "}" then
+ pos = pos + 1
+ return obj
+ end
+ while true do
+ skip_whitespace()
+ local key = parse_value()
+ skip_whitespace()
+ pos = pos + 1 -- skip ':'
+ local value = parse_value()
+ obj[key] = value
+ skip_whitespace()
+ local sep = str:sub(pos, pos)
+ pos = pos + 1
+ if sep == "}" then break end
+ end
+ return obj
+ elseif c == "[" then
+ -- Array
+ pos = pos + 1
+ local arr = {}
+ skip_whitespace()
+ if str:sub(pos, pos) == "]" then
+ pos = pos + 1
+ return arr
+ end
+ while true do
+ table.insert(arr, parse_value())
+ skip_whitespace()
+ local sep = str:sub(pos, pos)
+ pos = pos + 1
+ if sep == "]" then break end
+ end
+ return arr
+ elseif str:sub(pos, pos + 3) == "true" then
+ pos = pos + 4
+ return true
+ elseif str:sub(pos, pos + 4) == "false" then
+ pos = pos + 5
+ return false
+ elseif str:sub(pos, pos + 3) == "null" then
+ pos = pos + 4
+ return nil
+ else
+ -- Number
+ local num_str = str:match("^-?%d+%.?%d*", pos)
+ if num_str then
+ pos = pos + #num_str
+ return tonumber(num_str)
+ end
+ end
+ error("JSON parse error at position " .. pos .. ": " .. str:sub(pos, pos + 20))
+ end
+
+ return parse_value()
+end
+
+-- Load and cache the schema
+function M.load_schema()
+ if cached_schema then
+ return cached_schema
+ end
+
+ local schema_path = find_schema_path()
+ if not schema_path then
+ log_debug("Schema file not found, using built-in defaults")
+ -- Return minimal built-in schema as fallback
+ cached_schema = {
+ schema_version = M.SCHEMA_VERSION,
+ char_classes = {},
+ div_types = {},
+ list_classes = {}
+ }
+ return cached_schema
+ end
+
+ local f = io.open(schema_path, "r")
+ if not f then
+ log_debug("Could not open schema file: " .. schema_path)
+ return nil
+ end
+
+ local content = f:read("*a")
+ f:close()
+
+ local ok, schema = pcall(parse_json, content)
+ if not ok then
+ log_debug("Failed to parse schema JSON: " .. tostring(schema))
+ return nil
+ end
+
+ cached_schema = schema
+ log_debug("Loaded schema version " .. (schema.schema_version or "unknown"))
+ return cached_schema
+end
+
+-- Get char class definition from schema
+function M.get_char_class(class)
+ local schema = M.load_schema()
+ if schema and schema.char_classes then
+ return schema.char_classes[class]
+ end
+ return nil
+end
+
+-- Get div type definition from schema
+function M.get_div_type(name)
+ local schema = M.load_schema()
+ if schema and schema.div_types then
+ return schema.div_types[name]
+ end
+ return nil
+end
+
+-- Get list class definition from schema
+function M.get_list_class(class)
+ local schema = M.load_schema()
+ if schema and schema.list_classes then
+ return schema.list_classes[class]
+ end
+ return nil
+end
+
+-- Get table class definition from schema
+function M.get_table_class(class)
+ local schema = M.load_schema()
+ if schema and schema.table_classes then
+ return schema.table_classes[class]
+ end
+ return nil
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Escaping Functions
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Escape XML special characters for use in OOXML content
+function M.xml_escape(text)
+ if not text then return "" end
+ text = text:gsub("&", "&")
+ text = text:gsub("<", "<")
+ text = text:gsub(">", ">")
+ text = text:gsub('"', """)
+ text = text:gsub("'", "'")
+ return text
+end
+
+-- Escape a string for use inside a JSON string value
+-- Handles backslash and double-quote
+function M.json_escape(text)
+ if not text then return "" end
+ text = text:gsub('\\', '\\\\')
+ text = text:gsub('"', '\\"')
+ return text
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Field Code Builders
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Build JSON payload for field code instrText
+-- @param payload_type: "char", "div", "list", or "section"
+-- @param fields: table of additional fields to include
+-- @return JSON string (not XML-escaped)
+function M.build_payload_json(payload_type, fields)
+ local parts = {}
+ table.insert(parts, '"type":"' .. M.json_escape(payload_type) .. '"')
+ table.insert(parts, '"version":' .. M.SCHEMA_VERSION)
+
+ for key, value in pairs(fields) do
+ if type(value) == "string" then
+ table.insert(parts, '"' .. key .. '":"' .. M.json_escape(value) .. '"')
+ elseif type(value) == "number" then
+ table.insert(parts, '"' .. key .. '":' .. value)
+ elseif type(value) == "boolean" then
+ table.insert(parts, '"' .. key .. '":' .. (value and "true" or "false"))
+ end
+ end
+
+ return "{" .. table.concat(parts, ",") .. "}"
+end
+
+-- Build the QMD source string for a char class
+-- Uses source_template from schema if available, otherwise builds explicit form
+function M.build_char_source(class, text)
+ local class_def = M.get_char_class(class)
+ if class_def and class_def.source_template then
+ return class_def.source_template
+ else
+ return "[" .. text .. "]{." .. class .. "}"
+ end
+end
+
+-- Build complete ADDIN DOCSTYLE field code XML for char type
+-- @param style_id: Word style ID (e.g., "Date")
+-- @param text: Display text
+-- @param class: CSS class name (e.g., "date")
+-- @return OOXML string
+function M.build_char_field_code(style_id, text, class)
+ local source = M.build_char_source(class, text)
+ local json = M.build_payload_json("char", {
+ class = class,
+ source = source
+ })
+ local json_xml = M.xml_escape(json)
+
+ local display_run = '' ..
+ '' ..
+ '' .. M.xml_escape(text) .. '' ..
+ ''
+
+ return '' ..
+ ' ADDIN DOCSTYLE ' .. json_xml .. ' ' ..
+ '' ..
+ display_run ..
+ ''
+end
+
+-- Build field code start marker for block types (div, list, section)
+-- @param payload_type: "div", "list", or "section"
+-- @param fields: payload fields (e.g., {name = "toc"} or {class = "list-alpha"})
+-- @return OOXML paragraph string
+function M.build_block_field_start(payload_type, fields)
+ local json = M.build_payload_json(payload_type, fields)
+ local json_xml = M.xml_escape(json)
+
+ return '' ..
+ ' ADDIN DOCSTYLE ' .. json_xml .. ' ' ..
+ ''
+end
+
+-- Build field code end marker for block types
+-- @return OOXML paragraph string
+function M.build_block_field_end()
+ return ''
+end
+
+-- Convenience: Build div field code start
+function M.build_div_field_start(name)
+ return M.build_block_field_start("div", {name = name})
+end
+
+-- Convenience: Build list field code start
+function M.build_list_field_start(class, start_num)
+ local fields = {class = class}
+ if start_num and start_num > 1 then
+ fields.start = start_num
+ end
+ return M.build_block_field_start("list", fields)
+end
+
+-- Convenience: Build table field code start
+-- @param class: table class (e.g., "table-formal")
+-- @param attrs: optional attributes table (widths, width, font-size, etc.)
+function M.build_table_field_start(class, attrs)
+ local fields = {class = class}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ return M.build_block_field_start("table", fields)
+end
+
+-- Convenience: Build figure field code start
+-- @param id: QMD figure ID (e.g. "fig-consort-flow")
+-- @param attrs: optional attributes table (docpr_id, width, align, wrap, original_path)
+function M.build_figure_field_start(id, attrs)
+ local fields = {id = id}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ return M.build_block_field_start("figure", fields)
+end
+
+-- Convenience: Build section field code start
+function M.build_section_field_start(class, attrs)
+ local fields = {class = class}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ return M.build_block_field_start("section", fields)
+end
+
+-- Build complete section field code in a SINGLE paragraph for R-First Assembly.
+-- This prevents the 3-line gap by emitting BEGIN/instrText/SEPARATE/marker/END
+-- all in one paragraph rather than three separate paragraphs.
+-- @param class: section class (e.g., "section-body")
+-- @param attrs: optional attributes table
+-- @param marker_text: the DOCSTYLE_SECTION::... marker text
+-- @return OOXML paragraph string
+function M.build_section_marker_para(class, attrs, marker_text)
+ local fields = {class = class}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ local json = M.build_payload_json("section", fields)
+ local json_xml = M.xml_escape(json)
+ local marker_xml = M.xml_escape(marker_text)
+
+ return '' ..
+ '' ..
+ ' ADDIN DOCSTYLE ' .. json_xml .. ' ' ..
+ '' ..
+ '' .. marker_xml .. '' ..
+ ''
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Helper Functions
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Convert inline content to plain text (recursively handles nested spans)
+function M.inlines_to_text(inlines)
+ local text = ""
+ for _, inline in ipairs(inlines) do
+ if inline.t == "Str" then
+ text = text .. inline.text
+ elseif inline.t == "Space" then
+ text = text .. " "
+ elseif inline.t == "SoftBreak" then
+ text = text .. " "
+ elseif inline.t == "Span" then
+ text = text .. M.inlines_to_text(inline.content)
+ end
+ end
+ return text
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Page Config Loading (shared across filters)
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Cached page config (loaded once, shared by all filters in same render)
+local cached_page_config = nil
+
+--- Load page-config.json from _docstyle/ directory.
+-- Returns the parsed config table, or nil if not found.
+-- Result is cached so multiple filters reading the same file pay I/O once.
+function M.load_page_config()
+ if cached_page_config then return cached_page_config end
+
+ local config_paths = {
+ "_docstyle/page-config.json",
+ "./_docstyle/page-config.json"
+ }
+
+ for _, path in ipairs(config_paths) do
+ local file = io.open(path, "r")
+ if file then
+ local content = file:read("*a")
+ file:close()
+ local ok, config = pcall(function()
+ return pandoc.json.decode(content)
+ end)
+ if ok and config then
+ log_debug("Loaded page config from " .. path)
+ cached_page_config = config
+ return config
+ end
+ end
+ end
+
+ log_debug("No page-config.json found")
+ return nil
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Inline Renderer: Pandoc AST → OOXML runs
+-- ═══════════════════════════════════════════════════════════════════════════
+--
+-- Converts Pandoc inline elements to OOXML run XML strings.
+-- Handles the pre-conversion AST forms present when table-style.lua runs
+-- (before char-style.lua and comment-inject.lua in the filter chain).
+--
+-- base_rPr_parts: array of XML fragments to include in every
+-- e.g., {"", '', ''}
+
+-- Build ... from an array of parts, or "" if empty
+local function build_rPr(parts)
+ if #parts == 0 then return "" end
+ return "" .. table.concat(parts) .. ""
+end
+
+-- Build a single with given rPr parts and text
+local function build_text_run(rPr_parts, text)
+ return "" .. build_rPr(rPr_parts) ..
+ '' .. M.xml_escape(text) .. ""
+end
+
+-- Char-style class → Word style ID (via schema, with minimal fallback)
+local char_style_fallback = {
+ date = "Date", version = "Version", author = "Author",
+ affiliation = "Affiliation", sc = "SmallCaps"
+}
+local function get_char_style_id(class)
+ local def = M.get_char_class(class)
+ if def and def.word_style then return def.word_style end
+ return char_style_fallback[class]
+end
+
+-- Parse comment marker from HTML text
+-- Returns id and type ("start", "end", "point"), or nil
+-- Exported as M.parse_comment_marker for use by comment-inject.lua
+local function parse_comment_marker(text)
+ if not text then return nil, nil end
+ local start_id = text:match('')
+ if start_id then return start_id, "start" end
+ local end_id = text:match('')
+ if end_id then return end_id, "end" end
+ local point_id = text:match('')
+ if point_id then return point_id, "point" end
+ return nil, nil
+end
+
+-- Recurse into inline content with an extra rPr fragment appended
+-- Returns array of XML strings
+local function recurse_with_rPr(content, rPr_parts, extra_rPr)
+ local new_rPr = {}
+ for _, p in ipairs(rPr_parts) do table.insert(new_rPr, p) end
+ if type(extra_rPr) == "table" then
+ for _, e in ipairs(extra_rPr) do table.insert(new_rPr, e) end
+ else
+ table.insert(new_rPr, extra_rPr)
+ end
+ local results = {}
+ for _, child in ipairs(content) do
+ for _, xml in ipairs(render_inline(child, new_rPr)) do
+ table.insert(results, xml)
+ end
+ end
+ return results
+end
+
+-- Render a single Pandoc inline element to OOXML run(s)
+-- Returns array of XML strings
+local function render_inline(inline, rPr_parts)
+ local results = {}
+
+ if inline.t == "Str" then
+ table.insert(results, build_text_run(rPr_parts, inline.text))
+
+ elseif inline.t == "Space" or inline.t == "SoftBreak" then
+ table.insert(results, build_text_run(rPr_parts, " "))
+
+ elseif inline.t == "Strong" then
+ return recurse_with_rPr(inline.content, rPr_parts, "")
+
+ elseif inline.t == "Emph" then
+ return recurse_with_rPr(inline.content, rPr_parts, "")
+
+ elseif inline.t == "Strikeout" then
+ return recurse_with_rPr(inline.content, rPr_parts, "")
+
+ elseif inline.t == "Superscript" then
+ return recurse_with_rPr(inline.content, rPr_parts, '')
+
+ elseif inline.t == "Subscript" then
+ return recurse_with_rPr(inline.content, rPr_parts, '')
+
+ elseif inline.t == "Span" then
+ -- Check for char-style class via schema lookup
+ local matched_class = nil
+ local matched_style_id = nil
+ for _, class in ipairs(inline.classes) do
+ local sid = get_char_style_id(class)
+ if sid then
+ matched_class = class
+ matched_style_id = sid
+ break
+ end
+ end
+
+ if matched_class then
+ -- Emit char field code (replicates char-style.lua)
+ local text = M.inlines_to_text(inline.content)
+ if text ~= "" then
+ local field_xml = M.build_char_field_code(
+ matched_style_id, text, matched_class)
+ table.insert(results, field_xml)
+ end
+ else
+ -- Unknown span class — recurse into children with current rPr
+ for _, child in ipairs(inline.content) do
+ for _, xml in ipairs(render_inline(child, rPr_parts)) do
+ table.insert(results, xml)
+ end
+ end
+ end
+
+ elseif inline.t == "Link" then
+ return recurse_with_rPr(inline.content, rPr_parts,
+ {'', ''})
+
+ elseif inline.t == "RawInline" then
+ if inline.format == "html" then
+ -- Check for comment markers (replicates comment-inject.lua)
+ local id, marker_type = parse_comment_marker(inline.text)
+ if id then
+ if marker_type == "start" then
+ table.insert(results,
+ '')
+ elseif marker_type == "end" then
+ table.insert(results,
+ '' ..
+ '')
+ elseif marker_type == "point" then
+ table.insert(results,
+ '' ..
+ '' ..
+ '')
+ end
+ end
+ -- Other HTML raw inlines are dropped (no meaningful OOXML equivalent)
+ elseif inline.format == "openxml" then
+ -- Already OOXML — pass through unchanged
+ table.insert(results, inline.text)
+ end
+
+ elseif inline.t == "LineBreak" then
+ -- Line break within a paragraph
+ table.insert(results, "")
+
+ elseif inline.t == "Code" then
+ table.insert(results, build_text_run(rPr_parts, inline.text))
+
+ -- Fallback: try to recurse into content, or stringify
+ elseif inline.content then
+ for _, child in ipairs(inline.content) do
+ for _, xml in ipairs(render_inline(child, rPr_parts)) do
+ table.insert(results, xml)
+ end
+ end
+ else
+ -- Leaf node we don't handle — stringify
+ local text = pandoc.utils.stringify(pandoc.Inlines({inline}))
+ if text ~= "" then
+ table.insert(results, build_text_run(rPr_parts, text))
+ end
+ end
+
+ return results
+end
+
+--- Render an array of Pandoc Inlines to OOXML run XML.
+-- @param inlines Array of Pandoc inline elements
+-- @param base_rPr_parts Array of base run property XML fragments
+-- @return Concatenated OOXML string (runs only, no paragraph wrapper)
+function M.render_inlines(inlines, base_rPr_parts)
+ base_rPr_parts = base_rPr_parts or {}
+ local all_runs = {}
+ for _, inline in ipairs(inlines) do
+ for _, xml in ipairs(render_inline(inline, base_rPr_parts)) do
+ table.insert(all_runs, xml)
+ end
+ end
+ return table.concat(all_runs)
+end
+
+-- Export parse_comment_marker for use by comment-inject.lua
+M.parse_comment_marker = parse_comment_marker
+
+
+return M
diff --git a/_extensions/docstyle/figure.lua b/_extensions/docstyle/figure.lua
new file mode 100644
index 0000000..79b9583
--- /dev/null
+++ b/_extensions/docstyle/figure.lua
@@ -0,0 +1,108 @@
+-- figure.lua
+-- Pandoc Lua filter that wraps .figure divs in ADDIN DOCSTYLE field codes
+-- for round-trip harvest fidelity.
+--
+-- Usage in QMD:
+-- ::: {#fig-consort-flow .figure width="80%" align="center"}
+-- 
+--
+-- **Figure 1.** Caption text with [@citation].
+-- :::
+--
+-- The filter:
+-- 1. Detects divs with class "figure"
+-- 2. Emits opening ADDIN DOCSTYLE field code carrying id and attributes
+-- 3. Passes through all inner blocks (image + caption paragraph) unchanged
+-- 4. Emits closing field code
+--
+-- On re-harvest, detect_docstyle_field_codes() finds these markers,
+-- handle_docstyle_figure() reconstructs the div_open with the original id,
+-- and the harvest loop emits the figure div with the correct QMD id.
+
+local fcu = require("field-code-utils")
+
+-- Normalise FORMAT: Quarto passes "docx" at runtime; shadow and re-map to "openxml"
+-- so all checks use the canonical name (same pattern as table-style.lua, list-style.lua).
+local FORMAT = "openxml"
+
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write("[figure] " .. msg .. "\n")
+ end
+end
+
+-- Div attributes excluded from the field code payload (Pandoc-internal)
+local skip_attr_keys = { ["data-pos"] = true }
+
+-- Process Div elements with class "figure"
+function Div(div)
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Only handle divs with the "figure" class
+ local is_figure = false
+ for _, class in ipairs(div.classes) do
+ if class == "figure" then
+ is_figure = true
+ break
+ end
+ end
+ if not is_figure then
+ return nil
+ end
+
+ -- Collect the QMD id (from div.identifier) and attributes
+ local fig_id = div.identifier
+ if not fig_id or fig_id == "" then
+ fig_id = "fig-unknown"
+ end
+
+ local attrs = {}
+ for key, val in pairs(div.attributes) do
+ if val and val ~= "" and not skip_attr_keys[key] then
+ attrs[key] = val
+ end
+ end
+
+ -- Extract image path from the first Para containing an Image inside the div.
+ -- This becomes original_path in the field code payload for re-harvest path restoration.
+ local original_path = nil
+ for _, block in ipairs(div.content) do
+ if block.t == "Para" then
+ for _, inline in ipairs(block.content) do
+ if inline.t == "Image" then
+ original_path = inline.src
+ break
+ end
+ end
+ end
+ if original_path then break end
+ end
+ if original_path and original_path ~= "" then
+ attrs["original_path"] = original_path
+ end
+
+ debug("Processing .figure div: id=" .. fig_id)
+
+ local field_start = fcu.build_figure_field_start(fig_id, attrs)
+ local field_end = fcu.build_block_field_end()
+
+ -- Wrap: field_start | inner blocks | field_end
+ local result = pandoc.Blocks({ pandoc.RawBlock("openxml", field_start) })
+ for _, block in ipairs(div.content) do
+ result:insert(block)
+ end
+ result:insert(pandoc.RawBlock("openxml", field_end))
+
+ return result
+end
+
+-- Normalise FORMAT at document level: Quarto passes "docx", canonicalise to "openxml".
+function Pandoc(_)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
diff --git a/_extensions/docstyle/generate-reference.R b/_extensions/docstyle/generate-reference.R
new file mode 100644
index 0000000..75d131d
--- /dev/null
+++ b/_extensions/docstyle/generate-reference.R
@@ -0,0 +1,332 @@
+#!/usr/bin/env Rscript
+# Pre-render hook: Generate reference.docx from CSS
+#
+# This script is called by Quarto before rendering to generate a reference.docx
+# from the docstyle CSS configuration. Uses hash-based caching to avoid
+# regeneration when nothing has changed.
+#
+# Usage in _quarto.yml:
+# project:
+# pre-render: _extensions/docstyle/generate-reference.R
+#
+# Behaviour:
+# - If user explicitly sets reference-doc in YAML, skip generation (use theirs)
+# - Otherwise, generate reference.docx from docstyle.css configuration
+# - Cache by hash: only regenerate when CSS or docstyle config changes
+#
+# Output:
+# - _docstyle/reference.docx (cached reference document)
+# - _docstyle/reference.docx.hash (hash of inputs for cache validation)
+
+# Null coalesce helper (define early since used throughout)
+`%||%` <- function(x, y) if (is.null(x)) y else x
+
+# Check if we're in a Quarto project context
+project_dir <- Sys.getenv("QUARTO_PROJECT_DIR", "")
+if (nchar(project_dir) == 0) {
+ # Not running as Quarto hook - check current directory
+ project_dir <- getwd()
+}
+
+# Find _quarto.yml
+quarto_yml <- file.path(project_dir, "_quarto.yml")
+if (!file.exists(quarto_yml)) {
+ # Try looking for it in parent directory
+ quarto_yml <- file.path(dirname(project_dir), "_quarto.yml")
+ if (!file.exists(quarto_yml)) {
+ message("[generate-reference] No _quarto.yml found, skipping reference generation")
+ quit(save = "no", status = 0)
+ }
+ project_dir <- dirname(quarto_yml)
+}
+
+# Parse _quarto.yml
+config <- tryCatch({
+ yaml::read_yaml(quarto_yml)
+}, error = function(e) {
+ message("[generate-reference] Error reading _quarto.yml: ", e$message)
+ quit(save = "no", status = 0)
+})
+
+# Check if user explicitly set a custom reference-doc (not the generated one)
+# If pointing to _docstyle/reference.docx, we still need to generate it
+has_custom_reference_doc <- function(cfg, sidecar_dir) {
+ generated_path <- file.path(sidecar_dir, "reference.docx")
+
+ check_path <- function(path) {
+ if (is.null(path)) return(FALSE)
+ # Normalize paths for comparison
+ norm_path <- normalizePath(path, mustWork = FALSE)
+ norm_generated <- normalizePath(generated_path, mustWork = FALSE)
+ # If it's the generated path, we should still generate
+ if (norm_path == norm_generated) return(FALSE)
+ # Also check relative path comparison
+ if (path == generated_path) return(FALSE)
+ if (basename(dirname(path)) == basename(sidecar_dir) &&
+ basename(path) == "reference.docx") return(FALSE)
+ TRUE
+ }
+
+ # Check format.docx.reference-doc
+ if (check_path(cfg$format$docx$`reference-doc`)) return(TRUE)
+ if (check_path(cfg$format$`docstyle-docx`$`reference-doc`)) return(TRUE)
+ # Check top-level reference-doc
+ if (check_path(cfg$`reference-doc`)) return(TRUE)
+ FALSE
+}
+
+sidecar_dir_name <- config$docstyle$`sidecar-dir` %||% "_docstyle"
+if (has_custom_reference_doc(config, sidecar_dir_name)) {
+ message("[generate-reference] User specified custom reference-doc, skipping CSS generation")
+ quit(save = "no", status = 0)
+}
+
+# Check if docstyle configuration exists
+if (is.null(config$docstyle)) {
+ message("[generate-reference] No docstyle: section found, skipping reference generation")
+ quit(save = "no", status = 0)
+}
+
+# Resolve CSS path(s) - supports single path, array, or uses default
+css_config <- config$docstyle$css
+
+if (is.null(css_config)) {
+ # No CSS specified - use default.css from extension
+ # Find the extension directory (where this script lives)
+ script_dir <- tryCatch({
+ # When running as Rscript
+ script_path <- commandArgs(trailingOnly = FALSE)
+ file_arg <- grep("^--file=", script_path, value = TRUE)
+ if (length(file_arg) > 0) {
+ dirname(normalizePath(sub("^--file=", "", file_arg)))
+ } else {
+ # Fallback: look relative to project
+ file.path(project_dir, "_extensions", "docstyle")
+ }
+ }, error = function(e) {
+ file.path(project_dir, "_extensions", "docstyle")
+ })
+
+ default_css <- file.path(script_dir, "default.css")
+ if (file.exists(default_css)) {
+ css_paths <- default_css
+ message("[generate-reference] Using default CIHR-compliant CSS")
+ } else {
+ message("[generate-reference] No docstyle.css specified and default.css not found")
+ quit(save = "no", status = 0)
+ }
+} else {
+ # User specified CSS - resolve paths
+ css_paths <- vapply(css_config, function(p) {
+ full_path <- file.path(project_dir, p)
+ if (file.exists(full_path)) full_path else p
+ }, character(1))
+
+ # Check all CSS files exist
+ missing_css <- css_paths[!file.exists(css_paths)]
+ if (length(missing_css) > 0) {
+ message("[generate-reference] CSS file(s) not found: ", paste(missing_css, collapse = ", "))
+ quit(save = "no", status = 0)
+ }
+}
+
+# Setup output paths
+sidecar_dir <- config$docstyle$`sidecar-dir` %||% "_docstyle"
+sidecar_path <- file.path(project_dir, sidecar_dir)
+if (!dir.exists(sidecar_path)) {
+ dir.create(sidecar_path, recursive = TRUE)
+}
+
+reference_path <- file.path(sidecar_path, "reference.docx")
+hash_path <- file.path(sidecar_path, "reference.docx.hash")
+
+# Compute hash of inputs (CSS content + relevant docstyle config)
+compute_input_hash <- function(css_paths, docstyle_config) {
+ # Read CSS content from all files
+ css_contents <- vapply(css_paths, function(p) {
+ paste(readLines(p, warn = FALSE), collapse = "\n")
+ }, character(1))
+ css_content <- paste(css_contents, collapse = "\n---CSS-SEPARATOR---\n")
+
+ # Extract config elements that affect reference.docx
+ # Note: page includes line-numbers sub-config
+ relevant_config <- list(
+ page = docstyle_config$page,
+ header = docstyle_config$header,
+ footer = docstyle_config$footer,
+ sections = docstyle_config$sections,
+ toc = docstyle_config$toc,
+ `base-doc` = docstyle_config$`base-doc`
+ )
+ config_json <- jsonlite::toJSON(relevant_config, auto_unbox = TRUE)
+
+ # Include template version so template changes trigger regeneration
+ template_version <- ""
+ if (requireNamespace("docstyle", quietly = TRUE)) {
+ template_version <- as.character(utils::packageVersion("docstyle"))
+ }
+
+ # Combine and hash
+ combined <- paste0(css_content, "\n---\n", config_json,
+ "\n---TEMPLATE---\n", template_version)
+ digest::digest(combined, algo = "sha256")
+}
+
+# Check if regeneration is needed
+current_hash <- compute_input_hash(css_paths, config$docstyle)
+cached_hash <- ""
+if (file.exists(hash_path)) {
+ cached_hash <- trimws(readLines(hash_path, n = 1, warn = FALSE))
+}
+
+if (current_hash == cached_hash && file.exists(reference_path)) {
+ message("[generate-reference] Reference doc up to date (hash match)")
+ quit(save = "no", status = 0)
+}
+
+message("[generate-reference] Generating reference.docx from CSS...")
+
+# Try to load docstyle
+docstyle_loaded <- FALSE
+
+if (requireNamespace("docstyle", quietly = TRUE)) {
+ docstyle_loaded <- TRUE
+} else {
+ # Try to find and load from development source
+ search_dirs <- c(
+ project_dir,
+ dirname(project_dir),
+ dirname(dirname(project_dir)),
+ dirname(dirname(dirname(project_dir))),
+ dirname(dirname(dirname(dirname(project_dir))))
+ )
+
+ for (dir in search_dirs) {
+ desc_path <- file.path(dir, "DESCRIPTION")
+ if (file.exists(desc_path)) {
+ desc_content <- readLines(desc_path, n = 1, warn = FALSE)
+ if (grepl("Package:\\s*docstyle", desc_content)) {
+ if (requireNamespace("devtools", quietly = TRUE)) {
+ tryCatch({
+ devtools::load_all(dir, quiet = TRUE)
+ docstyle_loaded <- TRUE
+ break
+ }, error = function(e) NULL)
+ }
+ }
+ }
+ }
+}
+
+if (!docstyle_loaded) {
+ message("[generate-reference] docstyle package not found, skipping reference generation")
+ quit(save = "no", status = 0)
+}
+
+# Generate reference.docx
+tryCatch({
+ docstyle::generate_reference_doc(
+ config_path = quarto_yml,
+ output_path = reference_path
+ )
+
+ # Write hash file
+ writeLines(current_hash, hash_path)
+
+ message("[generate-reference] Generated: ", reference_path)
+ message("[generate-reference] Hash: ", substr(current_hash, 1, 12), "...")
+}, error = function(e) {
+ message("[generate-reference] Error generating reference.docx: ", e$message)
+ quit(save = "no", status = 1)
+})
+
+# Generate page-config.json for Lua filters and R finisher
+# Exports page layout, named sections, and header/footer config with pre-computed rPr_xml
+tryCatch({
+ page_config_path <- file.path(sidecar_path, "page-config.json")
+
+ # Read CSS and extract page config
+ css_styles <- docstyle::read_css(css_paths)
+ page_config <- attr(css_styles, "page")
+
+ if (is.null(page_config)) page_config <- list()
+
+ # Helper: compute rPr_xml from a CSS style name
+ resolve_rPr <- function(style_name) {
+ if (is.null(style_name) || is.null(css_styles)) return("")
+ selector <- paste0(".", style_name)
+ if (!is.null(css_styles[[selector]])) {
+ rPr <- docstyle::css_to_rPr(css_styles[[selector]])
+ return(docstyle::build_rPr_xml(rPr))
+ }
+ ""
+ }
+
+ # Export footer config with pre-computed rPr_xml and default text
+ ds <- config$docstyle
+ if (!is.null(ds$footer) && isTRUE(ds$footer$enabled)) {
+ page_config$footer <- list(
+ enabled = TRUE,
+ first_page = ds$footer$`first-page` %||% TRUE,
+ style = ds$footer$style %||% NULL,
+ rPr_xml = resolve_rPr(ds$footer$style),
+ left = ds$footer$left %||% "",
+ center = ds$footer$center %||% ds$footer$content %||% "",
+ right = ds$footer$right %||% ""
+ )
+ }
+
+ # Export header config with pre-computed rPr_xml and default text
+ if (!is.null(ds$header) && isTRUE(ds$header$enabled)) {
+ page_config$header <- list(
+ enabled = TRUE,
+ first_page = ds$header$`first-page` %||% TRUE,
+ style = ds$header$style %||% NULL,
+ rPr_xml = resolve_rPr(ds$header$style),
+ left = ds$header$left %||% "",
+ center = ds$header$center %||% ds$header$content %||% "",
+ right = ds$header$right %||% ""
+ )
+ }
+
+ # Export per-section style overrides with pre-computed rPr_xml
+ if (!is.null(ds$sections)) {
+ section_exports <- list()
+ for (sec_name in names(ds$sections)) {
+ sec <- ds$sections[[sec_name]]
+ section_exports[[sec_name]] <- list(
+ footer_style = sec$`footer-style` %||% NULL,
+ footer_rPr_xml = resolve_rPr(sec$`footer-style`),
+ header_style = sec$`header-style` %||% NULL,
+ header_rPr_xml = resolve_rPr(sec$`header-style`)
+ )
+ }
+ page_config$sections <- section_exports
+ }
+
+ # Extract table styles from CSS (e.g., .table-formal, .table-grid)
+ table_styles <- docstyle::extract_table_styles(css_styles)
+ if (length(table_styles) > 0) {
+ page_config$table_styles <- table_styles
+ message("[generate-reference] Table styles: ", paste(names(table_styles), collapse = ", "))
+ }
+
+ # Write page config as JSON
+ jsonlite::write_json(
+ page_config,
+ page_config_path,
+ auto_unbox = TRUE,
+ pretty = TRUE
+ )
+
+ # Report available named page styles
+ if (!is.null(page_config$named)) {
+ named_styles <- names(page_config$named)
+ if (length(named_styles) > 0) {
+ message("[generate-reference] Named page styles: ", paste(named_styles, collapse = ", "))
+ }
+ }
+}, error = function(e) {
+ # Non-fatal: page-config.json is optional
+ message("[generate-reference] Note: Could not generate page-config.json: ", e$message)
+})
diff --git a/_extensions/docstyle/list-style.lua b/_extensions/docstyle/list-style.lua
new file mode 100644
index 0000000..41838f7
--- /dev/null
+++ b/_extensions/docstyle/list-style.lua
@@ -0,0 +1,166 @@
+-- list-style.lua
+-- Pandoc Lua filter for CSS-defined list styles in Word output
+--
+-- Approach: AST rewriting + ADDIN DOCSTYLE field code markers
+-- 1. Converts BulletList → OrderedList with correct ListNumberStyle
+-- (Pandoc's docx writer generates proper numbering.xml definitions)
+-- 2. Wraps styled lists in ADDIN DOCSTYLE field codes
+-- (harvest detects field codes to recover CSS class on round-trip)
+--
+-- Usage in QMD:
+-- ::: {.list-alpha}
+-- - First item (renders as a.)
+-- - Second item (renders as b.)
+-- :::
+--
+-- Supported list classes:
+-- .list-bullet - Bullet list (explicit)
+-- .list-decimal - Numbered 1. 2. 3. at all levels
+-- .list-alpha - Lettered a. b. c. at all levels
+-- .list-roman - Roman i. ii. iii. at all levels
+-- .list-formal - Hierarchical: 1. / a. / i. per level
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write("[list-style] " .. msg .. "\n")
+ end
+end
+
+local FORMAT = "openxml"
+
+-- Map CSS class → Pandoc ListNumberStyle per indent level
+-- Pandoc styles: DefaultStyle, Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman
+local list_styles = {
+ ["list-bullet"] = nil, -- keep as BulletList
+ ["list-decimal"] = {
+ [0] = "Decimal", [1] = "Decimal", [2] = "Decimal"
+ },
+ ["list-alpha"] = {
+ [0] = "LowerAlpha", [1] = "LowerAlpha", [2] = "LowerAlpha"
+ },
+ ["list-roman"] = {
+ [0] = "LowerRoman", [1] = "LowerRoman", [2] = "LowerRoman"
+ },
+ ["list-formal"] = {
+ [0] = "Decimal", [1] = "LowerAlpha", [2] = "LowerRoman"
+ }
+}
+
+-- Find list style class in div classes
+local function find_list_style(classes)
+ for _, class in ipairs(classes) do
+ if list_styles[class] ~= nil then
+ return class
+ end
+ end
+ for _, class in ipairs(classes) do
+ if class == "list-bullet" then
+ return "list-bullet"
+ end
+ end
+ return nil
+end
+
+-- Convert a BulletList or OrderedList to an OrderedList with the specified style
+-- Handles nested lists recursively with level tracking
+-- div_start: optional start value from div attribute (applied at level 0 only)
+local function convert_list(block, style_name, level, div_start)
+ level = level or 0
+ local style_def = list_styles[style_name]
+
+ -- list-bullet: keep as-is
+ if not style_def then
+ return block
+ end
+
+ local pandoc_style = style_def[level] or style_def[0]
+
+ -- Process items, converting nested lists (nested lists don't inherit div_start)
+ local new_items = {}
+ for _, item in ipairs(block.content) do
+ local new_blocks = {}
+ for _, b in ipairs(item) do
+ if b.t == "BulletList" or b.t == "OrderedList" then
+ table.insert(new_blocks, convert_list(b, style_name, level + 1, nil))
+ else
+ table.insert(new_blocks, b)
+ end
+ end
+ table.insert(new_items, new_blocks)
+ end
+
+ -- Determine start number: div_start at level 0, then block's own start, then 1
+ local start_num = 1
+ if div_start and level == 0 then
+ start_num = div_start
+ elseif block.t == "OrderedList" and block.listAttributes then
+ start_num = block.listAttributes[1] or 1
+ end
+
+ return pandoc.OrderedList(new_items, pandoc.ListAttributes(start_num, pandoc_style, "Period"))
+end
+
+-- Process Div elements looking for list style classes
+function Div(div)
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ local style_name = find_list_style(div.classes)
+ if not style_name then
+ return nil
+ end
+
+ -- Read optional start attribute for list continuation
+ local div_start = tonumber(div.attributes.start) or nil
+
+ debug("Found ." .. style_name .. " div" ..
+ (div_start and (" start=" .. div_start) or ""))
+
+ -- Convert all lists in the div
+ local converted_blocks = {}
+ local modified = false
+
+ for _, block in ipairs(div.content) do
+ if block.t == "BulletList" or block.t == "OrderedList" then
+ table.insert(converted_blocks, convert_list(block, style_name, 0, div_start))
+ modified = true
+ else
+ table.insert(converted_blocks, block)
+ end
+ end
+
+ if not modified then
+ debug("No lists found in ." .. style_name .. " div")
+ return nil
+ end
+
+ -- Wrap with ADDIN DOCSTYLE field code markers using shared utility
+ local result = {}
+ table.insert(result, pandoc.RawBlock("openxml", fcu.build_list_field_start(style_name, div_start)))
+ for _, block in ipairs(converted_blocks) do
+ table.insert(result, block)
+ end
+ table.insert(result, pandoc.RawBlock("openxml", fcu.build_block_field_end()))
+
+ debug("Converted lists in ." .. style_name .. " div (field code marker added)")
+
+ return result
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+return {
+ { Pandoc = Pandoc },
+ { Div = Div }
+}
diff --git a/_extensions/docstyle/page-section.lua b/_extensions/docstyle/page-section.lua
new file mode 100644
index 0000000..03d85b0
--- /dev/null
+++ b/_extensions/docstyle/page-section.lua
@@ -0,0 +1,643 @@
+-- page-section.lua
+-- Pandoc Lua filter that injects Word section breaks for named page styles
+--
+-- Usage in QMD (use .section-* class prefix):
+-- ::: {.section-landscape}
+-- | Wide | Table | Here |
+-- :::
+--
+-- ::: {.section-body page-break="true" line-numbers="continuous"}
+-- :::
+--
+-- Attributes:
+-- page-break="true" Start section on new page (default: false)
+-- line-numbers="page" Line numbers restart each page
+-- line-numbers="section" Line numbers restart each section
+-- line-numbers="continuous" Line numbers never restart
+-- line-numbers="false" Disable line numbers for this section
+--
+-- CSS configuration (optional - attributes can override):
+-- @page landscape {
+-- size: letter landscape;
+-- margin: 0.5in;
+-- }
+-- @page body {
+-- --docstyle-line-numbers: every 1;
+-- --docstyle-line-numbers-restart: page;
+-- }
+--
+-- The filter reads named page rules from a JSON file generated by the pre-render
+-- hook. If no @page rule exists for a section name, the filter falls back to
+-- default page properties (allowing sections to work without explicit CSS).
+--
+-- Section breaks and Pandoc's docx writer
+-- ----------------------------------------
+-- Word's section model: sectPr defines properties for the section that ENDS at
+-- that point. The document's final sectPr (in ) defines the last section.
+-- Pandoc's docx writer always emits this final sectPr from the reference document.
+--
+-- This filter inserts mid-document sectPr elements to create section breaks. The
+-- final section's properties (including line numbers) come from the reference
+-- document, generated by generate-reference.R with settings from page-config.json.
+--
+-- IMPORTANT: Page breaks use an explicit paragraph followed
+-- by a continuous sectPr, NOT the nextPage section type. The nextPage type alone
+-- is unreliable when Pandoc's docx writer inserts bookmark elements around the
+-- break. The explicit break + continuous sectPr pattern matches how Word itself
+-- creates page breaks in natively-authored documents.
+--
+-- IMPORTANT: Do NOT insert an additional sectPr at document end (in the Pandoc()
+-- function). For empty marker divs this creates three sections instead of two,
+-- and Word silently drops the earlier page break. Line numbers for the final
+-- section must be configured in the reference document, not via an extra sectPr.
+--
+-- Wrapping divs (non-empty) DO insert a closing sectPr after their content.
+-- This creates three sections (before | wrapped content | after), which is the
+-- correct structure for scoping line numbers to the wrapped content only.
+--
+-- Round-trip support
+-- ------------------
+-- Section divs are wrapped in ADDIN DOCSTYLE field codes for harvest round-trip.
+-- The JSON payload preserves the class name, page-break, and line-numbers
+-- attributes so they can be reconstructed during docx_to_qmd() harvest.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write("[page-section] " .. msg .. "\n")
+ end
+end
+
+local FORMAT = "openxml"
+
+-- Deep copy a table (for nested structures like line-numbers)
+-- Prevents reference aliasing when multiple markers use the same named style
+local function deep_copy(t)
+ if type(t) ~= "table" then return t end
+ local copy = {}
+ for k, v in pairs(t) do
+ copy[k] = deep_copy(v)
+ end
+ return copy
+end
+
+-- Page configuration from JSON
+local page_config = nil
+local default_page = nil
+local final_section_style = nil -- Track the last .section-* style for final sectPr
+local current_section_props = nil -- Track current section for correct sectPr emission
+
+-- Unit conversion: CSS units to twips (1/20th of a point)
+-- 1 inch = 1440 twips, 1 pt = 20 twips, 1 cm = 567 twips, 1 mm = 56.7 twips
+local function css_to_twips(value)
+ if not value then return nil end
+
+ local num, unit = string.match(value, "^([%d%.]+)(%a+)$")
+ if not num then
+ -- Try just a number (assume inches)
+ num = tonumber(value)
+ if num then return math.floor(num * 1440) end
+ return nil
+ end
+
+ num = tonumber(num)
+ if not num then return nil end
+
+ unit = string.lower(unit)
+ if unit == "in" then
+ return math.floor(num * 1440)
+ elseif unit == "pt" then
+ return math.floor(num * 20)
+ elseif unit == "cm" then
+ return math.floor(num * 567)
+ elseif unit == "mm" then
+ return math.floor(num * 56.7)
+ elseif unit == "px" then
+ -- CSS standard: 96px = 1in
+ return math.floor(num * 15)
+ end
+
+ return nil
+end
+
+-- Get page dimensions in twips for a given size
+local function get_page_size(size, orientation)
+ -- Standard page sizes in twips (width x height in portrait)
+ local sizes = {
+ letter = { w = 12240, h = 15840 }, -- 8.5" x 11"
+ a4 = { w = 11906, h = 16838 }, -- 210mm x 297mm
+ legal = { w = 12240, h = 20160 } -- 8.5" x 14"
+ }
+
+ local dims = sizes[size] or sizes.letter
+
+ if orientation == "landscape" then
+ return dims.h, dims.w -- Swap width and height
+ else
+ return dims.w, dims.h
+ end
+end
+
+-- Build Word section properties XML
+local function build_sect_pr(page_props, sect_type)
+ sect_type = sect_type or "nextPage" -- nextPage, continuous, evenPage, oddPage
+
+ local size = page_props.size or "letter"
+ local orientation = page_props.orientation or "portrait"
+ local margins = page_props.margins or {}
+
+ local w, h = get_page_size(size, orientation)
+
+ -- Default margins (1 inch = 1440 twips)
+ local margin_top = css_to_twips(margins.top) or 1440
+ local margin_bottom = css_to_twips(margins.bottom) or 1440
+ local margin_left = css_to_twips(margins.left) or 1440
+ local margin_right = css_to_twips(margins.right) or 1440
+ local gutter = css_to_twips(margins.gutter) or 0
+
+ -- Build the sectPr XML
+ local xml_parts = {
+ ''
+ }
+
+ -- Section type
+ table.insert(xml_parts, '')
+
+ -- Page size with orientation
+ local orient_attr = ""
+ if orientation == "landscape" then
+ orient_attr = ' w:orient="landscape"'
+ end
+ table.insert(xml_parts, string.format(
+ '',
+ w, h, orient_attr
+ ))
+
+ -- Page margins
+ table.insert(xml_parts, string.format(
+ '',
+ margin_top, margin_right, margin_bottom, margin_left, gutter
+ ))
+
+ -- Line numbers if configured
+ if page_props["line-numbers"] and page_props["line-numbers"].enabled then
+ local ln = page_props["line-numbers"]
+ local count_by = ln["count-by"] or 1
+ local restart = ln.restart or "newPage"
+ local distance = css_to_twips(ln.distance) or 360 -- Default 0.25in
+
+ debug(" build_sect_pr: adding line numbers with restart=" .. tostring(restart))
+
+ -- Map restart values to Word
+ local restart_map = {
+ page = "newPage",
+ section = "newSection",
+ continuous = "continuous"
+ }
+ restart = restart_map[restart] or "newPage"
+
+ table.insert(xml_parts, string.format(
+ '',
+ count_by, restart, distance
+ ))
+ else
+ debug(" build_sect_pr: no line numbers (page_props['line-numbers']=" ..
+ tostring(page_props["line-numbers"]) .. ")")
+ end
+
+ table.insert(xml_parts, '')
+
+ return table.concat(xml_parts)
+end
+
+-- Build section break paragraphs as a list of XML strings.
+-- Returns a table of XML strings, each to be emitted as a separate RawBlock.
+--
+-- IMPORTANT: Each XML string must be a separate RawBlock. When concatenated
+-- into a single RawBlock, Pandoc's docx writer may not process the page break
+-- correctly (the break is silently dropped). Separate RawBlocks match how
+-- raw openxml blocks in markdown are processed.
+--
+-- For page breaks: Word reliably renders an explicit
+-- followed by a continuous sectPr. The nextPage sect type alone is unreliable
+-- in some contexts (e.g., when Pandoc bookmark elements surround the break).
+-- This matches the pattern Word itself uses in natively-authored documents.
+local function build_section_break_paras(page_props, sect_type)
+ local sect_pr = build_sect_pr(page_props, "continuous")
+ if sect_type == "nextPage" then
+ -- Two separate paragraphs: page break + continuous sectPr
+ return {
+ '',
+ '' .. sect_pr .. ''
+ }
+ else
+ return {
+ '' .. sect_pr .. ''
+ }
+ end
+end
+
+-- Read page configuration from JSON file (delegates to shared loader)
+local function load_page_config()
+ return fcu.load_page_config()
+end
+
+-- Read configuration from metadata
+function Meta(meta)
+ -- Try to load from JSON file first
+ page_config = load_page_config()
+
+ -- Also check metadata for inline config (for testing)
+ if meta.docstyle and meta.docstyle.page then
+ local page = meta.docstyle.page
+
+ -- Parse metadata into page_config structure
+ if not page_config then
+ page_config = { named = {} }
+ end
+
+ -- Store default page settings
+ default_page = {
+ size = page.size and pandoc.utils.stringify(page.size) or "letter",
+ orientation = page.orientation and pandoc.utils.stringify(page.orientation) or "portrait",
+ margins = {}
+ }
+
+ if page.margins then
+ for k, v in pairs(page.margins) do
+ default_page.margins[k] = pandoc.utils.stringify(v)
+ end
+ end
+ end
+
+ -- Set default page from config if available
+ if page_config and not default_page then
+ default_page = {
+ size = page_config.size or "letter",
+ orientation = page_config.orientation or "portrait",
+ margins = page_config.margins or {},
+ ["line-numbers"] = page_config["line-numbers"]
+ }
+ end
+
+ -- Ultimate fallback
+ if not default_page then
+ default_page = {
+ size = "letter",
+ orientation = "portrait",
+ margins = { top = "1in", bottom = "1in", left = "1in", right = "1in" }
+ }
+ end
+
+ current_section_props = deep_copy(default_page)
+
+ debug("Default page: " .. (default_page.size or "letter") ..
+ " " .. (default_page.orientation or "portrait"))
+
+ if page_config and page_config.named then
+ for name, _ in pairs(page_config.named) do
+ debug("Named page style available: " .. name)
+ end
+ end
+
+ return nil
+end
+
+-- Process Div elements looking for .section-* classes
+--
+-- Word Section Model:
+-- - A section runs from one section break to the next (or document end)
+-- - Section properties are defined by the sectPr that ENDS the section
+-- - The document's final sectPr defines properties for the LAST section
+--
+-- Usage (empty div as marker - recommended):
+-- ::: section-body
+-- :::
+--
+-- # 1. Introduction
+-- Content here is in the body section...
+--
+-- Usage (div wrapping content - also supported):
+-- ::: {.section-body}
+-- # 1. Introduction
+-- Content here is in the body section...
+-- :::
+--
+-- The section marker inserts a section break that ENDS the previous section.
+-- Everything after the marker is in the NEW section until the next marker or document end.
+--
+-- Add page-break="true" to start the new section on a new page:
+-- ::: {.section-body page-break="true"}
+-- :::
+
+function Div(div)
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Look for section-* classes
+ local section_style = nil
+ for _, class in ipairs(div.classes) do
+ local name = string.match(class, "^section%-(.+)$")
+ if name then
+ section_style = name
+ break
+ end
+ end
+
+ -- Handle standalone page break: ::: {.page-break} :::
+ -- Emits explicit that Pandoc passes through reliably.
+ -- Use this instead of \newpage which Pandoc drops near headings/bookmarks.
+ -- Harvest detects and restores ::: {.page-break} :::
+ if not section_style then
+ for _, class in ipairs(div.classes) do
+ if class == "page-break" then
+ debug("Found .page-break div")
+ local blocks = {}
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ ''))
+ -- Pass through any content inside the div
+ for _, block in ipairs(div.content) do
+ table.insert(blocks, block)
+ end
+ return blocks
+ end
+ end
+ return nil
+ end
+
+ debug("Found .section-" .. section_style .. " div")
+
+ -- Get the named page properties for the NEW section
+ -- IMPORTANT: Use deep_copy to avoid aliasing issues when multiple markers
+ -- use the same named style. Without copy, modifying new_section_props
+ -- (e.g., setting line-numbers) would corrupt the shared config table.
+ local new_section_props = nil
+ if page_config and page_config.named and page_config.named[section_style] then
+ new_section_props = deep_copy(page_config.named[section_style])
+ debug(" Using @page " .. section_style .. " properties")
+ else
+ -- No named @page rule found - fall back to default with style-specific inference
+ debug(" No @page " .. section_style .. " rule found, using defaults")
+
+ if section_style == "landscape" then
+ -- Landscape: swap orientation
+ new_section_props = {
+ size = default_page.size or "letter",
+ orientation = "landscape",
+ margins = default_page.margins or {}
+ }
+ else
+ -- Other styles (body, appendix, etc.): inherit from default_page
+ -- This allows sections to work even without explicit @page rules
+ new_section_props = {
+ size = default_page.size or "letter",
+ orientation = default_page.orientation or "portrait",
+ margins = default_page.margins or {},
+ ["line-numbers"] = default_page["line-numbers"]
+ }
+ end
+ end
+
+ -- Line numbers: opt-in only via div attribute.
+ -- Default is NO line numbers. The @page CSS config provides page geometry
+ -- (size, margins, orientation) but line numbers require explicit opt-in:
+ -- ::: {.section-body line-numbers="continuous"}
+ -- This matches user expectation: "open it, start writing" = no line numbers.
+ -- Supports: line-numbers="true", "false", "page", "section", "continuous"
+ local ln_attr = div.attributes["line-numbers"]
+ if ln_attr then
+ ln_attr = pandoc.utils.stringify(ln_attr)
+ end
+
+ -- Clear any inherited line numbers from CSS config; only div attribute controls
+ new_section_props["line-numbers"] = nil
+
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ if ln_attr == "true" or ln_attr == "page" then
+ new_section_props["line-numbers"] = {
+ enabled = true,
+ ["count-by"] = 1,
+ restart = "page"
+ }
+ debug(" Line numbers enabled (restart per page) via attribute")
+ elseif ln_attr == "section" then
+ new_section_props["line-numbers"] = {
+ enabled = true,
+ ["count-by"] = 1,
+ restart = "section"
+ }
+ debug(" Line numbers enabled (restart per section) via attribute")
+ elseif ln_attr == "continuous" then
+ new_section_props["line-numbers"] = {
+ enabled = true,
+ ["count-by"] = 1,
+ restart = "continuous"
+ }
+ debug(" Line numbers enabled (continuous) via attribute")
+ end
+ else
+ debug(" Line numbers: off (default, no attribute)")
+ end
+
+ -- In Word, sectPr defines properties for the section that ENDS at that point.
+ -- To start a new section:
+ -- 1. Insert sectPr with PREVIOUS section's properties (ends the previous section)
+ -- 2. Content after that sectPr is in the new section
+ -- 3. New section's properties come from the NEXT sectPr (or document's final sectPr)
+
+ -- Check for page-break attribute: ::: {.section-body page-break="true"}
+ local sect_type = "continuous"
+ if pandoc.utils.stringify(div.attributes["page-break"] or "") == "true" then
+ sect_type = "nextPage"
+ debug(" Using nextPage section break")
+ end
+
+ -- Build result blocks
+ local blocks = {}
+
+ -- For page breaks: emit the explicit BEFORE the field code
+ -- and sectPr. This prevents the page break from being sandwiched between two
+ -- consecutive sectPr elements (e.g., a closing sectPr from a previous wrapping
+ -- div and this opening sectPr), which causes Word to silently drop the break.
+ if sect_type == "nextPage" then
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ ''))
+ debug(" Page break emitted before section break")
+ end
+
+ -- === R-FIRST ASSEMBLY (v2) ===
+ -- Instead of emitting complex sectPr XML that Pandoc wraps in container
+ -- paragraphs (causing the 3-line gap), emit a simple text marker that
+ -- the R finisher will find and process.
+ --
+ -- The R finisher will:
+ -- 1. Find this marker
+ -- 2. Build sectPr using page-config.json
+ -- 3. Attach sectPr to the PRECEDING paragraph
+ -- 4. Delete the marker paragraph (eliminates the gap)
+
+ -- Determine line-numbers for marker
+ local ln_for_marker = "none"
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ ln_for_marker = ln_attr
+ end
+
+ -- Build text marker: DOCSTYLE_SECTION::{class}::{page-break}::{line-numbers}
+ local marker_text = string.format("DOCSTYLE_SECTION::section-%s::%s::%s",
+ section_style,
+ sect_type == "nextPage" and "true" or "false",
+ ln_for_marker
+ )
+ debug(" Emitting marker: " .. marker_text)
+
+ -- Build attributes for field code
+ local field_attrs = {}
+ if sect_type == "nextPage" then
+ field_attrs["page-break"] = true
+ end
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ field_attrs["line-numbers"] = ln_attr
+ end
+ -- Pass through all remaining div attributes (footer-left, header-left,
+ -- page-start, etc.) so the R finisher can read them from the JSON payload
+ local skip_attrs = {["line-numbers"]=true, ["page-break"]=true, ["page-break-after"]=true}
+ for k, v in pairs(div.attributes) do
+ if not field_attrs[k] and not skip_attrs[k] then
+ field_attrs[k] = pandoc.utils.stringify(v)
+ end
+ end
+
+ -- Emit field code + marker in a SINGLE paragraph to prevent 3-line gap
+ -- Uses build_section_marker_para which combines BEGIN/instrText/SEPARATE/marker/END
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ fcu.build_section_marker_para("section-" .. section_style, field_attrs, marker_text)))
+
+ -- For wrapping divs (non-empty): emit content, then closing marker.
+ -- The opening marker already contains complete field code (BEGIN/SEPARATE/marker/END).
+ -- For empty marker divs: field code is already complete, nothing more needed.
+ if #div.content > 0 then
+ -- Div content
+ for _, block in ipairs(div.content) do
+ table.insert(blocks, block)
+ end
+
+ -- Closing marker: R finisher will attach sectPr to last content paragraph
+ local close_page_break = pandoc.utils.stringify(div.attributes["page-break-after"] or "") == "true"
+ local close_marker = string.format("DOCSTYLE_SECTION_END::section-%s::%s::%s",
+ section_style,
+ close_page_break and "true" or "false",
+ ln_for_marker
+ )
+ debug(" Emitting closing marker: " .. close_marker)
+
+ -- Page break after content if requested
+ if close_page_break then
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ ''))
+ end
+
+ -- Closing marker in single paragraph with field code wrapper
+ -- Strip footer/header/page-start attributes from closing markers — the
+ -- finisher reads these from opening markers only (via payload shift)
+ local close_skip = {}
+ for k, v in pairs(skip_attrs) do close_skip[k] = v end
+ close_skip["footer-left"]=true; close_skip["footer-center"]=true; close_skip["footer-right"]=true
+ close_skip["header-left"]=true; close_skip["header-center"]=true; close_skip["header-right"]=true
+ close_skip["footer"]=true; close_skip["header"]=true
+ close_skip["page-start"]=true
+ local close_attrs = {}
+ if close_page_break then
+ close_attrs["page-break"] = true
+ end
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ close_attrs["line-numbers"] = ln_attr
+ end
+ for k, v in pairs(div.attributes) do
+ if not close_attrs[k] and not close_skip[k] then
+ close_attrs[k] = pandoc.utils.stringify(v)
+ end
+ end
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ fcu.build_section_marker_para("section-" .. section_style .. "-end", close_attrs, close_marker)))
+
+ debug("Wrapping div: section '" .. section_style .. "' opened and closed")
+ else
+ -- Empty marker div: field code already complete in single paragraph
+ debug("Empty marker: section '" .. section_style .. "' started")
+ end
+
+ return blocks
+end
+
+-- Check output format and apply final section style if needed
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+
+ -- NOTE: We intentionally do NOT insert an extra sectPr here for line numbers.
+ --
+ -- Previously, this function inserted a sectPr paragraph at the end to apply
+ -- line number properties to the final section. However, this creates THREE
+ -- sections in the document:
+ -- 1. Mid-document sectPr (from Div()) - ends front matter
+ -- 2. Filter's final sectPr (from here) - ends body section
+ -- 3. Pandoc's body sectPr - ends document
+ --
+ -- This confuses Word's page break rendering - the explicit
+ -- before sectPr #1 gets silently ignored.
+ --
+ -- Instead, line numbers should be configured in the reference document via
+ -- generate-reference.R, which properly sets in Pandoc's final
+ -- body sectPr. This approach:
+ -- - Keeps only 2 sections (front matter + body)
+ -- - Page breaks work reliably
+ -- - Line numbers still apply to body content
+ --
+ -- See: R/page_layout.R apply_line_numbers() function
+
+ if final_section_style and FORMAT == "openxml" then
+ debug("Final section style tracked: " .. (final_section_style["line-numbers"] and "with line numbers" or "no line numbers"))
+ debug("Line numbers should be configured in reference doc, not via extra sectPr")
+ end
+
+ return nil
+end
+
+-- Consume stray ":::" paragraphs that appear when users add standalone :::
+-- markers in QMD (which Pandoc parses as literal text, not fenced div closers).
+-- This ensures users only need to mark section STARTS - no closing markers needed.
+function Para(para)
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Check if paragraph contains only ":::" (possibly with whitespace)
+ if #para.content == 1 and
+ para.content[1].t == "Str" and
+ para.content[1].text:match("^:::?$") then
+ debug("Consuming stray ':::' paragraph")
+ return {} -- Remove the paragraph
+ end
+
+ return nil -- Keep unchanged
+end
+
+-- Filter execution order:
+-- 1. Meta - load configuration
+-- 2. Div - process .page-* and .section-* divs
+-- 3. Para - consume stray ::: paragraphs
+-- 4. Pandoc - apply final section style if needed
+return {
+ { Meta = Meta },
+ { Div = Div },
+ { Para = Para },
+ { Pandoc = Pandoc }
+}
diff --git a/_extensions/docstyle/reference.docx b/_extensions/docstyle/reference.docx
new file mode 100644
index 0000000..590ff7d
Binary files /dev/null and b/_extensions/docstyle/reference.docx differ
diff --git a/_extensions/docstyle/revisions-inject.lua b/_extensions/docstyle/revisions-inject.lua
new file mode 100644
index 0000000..94e6f33
--- /dev/null
+++ b/_extensions/docstyle/revisions-inject.lua
@@ -0,0 +1,315 @@
+-- revisions-inject.lua
+-- Pandoc Lua filter that converts revision spans to OpenXML track changes
+--
+-- Usage in QMD:
+-- Insertions: [inserted text]{.ins id="rev_101"}
+-- Deletions: [~~deleted text~~]{.del id="rev_102"}
+--
+-- The filter reads revision metadata from a sidecar JSON file (revisions.json)
+-- which contains author, date, and other metadata for each revision.
+--
+-- Metadata loading (in priority order):
+-- 1. -M revisions-file:path/to/revisions.json (explicit path)
+-- 2. Auto-detect _docstyle/revisions.json (convention-based)
+--
+-- Note: Deletions use strikethrough syntax with a .del class wrapper.
+-- This is achieved via a Span around the Strikeout:
+-- [~~deleted~~]{.del id="x"}
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Metadata storage for revisions (loaded from revisions.json)
+local revisions_meta = {}
+local revisions_loaded = false
+
+-- Helper function to escape XML special characters
+local function xml_escape(text)
+ if not text then return "" end
+ text = text:gsub("&", "&")
+ text = text:gsub("<", "<")
+ text = text:gsub(">", ">")
+ text = text:gsub('"', """)
+ text = text:gsub("'", "'")
+ return text
+end
+
+-- Helper to generate xml:space attribute for whitespace preservation
+-- Word requires xml:space="preserve" when text has leading/trailing whitespace
+local function get_space_attr(text)
+ if text and (text:match("^%s") or text:match("%s$") or text:match("%s%s")) then
+ return ' xml:space="preserve"'
+ end
+ return ""
+end
+
+-- Helper to extract numeric ID from revision ID string (e.g., "rev_9" -> "9")
+-- Word requires numeric w:id values
+local function get_numeric_id(id)
+ if not id then return "0" end
+ local num = id:match("rev_(%d+)")
+ if num then return num end
+ -- If already numeric or doesn't match pattern, return as-is
+ return id:match("^%d+$") and id or "0"
+end
+
+-- Helper to get revision metadata by ID
+local function get_revision(id)
+ if revisions_meta[id] then
+ return revisions_meta[id]
+ end
+ -- Return defaults if not found
+ return {
+ author = "Unknown",
+ date = "2025-01-01T00:00:00Z"
+ }
+end
+
+-- Extract text and RawInlines from inline elements
+-- Returns: { text = "plain text", raw_inlines = { {pos="before|after", el=RawInline}, ... } }
+-- RawInlines (like comment markers) are preserved for output outside the deletion
+local function extract_deletion_content(inlines)
+ local text_parts = {}
+ local raw_inlines_before = {} -- RawInlines that appear before any text
+ local raw_inlines_after = {} -- RawInlines that appear after text starts
+ local seen_text = false
+
+ local function process_inlines(items)
+ for _, inline in ipairs(items) do
+ if inline.t == "Str" then
+ table.insert(text_parts, inline.text)
+ seen_text = true
+ elseif inline.t == "Space" then
+ table.insert(text_parts, " ")
+ seen_text = true
+ elseif inline.t == "SoftBreak" then
+ table.insert(text_parts, " ")
+ seen_text = true
+ elseif inline.t == "LineBreak" then
+ table.insert(text_parts, "\n")
+ seen_text = true
+ elseif inline.t == "RawInline" and inline.format == "openxml" then
+ -- Preserve OpenXML RawInlines (e.g., comment markers from comment-inject.lua)
+ if seen_text then
+ table.insert(raw_inlines_after, inline)
+ else
+ table.insert(raw_inlines_before, inline)
+ end
+ elseif inline.t == "Strikeout" then
+ -- Recursively process strikeout content
+ process_inlines(inline.content)
+ elseif inline.content then
+ process_inlines(inline.content)
+ end
+ end
+ end
+
+ process_inlines(inlines)
+
+ return {
+ text = table.concat(text_parts),
+ raw_before = raw_inlines_before,
+ raw_after = raw_inlines_after
+ }
+end
+
+-- Legacy function for backward compatibility (insertions still use this)
+local function stringify_inlines(inlines)
+ local result = extract_deletion_content(inlines)
+ return result.text
+end
+
+-- Process Span elements with .ins class (insertions)
+function Span(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Handle insertions (.ins class)
+ if el.classes:includes('ins') then
+ -- Pandoc parses {.ins id="x"} with "id" as the identifier, not an attribute
+ local id = el.identifier
+ if (not id or id == "") then
+ id = el.attributes['id'] or "0"
+ end
+ local rev = get_revision(id)
+
+ debug("[revisions-inject] Processing insertion id=" .. id .. "\n")
+
+ -- Build w:ins wrapper (use numeric ID for Word compatibility)
+ local numeric_id = get_numeric_id(id)
+ local start_xml = string.format(
+ '',
+ xml_escape(numeric_id),
+ xml_escape(rev.author),
+ xml_escape(rev.date)
+ )
+ local end_xml = ''
+
+ local result = { pandoc.RawInline('openxml', start_xml) }
+
+ -- Add content
+ for _, item in ipairs(el.content) do
+ table.insert(result, item)
+ end
+
+ table.insert(result, pandoc.RawInline('openxml', end_xml))
+ return result
+ end
+
+ -- Handle deletions (.del class wrapping strikethrough)
+ -- Pattern: [~~deleted text~~]{.del id="x"}
+ if el.classes:includes('del') then
+ -- Pandoc parses {.del id="x"} with "id" as the identifier, not an attribute
+ local id = el.identifier
+ if (not id or id == "") then
+ id = el.attributes['id'] or "0"
+ end
+ local rev = get_revision(id)
+
+ debug("[revisions-inject] Processing deletion id=" .. id .. "\n")
+
+ -- Extract text and any RawInlines (like comment markers) from content
+ local content = extract_deletion_content(el.content)
+ local del_text = content.text
+
+ -- Remove any remaining strikethrough markers (~~ ) that may have leaked through
+ del_text = del_text:gsub("~~", "")
+
+ -- Build w:del with w:delText (use numeric ID for Word compatibility)
+ -- Include xml:space="preserve" if text has significant whitespace
+ local numeric_id = get_numeric_id(id)
+ local space_attr = get_space_attr(del_text)
+ local del_xml = string.format(
+ '' ..
+ '%s' ..
+ '',
+ xml_escape(numeric_id),
+ xml_escape(rev.author),
+ xml_escape(rev.date),
+ space_attr,
+ xml_escape(del_text)
+ )
+
+ -- Build result: RawInlines before + deletion + RawInlines after
+ -- This preserves comment markers that were inside the deletion
+ local result = {}
+
+ -- Add any RawInlines that appeared before text (e.g., comment start markers)
+ for _, raw in ipairs(content.raw_before) do
+ table.insert(result, raw)
+ debug("[revisions-inject] Preserving RawInline before deletion\n")
+ end
+
+ -- Add the deletion itself
+ table.insert(result, pandoc.RawInline('openxml', del_xml))
+
+ -- Add any RawInlines that appeared after text started (e.g., comment end markers)
+ for _, raw in ipairs(content.raw_after) do
+ table.insert(result, raw)
+ debug("[revisions-inject] Preserving RawInline after deletion\n")
+ end
+
+ -- Return single element or list depending on whether we have RawInlines
+ if #result == 1 then
+ return result[1]
+ else
+ return result
+ end
+ end
+
+ return nil
+end
+
+-- Parse JSON file content into revisions_meta table
+-- Uses regex-based parsing that handles our flat JSON structure
+local function parse_revisions_json(content, source_path)
+ local count = 0
+
+ -- Pattern matches revision entries with author and date fields
+ -- Handles both orderings: author before date, or date before author
+ for id, block in content:gmatch('"(rev_[^"]+)":%s*(%b{})') do
+ local author = block:match('"author":%s*"([^"]*)"')
+ local date = block:match('"date":%s*"([^"]*)"')
+ local rev_type = block:match('"type":%s*"([^"]*)"')
+
+ if author then
+ revisions_meta[id] = {
+ author = author,
+ date = date or os.date("!%Y-%m-%dT%H:%M:%SZ"),
+ type = rev_type
+ }
+ count = count + 1
+ debug("[revisions-inject] Loaded revision: " .. id .. " by " .. author .. "\n")
+ end
+ end
+
+ if count > 0 then
+ debug("[revisions-inject] Loaded " .. count .. " revisions from: " .. source_path .. "\n")
+ revisions_loaded = true
+ end
+
+ return count
+end
+
+-- Try to load revisions from a file path
+local function try_load_revisions(path)
+ local file = io.open(path, "r")
+ if file then
+ local content = file:read("*all")
+ file:close()
+ return parse_revisions_json(content, path)
+ end
+ return 0
+end
+
+-- Load revision metadata from document metadata or auto-detect
+-- Priority:
+-- 1. -M revisions-file:path (explicit)
+-- 2. _docstyle/revisions.json (convention)
+function Meta(meta)
+ -- Skip if already loaded
+ if revisions_loaded then
+ return nil
+ end
+
+ -- Priority 1: Explicit path via metadata
+ if meta['revisions-file'] then
+ local path = pandoc.utils.stringify(meta['revisions-file'])
+ debug("[revisions-inject] Trying explicit path: " .. path .. "\n")
+ if try_load_revisions(path) > 0 then
+ return nil
+ end
+ debug("[revisions-inject] Warning: Could not open revisions file: " .. path .. "\n")
+ end
+
+ -- Priority 2: Auto-detect _docstyle/revisions.json
+ local auto_path = "_docstyle/revisions.json"
+ debug("[revisions-inject] Trying auto-detect: " .. auto_path .. "\n")
+ if try_load_revisions(auto_path) > 0 then
+ return nil
+ end
+
+ debug("[revisions-inject] No revisions.json found (checked _docstyle/revisions.json)\n")
+ return nil
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ debug("[revisions-inject] Filter active for Word output\n")
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Pandoc = Pandoc },
+ { Span = Span }
+}
diff --git a/_extensions/docstyle/table-style.lua b/_extensions/docstyle/table-style.lua
new file mode 100644
index 0000000..70aae5f
--- /dev/null
+++ b/_extensions/docstyle/table-style.lua
@@ -0,0 +1,598 @@
+-- table-style.lua
+-- Pandoc Lua filter that applies CSS-defined table styles to Word output
+--
+-- Usage in QMD:
+-- ::: {.table-formal}
+-- | Column 1 | Column 2 |
+-- |----------|----------|
+-- | Data | Data |
+-- :::
+--
+-- Supported table classes:
+-- .table-formal - Top/bottom borders, shaded header row
+-- .table-grid - Full grid borders on all cells
+--
+-- Table styles are loaded from page-config.json (CSS-derived) at runtime.
+-- Built-in defaults are used as fallback when no CSS config is available.
+
+-- Load field-code-utils for ADDIN DOCSTYLE field code emission
+local fcu = require("field-code-utils")
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+local FORMAT = "openxml"
+
+-- Built-in fallback table style definitions (used when CSS config not available)
+local builtin_table_styles = {
+ ["table-formal"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "7F7F7F" },
+ bottom = { val = "single", sz = "4", color = "7F7F7F" },
+ left = nil,
+ right = nil,
+ insideH = nil,
+ insideV = nil
+ },
+ header_shading = "D9D9D9"
+ },
+ ["table-grid"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "000000" },
+ bottom = { val = "single", sz = "4", color = "000000" },
+ left = { val = "single", sz = "4", color = "000000" },
+ right = { val = "single", sz = "4", color = "000000" },
+ insideH = { val = "single", sz = "4", color = "000000" },
+ insideV = { val = "single", sz = "4", color = "000000" }
+ },
+ header_shading = nil,
+ header_bold = true
+ }
+}
+
+-- Active table styles (populated from CSS config or fallback)
+local table_styles = nil
+
+-- Load table styles from page-config.json (CSS-derived) via shared loader
+local function load_table_styles()
+ local config = fcu.load_page_config()
+ if config and config.table_styles then
+ debug("[table-style] Loaded CSS table styles from page-config.json\n")
+ return config.table_styles
+ end
+ debug("[table-style] No CSS table config found, using built-in defaults\n")
+ return nil
+end
+
+-- Initialise table_styles: CSS config with built-in fallback
+local function init_table_styles()
+ if table_styles then return end
+
+ local css_styles = load_table_styles()
+ if css_styles then
+ -- Start with built-in defaults, then deep-merge CSS values
+ table_styles = {}
+ -- Deep-copy all built-in styles (avoids mutating builtin_table_styles
+ -- when the CSS overlay loop writes into nested tables like borders)
+ for name, style in pairs(builtin_table_styles) do
+ table_styles[name] = {}
+ for k, v in pairs(style) do
+ if type(v) == "table" then
+ table_styles[name][k] = {}
+ for sub_k, sub_v in pairs(v) do
+ table_styles[name][k][sub_k] = sub_v
+ end
+ else
+ table_styles[name][k] = v
+ end
+ end
+ end
+ -- Overlay CSS-derived styles field-by-field (preserves built-in
+ -- fields not covered by CSS, e.g. header_shading when CSS only
+ -- defines borders). Deep-merges nested tables like borders.
+ for name, css_style in pairs(css_styles) do
+ if not table_styles[name] then
+ table_styles[name] = {}
+ end
+ for k, v in pairs(css_style) do
+ if type(v) == "table" and type(table_styles[name][k]) == "table" then
+ -- Deep merge: overlay CSS sub-keys over built-in sub-keys
+ for sub_k, sub_v in pairs(v) do
+ table_styles[name][k][sub_k] = sub_v
+ end
+ else
+ table_styles[name][k] = v
+ end
+ end
+ end
+ else
+ table_styles = builtin_table_styles
+ end
+end
+
+-- Build border XML element
+local function build_border_xml(name, border)
+ if not border then return "" end
+ return string.format('',
+ name, border.val, border.sz, border.color)
+end
+
+-- Build table borders XML
+local function build_tblBorders_xml(borders)
+ if not borders then return "" end
+
+ local parts = { "" }
+ if borders.top then table.insert(parts, build_border_xml("top", borders.top)) end
+ if borders.left then table.insert(parts, build_border_xml("left", borders.left)) end
+ if borders.bottom then table.insert(parts, build_border_xml("bottom", borders.bottom)) end
+ if borders.right then table.insert(parts, build_border_xml("right", borders.right)) end
+ if borders.insideH then table.insert(parts, build_border_xml("insideH", borders.insideH)) end
+ if borders.insideV then table.insert(parts, build_border_xml("insideV", borders.insideV)) end
+ table.insert(parts, "")
+
+ return table.concat(parts)
+end
+
+-- Build cell shading XML
+local function build_shading_xml(color)
+ if not color then return "" end
+ return string.format('', color)
+end
+
+
+-- Parse widths attribute (e.g., "30,70" or "25,50,25")
+-- Returns array of percentages or nil if not specified
+local function parse_widths(widths_str)
+ if not widths_str or widths_str == "" then
+ return nil
+ end
+
+ local widths = {}
+ for w in string.gmatch(widths_str, "([^,]+)") do
+ local num = tonumber(w)
+ if num then
+ table.insert(widths, num)
+ end
+ end
+
+ return #widths > 0 and widths or nil
+end
+
+-- Convert Pandoc table to OpenXML with custom styling
+-- widths_str: optional comma-separated percentages (e.g., "30,70")
+-- width_pct: optional table width as percentage of page (e.g., "50" for half width)
+-- font_size_pt: optional font size in points (e.g., 9)
+-- overrides: optional table of per-table overrides (header_bold, header_shading)
+local function styled_table_to_openxml(tbl, style_name, widths_str, width_pct, font_size_pt, overrides)
+ local style = table_styles[style_name]
+ if not style then
+ debug("[table-style] Unknown table style: " .. style_name .. "\n")
+ return nil
+ end
+
+ -- Apply per-table overrides from div attributes (header-bold, header-shading)
+ overrides = overrides or {}
+ local eff_header_bold = style.header_bold
+ local eff_header_shading = style.header_shading
+ if overrides.header_bold ~= nil then
+ eff_header_bold = overrides.header_bold
+ end
+ if overrides.header_shading then
+ eff_header_shading = overrides.header_shading
+ end
+
+ debug("[table-style] Applying style '" .. style_name .. "' to table\n")
+
+ -- Get table dimensions
+ local num_cols = 0
+ local rows = {}
+
+ -- Process table head
+ if tbl.head and tbl.head.rows then
+ for _, row in ipairs(tbl.head.rows) do
+ local cells = {}
+ for _, cell in ipairs(row.cells) do
+ table.insert(cells, { content = cell, is_header = true })
+ num_cols = math.max(num_cols, #row.cells)
+ end
+ table.insert(rows, { cells = cells, is_header_row = true })
+ end
+ end
+
+ -- Process table body
+ if tbl.bodies then
+ for _, body in ipairs(tbl.bodies) do
+ if body.body then
+ for _, row in ipairs(body.body) do
+ local cells = {}
+ for _, cell in ipairs(row.cells) do
+ table.insert(cells, { content = cell, is_header = false })
+ num_cols = math.max(num_cols, #row.cells)
+ end
+ table.insert(rows, { cells = cells, is_header_row = false })
+ end
+ end
+ end
+ end
+
+ -- Calculate table width (default 9000 twips = ~6.25 inches = full text width)
+ local full_width = 9000
+ local total_width = full_width
+
+ -- Apply width percentage if specified (e.g., "50" for half width)
+ if width_pct then
+ local pct = tonumber(width_pct)
+ if pct and pct > 0 and pct <= 100 then
+ total_width = math.floor(full_width * pct / 100)
+ debug("[table-style] Using table width: " .. pct .. "%\n")
+ end
+ end
+
+ -- Calculate column widths
+ local col_widths = {}
+ local widths = parse_widths(widths_str)
+
+ if widths and #widths == num_cols then
+ -- Use specified percentages
+ local total_pct = 0
+ for _, w in ipairs(widths) do
+ total_pct = total_pct + w
+ end
+ for i, w in ipairs(widths) do
+ col_widths[i] = math.floor(total_width * w / total_pct)
+ end
+ debug("[table-style] Using custom column widths: " .. widths_str .. "\n")
+ else
+ -- Auto-compute widths from cell content.
+ -- For each column: find the longest single word (minimum width to avoid
+ -- mid-word breaks), then distribute remaining space by total text volume.
+
+ -- Approximate characters that fit in the full table width
+ -- ~11 chars/inch at 10pt Calibri, scale inversely with font size
+ local base_font = font_size_pt or 10
+ local chars_per_inch = 11 * (10 / base_font)
+ local total_chars = math.floor(6.5 * chars_per_inch)
+
+ -- Collect text per column (header + all body cells)
+ local col_texts = {}
+ for i = 1, num_cols do col_texts[i] = {} end
+ for _, row in ipairs(rows) do
+ for col_idx, cell in ipairs(row.cells) do
+ if col_idx <= num_cols then
+ local text = ""
+ if cell.content then
+ text = pandoc.utils.stringify(cell.content)
+ end
+ table.insert(col_texts[col_idx], text)
+ end
+ end
+ end
+
+ local min_chars = {}
+ local volume = {}
+ for i = 1, num_cols do
+ -- Longest single word in this column (determines minimum width)
+ local max_word = 1
+ for _, text in ipairs(col_texts[i]) do
+ for word in text:gmatch("%S+") do
+ max_word = math.max(max_word, #word)
+ end
+ end
+ min_chars[i] = max_word + 1 -- +1 char padding
+
+ -- Total text volume (drives proportional allocation)
+ local vol = 0
+ for _, text in ipairs(col_texts[i]) do
+ vol = vol + #text
+ end
+ volume[i] = math.max(vol, 1)
+ end
+
+ -- Convert minimum chars to percentage of page
+ local min_pct = {}
+ local sum_min = 0
+ for i = 1, num_cols do
+ min_pct[i] = min_chars[i] / total_chars * 100
+ sum_min = sum_min + min_pct[i]
+ end
+
+ local auto_widths = {}
+ if sum_min >= 100 then
+ -- Minimums fill the page; scale proportionally
+ for i = 1, num_cols do
+ auto_widths[i] = min_pct[i] / sum_min * 100
+ end
+ else
+ -- Allocate minimums, distribute remaining space by volume
+ local remaining = 100 - sum_min
+ local total_vol = 0
+ for i = 1, num_cols do total_vol = total_vol + volume[i] end
+ for i = 1, num_cols do
+ auto_widths[i] = min_pct[i] + (volume[i] / total_vol * remaining)
+ end
+ end
+
+ -- Convert percentages to twips, adjusting for rounding
+ local sum_tw = 0
+ for i = 1, num_cols do
+ col_widths[i] = math.floor(total_width * auto_widths[i] / 100)
+ sum_tw = sum_tw + col_widths[i]
+ end
+ -- Give rounding remainder to the widest column
+ local widest = 1
+ for i = 2, num_cols do
+ if col_widths[i] > col_widths[widest] then widest = i end
+ end
+ col_widths[widest] = col_widths[widest] + (total_width - sum_tw)
+
+ -- Log the computed widths
+ local pcts = {}
+ for i = 1, num_cols do
+ table.insert(pcts, tostring(math.floor(auto_widths[i] + 0.5)))
+ end
+ debug("[table-style] Auto-computed column widths: " .. table.concat(pcts, ",") .. "\n")
+ end
+
+ -- Build table properties XML
+ -- Add small bottom cell margin (~0.5 line = 120 twips) for breathing room
+ local cell_margin_xml = ''
+
+ local tblPr_parts = {
+ "",
+ '',
+ build_tblBorders_xml(style.borders),
+ '',
+ cell_margin_xml,
+ ""
+ }
+
+ -- Build grid columns
+ local grid_parts = { "" }
+ for i = 1, num_cols do
+ table.insert(grid_parts, '')
+ end
+ table.insert(grid_parts, "")
+
+ -- Build rows
+ -- Pre-compute font size string once (table-level constant)
+ local half_pts = font_size_pt and tostring(font_size_pt * 2) or nil
+
+ local row_parts = {}
+ for _, row in ipairs(rows) do
+ local row_xml = { "" }
+
+ -- Build row-level run properties (same for all cells in this row)
+ local rPr_parts = {}
+ if row.is_header_row and eff_header_bold then
+ table.insert(rPr_parts, "")
+ end
+ if half_pts then
+ table.insert(rPr_parts, '')
+ table.insert(rPr_parts, '')
+ end
+ local rPr = ""
+ if #rPr_parts > 0 then
+ rPr = "" .. table.concat(rPr_parts) .. ""
+ end
+
+ -- Paragraph properties for single line spacing (no space after)
+ local pPr = ''
+
+ -- Helper: render a list of inlines to a Word paragraph
+ local function build_rich_para(inlines)
+ if #inlines == 0 then return nil end
+ local runs_xml = fcu.render_inlines(inlines, rPr_parts)
+ if runs_xml == "" then return nil end
+ return "" .. pPr .. runs_xml .. ""
+ end
+
+ for col_idx, cell in ipairs(row.cells) do
+ -- Cell properties
+ local tcPr_parts = {
+ "",
+ ''
+ }
+
+ -- Add header shading if this is a header row
+ if row.is_header_row and eff_header_shading then
+ table.insert(tcPr_parts, build_shading_xml(eff_header_shading))
+ end
+
+ table.insert(tcPr_parts, "")
+
+ -- Build paragraphs for the cell using the inline renderer
+ -- This preserves bold, italic, comments, char-style spans, etc.
+ local paragraphs = {}
+
+ -- Get the cell's content blocks
+ local cell_blocks = {}
+ if cell.content and cell.content.contents then
+ cell_blocks = cell.content.contents
+ elseif cell.content then
+ cell_blocks = cell.content
+ end
+
+ -- Process each block in the cell
+ for _, block in ipairs(cell_blocks) do
+ if block.content then
+ -- Split content on LineBreak elements to create separate Word paragraphs
+ local current_line = {}
+
+ for _, inline in ipairs(block.content) do
+ if inline.t == "LineBreak" then
+ local para = build_rich_para(current_line)
+ if para then table.insert(paragraphs, para) end
+ current_line = {}
+ else
+ table.insert(current_line, inline)
+ end
+ end
+
+ -- Last line (after final LineBreak or if no LineBreak)
+ if #current_line > 0 then
+ local para = build_rich_para(current_line)
+ if para then table.insert(paragraphs, para) end
+ end
+ else
+ -- Block without inline content — stringify as fallback
+ local text = pandoc.utils.stringify(block)
+ if text ~= "" then
+ table.insert(paragraphs,
+ "" .. pPr .. "" .. rPr ..
+ '' .. fcu.xml_escape(text) .. "")
+ end
+ end
+ end
+
+ -- If no paragraphs found (shouldn't happen), add empty paragraph
+ if #paragraphs == 0 then
+ table.insert(paragraphs, "" .. pPr .. "" .. rPr .. "")
+ end
+
+ -- Build cell XML with all paragraphs
+ local cell_xml = "" ..
+ table.concat(tcPr_parts) ..
+ table.concat(paragraphs) ..
+ ""
+
+ table.insert(row_xml, cell_xml)
+ end
+
+ table.insert(row_xml, "")
+ table.insert(row_parts, table.concat(row_xml))
+ end
+
+ -- Assemble complete table XML
+ local table_xml = "" ..
+ table.concat(tblPr_parts) ..
+ table.concat(grid_parts) ..
+ table.concat(row_parts) ..
+ ""
+
+ return table_xml
+end
+
+-- Find table style class in div classes
+local function find_table_style(classes)
+ for _, class in ipairs(classes) do
+ if table_styles and table_styles[class] then
+ return class
+ end
+ end
+ return nil
+end
+
+-- Keys to skip when collecting div attributes for field code payload
+-- (Pandoc-internal keys that should not be serialised)
+local skip_attr_keys = { id = true, ["data-pos"] = true }
+
+-- Process Div elements looking for table style classes
+function Div(div)
+ -- Check if this div has a table style class
+ local style_name = find_table_style(div.classes)
+ if not style_name then
+ return nil
+ end
+
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Extract attributes if present
+ -- Usage: ::: {.table-formal widths="30,70" width="50" font-size="9"}
+ local widths_str = div.attributes["widths"] -- column widths (e.g., "30,70")
+ local width_pct = div.attributes["width"] -- table width % (e.g., "50")
+ local font_size_str = div.attributes["font-size"] -- font size in pt (e.g., "9")
+
+ -- Find Table element inside the div (search recursively through nested Divs,
+ -- since Quarto wraps R code chunk output in .cell > .cell-output-display divs)
+ local function find_table(blocks)
+ for _, block in ipairs(blocks) do
+ if block.t == "Table" then
+ return block
+ elseif block.t == "Div" and block.content then
+ local found = find_table(block.content)
+ if found then return found end
+ end
+ end
+ return nil
+ end
+ local tbl = find_table(div.content)
+
+ if not tbl then
+ debug("[table-style] No table found in ." .. style_name .. " div\n")
+ return nil
+ end
+
+ -- Parse font size (points)
+ local font_size_pt = nil
+ if font_size_str then
+ font_size_pt = tonumber(font_size_str)
+ if font_size_pt then
+ debug("[table-style] Using font size: " .. font_size_pt .. "pt\n")
+ end
+ end
+
+ -- Also check for CSS-config font size if not specified as div attribute
+ if not font_size_pt then
+ local style = table_styles[style_name]
+ if style and style.font_size_half_pts then
+ font_size_pt = style.font_size_half_pts / 2
+ debug("[table-style] Using CSS font size: " .. font_size_pt .. "pt\n")
+ end
+ end
+
+ -- Parse per-table header overrides from div attributes
+ local overrides = {}
+ local hb = div.attributes["header-bold"]
+ if hb then
+ overrides.header_bold = (hb == "true" or hb == "1")
+ end
+ local hs = div.attributes["header-shading"]
+ if hs and hs ~= "" then
+ overrides.header_shading = hs:gsub("^#", "") -- strip leading # if present
+ end
+
+ -- Convert to styled OpenXML
+ local table_xml = styled_table_to_openxml(tbl, style_name, widths_str, width_pct, font_size_pt, overrides)
+ if not table_xml then
+ return nil
+ end
+
+ -- Build field code payload and wrap table with ADDIN DOCSTYLE markers
+ -- Filter out Pandoc-internal keys before passing to field code builder
+ local attrs = {}
+ for key, val in pairs(div.attributes) do
+ if val and val ~= "" and not skip_attr_keys[key] then
+ attrs[key] = val
+ end
+ end
+ local field_start = fcu.build_table_field_start(style_name, attrs)
+ local field_end = fcu.build_block_field_end()
+
+ return pandoc.Blocks({
+ pandoc.RawBlock("openxml", field_start),
+ pandoc.RawBlock("openxml", table_xml),
+ pandoc.RawBlock("openxml", field_end)
+ })
+end
+
+-- Initialise format and load table styles once
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ init_table_styles()
+ return nil
+end
+
+return {
+ { Pandoc = Pandoc },
+ { Div = Div }
+}
diff --git a/_extensions/docstyle/toc-field.lua b/_extensions/docstyle/toc-field.lua
new file mode 100644
index 0000000..12c221a
--- /dev/null
+++ b/_extensions/docstyle/toc-field.lua
@@ -0,0 +1,214 @@
+-- toc-field.lua
+-- Pandoc Lua filter that injects Word TOC field codes
+--
+-- Usage in QMD:
+-- ::: {.toc}
+-- :::
+--
+-- Configuration in _quarto.yml (under docstyle.toc):
+-- title: "Contents" # Optional heading above TOC
+-- title-level: 1 # Heading level for title (default: 1)
+-- levels: "1-3" # Which heading levels to include
+-- page-numbers: true # Show page numbers
+-- hyperlinks: true # Make entries clickable
+-- tab-leader: "dot" # dot, dash, underscore, none
+--
+-- This filter finds Div elements with class "toc" and replaces them with
+-- Word TOC field codes, enabling dynamic table of contents in Word.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local FORMAT = "openxml"
+
+-- Default configuration
+local toc_config = {
+ title = nil, -- No title by default
+ title_level = 1, -- # heading
+ levels = "1-3",
+ page_numbers = true,
+ hyperlinks = true,
+ tab_leader = "dot"
+}
+
+-- Read configuration from metadata
+function Meta(meta)
+ if meta.docstyle and meta.docstyle.toc then
+ local toc = meta.docstyle.toc
+
+ if toc.title then
+ toc_config.title = pandoc.utils.stringify(toc.title)
+ end
+
+ if toc["title-level"] then
+ toc_config.title_level = tonumber(pandoc.utils.stringify(toc["title-level"])) or 1
+ end
+
+ if toc.levels then
+ toc_config.levels = pandoc.utils.stringify(toc.levels)
+ end
+
+ if toc["page-numbers"] ~= nil then
+ local val = toc["page-numbers"]
+ if type(val) == "boolean" then
+ toc_config.page_numbers = val
+ else
+ toc_config.page_numbers = pandoc.utils.stringify(val) ~= "false"
+ end
+ end
+
+ if toc.hyperlinks ~= nil then
+ local val = toc.hyperlinks
+ if type(val) == "boolean" then
+ toc_config.hyperlinks = val
+ else
+ toc_config.hyperlinks = pandoc.utils.stringify(val) ~= "false"
+ end
+ end
+
+ if toc["tab-leader"] then
+ toc_config.tab_leader = pandoc.utils.stringify(toc["tab-leader"])
+ end
+
+ io.stderr:write("[toc-field] Config: levels=" .. toc_config.levels ..
+ ", page-numbers=" .. tostring(toc_config.page_numbers) ..
+ ", hyperlinks=" .. tostring(toc_config.hyperlinks) ..
+ ", tab-leader=" .. toc_config.tab_leader .. "\n")
+ end
+
+ return nil
+end
+
+-- Normalize levels to a range format (Word requires "1-3" not just "1")
+local function normalize_levels(levels)
+ -- If already a range (contains "-"), return as-is
+ if string.find(levels, "-") then
+ return levels
+ end
+ -- Single number: convert to range "n-n"
+ return levels .. "-" .. levels
+end
+
+-- Build the TOC field instruction text
+local function build_toc_instr()
+ -- TOC field switches:
+ -- \o "1-3" - Include heading levels 1-3
+ -- \h - Hyperlink entries to headings
+ -- \z - Hide tab leader and page numbers in Web Layout view
+ -- \u - Use applied paragraph outline level
+ -- \n - Suppress page numbers (if page-numbers: false)
+
+ local switches = {}
+
+ -- Heading levels (normalize to range format)
+ local levels_range = normalize_levels(toc_config.levels)
+ table.insert(switches, '\\o "' .. levels_range .. '"')
+
+ -- Hyperlinks
+ if toc_config.hyperlinks then
+ table.insert(switches, "\\h")
+ end
+
+ -- Hide formatting in web view
+ table.insert(switches, "\\z")
+
+ -- Use outline levels
+ table.insert(switches, "\\u")
+
+ -- Suppress page numbers if disabled
+ if not toc_config.page_numbers then
+ table.insert(switches, "\\n")
+ end
+
+ return "TOC " .. table.concat(switches, " ")
+end
+
+-- Build the OpenXML for a TOC field code
+local function build_toc_field_xml()
+ local instr = build_toc_instr()
+
+ -- Pad instruction text (Word requires leading/trailing spaces)
+ instr = " " .. instr .. " "
+
+ -- Build the 5-part Word field code structure
+ local xml = '' ..
+ '' ..
+ '' .. instr .. '' ..
+ '' ..
+ '[Update field to generate table of contents]' ..
+ '' ..
+ ''
+
+ return xml
+end
+
+-- Build a heading element for the TOC title
+-- Uses the configured title-level to determine the Word style
+-- title-level: 1 -> Heading1, 2 -> Heading2, etc.
+local function build_title_blocks()
+ if not toc_config.title then
+ return {}
+ end
+
+ -- Map title-level to Word heading style
+ -- Default to Heading1 if title_level is 1 or not specified
+ local style_id = "Heading" .. tostring(toc_config.title_level)
+
+ -- Build OpenXML paragraph with the appropriate Heading style
+ local title_xml = '' ..
+ '' ..
+ '' .. toc_config.title .. '' ..
+ ''
+
+ return { pandoc.RawBlock("openxml", title_xml) }
+end
+
+-- Process Div elements looking for .toc class
+function Div(div)
+ -- Check if this div has the "toc" class
+ if not div.classes:includes("toc") then
+ return nil
+ end
+
+ -- Only process for docx output
+ if not FORMAT or FORMAT ~= "openxml" then
+ io.stderr:write("[toc-field] Skipping TOC injection (not docx output)\n")
+ return nil
+ end
+
+ io.stderr:write("[toc-field] Found .toc div, injecting TOC field code\n")
+
+ -- Build the result blocks
+ local blocks = {}
+
+ -- ADDIN DOCSTYLE field code begin (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_div_field_start("toc")))
+
+ -- Add title heading if configured
+ local title_blocks = build_title_blocks()
+ for _, block in ipairs(title_blocks) do
+ table.insert(blocks, block)
+ end
+
+ -- Add the TOC field code
+ local toc_xml = build_toc_field_xml()
+ table.insert(blocks, pandoc.RawBlock("openxml", toc_xml))
+
+ -- ADDIN DOCSTYLE field code end (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_block_field_end()))
+
+ return blocks
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Div = Div }
+}
diff --git a/_extensions/docstyle/update-field-codes.R b/_extensions/docstyle/update-field-codes.R
new file mode 100755
index 0000000..0719b2d
--- /dev/null
+++ b/_extensions/docstyle/update-field-codes.R
@@ -0,0 +1,392 @@
+#!/usr/bin/env Rscript
+# Post-render hook: Inject comments, Zotero components, and update field-codes.json
+#
+# This script is called by Quarto after rendering to:
+# 1. Inject comments from comments.json into the rendered DOCX
+# 2. Inject Zotero components (ZOTERO_PREF) for full round-trip support
+# 3. Extract Zotero field codes and merge into field-codes.json (for caching)
+# 4. Validate the rendered document (optional)
+#
+# The comment-inject.lua filter creates comment markers in document.xml,
+# but the actual comments.xml must be built from the JSON sidecar file.
+#
+# Usage in _quarto.yml:
+# project:
+# post-render: _extensions/docstyle/update-field-codes.R
+#
+# Environment variables (set by Quarto):
+# QUARTO_PROJECT_OUTPUT_FILES - newline-separated list of output files
+#
+# Optional environment variables:
+# DOCSTYLE_VALIDATE=1 - Enable DOCX structure validation
+# DOCSTYLE_VALIDATE_COMMENTS=1 - Enable comment validation
+# DOCSTYLE_VALIDATE_ZOTERO=1 - Enable Zotero field code validation
+# DOCSTYLE_DEBUG=1 - Enable verbose debug output
+
+# Get output files from Quarto
+output_files_env <- Sys.getenv("QUARTO_PROJECT_OUTPUT_FILES", "")
+
+if (nchar(output_files_env) == 0) {
+
+ # Not running as Quarto hook, exit silently
+ quit(save = "no", status = 0)
+}
+
+output_files <- strsplit(output_files_env, "\n")[[1]]
+docx_files <- output_files[grepl("\\.docx$", output_files, ignore.case = TRUE)]
+
+if (length(docx_files) == 0) {
+ # No DOCX files rendered, nothing to do
+ quit(save = "no", status = 0)
+}
+
+# Try to load docstyle (check installed package first, then try devtools::load_all)
+docstyle_loaded <- FALSE
+
+if (requireNamespace("docstyle", quietly = TRUE)) {
+ docstyle_loaded <- TRUE
+} else {
+ # Try to find and load from development source
+ # First, try relative to this script (follows symlinks to find package root)
+ script_path <- tryCatch({
+ args <- commandArgs(trailingOnly = FALSE)
+ file_arg <- grep("^--file=", args, value = TRUE)
+ if (length(file_arg) > 0) {
+ normalizePath(sub("^--file=", "", file_arg), mustWork = FALSE)
+ } else {
+ NULL
+ }
+ }, error = function(e) NULL)
+
+ # Build search paths: script location parents + project parents
+ project_dir <- Sys.getenv("QUARTO_PROJECT_DIR", getwd())
+ search_dirs <- c(
+ project_dir,
+ dirname(project_dir),
+ dirname(dirname(project_dir)),
+ dirname(dirname(dirname(project_dir)))
+ )
+
+ # Add script-relative paths (for symlinked extensions)
+ if (!is.null(script_path) && file.exists(script_path)) {
+ script_dir <- dirname(script_path)
+ # Script is in _extensions/docstyle/, package root is 2 levels up
+ search_dirs <- c(
+ dirname(dirname(script_dir)), # Package root (e.g., /path/to/docstyle)
+ search_dirs
+ )
+ }
+
+ search_dirs <- unique(search_dirs)
+
+ for (dir in search_dirs) {
+ desc_path <- file.path(dir, "DESCRIPTION")
+ if (file.exists(desc_path)) {
+ desc_content <- readLines(desc_path, n = 1, warn = FALSE)
+ if (grepl("Package:\\s*docstyle", desc_content)) {
+ if (requireNamespace("devtools", quietly = TRUE)) {
+ tryCatch({
+ devtools::load_all(dir, quiet = TRUE)
+ docstyle_loaded <- TRUE
+ break
+ }, error = function(e) NULL)
+ }
+ }
+ }
+ }
+}
+
+if (!docstyle_loaded) {
+ message("[post-render] docstyle package not found, skipping comment injection")
+ quit(save = "no", status = 0)
+}
+
+# Resolve project dir once — used for path normalisation and _docstyle/ lookup
+project_dir <- Sys.getenv("QUARTO_PROJECT_DIR", getwd())
+
+# Helper: resolve a docx path that may be relative to the document directory
+# rather than the project root (happens when output-dir: ../_site/docs in a
+# subdirectory _quarto.yml). QUARTO_PROJECT_OUTPUT_FILES is built by Quarto
+# as relative3(projDir, outputFile) but when output-dir contains ".." the
+# resolved path goes above projDir. QUARTO_DOCUMENT_PATH is always the
+# document's directory — try that as a fallback base.
+resolve_docx_path <- function(path, project_dir) {
+ if (file.exists(path)) return(path)
+ # Try relative to project root
+ attempt <- normalizePath(file.path(project_dir, path), mustWork = FALSE)
+ if (file.exists(attempt)) return(attempt)
+ # Try relative to document directory (handles output-dir: ../_site/... in subdirs)
+ doc_path <- Sys.getenv("QUARTO_DOCUMENT_PATH", "")
+ if (nzchar(doc_path)) {
+ attempt2 <- normalizePath(file.path(doc_path, path), mustWork = FALSE)
+ if (file.exists(attempt2)) return(attempt2)
+ }
+ path # return original; caller handles missing file
+}
+
+# Process each DOCX file
+for (docx_path in docx_files) {
+ docx_path <- resolve_docx_path(docx_path, project_dir)
+ if (!file.exists(docx_path)) {
+ next
+ }
+
+ # Determine output directory for field-codes.json
+ # Use _docstyle/ in project root if it exists, otherwise same dir as DOCX
+ docstyle_dir <- file.path(project_dir, "_docstyle")
+
+ if (dir.exists(docstyle_dir)) {
+ output_dir <- docstyle_dir
+ } else {
+ output_dir <- dirname(docx_path)
+ }
+
+ # Collect summary info
+ n_comments <- 0
+ zotero_pref_injected <- FALSE
+
+ # Debug mode (used throughout)
+ debug_mode <- Sys.getenv("DOCSTYLE_DEBUG", "0") == "1"
+
+ # Step 1: Inject comments from comments.json (if present)
+ comments_json <- file.path(output_dir, "comments.json")
+ if (file.exists(comments_json)) {
+ tryCatch({
+ # Scan for comment IDs used in the rendered document
+ used_ids <- docstyle::scan_used_comment_ids(docx_path)
+
+ if (length(used_ids) > 0) {
+ # Validate that used IDs exist in comments.json before injection
+ # This prevents corrupt DOCX when QMD and JSON are out of sync
+ comments <- docstyle::read_comments_json(comments_json)
+ json_ids <- names(comments)
+ missing_ids <- setdiff(used_ids, json_ids)
+
+ if (length(missing_ids) > 0) {
+ # Critical error: QMD references comments not in JSON
+ message("[docstyle] ERROR: Comment ID mismatch detected!")
+ message(" Document references ", length(missing_ids), " comment ID(s) not in comments.json:")
+ message(" ", paste(missing_ids, collapse = ", "))
+ message(" This would produce a corrupt DOCX file.")
+ message(" To fix: Run docstyle::sync_comment_ids() to re-sync IDs from source DOCX")
+ message(" Skipping comment injection to prevent corruption.")
+ # Don't inject - leave document without comments rather than corrupt it
+ } else {
+ docstyle::inject_comments(
+ docx_path = docx_path,
+ comments_json = comments_json,
+ used_ids = used_ids
+ )
+ n_comments <- length(used_ids)
+ }
+ }
+ }, error = function(e) {
+ message("[docstyle] Error injecting comments: ", conditionMessage(e))
+ })
+ }
+
+ # Step 1b: Fix comment-deletion nesting
+ # Comments attached to deleted text end up after the deletion due to Lua filter
+ # limitations. This repositions them to span the deletion properly.
+ if (n_comments > 0) {
+ tryCatch({
+ n_fixed <- docstyle::fix_comment_deletion_nesting(
+ docx_path = docx_path,
+ verbose = debug_mode
+ )
+ if (n_fixed > 0 && debug_mode) {
+ message("[docstyle] Fixed ", n_fixed, " comment-deletion nesting issue(s)")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error fixing comment nesting: ", conditionMessage(e))
+ })
+ }
+
+ # Step 1c: Inject Zotero citation field codes from markers
+ # The Lua filter emits DOCSTYLE_CITE:: markers; this replaces them with
+ # real Word field code XML using data from field-codes.json.
+ n_citations_injected <- 0L
+ field_codes_json <- file.path(output_dir, "field-codes.json")
+ if (file.exists(field_codes_json)) {
+ tryCatch({
+ cite_result <- docstyle::inject_zotero_citations(
+ docx_path = docx_path,
+ field_codes_path = field_codes_json,
+ verbose = debug_mode
+ )
+ n_citations_injected <- cite_result$n_injected
+ }, error = function(e) {
+ message("[docstyle] Error injecting Zotero citations: ", conditionMessage(e))
+ })
+ }
+
+ # Step 2: Validate comments (if enabled via DOCSTYLE_VALIDATE_COMMENTS=1)
+ validate_comments <- Sys.getenv("DOCSTYLE_VALIDATE_COMMENTS", "0")
+ if (validate_comments == "1" && file.exists(comments_json)) {
+ tryCatch({
+ result <- docstyle::validate_comments(
+ docx_path = docx_path,
+ comments_json = comments_json,
+ verbose = TRUE
+ )
+ if (!result$valid) {
+ message("[docstyle] Comment validation failed")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error validating comments: ", conditionMessage(e))
+ })
+ }
+
+ # Step 2b: Validate DOCX structure (if enabled via DOCSTYLE_VALIDATE=1)
+ # Catches XML issues, malformed tracked changes, duplicate IDs, etc.
+ validate_structure <- Sys.getenv("DOCSTYLE_VALIDATE", "0")
+ if (debug_mode) {
+ message("[docstyle] DOCSTYLE_VALIDATE=", validate_structure)
+ }
+ if (validate_structure == "1") {
+ tryCatch({
+ result <- docstyle::validate_docx_structure(
+ docx_path = docx_path,
+ verbose = debug_mode
+ )
+ if (!result$valid) {
+ message("[docstyle] Structure validation: ", length(result$errors), " issue(s) found")
+ for (err in result$errors) {
+ message(" - ", err)
+ }
+ } else if (debug_mode) {
+ message("[docstyle] Structure validation: passed all checks")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error validating structure: ", conditionMessage(e))
+ })
+ }
+
+ # Step 3: Inject Zotero components (ZOTERO_PREF if missing)
+ # This ensures rendered documents have full Zotero functionality for round-trip editing
+ tryCatch({
+ result <- docstyle::inject_zotero_components(
+ docx_path = docx_path,
+ field_codes_json = if (file.exists(field_codes_json)) field_codes_json else NULL,
+ validate = FALSE, # Will validate separately if enabled
+ verbose = debug_mode
+ )
+ if (result$zotero_pref_injected) {
+ zotero_pref_injected <- TRUE
+ if (debug_mode) {
+ message("[docstyle] Injected ZOTERO_PREF (style: ", result$style_id, ")")
+ }
+ }
+ }, error = function(e) {
+ if (debug_mode) {
+ message("[docstyle] Error injecting Zotero components: ", conditionMessage(e))
+ }
+ })
+
+ # Step 3b: Validate Zotero field codes (if enabled via DOCSTYLE_VALIDATE_ZOTERO=1)
+ validate_zotero <- Sys.getenv("DOCSTYLE_VALIDATE_ZOTERO", "0")
+ if (validate_zotero == "1") {
+ tryCatch({
+ result <- docstyle::validate_zotero(
+ docx_path = docx_path,
+ verbose = debug_mode
+ )
+ if (!result$valid) {
+ message("[docstyle] Zotero validation: ", length(result$issues$errors), " error(s)")
+ for (err in result$issues$errors) {
+ message(" - ", err)
+ }
+ } else if (debug_mode) {
+ message("[docstyle] Zotero validation: passed all checks")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error validating Zotero: ", conditionMessage(e))
+ })
+ }
+
+ # Note: Step 4 (extract and merge field codes) was removed in v0.7.6.
+ # The render pipeline is read-only with respect to field-codes.json.
+ # New citations only enter via harvest (docx_to_qmd), not via render.
+ # See: https://github.com/DougManuel/docstyle/issues/38
+
+ # Step 5: Finalize section structure
+ # Post-process sectPr elements: remove leaked line numbers from body sectPr,
+ # validate opening/closing sectPr have correct properties
+ n_sections_fixed <- 0L
+ body_sectPr_fixed <- FALSE
+ tryCatch({
+ result <- docstyle::finalize_docx(
+ docx_path = docx_path,
+ verbose = debug_mode
+ )
+ n_sections_fixed <- result$fixed
+ body_sectPr_fixed <- isTRUE(result$body_fixed)
+ }, error = function(e) {
+ message("[docstyle] Error finalizing sections: ", conditionMessage(e))
+ })
+
+ # Step 6: Prune unused styles (remove Pandoc bloat)
+ n_styles_pruned <- 0L
+ tryCatch({
+ n_styles_pruned <- docstyle::prune_styles_file(
+ docx_path = docx_path,
+ sidecar_dir = output_dir,
+ verbose = debug_mode
+ )
+ }, error = function(e) {
+ message("[docstyle] Error pruning styles: ", conditionMessage(e))
+ })
+
+ # Step 7: Scan for unresolved citations (always runs)
+ # Any [@citekey] text remaining in the output means the citation could not
+ # be resolved to a Zotero field code. This catches both Lua-filter misses
+ # (citekey not in field-codes.json) and R-finisher fallbacks.
+ unresolved_cites <- character()
+ tryCatch({
+ unresolved_cites <- docstyle::scan_unresolved_citations(docx_path)
+ }, error = function(e) {
+ if (debug_mode) {
+ message("[docstyle] Error scanning for unresolved citations: ", conditionMessage(e))
+ }
+ })
+
+ # Partition unresolved citations into staged vs unknown.
+ # Staged: metadata exists in field-codes.json citations catalog but no citationGroup
+ # (added via add_citations_from_zotero() or QMD-first drafting — expected during drafting)
+ # Unknown: no metadata at all — likely a typo or missing harvest
+ staged_cites <- character()
+ unknown_cites <- character()
+ if (length(unresolved_cites) > 0 && file.exists(field_codes_json)) {
+ tryCatch({
+ fc_obj <- jsonlite::fromJSON(field_codes_json, simplifyVector = FALSE)
+ known_keys <- names(fc_obj$citations %||% list())
+ staged_cites <- unresolved_cites[unresolved_cites %in% known_keys]
+ unknown_cites <- unresolved_cites[!unresolved_cites %in% known_keys]
+ }, error = function(e) {
+ unknown_cites <<- unresolved_cites
+ })
+ } else {
+ unknown_cites <- unresolved_cites
+ }
+
+ # Print single summary line
+ parts <- character()
+ if (n_comments > 0) parts <- c(parts, sprintf("%d comment%s", n_comments, if (n_comments == 1) "" else "s"))
+ if (n_citations_injected > 0) parts <- c(parts, sprintf("%d citation%s injected", n_citations_injected, if (n_citations_injected == 1) "" else "s"))
+ if (zotero_pref_injected) parts <- c(parts, "ZOTERO_PREF injected")
+ if (body_sectPr_fixed) parts <- c(parts, "section structure finalized")
+ if (n_styles_pruned > 0) parts <- c(parts, sprintf("%d style%s pruned", n_styles_pruned, if (n_styles_pruned == 1) "" else "s"))
+ if (length(parts) > 0) {
+ message("[docstyle] Processed: ", paste(parts, collapse = ", "))
+ }
+ if (length(staged_cites) > 0) {
+ message("[docstyle] Info: ", length(staged_cites),
+ " staged citation(s) pending Zotero insertion in Word: ",
+ paste(staged_cites, collapse = ", "))
+ }
+ if (length(unknown_cites) > 0) {
+ message("[docstyle] Warning: ", length(unknown_cites),
+ " unresolved citation(s) with no metadata — check citekeys or re-harvest: ",
+ paste(unknown_cites, collapse = ", "))
+ }
+}
diff --git a/_extensions/docstyle/validate-markup.R b/_extensions/docstyle/validate-markup.R
new file mode 100755
index 0000000..f13d016
--- /dev/null
+++ b/_extensions/docstyle/validate-markup.R
@@ -0,0 +1,144 @@
+#!/usr/bin/env Rscript
+# Pre-render hook: Validate QMD markup before rendering
+#
+# This script validates comment markers, revision spans, and other
+# docstyle-specific markup before Quarto renders the document.
+# It catches issues that would cause Word "unreadable content" errors.
+#
+# Usage in _quarto.yml:
+# project:
+# pre-render:
+# - _extensions/docstyle/validate-markup.R
+# - _extensions/docstyle/generate-reference.R
+#
+# Environment variables (set by Quarto):
+# QUARTO_PROJECT_DIR - project root directory
+# QUARTO_PROJECT_INPUT_FILES - files being rendered
+#
+# Exit codes:
+# 0 - Validation passed (or skipped)
+# 1 - Validation failed (stops render)
+
+# Get input files from Quarto
+input_files_env <- Sys.getenv("QUARTO_PROJECT_INPUT_FILES", "")
+project_dir <- Sys.getenv("QUARTO_PROJECT_DIR", getwd())
+
+if (nchar(input_files_env) == 0) {
+ # Not running as Quarto hook, exit silently
+ quit(save = "no", status = 0)
+}
+
+# Parse input files (newline-separated)
+input_files <- strsplit(input_files_env, "\n")[[1]]
+qmd_files <- input_files[grepl("\\.qmd$", input_files, ignore.case = TRUE)]
+
+if (length(qmd_files) == 0) {
+ # No QMD files being rendered, nothing to validate
+ quit(save = "no", status = 0)
+}
+
+# Try to load docstyle
+docstyle_loaded <- FALSE
+
+if (requireNamespace("docstyle", quietly = TRUE)) {
+ docstyle_loaded <- TRUE
+} else {
+ # Try to find and load from development source
+ search_dirs <- c(
+ project_dir,
+ dirname(project_dir),
+ dirname(dirname(project_dir)),
+ dirname(dirname(dirname(project_dir))),
+ dirname(dirname(dirname(dirname(project_dir))))
+ )
+
+ for (dir in search_dirs) {
+ desc_path <- file.path(dir, "DESCRIPTION")
+ if (file.exists(desc_path)) {
+ desc_content <- readLines(desc_path, n = 1, warn = FALSE)
+ if (grepl("Package:\\s*docstyle", desc_content)) {
+ if (requireNamespace("devtools", quietly = TRUE)) {
+ tryCatch({
+ devtools::load_all(dir, quiet = TRUE)
+ docstyle_loaded <- TRUE
+ break
+ }, error = function(e) NULL)
+ }
+ }
+ }
+ }
+}
+
+if (!docstyle_loaded) {
+ message("[validate-markup] docstyle package not found, skipping validation")
+ quit(save = "no", status = 0)
+}
+
+# Find sidecar directory for comments.json
+sidecar_dir <- file.path(project_dir, "_docstyle")
+if (!dir.exists(sidecar_dir)) {
+ # Try relative to first QMD file
+ sidecar_dir <- file.path(dirname(qmd_files[1]), "_docstyle")
+}
+
+comments_json <- if (dir.exists(sidecar_dir)) {
+ json_path <- file.path(sidecar_dir, "comments.json")
+ if (file.exists(json_path)) json_path else NULL
+} else {
+ NULL
+}
+
+# Validate each QMD file
+all_valid <- TRUE
+total_errors <- 0
+total_warnings <- 0
+
+cat("\n")
+cat("=== docstyle Markup Validation ===\n")
+
+for (qmd_path in qmd_files) {
+ # Make path absolute if needed
+ if (!startsWith(qmd_path, "/")) {
+ qmd_path <- file.path(project_dir, qmd_path)
+ }
+
+ if (!file.exists(qmd_path)) {
+ next
+ }
+
+ cat(sprintf("\nValidating: %s\n", basename(qmd_path)))
+
+ result <- tryCatch({
+ docstyle::validate_qmd(
+ qmd_path = qmd_path,
+ comments_json = comments_json,
+ verbose = TRUE
+ )
+ }, error = function(e) {
+ message("[validate-markup] Error: ", conditionMessage(e))
+ list(valid = FALSE, issues = list(errors = conditionMessage(e), warnings = character()))
+ })
+
+ if (!result$valid) {
+ all_valid <- FALSE
+ }
+ total_errors <- total_errors + length(result$issues$errors)
+ total_warnings <- total_warnings + length(result$issues$warnings)
+}
+
+cat("\n")
+cat("=== Validation Summary ===\n")
+cat(sprintf("Files: %d | Errors: %d | Warnings: %d\n",
+ length(qmd_files), total_errors, total_warnings))
+
+if (!all_valid) {
+ cat("\n")
+ cat("ERROR: Validation failed. Fix errors before rendering.\n")
+ cat("Hint: Convert deprecated [text]{.comment id=\"X\"} to:\n")
+ cat(" - Range: text\n")
+ cat(" - Point: \n")
+ cat("\n")
+ quit(save = "no", status = 1)
+}
+
+cat("\n")
diff --git a/_extensions/docstyle/version-history.lua b/_extensions/docstyle/version-history.lua
new file mode 100644
index 0000000..c820a89
--- /dev/null
+++ b/_extensions/docstyle/version-history.lua
@@ -0,0 +1,326 @@
+-- version-history.lua
+-- Pandoc Lua filter that generates a version history table from YAML metadata
+--
+-- Usage in QMD:
+-- ::: version-history
+-- :::
+--
+-- Configuration in _quarto.yml (under docstyle.version-history):
+-- title: "Version history" # Heading text (or false to disable)
+-- title-level: 1 # 1-6: uses Heading1-Heading6 style
+-- widths: "15,70,15" # Column width percentages (Version, Description, Date)
+-- style: "table-grid" # Table style: table-grid (all borders) or table-formal (top/bottom)
+--
+-- Version entries in QMD YAML front matter:
+-- version-history:
+-- - version: "1.0.0"
+-- date: "2025-01-15"
+-- description: "Final release"
+--
+-- This filter finds Div elements with class "version-history" and replaces them
+-- with a Word table generated from the version-history metadata.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local FORMAT = "openxml"
+
+-- Built-in table style definitions (matching table-style.lua)
+local table_styles = {
+ ["table-grid"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "000000" },
+ bottom = { val = "single", sz = "4", color = "000000" },
+ left = { val = "single", sz = "4", color = "000000" },
+ right = { val = "single", sz = "4", color = "000000" },
+ insideH = { val = "single", sz = "4", color = "000000" },
+ insideV = { val = "single", sz = "4", color = "000000" }
+ },
+ header_shading = nil,
+ header_bold = true
+ },
+ ["table-formal"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "7F7F7F" },
+ bottom = { val = "single", sz = "4", color = "7F7F7F" },
+ left = nil,
+ right = nil,
+ insideH = nil,
+ insideV = nil
+ },
+ header_shading = "D9D9D9",
+ header_bold = true
+ }
+}
+
+-- Store version history from metadata
+local version_history = nil
+local div_found = false
+local config = {
+ title = "Version history",
+ title_level = 1,
+ widths = {15, 70, 15}, -- Default: Version 15%, Description 70%, Date 15%
+ style = "table-grid" -- Default table style
+}
+
+-- Use shared xml_escape from field-code-utils
+local xml_escape = fcu.xml_escape
+
+-- Parse widths string "15,70,15" into table {15, 70, 15}
+local function parse_widths(widths_str)
+ local widths = {}
+ for w in string.gmatch(widths_str, "([^,]+)") do
+ local num = tonumber(w)
+ if num then
+ table.insert(widths, num)
+ end
+ end
+ -- Ensure we have exactly 3 widths
+ if #widths ~= 3 then
+ return {15, 70, 15} -- Default
+ end
+ return widths
+end
+
+-- Read configuration from metadata
+function Meta(meta)
+ -- Get version history entries
+ if meta["version-history"] then
+ version_history = meta["version-history"]
+ io.stderr:write("[version-history] Found " .. #version_history .. " version entries in metadata\n")
+ end
+
+ -- Get optional config from docstyle.version-history
+ if meta.docstyle and meta.docstyle["version-history"] then
+ local vh_config = meta.docstyle["version-history"]
+
+ -- Title (string or false to disable)
+ if vh_config.title ~= nil then
+ local title_val = vh_config.title
+ if type(title_val) == "boolean" and not title_val then
+ config.title = nil -- Disable title
+ else
+ config.title = pandoc.utils.stringify(title_val)
+ end
+ end
+
+ -- Title level (1-6)
+ if vh_config["title-level"] then
+ config.title_level = tonumber(pandoc.utils.stringify(vh_config["title-level"])) or 1
+ end
+
+ -- Column widths
+ if vh_config.widths then
+ local widths_str = pandoc.utils.stringify(vh_config.widths)
+ config.widths = parse_widths(widths_str)
+ io.stderr:write("[version-history] Column widths: " .. table.concat(config.widths, ", ") .. "\n")
+ end
+
+ -- Table style
+ if vh_config.style then
+ local style_name = pandoc.utils.stringify(vh_config.style)
+ if table_styles[style_name] then
+ config.style = style_name
+ io.stderr:write("[version-history] Table style: " .. style_name .. "\n")
+ else
+ io.stderr:write("[version-history] Unknown table style '" .. style_name .. "', using default\n")
+ end
+ end
+ end
+
+ return nil
+end
+
+-- Build border XML element
+local function build_border_xml(name, border)
+ if not border then return "" end
+ return string.format('',
+ name, border.val, border.sz, border.color)
+end
+
+-- Build table borders XML from style definition
+local function build_tblBorders_xml(borders)
+ if not borders then return "" end
+
+ local parts = { "" }
+ if borders.top then table.insert(parts, build_border_xml("top", borders.top)) end
+ if borders.left then table.insert(parts, build_border_xml("left", borders.left)) end
+ if borders.bottom then table.insert(parts, build_border_xml("bottom", borders.bottom)) end
+ if borders.right then table.insert(parts, build_border_xml("right", borders.right)) end
+ if borders.insideH then table.insert(parts, build_border_xml("insideH", borders.insideH)) end
+ if borders.insideV then table.insert(parts, build_border_xml("insideV", borders.insideV)) end
+ table.insert(parts, "")
+
+ return table.concat(parts)
+end
+
+-- Build a table cell XML with optional width and shading
+local function build_cell(text, bold, width_pct, shading)
+ local rPr = ""
+ if bold then
+ rPr = ""
+ end
+
+ -- Width in fiftieths of a percent (5000 = 100%)
+ local tcPr_parts = { "" }
+ if width_pct then
+ local width_val = math.floor(width_pct * 50) -- Convert % to fiftieths
+ table.insert(tcPr_parts, '')
+ end
+ if shading then
+ table.insert(tcPr_parts, '')
+ end
+ table.insert(tcPr_parts, "")
+
+ return '' ..
+ table.concat(tcPr_parts) ..
+ '' ..
+ '' .. rPr ..
+ '' .. xml_escape(text) .. '' ..
+ '' ..
+ '' ..
+ ''
+end
+
+-- Build the version history table XML
+local function build_table_xml()
+ if not version_history or #version_history == 0 then
+ return nil
+ end
+
+ local w = config.widths
+ local style = table_styles[config.style] or table_styles["table-grid"]
+
+ -- Header row with column widths (bold header if style specifies, with optional shading)
+ local header_bold = style.header_bold
+ local header_shading = style.header_shading
+ local header_row = '' ..
+ build_cell("Version", header_bold, w[1], header_shading) ..
+ build_cell("Description", header_bold, w[2], header_shading) ..
+ build_cell("Date", header_bold, w[3], header_shading) ..
+ ''
+
+ -- Data rows
+ local data_rows = {}
+ for _, entry in ipairs(version_history) do
+ local version = ""
+ local description = ""
+ local date = ""
+
+ if entry.version then
+ version = pandoc.utils.stringify(entry.version)
+ end
+ if entry.description then
+ description = pandoc.utils.stringify(entry.description)
+ end
+ if entry.date then
+ date = pandoc.utils.stringify(entry.date)
+ end
+
+ local row = '' ..
+ build_cell(version, false, w[1], nil) ..
+ build_cell(description, false, w[2], nil) ..
+ build_cell(date, false, w[3], nil) ..
+ ''
+ table.insert(data_rows, row)
+ end
+
+ -- Build table borders from style
+ local borders_xml = build_tblBorders_xml(style.borders)
+
+ -- Complete table with style-defined borders
+ local table_xml = '' ..
+ '' ..
+ '' .. -- 100% width
+ borders_xml ..
+ '' ..
+ header_row ..
+ table.concat(data_rows) ..
+ ''
+
+ return table_xml
+end
+
+-- Build heading for the title
+local function build_title_xml()
+ if not config.title then
+ return nil
+ end
+
+ local style_id = "Heading" .. tostring(config.title_level)
+
+ return '' ..
+ '' ..
+ '' .. xml_escape(config.title) .. '' ..
+ ''
+end
+
+-- Process Div elements looking for .version-history class
+function Div(div)
+ -- Check if this div has the "version-history" class
+ if not div.classes:includes("version-history") then
+ return nil
+ end
+
+ div_found = true
+
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ io.stderr:write("[version-history] Skipping (not docx output)\n")
+ return nil
+ end
+
+ if not version_history or #version_history == 0 then
+ io.stderr:write("[version-history] No version-history metadata found\n")
+ return {} -- Remove the div entirely
+ end
+
+ io.stderr:write("[version-history] Generating table with " .. #version_history .. " entries\n")
+
+ -- Build the result blocks
+ local blocks = {}
+
+ -- ADDIN DOCSTYLE field code begin (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_div_field_start("version-history")))
+
+ -- Add title heading
+ local title_xml = build_title_xml()
+ if title_xml then
+ table.insert(blocks, pandoc.RawBlock("openxml", title_xml))
+ end
+
+ -- Add the table
+ local table_xml = build_table_xml()
+ if table_xml then
+ table.insert(blocks, pandoc.RawBlock("openxml", table_xml))
+ end
+
+ -- ADDIN DOCSTYLE field code end (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_block_field_end()))
+
+ return blocks
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+-- Warn if version-history metadata exists but no div was found
+local function CheckUnused(doc)
+ if version_history and #version_history > 0 and not div_found then
+ io.stderr:write("[version-history] Warning: " .. #version_history ..
+ " version-history entries in metadata but no ::: version-history ::: " ..
+ "div in document. Add the div where you want the table to appear.\n")
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta, Pandoc = Pandoc },
+ { Div = Div },
+ { Pandoc = CheckUnused }
+}
diff --git a/_extensions/docstyle/zotero-inject.lua b/_extensions/docstyle/zotero-inject.lua
new file mode 100644
index 0000000..e24b95a
--- /dev/null
+++ b/_extensions/docstyle/zotero-inject.lua
@@ -0,0 +1,136 @@
+-- zotero-inject.lua
+-- Pandoc Lua filter that emits text markers for Zotero citation field codes.
+--
+-- The R finisher (inject_zotero_citations) replaces these markers with real
+-- Word field code XML after Pandoc has finished rendering to docx.
+--
+-- Markers:
+-- DOCSTYLE_CITE::key1;key2 – citation (single or grouped)
+-- DOCSTYLE_CITE_BIBL – bibliography placeholder
+--
+-- Usage:
+-- pandoc --lua-filter=zotero-inject.lua -M field-codes=path/to/field-codes.json ...
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Citekey existence lookup (populated from field-codes.json citations section)
+local known_citekeys = {}
+
+-- Check if file exists
+local function file_exists(path)
+ local f = io.open(path, "r")
+ if f then
+ f:close()
+ return true
+ end
+ return false
+end
+
+-- Read field-codes.json at startup; populate known_citekeys
+function Meta(meta)
+ local field_codes_path = nil
+
+ -- Get path from metadata (explicit override)
+ if meta["field-codes"] then
+ field_codes_path = pandoc.utils.stringify(meta["field-codes"])
+ end
+
+ -- Auto-detect common locations if not specified
+ if not field_codes_path then
+ debug("[zotero-inject] Looking for field-codes.json...\n")
+ local search_paths = {
+ "_docstyle/field-codes.json",
+ "field-codes.json",
+ "../_docstyle/field-codes.json"
+ }
+ for _, path in ipairs(search_paths) do
+ debug("[zotero-inject] Checking: " .. path .. "\n")
+ if file_exists(path) then
+ field_codes_path = path
+ debug("[zotero-inject] Found field-codes.json: " .. path .. "\n")
+ break
+ end
+ end
+ end
+
+ if not field_codes_path then
+ debug("[zotero-inject] No field-codes.json found; markers will not be emitted\n")
+ return nil
+ end
+
+ -- Read and parse the JSON file
+ local file = io.open(field_codes_path, "r")
+ if not file then
+ debug("[zotero-inject] Could not open: " .. field_codes_path .. "\n")
+ return nil
+ end
+
+ local content = file:read("*all")
+ file:close()
+
+ local ok, parsed = pcall(function()
+ return pandoc.json.decode(content)
+ end)
+
+ if not ok then
+ debug("[zotero-inject] Failed to parse field-codes.json\n")
+ return nil
+ end
+
+ -- Build citekey existence set from citations catalog
+ local count = 0
+ if parsed.citations then
+ for citekey, _ in pairs(parsed.citations) do
+ known_citekeys[citekey] = true
+ count = count + 1
+ end
+ end
+ debug("[zotero-inject] Loaded " .. count .. " citekey(s) from field-codes.json\n")
+
+ return nil
+end
+
+-- Process Cite elements: emit text markers instead of raw OpenXML
+function Cite(cite)
+ local citekeys = {}
+ for _, citation in ipairs(cite.citations) do
+ table.insert(citekeys, citation.id)
+ end
+
+ if #citekeys == 0 then
+ return nil
+ end
+
+ -- All citekeys must be known; otherwise fall back to Pandoc default rendering
+ for _, citekey in ipairs(citekeys) do
+ if not known_citekeys[citekey] then
+ debug("[zotero-inject] Skipping (unknown citekey): " .. citekey .. "\n")
+ return nil
+ end
+ end
+
+ local marker = "DOCSTYLE_CITE::" .. table.concat(citekeys, ";")
+ debug("[zotero-inject] Emitting marker: " .. marker .. "\n")
+ return pandoc.Str(marker)
+end
+
+-- Process Div elements: emit bibliography marker for bibliography placeholder
+function Div(div)
+ if div.classes:includes("bibliography") then
+ debug("[zotero-inject] Emitting bibliography marker from div\n")
+ return pandoc.Para({ pandoc.Str("DOCSTYLE_CITE_BIBL") })
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Cite = Cite },
+ { Div = Div }
+}
diff --git a/_freeze/docs/explanation/project-specifications/execute-results/html.json b/_freeze/docs/explanation/project-specifications/execute-results/html.json
new file mode 100644
index 0000000..e620a86
--- /dev/null
+++ b/_freeze/docs/explanation/project-specifications/execute-results/html.json
@@ -0,0 +1,15 @@
+{
+ "hash": "5c354dacbbc1399ea5d9c16000b4fe2c",
+ "result": {
+ "engine": "knitr",
+ "markdown": "---\ntitle: \"Canadian Smoking History Generator Model: Project specifications\"\nsubtitle: \"Scope, structure, and implementation plan\"\nauthor: \"CSHGM Development Team\"\ndate: \"Last Updated: 2025-03-06\"\nformat:\n html:\n toc: true\n toc-depth: 3\n number-sections: true\n theme: cosmo\n code-fold: show\nexecute:\n echo: true\n warning: false\n message: false\n---\n\n\n\n\n\n\n# Executive summary\n\nThis document outlines the scope, specifications, and implementation plan for the Canadian Smoking History Generator Model (CSHGM) project. The project aims to modernize and extend the existing smoking history generation methodology developed in the Ontario SHGM study (2020) and the Age-Period-Cohort modelling approach pioneered by Holford. By implementing this methodology in R, we seek to improve accessibility, transparency, and extensibility while preserving the scientific integrity of the original work.\n\nThis specification document will serve as a reference for all project contributors, helping to ensure consistency across development efforts and provide clear guidance for implementation decisions.\n\n# Project goals and scope\n\n## Primary goals\n\n1. Implement the Canadian Smoking History Generator Model in R\n2. Provide a well-documented, reproducible workflow for smoking history modelling\n3. Create flexible, modular components that can be adapted to different contexts\n4. Ensure compatibility with secure computing environments\n5. Follow open science principles and best practices\n\n## Project scope\n\n### In scope\n\n- R implementation of the Age-Period-Cohort (APC) modelling approach\n- Processing of Canadian Community Health Survey (CCHS) data\n- Derivation of smoking initiation and cessation probabilities\n- Generation of complete smoking histories\n- Validation against historical data\n- Documentation for users and developers\n\n### Out of scope (for initial release)\n\n- Integration with specific disease models\n- Implementation of alternative smoking behaviour models\n- Web application or interactive user interface\n- Incorporation of e-cigarette or vaping behaviours\n- Integration with non-Canadian health surveys\n\n## Legacy recognition\n\nThis project builds upon the foundational work of Dr. Ted Holford at Yale University. As Dr. Holford transitions toward retirement, this project will serve as a legacy implementation of his APC methodology applied to smoking behaviour. We will work with Dr. Holford to determine appropriate recognition and attribution.\n\n# Project structure and approach\n\n## Folder structure\n\nWe will adopt a structure that balances R package conventions with a more accessible Quarto project organization:\n\n``` \ncshgm/\n├── .github/ # GitHub Actions and templates\n├── R/ # R functions in separate files\n├── data/ # Processed data objects\n│ └── dev/ # Development data files\n├── docs/ # Documentation as Quarto files\n│ ├── reference/ # Technical reference for the codebase\n│ ├── explanation/ # Conceptual explanations\n│ ├── how-to/ # Task-oriented guides\n│ └── tutorials/ # Learning-oriented tutorials\n├── tests/ # Test files\n│ └── testthat/ # Unit tests using testthat\n├── config/ # Configuration files\n│ ├── templates/ # Template configurations\n│ ├── variables/ # Variable definitions\n│ ├── default.yml # Default configuration\n│ └── secure.yml # Template for secure environments\n├── resources/ # Project resources\n│ ├── cchs/ # CCHS documentation and reference materials\n│ ├── legacy-code/ # Original SAS implementation \n│ ├── variable sheets/ # Original variable definition sheets\n│ └── worksheets/ # Working files for variable harmonization\n├── logs/ # Log files directory\n├── renv/ # R package management (renv)\n├── _quarto.yml # Quarto site configuration\n├── renv.lock # Package dependency locking\n├── index.qmd # Main landing page\n├── styles.css # Website styling\n├── CONTRIBUTING.md # Contribution guidelines\n└── README.md # Project overview\n```\n\n### Resource organization\n\nThe project resources are organized as follows:\n\n- **Active resources**: Materials regularly used during development\n - `resources/cchs/`: CCHS documentation, question trees, and survey details\n - `resources/variable sheets/`: Variable definitions and mappings\n - `resources/worksheets/`: Working files for variable harmonization\n\n- **Reference materials**: Historical or legacy materials \n - `resources/legacy-code/`: Original SAS implementation and historical code\n - Documentation of previous studies and implementations\n\nThis organization centralizes all supporting materials in the resources directory while maintaining clear separation between active resources and historical references.\n\n## Documentation approach\n\nWe will follow the [Divio documentation system](https://documentation.divio.com/) with four types of documentation:\n\n1. **Tutorials**: Step-by-step lessons to get started\n2. **How-to guides**: Practical instructions for specific tasks\n3. **Explanation**: Conceptual discussions of methodology\n4. **Reference**: Technical details of functions and data\n\nThis approach ensures we meet the needs of different users, from beginners to advanced researchers.\n\n## Development philosophy\n\n### Tidyverse design principles\n\nWe will follow tidyverse design principles:\n\n- Create composable functions that do one thing well\n- Use consistent function naming and argument conventions\n- Embrace the pipe (`|>` or `%>%`) for readable data manipulation\n- Prioritize data frames as the primary data structure\n- Use tidy evaluation for metaprogramming when needed\n\n### Open science principles\n\nThe project will adhere to FAIR principles:\n\n- **Findable**: Clear metadata, permanent DOI\n- **Accessible**: Open source, clear installation instructions\n- **Interoperable**: Standard data formats, well-documented APIs\n- **Reusable**: Comprehensive documentation, permissive license\n\n### Modular design\n\nThe implementation will be modular with these key components:\n\n1. **Data processing**: Functions to process and harmonize CCHS data\n2. **APC modelling**: Implementation of the APC modelling methodology\n3. **Simulation**: Functions to generate smoking histories\n4. **Validation**: Tools to validate against reference data\n5. **Visualization**: Components to visualize results\n\n## Technology stack\n\n### Core technologies\n\n- **R**: Primary programming language\n- **Quarto**: Documentation and website generation\n- **GitHub**: Version control and collaboration\n- **GitHub Actions**: Continuous integration and deployment\n- **renv**: Package management and reproducibility\n\n### Package management\n\nWe use the `renv` package to manage R package dependencies. This approach ensures:\n\n1. **Reproducibility**: All users and environments use the same package versions\n2. **Isolation**: Project dependencies are isolated from other projects\n3. **Documentation**: Package dependencies are explicitly recorded\n4. **Portability**: Easier deployment in secure environments\n\nThe `renv.lock` file records all package dependencies and their versions. When a new contributor joins the project, they can restore the exact environment with:\n\n```r\nrenv::restore()\n```\n\nWhen adding a new package dependency, it should be properly recorded:\n\n```r\nrenv::install(\"packagename\")\nrenv::snapshot()\n```\n\n### Key R packages\n\n- **tidyverse**: Data manipulation and visualization\n- **splines**: Spline modelling functionality\n- **cchsflow**: CCHS data harmonization\n- **config**: Configuration management\n- **logger**: Logging functionality\n- **testthat**: Unit testing\n- **roxygen2**: Documentation generation\n\n# Implementation plan\n\n## Phase 1: Project setup and core infrastructure\n\n1. Set up GitHub repository with initial structure\n2. Initialize renv for package management\n3. Configure GitHub Actions for CI/CD\n4. Create initial documentation framework \n5. Implement configuration management system\n6. Establish coding standards and contributor guidelines\n\n## Phase 2: Data processing implementation\n\n1. Develop functions for CCHS data harmonization\n2. Implement smoking status variable processing\n3. Create data preprocessing workflow\n4. Write documentation and tests for data processing\n\n## Phase 3: APC model implementation\n\n1. Implement natural cubic spline generation\n2. Develop APC modelling functions\n3. Create initiation probability estimation\n4. Create cessation probability estimation\n5. Write documentation and tests for APC components\n\n## Phase 4: Simulation and validation\n\n1. Develop smoking history generation functions\n2. Implement validation methodology\n3. Create calibration procedures\n4. Generate validation datasets\n5. Write documentation and tests for simulation components\n\n## Phase 5: Integration and documentation\n\n1. Ensure component integration\n2. Finalize documentation\n3. Create example workflows\n4. Develop tutorials and how-to guides\n5. Prepare for initial release\n\n# Specific implementation considerations\n\n## Configuration management\n\nWe will use the `config` package with YAML files to manage environment-specific settings:\n\n``` yaml\n# Example config.yml\ndefault:\n data_path: \"data/\"\n output_path: \"output/\"\n log_level: \"INFO\"\n variables_path: \"config/variables.csv\"\n variable_details_path: \"config/variable_details.csv\"\n\nsecure:\n data_path: \"/secure/data/\"\n output_path: \"/secure/output/\"\n log_level: \"WARN\"\n variables_path: \"/secure/config/variables.csv\"\n variable_details_path: \"/secure/config/variable_details.csv\"\n```\n\nThis approach allows for seamless transitions between different computing environments.\n\n## Documentation breakdown\n\nWe propose organizing documentation into the following Quarto files:\n\n### Reference documentation\n\n- `reference-variables.qmd`: Complete reference of all variables\n- `reference-functions.qmd`: Technical documentation of all functions\n- `reference-model.qmd`: Details of the APC model implementation\n\n### Explanation documentation\n\n- `explanation-smoking-history.qmd`: Conceptual overview of smoking history modelling\n- `explanation-apc-method.qmd`: Explanation of the APC methodology\n- `explanation-validation.qmd`: Discussion of validation approaches\n\n### How-to guides\n\n- `howto-setup.qmd`: Setting up the environment\n- `howto-process-cchs.qmd`: Processing CCHS data\n- `howto-run-apc.qmd`: Running the APC model\n- `howto-generate-histories.qmd`: Generating smoking histories\n- `howto-validate.qmd`: Validating results\n\n### Tutorials\n\n- `tutorial-basic.qmd`: Step-by-step tutorial for basic usage\n- `tutorial-advanced.qmd`: Advanced tutorial for customization\n\n## Testing strategy\n\nWe will implement a comprehensive testing strategy:\n\n1. **Unit tests**: For individual functions\n2. **Integration tests**: For component interactions\n3. **Regression tests**: To ensure consistency with previous results\n4. **Validation tests**: Against reference data\n\nTests will be organized to mirror the structure of the code, with separate test files for each module.\n\n## Considerations for secure environments\n\nTo ensure compatibility with secure environments like Statistics Canada's Regional Data Centres:\n\n1. No hard-coded file paths\n2. Configuration-driven data access\n3. Minimal external dependencies\n4. Well-documented installation procedures\n5. Alternative output formats for environments with restrictions\n\n## International adaptability\n\nTo facilitate adoption by teams in other countries:\n\n1. Clear separation between data processing and modelling\n2. Documented variable mapping procedures\n3. Configurable parameters for country-specific variations\n4. Example workflows for adapting to different survey structures\n\n# Collaboration and contribution\n\n## Team roles and responsibilities\n\n- **Project coordination**: CSHGM Consortium\n- **Statistical methodology**: Colleagues from Statistics Canada, BCDCDC\n- **APC modelling expert**: Ted Holford\n- **R development**: To be determined\n- **Documentation**: To be determined\n- **Testing and validation**: To be determined\n\n## Contribution workflow\n\n1. Issues created for specific tasks\n2. Branches created for feature development\n3. Pull requests submitted for review\n4. Code review by at least one team member\n5. Automated testing via GitHub Actions\n6. Merge to main branch after approval\n\n## Communication channels\n\n- GitHub Issues for task tracking\n- Regular video meetings for team coordination\n- Email for broader announcements\n- Shared documentation for long-term knowledge preservation\n\n# Potential challenges and mitigation strategies\n\n| Challenge | Mitigation strategy |\n|-----------|---------------------|\n| Secure environment limitations | Develop with minimal dependencies, provide alternative implementation paths |\n| Compatibility across CCHS cycles | Robust variable harmonization, comprehensive testing with multiple cycles |\n| Statistical complexity | Clear documentation, code review by statistical experts |\n| Balancing accessibility vs. complexity | Modular design with multiple entry points for different user levels |\n| Long-term sustainability | Comprehensive documentation, clear contribution guidelines, active community building |\n\n# Timeline and milestones\n\n| Milestone | Target date | Deliverables |\n|-----------|-------------|--------------|\n| Project setup | Month 1 | Repository, documentation framework, contribution guidelines |\n| Data processing implementation | Month 3 | Functions for CCHS data processing, documentation |\n| APC model implementation | Month 6 | Complete APC modelling functions, tests, documentation |\n| Simulation and validation | Month 9 | History generation, validation functions, documentation |\n| Integration and documentation | Month 12 | Complete documentation, examples, initial release |\n\n# Conclusion\n\nThis project specification outlines the approach for implementing the Canadian Smoking History Generator Model in R. By adhering to open science principles, following tidyverse design guidelines, and creating comprehensive documentation, we aim to create a valuable resource for researchers studying smoking behaviour and its health impacts.\n\nThe modular, configurable design will ensure the codebase can be used in various environments and adapted to different contexts, preserving the scientific methodology while improving accessibility and usability.\n\n# Next steps\n\n1. Finalize project specification based on team feedback\n2. Set up initial repository structure\n3. Implement configuration management system\n4. Begin work on data processing functions\n5. Develop initial documentation framework\n\n# References\n\nOntario SHGM study (2020). Current and future mortality probabilities in Ontario, 2002 to 2032. *Health Reports*, Statistics Canada.\n\nHolford, T.R. (1991). Understanding the effects of age, period, and cohort on incidence and mortality rates. *Annual Review of Public Health*, 12, 425-457. doi:10.1146/annurev.pu.12.050191.002233\n\nWickham, H. (2015). R packages: organize, test, document, and share your code. O'Reilly Media, Inc.\n\nWilkinson, M.D. et al. (2016). The FAIR Guiding Principles for scientific data management and stewardship. *Scientific Data*, 3(1), 1-9. doi:10.1038/sdata.2016.18\n\n# Appendix A: Glossary of terms\n\n| Term | Definition |\n|------|------------|\n| APC | Age-Period-Cohort modelling approach |\n| CCHS | Canadian Community Health Survey |\n| CSHGM | Canadian Smoking History Generator Model |\n| FAIR | Findable, Accessible, Interoperable, Reusable |\n| RDC | Research Data Centre (Statistics Canada secure environment) |\n\n# Appendix B: Example configuration file\n\n```yaml\ndefault:\n # Data paths\n raw_data_path: \"data-raw/\"\n processed_data_path: \"data/\"\n output_path: \"output/\"\n \n # Variable definition files\n variables_path: \"config/variables.csv\"\n variable_details_path: \"config/variable_details.csv\"\n \n # Logging configuration\n log_level: \"INFO\"\n log_file: \"logs/cshgm.log\"\n \n # Model parameters\n min_age: 8\n max_age: 99\n min_cohort: 1920\n max_cohort: 2020\n \n # CCHS cycles to include\n cchs_cycles: [\"2001\", \"2003\", \"2005\", \"2007-2008\", \"2009-2010\", \n \"2011-2012\", \"2013-2014\", \"2015-2016\", \"2017-2018\"]\n \n # APC model specifications\n age_knots: [10, 15, 20, 50, 60]\n period_knots: [1940, 1950, 1960, 1970, 1980]\n cohort_knots: [1930, 1940, 1945, 1950, 1955, 1960, 1965, 1970, 1975, 1980]\n```\n\n# Appendix C: Recommended license\n\nWe recommend using the **Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)** license for this project. This license ensures:\n\n1. **Attribution** - Users must give appropriate credit to the CSHGM Consortium\n2. **Non-commercial use** - The material cannot be used for commercial purposes\n3. **Share-alike** - Adaptations must be shared under the same terms\n\nThis license is appropriate for Statistics Canada and other partners who need to maintain the project for non-profit uses, while still allowing academic and research adaptation. It ensures that any modified versions will maintain attribution to the original project and cannot be commercialized.\n\nThe license should attribute copyright to \"CSHGM Consortium\" rather than specific individuals, to emphasize the collaborative nature of the project.\n\nFor code specifically, we recommend the **GNU General Public License v3.0 (GPL-3.0)**, which is a strong copyleft license ensuring that derivative works also remain open source. This license is compatible with R's ecosystem, as many R packages use GPL licensing.\n\nA brief summary of the CC BY-NC-SA 4.0 license:\n\n```\nCreative Commons Attribution-NonCommercial-ShareAlike 4.0 International License\n\nCopyright (c) 2025 CSHGM Consortium\n\nThis work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 \nInternational License. To view a copy of this license, visit:\nhttp://creativecommons.org/licenses/by-nc-sa/4.0/\n\nYou are free to:\n- Share — copy and redistribute the material in any medium or format\n- Adapt — remix, transform, and build upon the material\n\nUnder the following terms:\n- Attribution — You must give appropriate credit, provide a link to the license, \n and indicate if changes were made.\n- NonCommercial — You may not use the material for commercial purposes.\n- ShareAlike — If you remix, transform, or build upon the material, you must \n distribute your contributions under the same license as the original.\n```",
+ "supporting": [],
+ "filters": [
+ "rmarkdown/pagebreak.lua"
+ ],
+ "includes": {},
+ "engineDependencies": {},
+ "preserve": {},
+ "postProcess": true
+ }
+}
\ No newline at end of file
diff --git a/_freeze/docs/how-to/data-loading-and-harmonizing/execute-results/html.json b/_freeze/docs/how-to/data-loading-and-harmonizing/execute-results/html.json
new file mode 100644
index 0000000..594a70d
--- /dev/null
+++ b/_freeze/docs/how-to/data-loading-and-harmonizing/execute-results/html.json
@@ -0,0 +1,15 @@
+{
+ "hash": "b764548ad22a07759c11c970c9d703e3",
+ "result": {
+ "engine": "knitr",
+ "markdown": "---\ntitle: \"Data loading and harmonizing\"\nsubtitle: \"Preparing CCHS data for the Smoking History Generator Model\"\nauthor: \"CSHGM Development Team\"\ndate: \"Last Updated: 2025-03-06\"\nformat:\n html:\n toc: true\n toc-depth: 3\n number-sections: true\n theme: cosmo\n code-fold: show\nexecute:\n echo: true\n warning: false\n message: false\n---\n\n\n\n\n\n\n# Introduction\n\nThis guide focuses on the initial steps of the CSHGM workflow: loading Canadian Community Health Survey (CCHS) data and harmonizing variables across survey cycles. These steps are critical for ensuring consistent analysis across different time periods.\n\n## Purpose of data harmonization\n\nVariable names, coding, and survey design have changed across CCHS cycles. Harmonization ensures that:\n\n1. Variables have consistent names across cycles\n2. Response codes are standardized\n3. Derived variables are calculated consistently\n4. Missing values are handled appropriately\n\n# Required data sources\n\n## Canadian Community Health Survey (CCHS)\n\nThe CCHS is the primary data source for the smoking history model. We typically use multiple cycles:\n\n- CCHS 2001 (Cycle 1.1)\n- CCHS 2003 (Cycle 2.1)\n- CCHS 2005 (Cycle 3.1)\n- CCHS 2007-2008\n- CCHS 2009-2010\n- CCHS 2011-2012\n- CCHS 2013-2014\n- CCHS 2015-2016\n- CCHS 2017-2018\n\n## Accessing CCHS data\n\nCCHS data can be accessed through:\n\n1. **Statistics Canada Research Data Centres (RDCs)**: For researchers who need access to the detailed microdata\n2. **Public Use Microdata Files (PUMFs)**: Available through academic institutions\n3. **Sample files**: For demonstration purposes, sample datasets are included with this package\n\n# Loading CCHS data\n\n## Step 1: Set up your environment\n\nFirst, set up your environment with the necessary packages:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Load required packages\nlibrary(cshgm)\nlibrary(dplyr)\nlibrary(cchsflow) # For harmonizing CCHS variables\nlibrary(haven) # For reading SAS or SPSS files\n\n# Configure paths using the config package\nlibrary(config)\ncfg <- config::get()\ndata_path <- cfg$data_path\n```\n:::\n\n\n\n\n## Step 2: Load individual CCHS cycles\n\nLoad each CCHS cycle file. The format will depend on how your data is stored:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example: Loading RData files\ncchs2001 <- readRDS(file.path(data_path, \"cchs2001_p.RData\"))\ncchs2003 <- readRDS(file.path(data_path, \"cchs2003_p.RData\"))\ncchs2005 <- readRDS(file.path(data_path, \"cchs2005_p.RData\"))\ncchs2007_2008 <- readRDS(file.path(data_path, \"cchs2007_2008_p.RData\"))\ncchs2009_2010 <- readRDS(file.path(data_path, \"cchs2009_2010_p.RData\"))\ncchs2011_2012 <- readRDS(file.path(data_path, \"cchs2012_p.RData\"))\ncchs2013_2014 <- readRDS(file.path(data_path, \"cchs2013_2014_p.RData\"))\n\n# For SAS or SPSS files, you would use:\n# cchs2001 <- haven::read_sas(file.path(data_path, \"cchs2001.sas7bdat\"))\n# or\n# cchs2001 <- haven::read_spss(file.path(data_path, \"cchs2001.sav\"))\n```\n:::\n\n\n\n\n## Step 3: Data preparation\n\nBefore harmonizing, prepare the data by adding cycle-specific identifiers:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add cycle identifier to each dataset\ncchs2001 <- cchs2001 %>% mutate(cycle = \"2001\")\ncchs2003 <- cchs2003 %>% mutate(cycle = \"2003\")\ncchs2005 <- cchs2005 %>% mutate(cycle = \"2005\")\ncchs2007_2008 <- cchs2007_2008 %>% mutate(cycle = \"2007-2008\")\ncchs2009_2010 <- cchs2009_2010 %>% mutate(cycle = \"2009-2010\")\ncchs2011_2012 <- cchs2011_2012 %>% mutate(cycle = \"2011-2012\")\ncchs2013_2014 <- cchs2013_2014 %>% mutate(cycle = \"2013-2014\")\n\n# Create a list of all datasets\ncchs_list <- list(\n cchs2001, cchs2003, cchs2005, cchs2007_2008, \n cchs2009_2010, cchs2011_2012, cchs2013_2014\n)\n```\n:::\n\n\n\n\n# Variable harmonization\n\n## Core smoking variables\n\nFor the CSHGM, we need to harmonize several key smoking-related variables:\n\n| Type | Variables | Description |\n|------|-----------|-------------|\n| Status | SMK_01A, SMK_202, SMKDSTY | Basic smoking status indicators |\n| Initiation | SMKG01C_cont, SMKG203_cont, SMKG207_cont | Age of smoking initiation |\n| Cessation | SMK_09A_cont, SMKG09C | When smoking cessation occurred |\n| Intensity | SMK_204, SMK_208, SMK_05B, SMK_05C | Quantity of cigarettes smoked |\n\n## Step 4: Harmonize variables using cchsflow\n\nThe `cchsflow` package provides tools for harmonizing CCHS variables across cycles:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Define the variables needed for CSHGM\nsmoking_vars <- c(\n \"SMK_01A\", # In lifetime, smoked 100 or more cigarettes\n \"SMK_202\", # Current smoking status\n \"SMKDSTY\", # Type of smoker\n \"SMKG01C_cont\", # Age smoked first cigarette\n \"SMKG203_cont\", # Age started smoking daily (daily smokers)\n \"SMKG207_cont\", # Age started smoking daily (former daily smokers)\n \"SMK_09A_cont\", # When stopped smoking daily (former daily)\n \"SMKG09C\", # Years since stopped smoking daily\n \"SMK_204\", # Number of cigarettes smoked daily (daily smokers)\n \"SMK_208\", # Number of cigarettes smoked daily (former daily smokers)\n \"SMK_05B\", # Number of cigarettes smoked daily (occasional smokers)\n \"SMK_05C\" # Number of days smoked in past month (occasional smokers)\n)\n\n# Demographic variables\ndemographic_vars <- c(\n \"DHH_SEX\", # Sex of respondent\n \"DHH_AGE\", # Age of respondent\n \"WTS_M\" # Survey weight\n)\n\n# Variables for timing\ntiming_vars <- c(\n \"SAMPLEID\" # Contains year and month of survey\n)\n\n# Combine all variables\nall_vars <- c(smoking_vars, demographic_vars, timing_vars)\n\n# Process each dataset with cchsflow\nharmonized_list <- lapply(cchs_list, function(dataset) {\n rec_with_table(dataset, all_vars)\n})\n\n# Combine all harmonized datasets\nharmonized_data <- bind_rows(harmonized_list)\n```\n:::\n\n\n\n\n## Step 5: Create derived variables\n\nSome additional variables need to be created for the CSHGM:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create survey date and additional variables\nharmonized_data <- harmonized_data %>%\n mutate(\n # Extract survey year and month from SAMPLEID\n survey_year = as.numeric(substr(as.character(SAMPLEID), 1, 4)),\n survey_month = as.numeric(substr(as.character(SAMPLEID), 5, 6)),\n \n # Create a date for the survey (use middle of month as approximation)\n cchsbdate = as.Date(paste(survey_year, survey_month, \"15\", sep = \"-\")),\n \n # Rename age of first cigarette for consistency with original code\n agefirst = SMKG01C_cont,\n \n # Create survey age variable (age at time of survey)\n surveyage = DHH_AGE,\n \n # Calculate birth year\n birth_year = survey_year - surveyage,\n \n # Create weight variable with consistent name\n weighting = WTS_M,\n \n # Create respondent ID\n ont_id = row_number() # Create sequential ID if none exists\n )\n```\n:::\n\n\n\n\n## Step 6: Clean and validate the data\n\nData cleaning is essential to ensure valid analyses:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Remove cases with missing key variables\ncleaned_data <- harmonized_data %>%\n filter(!is.na(DHH_SEX)) %>% # Remove cases missing sex\n filter(!is.na(surveyage)) %>% # Remove cases missing age\n filter(surveyage >= 12) %>% # Include only respondents aged 12+\n filter(!is.na(weighting)) # Remove cases missing weights\n\n# Recode sex to match CSHGM requirements (M/F)\ncleaned_data <- cleaned_data %>%\n mutate(sex = ifelse(DHH_SEX == 1, \"M\", \"F\"))\n```\n:::\n\n\n\n\n## Step 7: Check variable distributions\n\nVerify that the harmonization worked correctly by checking key variable distributions:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Check smoking status distribution\ncleaned_data %>%\n count(SMKDSTY) %>%\n mutate(\n label = case_when(\n SMKDSTY == 1 ~ \"Daily smoker\",\n SMKDSTY == 2 ~ \"Occasional smoker\",\n SMKDSTY == 3 ~ \"Former daily smoker\",\n SMKDSTY == 4 ~ \"Former occasional smoker\", \n SMKDSTY == 5 ~ \"Never smoker\",\n TRUE ~ \"Unknown\"\n ),\n percentage = 100 * n / sum(n)\n ) %>%\n arrange(SMKDSTY)\n\n# Check distribution by cycle\ncleaned_data %>%\n group_by(cycle) %>%\n count() %>%\n mutate(percentage = 100 * n / sum(n))\n\n# Check age distribution\ncleaned_data %>%\n mutate(age_group = cut(surveyage, \n breaks = c(12, 20, 30, 40, 50, 60, 70, 80, 999),\n labels = c(\"12-19\", \"20-29\", \"30-39\", \"40-49\", \n \"50-59\", \"60-69\", \"70-79\", \"80+\"))) %>%\n count(age_group) %>%\n mutate(percentage = 100 * n / sum(n))\n```\n:::\n\n\n\n\n# Save the processed data\n\nOnce your data is harmonized and validated, save it for future use:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Save the processed data\nsaveRDS(cleaned_data, file.path(data_path, \"harmonized_cchs_data.rds\"))\n```\n:::\n\n\n\n\n# Additional considerations\n\n## Variable mapping challenges\n\nCCHS variables may not always harmonize cleanly. Here are some common challenges and solutions:\n\n1. **Variable availability**: Not all variables are available in all cycles. Check which cycles include your variables of interest.\n\n2. **Coding differences**: Response codes may differ across cycles, even after harmonization. Verify code mappings using `cchsflow` documentation.\n\n3. **Skip patterns**: Survey skip patterns have changed over time, affecting the population asked certain questions. \n\n4. **Variable granularity**: Some cycles may have more detailed responses than others. Harmonization usually adopts the least granular version.\n\n## Working with secure environments\n\nWhen working in Statistics Canada Research Data Centres (RDCs) or other secure environments:\n\n1. Use relative file paths or configuration files to manage data locations\n2. Avoid storing large intermediate files if storage is limited\n3. Follow the RDC's guidelines for output review\n4. Consider batch processing if interactive sessions are limited\n\n# Next steps\n\nAfter harmonizing your CCHS data, you can proceed to:\n\n1. [Process smoking variables](processing-cchs-data.qmd) in more detail\n2. Begin [APC modeling](reproducing-manuel-study.qmd) using the harmonized data\n\n# References\n\nCanadian Community Health Survey (CCHS) - Annual Component. Statistics Canada.\n\nOnysko, J., & MacKenzie, A. (2015). The cchsflow R package: Standardizing variables across CCHS cycles. Statistics Canada.",
+ "supporting": [],
+ "filters": [
+ "rmarkdown/pagebreak.lua"
+ ],
+ "includes": {},
+ "engineDependencies": {},
+ "preserve": {},
+ "postProcess": true
+ }
+}
\ No newline at end of file
diff --git a/_freeze/docs/how-to/reproducing-manuel-study/execute-results/html.json b/_freeze/docs/how-to/reproducing-manuel-study/execute-results/html.json
new file mode 100644
index 0000000..138a788
--- /dev/null
+++ b/_freeze/docs/how-to/reproducing-manuel-study/execute-results/html.json
@@ -0,0 +1,15 @@
+{
+ "hash": "d3f17f9f4680bee45686abe6a7aa3663",
+ "result": {
+ "engine": "knitr",
+ "markdown": "---\ntitle: \"Reproducing the Canadian Smoking History Generator Model\"\nsubtitle: \"A Guide to Variable Derivation and Analysis\"\nauthor: \"CSHGM Development Team\"\ndate: \"Last Updated: 2025-03-06\"\nformat:\n html:\n toc: true\n toc-depth: 3\n number-sections: true\n theme: cosmo\n code-fold: show\nexecute:\n echo: true\n warning: false\n message: false\n---\n\n\n\n\n\n\n# Introduction\n\nThis document provides a guide to reproducing the Canadian Smoking History Generator Model as described in Manuel et al. (2002) \"Current and future mortality probabilities in Ontario, 2002 to 2032\". The model uses Age-Period-Cohort (APC) analysis to estimate smoking initiation and cessation probabilities across different birth cohorts, which are then used to generate complete smoking histories.\n\n## Purpose of This Guide\n\nThis guide aims to:\n\n1. Describe the key variables needed from the Canadian Community Health Survey (CCHS)\n2. Show how to derive the necessary variables for the model\n3. Provide R code implementations of the original SAS procedures\n4. Explain how to interpret and validate the results\n\n## Original Study Overview\n\nThe original study by Manuel et al. (2002) developed a model to:\n\n- Generate individual smoking histories for a simulated population\n- Estimate smoking prevalence across different time periods and birth cohorts\n- Model both smoking initiation and cessation probabilities\n- Account for age, period, and cohort effects in smoking behavior\n\nThe approach used data from the Canadian Community Health Survey to construct smoking histories and applied statistical modeling to estimate probabilities of starting and quitting smoking.\n\n# Required Data Sources\n\n## Canadian Community Health Survey (CCHS)\n\nThe CCHS is the primary data source for this analysis. Multiple cycles have been conducted:\n\n- CCHS 2001 (Cycle 1.1)\n- CCHS 2003 (Cycle 2.1)\n- CCHS 2005 (Cycle 3.1)\n- CCHS 2007-2008\n- CCHS 2009-2010\n- CCHS 2011-2012\n- CCHS 2013-2014\n- CCHS 2015-2016\n- CCHS 2017-2018\n\n## Variable Harmonization Across Cycles\n\nCCHS variable names have changed across cycles. We use the cchsflow package for variable harmonization:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of harmonizing CCHS data using cchsflow\n# library(cchsflow)\n# harmonized_data <- rec_with_table(\n# cchs_data,\n# c(\"SMK_01A\", \"SMKG01C_cont\", \"SMKG203_cont\", \"SMK_204\") \n# )\n```\n:::\n\n\n\n\n# Key Variables for Smoking History\n\n## Core Smoking Status Variables\n\nThe following variables are critical for determining smoking status:\n\n| Harmonized Variable | Description | Values |\n|---------------------|-------------|--------|\n| SMK_01A | In lifetime, smoked 100 or more cigarettes | 1=Yes, 2=No |\n| SMK_202 | Current smoking status | 1=Daily, 2=Occasional, 3=Not at all |\n| SMKDSTY | Type of smoker | 1=Daily, 2=Occasional, 3=Former daily, 4=Former occasional, 5=Never |\n\n## Smoking Initiation Variables\n\nVariables measuring when someone started smoking:\n\n| Harmonized Variable | Description | Type |\n|---------------------|-------------|------|\n| SMKG01C_cont | Age smoked first cigarette | Continuous |\n| SMKG203_cont | Age started smoking daily (daily smokers) | Continuous |\n| SMKG207_cont | Age started smoking daily (former daily smokers) | Continuous |\n\n## Smoking Cessation Variables\n\nVariables measuring when someone quit smoking:\n\n| Harmonized Variable | Description | Type |\n|---------------------|-------------|------|\n| SMK_09A_cont | When stopped smoking daily (former daily) | Continuous |\n| SMKG09C | Years since stopped smoking daily (former daily) | Categorical |\n\n## Smoking Intensity Variables\n\nVariables measuring how much someone smokes/smoked:\n\n| Harmonized Variable | Description | Type |\n|---------------------|-------------|------|\n| SMK_204 | Number of cigarettes smoked daily (daily smokers) | Continuous |\n| SMK_208 | Number of cigarettes smoked daily (former daily smokers) | Continuous |\n| SMK_05B | Number of cigarettes smoked daily (occasional smokers) | Continuous |\n| SMK_05C | Number of days smoked in past month (occasional smokers) | Continuous |\n\n## Demographic Variables\n\nVariables used for cohort assignment and stratification:\n\n| Harmonized Variable | Description | Type |\n|---------------------|-------------|------|\n| DHH_SEX | Sex of respondent | Categorical |\n| cchsbdate | CCHS survey date | Date |\n\n# Variable Derivation for Age-Period-Cohort Analysis\n\n## Smoking Initiation Process\n\n### Step 1: Identify Smoking Initiators\n\nThe first step is to identify individuals who have initiated smoking, defined as having smoked at least 100 cigarettes in their lifetime.\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Using our process_smoking_initiation function\ninits_male <- process_smoking_initiation(harmonized_data, sex = \"M\")\ninits_female <- process_smoking_initiation(harmonized_data, sex = \"F\")\n```\n:::\n\n\n\n\nThe function shown above performs the following steps:\n\n1. Filters respondents by sex\n2. Identifies never-smokers (SMK_01A = 2) and sets their initiation age to 101 (a flag value)\n3. Creates an initiation indicator (init=1 for smokers, init=0 for never-smokers)\n4. Filters to include only those born in 1920 or later\n5. Creates a subset containing only those who initiated smoking\n6. Applies age filter (age >= 8)\n7. Calculates period (year of initiation) as cohort + age\n8. Returns a dataset with ont_id, weighting, age, cohort, period, and init variables\n\n### Step 2: Create Population Denominators\n\nTo calculate initiation rates, we need the population at risk of initiation at each age, period, and cohort combination.\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example function for creating denominators (not fully implemented yet)\ncreate_initiation_denominators <- function(dataset, sex = \"M\") {\n # Function implementation would go here\n # This creates population counts by age, period, and cohort\n}\n```\n:::\n\n\n\n\n## Smoking Cessation Process\n\n### Step 1: Identify Smoking Cessation\n\nSimilarly, we identify individuals who have quit smoking and when they quit.\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example function for processing cessation data\nprocess_smoking_cessation <- function(dataset, sex = \"M\") {\n # Function implementation would follow similar steps to initiation\n # but with cessation-specific criteria\n}\n```\n:::\n\n\n\n\n## Age-Period-Cohort Modeling\n\nThe APC modeling approach uses splines to model age, period, and cohort effects:\n\n1. Natural cubic splines are constructed for age, period, and cohort\n2. These splines are used in a generalized linear model with binomial errors\n3. Survival probabilities are used to adjust for differential mortality\n4. Separate models are fit for males and females\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of creating splines for APC modeling\ncreate_age_splines <- function(ages, knots) {\n # Function to create natural cubic splines for ages\n # Implementation would go here\n}\n```\n:::\n\n\n\n\n# Applying the Model\n\n## Estimating Smoking Probabilities\n\nOnce the APC models are fitted, they can be used to predict:\n\n1. Initiation probabilities by age, period, and cohort\n2. Cessation probabilities by age, period, and cohort\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of predicting initiation probabilities\npredict_initiation <- function(age, period, cohort, sex) {\n # Implementation would use fitted coefficients from APC model\n}\n```\n:::\n\n\n\n\n## Generating Smoking Histories\n\nComplete smoking histories can be generated using:\n\n1. A microsimulation approach based on the estimated probabilities\n2. Monte Carlo methods to introduce appropriate stochasticity\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of generating a smoking history\ngenerate_smoking_history <- function(birth_year, sex) {\n # Simulation implementation would go here\n}\n```\n:::\n\n\n\n\n# Validation and Calibration\n\n## Comparing to Historical Data\n\nModel validation involves comparing the generated smoking histories to:\n\n1. Historical smoking prevalence data\n2. Known patterns in smoking behavior by birth cohort\n3. Other published estimates of smoking trends\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of validating model results\nvalidate_smoking_prevalence <- function(simulated_data, reference_data) {\n # Validation code would go here\n}\n```\n:::\n\n\n\n\n## Calibration Procedures\n\nIf necessary, model calibration can be performed by:\n\n1. Adjusting the baseline rates\n2. Modifying the period effects\n3. Fine-tuning cohort-specific parameters\n\n# Implementation Example\n\nBelow is a simplified example of processing smoking initiation data using the function we've developed:\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create simulated data for testing\ntest_data <- data.frame(\n ont_id = 1001:1010,\n sex = rep(c(\"M\", \"F\"), 5),\n SMK_01A = c(1, 2, 1, 1, 1, 2, 1, 2, 1, 1), # 1=Yes, 2=No to 100+ cigarettes\n agefirst = c(16, NA, 12, 21, 7, NA, 18, NA, 15, 22),\n cchsbdate = as.Date(c(\"2001-06-15\", \"2001-07-20\", \"2001-08-10\", \n \"2001-09-05\", \"2001-10-25\", \"2002-01-15\",\n \"2002-02-20\", \"2002-03-10\", \"2002-04-15\", \"2002-05-20\")),\n weighting = round(runif(10, 100, 300))\n)\n\n# Process the initiation data\nmale_init <- process_smoking_initiation(test_data, sex = \"M\")\nfemale_init <- process_smoking_initiation(test_data, sex = \"F\")\n\n# Examine the results\nprint(male_init)\nprint(female_init)\n```\n:::\n\n\n\n\n# Conclusion\n\nThis guide has provided an overview of how to:\n\n1. Prepare CCHS data for smoking history analysis\n2. Derive the key variables needed for APC modeling\n3. Implement the core components of the Canadian Smoking History Generator Model\n4. Apply and validate the model\n\nFurther development is ongoing to complete the full implementation of the model in R, including additional validation against historical data and extensions to incorporate more recent survey cycles.\n\n# References\n\nManuel, D.G. et al. (2002). Current and future mortality probabilities in Ontario, 2002 to 2032. *Health Reports*, Statistics Canada.\n\nHolford, T.R. (1991). Understanding the effects of age, period, and cohort on incidence and mortality rates. *Annual Review of Public Health*, 12, 425-457.\n\nCanadian Community Health Survey (CCHS) - Annual Component. Statistics Canada.\n\n# Appendix: Additional Resources\n\n## SAS to R Translation Notes\n\nThe original model was implemented in SAS. Key translation considerations include:\n\n1. SAS `PROC GENMOD` is equivalent to R's `glm()` function\n2. SAS natural cubic splines can be created with R's `splines::ns()`\n3. SAS data steps are translated to R's data.frame operations\n4. SAS `%macro` functions are converted to R functions\n\n## Complete Variable Crosswalk\n\nFor a complete mapping of variables across CCHS cycles, refer to the included variable sheets:\n\n- `worksheets/cshgm-variables.csv`\n- `worksheets/cshgm-variable-details.csv`",
+ "supporting": [],
+ "filters": [
+ "rmarkdown/pagebreak.lua"
+ ],
+ "includes": {},
+ "engineDependencies": {},
+ "preserve": {},
+ "postProcess": true
+ }
+}
\ No newline at end of file
diff --git a/_freeze/docs/project-specifications/execute-results/html.json b/_freeze/docs/project-specifications/execute-results/html.json
new file mode 100644
index 0000000..91e40ba
--- /dev/null
+++ b/_freeze/docs/project-specifications/execute-results/html.json
@@ -0,0 +1,17 @@
+{
+ "hash": "3fb94db4df5c708dd87e8b5814390c68",
+ "result": {
+ "engine": "knitr",
+ "markdown": "---\ntitle: \"Canadian Smoking History Generator Model: Project specifications\"\nsubtitle: \"Scope, structure, and implementation plan\"\nauthor: \"CSHGM Development Team\"\ndate: \"Last Updated: 2025-03-27\"\nformat:\n html:\n toc: true\n toc-depth: 3\n number-sections: true\n theme: cosmo\n code-fold: show\nexecute:\n echo: true\n warning: false\n message: false\n---\n\n\n\n\n\n\n\n# Executive summary\n\nThis document outlines the scope, specifications, and implementation plan for the Canadian Smoking History Generator Model (CSHGM) project. The project aims to create the CSHGM, by extend the existing smoking history generation methodology developed in the Ontario SHGM study (2020) and the Age-Period-Cohort modelling approach pioneered by Holford. By implementing this methodology in R, we seek to improve accessibility, transparency, and extensibility while preserving the scientific integrity of the original work.\n\nThis specification document will serve as a reference for all project contributors, helping to ensure consistency across development efforts and provide clear guidance for implementation decisions.\n\n# Project goals and scope\n\n## Primary goals\n\n1. Implement the Canadian Smoking History Generator Model in R\n2. Provide a well-documented, reproducible workflow for smoking history modelling\n3. Create flexible, modular components that can be adapted to different contexts\n4. Ensure compatibility with secure computing environments\n5. Follow open science principles and best practices\n\n## Project scope\n\n### In scope\n\n- R implementation of the Age-Period-Cohort (APC) modelling approach\n- Processing of Canadian Community Health Survey (CCHS) data\n- Derivation of smoking initiation and cessation probabilities\n- Generation of complete smoking histories\n- Validation against historical data\n- Documentation for users and developers\n\n### Out of scope (for initial release)\n\n- Integration with specific disease models\n- Implementation of alternative smoking behaviour models\n- Web application or interactive user interface\n- Incorporation of e-cigarette or vaping behaviours\n- Integration with non-Canadian health surveys\n\n## Legacy recognition\n\nThis project builds upon the foundational work of Dr. Ted Holford at Yale University and CISNET. We will work with Dr. Holford to determine appropriate recognition and attribution.\n\n# Project structure and approach\n\n## Folder structure\n\nWe will adopt a structure that balances R package conventions with a more accessible Quarto project organization:\n\n``` \ncshgm/\n├── .github/ # GitHub Actions and templates\n├── R/ # R functions in separate files\n├── data-raw/ # Scripts to process raw data\n├── data/ # Processed data objects (for non-secure environments)\n├── resources/ # Background material, including historic SAS code and previously published papers.\n├── docs/ # Documentation as Quarto files\n│ ├── reference/ # Technical reference for functions, configuration files\n│ ├── explanation/ # Conceptual explanations\n│ ├── how-to/ # Task-oriented guides\n│ └── tutorials/ # Learning-oriented tutorials\n├── tests/ # Test files\n│ └── testthat/ # Unit tests using testthat\n├── config/ # Configuration files\n│ ├── default.yml # Default configuration\n│ └── secure.yml # Template for secure environments\n├── _quarto.yml # Quarto site configuration\n├── index.qmd # Main landing page\n└── README.md # Project overview\n```\n\nAdditional resources and background documents are stored on [OSF.io](https://osf.io/vcydu/).\n\n## Documentation approach\n\nWe will follow the [Divio documentation system](https://documentation.divio.com/) with four types of documentation:\n\n1. **Tutorials**: Step-by-step lessons to get started\n2. **How-to guides**: Practical instructions for specific tasks\n3. **Explanation**: Conceptual discussions of methodology\n4. **Reference**: Technical details of functions and data\n\nThis approach ensures we meet the needs of different users, from beginners to advanced researchers.\n\n## Development philosophy\n\nThe project will use well-used standard to facilitate open science.\n\n### Tidyverse design principles\n\nWe will follow tidyverse design principles:\n\n- Create composable functions that do one thing well\n- Use consistent function naming and argument conventions\n- Embrace the pipe (`|>` or `%>%`) for readable data manipulation\n- Prioritize data frames as the primary data structure\n- Use tidy evaluation for metaprogramming when needed\n\n### Open science principles\n\nThe project will adhere to FAIR principles:\n\n- **Findable**: Clear metadata, permanent DOI\n- **Accessible**: Open source, clear installation instructions\n- **Interoperable**: Standard data formats, well-documented APIs\n- **Reusable**: Comprehensive documentation, permissive license\n\n### Modular design\n\nThe implementation will be modular with these key components:\n\n1. **Data Processing**: Functions to process and harmonize CCHS data\n2. **APC Modeling**: Implementation of the APC modeling methodology\n3. **Simulation**: Functions to generate smoking histories\n4. **Validation**: Tools to validate against reference data\n5. **Visualization**: Components to visualize results\n\n## Technology stack\n\n### Core technologies\n\n- **R**: Primary programming language\n- **Quarto**: Documentation and website generation\n- **GitHub**: Version control and collaboration\n- **GitHub Actions**: Continuous integration and deployment\n\n### Key R packages\n\n- **tidyverse**: Data manipulation and visualization\n- **splines**: Spline modelling functionality\n- **cchsflow**: CCHS data harmonization\n- **config**: Configuration management\n- **logger**: Logging functionality\n- **testthat**: Unit testing\n- **roxygen2**: Documentation generation\n\n# Implementation plan\n\n## 1. Confirm goals, objectives, and deliverables\n\n**1.1. Confirm project scope**: Align project goals with specific, measurable objectives.\n\n**1.2. Identify deliverables**\n\n- A peer-review publication, similar to previously published reports like Manuel et al 2020. Health Report?\n- Public repository to facilitate updating: documentation, and test datasets.\n\n**1.3. Document success criteria**: Establish criteria for evaluating project completion and impact.\n\n# 2. Confirm stakeholders and research partners\n\n**2.1. Confirm the current team**\n\n**2.2. Identify any stakeholders we wish to keep updated**: CISNET via ; INSPQ?; Health Canada and PHAC?\n\n## 3. Define project roles and responsibilities\n\n1. **Team member assignments**: Assign roles including project lead, and leads for different components.\n2. **Responsibility matrix**: Develop a RACI (Responsible, Accountable, Consulted, Informed) chart.\n3. **Governance structure**: Define paths for issues and schedule regular check-ins.\n\n# Establish project management approach\n\n1. **Methodology**: Use GitHub Issues and Projects for tracking progress.\n2. **Timeline and milestones**: Develop a milestone-based timeline.\n3. **Monitoring and reporting**: Review progress to milestones, and issues at regular meetings.\n\n# Coordinate development of the technical (analysis) plan\n\n1. **Separate technical plan**: Detail methodological aspects such as APC model implementation, data processing, and validation.\n2. **Review cycles**: Schedule peer review of technical assumptions and implementation.\n3. **Integration with broader study**: Ensure alignment between technical plan and overall study.\n\n# Define data management processes\n\n1. **Data preparation and harmonization**: Implement consistent procedures for processing CCHS data.\n2. **Data security**: Outline handling protocols for restricted environments.\n3. **Version control**: Track changes in datasets and outputs using GitHub.\n\n# Plan communication and knowledge exchange\n\n1. **Internal communications**: Use GitHub Issues, Slack, and email for coordination.\n2. **External dissemination**: Plan presentations, publications, and policy briefs.\n3. **Documentation strategy**: Maintain Divio-style documentation for accessibility.\n\n# Develop risk management strategies\n\n1. **Identify risks**: Consider data availability, funding, and methodological uncertainties.\n2. **Mitigation measures**: Define strategies to address potential risks.\n3. **Contingency planning**: Establish fallback options for major challenges.\n\n# Conduct pilot work and feasibility checks\n\n1. **Prototype testing**: Conduct small-scale trials before full implementation.\n2. **Feasibility review**: Adjust project scope based on pilot findings.\n3. **Refinement of protocols**: Incorporate lessons learned into final methodology.\n\n# Finalize study protocol\n\n1. **Integration of feedback**: Revise protocols based on pilot and review feedback.\n2. **Ethical review (if applicable)**: Secure necessary approvals.\n3. **Sign-off and baseline**: Confirm agreement among stakeholders.\n\n# Execute study and deliver final outputs\n\n1. **Full-scale development**: Implement smoking history model and validate results.\n2. **Ongoing quality checks**: Maintain testing and documentation integrity.\n3. **Summative evaluation**: Ensure final deliverables meet project objectives.\n\n# Dissemination and close-out\n\n1. **Knowledge translation**: Share findings through reports, presentations, and publications.\n2. **Publication or final repository**: Release code and datasets with DOIs.\n3. **Project close-out review**: Document lessons learned and plan potential expansions.\n\n# Tracking and project management\n\n1. **GitHub Issues**: Use issue tracking for task management.\n2. **GitHub Projects**: Organize and visualize progress through kanban boards.\n3. **Regular progress reviews**: Conduct check-ins to ensure alignment with milestones.\n\n## Phase 1: Project setup and core infrastructure\n\n1. Set up GitHub repository with initial structure\n2. Configure GitHub Actions for CI/CD\n3. Create initial documentation framework\n4. Implement configuration management system\n5. Establish coding standards and contributor guidelines\n\n## Phase 2: Data processing implementation\n\n1. Develop functions for CCHS data harmonization\n2. Implement smoking status variable processing\n3. Create data preprocessing workflow\n4. Write documentation and tests for data processing\n\n## Phase 3: APC model implementation\n\n1. Implement natural cubic spline generation\n2. Develop APC modelling functions\n3. Create initiation probability estimation\n4. Create cessation probability estimation\n5. Write documentation and tests for APC components\n\n## Phase 4: Simulation and validation\n\n1. Develop smoking history generation functions\n2. Implement validation methodology\n3. Create calibration procedures\n4. Generate validation datasets\n5. Write documentation and tests for simulation components\n\n## Phase 5: Integration and documentation\n\n1. Ensure component integration\n2. Finalize documentation\n3. Create example workflows\n4. Develop tutorials and how-to guides\n5. Prepare for initial release\n\n# Specific implementation considerations\n\n## Configuration management\n\nWe will use the `config` package with YAML files to manage environment-specific settings:\n\n``` yaml\n# Example config.yml\ndefault:\n data_path: \"data/\"\n variables: !expr file.path(getwd(), \"worksheets/variables/huiport-variables.csv\")\n variable_details: !expr file.path(getwd(), \"worksheets/variable-details/cchsflow-variable-details.csv\")\n output_path: \"output/\"\n log_level: \"INFO\"\n\nsecure:\n data_path: \"/secure/data/\"\n output_path: \"/secure/output/\"\n log_level: \"WARN\"\n```\n\nThis approach allows for seamless transitions between different computing environments.\n\n## Documentation breakdown\n\nWe propose organizing documentation into the following Quarto files:\n\n### Reference documentation\n\n- `reference-variables.qmd`: Complete reference of all variables\n- `reference-functions.qmd`: Technical documentation of all functions\n- `reference-model.qmd`: Details of the APC model implementation\n\n### Explanation documentation\n\n- `explanation-smoking-history.qmd`: Conceptual overview of smoking history modelling\n- `explanation-apc-method.qmd`: Explanation of the APC methodology\n- `explanation-validation.qmd`: Discussion of validation approaches\n\n### How-to guides\n\n- `howto-setup.qmd`: Setting up the environment\n- `howto-process-cchs.qmd`: Processing CCHS data\n- `howto-run-apc.qmd`: Running the APC model\n- `howto-generate-histories.qmd`: Generating smoking histories\n- `howto-validate.qmd`: Validating results\n\n### Tutorials\n\n- `tutorial-basic.qmd`: Step-by-step tutorial for basic usage\n- `tutorial-advanced.qmd`: Advanced tutorial for customization\n\n## Testing strategy\n\nWe will implement a comprehensive testing strategy:\n\n1. **Unit tests**: For individual functions\n2. **Integration tests**: For component interactions\n3. **Regression tests**: To ensure consistency with previous results\n4. **Validation tests**: Against reference data\n\nTests will be organized to mirror the structure of the code, with separate test files for each module.\n\n## Considerations for secure environments\n\nTo ensure compatibility with secure environments like Statistics Canada's Regional Data Centres:\n\n1. No hard-coded file paths\n2. Configuration-driven data access\n3. Minimal external dependencies\n4. Well-documented installation procedures\n5. Alternative output formats for environments with restrictions\n6. All output meets small cell size and other privacy consideration the data sources and institutional partners.\n\n## International adaptability\n\nTo facilitate adoption by teams in other countries:\n\n1. Clear separation between data processing and modelling\n2. Documented variable mapping procedures\n3. Configurable parameters for country-specific variations\n4. Example workflows for adapting to different survey structures\n\n# Collaboration and contribution\n\n## Team roles and responsibilities\n\n- **Project coordination**: CSHGM Consortium\n- **Statistical methodology**: Colleagues from Statistics Canada, BCDCDC\n- **APC modelling expert**: Ted Holford\n- **R development**: To be determined\n- **Documentation**: To be determined\n- **Testing and validation**: To be determined\n\n## Contribution workflow\n\n1. Issues created for specific tasks\n2. Branches created for feature development\n3. Pull requests submitted for review\n4. Code review by at least one team member\n5. Automated testing via GitHub Actions\n6. Merge to main branch after approval\n\n## Communication channels\n\n- GitHub Issues for task tracking\n- Regular video meetings for team coordination\n- Email for broader announcements\n- Shared documentation for long-term knowledge preservation\n\n# Potential challenges and mitigation strategies\n\n| Challenge | Mitigation strategy |\n|--------------------------|----------------------------------------------|\n| Secure environment limitations | Develop with minimal dependencies, provide alternative implementation paths |\n| Compatibility across CCHS cycles | Robust variable harmonization, comprehensive testing with multiple cycles |\n| Statistical complexity | Clear documentation, code review by statistical experts |\n| Balancing accessibility vs. complexity | Modular design with multiple entry points for different user levels |\n| Long-term sustainability | Comprehensive documentation, clear contribution guidelines, active community building |\n\n# Timeline and milestones\n\n| Milestone | Target date | Deliverables |\n|---------------------|-------------------------|---------------------------|\n| Project setup | Month 1 | Repository, documentation framework, contribution guidelines |\n| Data processing implementation | Month 3 | Functions for CCHS data processing, documentation |\n| APC model implementation | Month 6 | Complete APC modelling functions, tests, documentation |\n| Simulation and validation | Month 9 | History generation, validation functions, documentation |\n| Integration and documentation | Month 12 | Complete documentation, examples, initial release |\n\n# Conclusion\n\nThis project specification outlines the approach for implementing the Canadian Smoking History Generator Model in R. By adhering to open science principles, following tidyverse design guidelines, and creating comprehensive documentation, we aim to create a valuable resource for researchers studying smoking behaviour and its health impacts.\n\nThe modular, configurable design will ensure the codebase can be used in various environments and adapted to different contexts, preserving the scientific methodology while improving accessibility and usability.\n\n# Next steps\n\n1. Finalize project specification based on team feedback\n2. Set up initial repository structure\n3. Implement configuration management system\n4. Begin work on data processing functions\n5. Develop initial documentation framework\n\n# References\n\nOntario SHGM study (2020). Current and future mortality probabilities in Ontario, 2002 to 2032. *Health Reports*, Statistics Canada.\n\nHolford, T.R. (1991). Understanding the effects of age, period, and cohort on incidence and mortality rates. *Annual Review of Public Health*, 12, 425-457. doi:10.1146/annurev.pu.12.050191.002233\n\nWickham, H. (2015). R packages: organize, test, document, and share your code. O'Reilly Media, Inc.\n\nWilkinson, M.D. et al. (2016). The FAIR Guiding Principles for scientific data management and stewardship. *Scientific Data*, 3(1), 1-9. doi:10.1038/sdata.2016.18\n\n# Appendix A: Glossary of terms\n\n| Term | Definition |\n|-------|-------------------------------------------------------------|\n| APC | Age-Period-Cohort modelling approach |\n| CCHS | Canadian Community Health Survey |\n| CSHGM | Canadian Smoking History Generator Model |\n| FAIR | Findable, Accessible, Interoperable, Reusable |\n| RDC | Research Data Centre (Statistics Canada secure environment) |\n\n# Appendix B: Example configuration file\n\n``` yaml\ndefault:\n # Data paths\n raw_data_path: \"data-raw/\"\n processed_data_path: \"data/\"\n output_path: \"output/\"\n \n # Logging configuration\n log_level: \"INFO\"\n log_file: \"logs/cshgm.log\"\n \n # Model parameters\n min_age: 8\n max_age: 99\n min_cohort: 1920\n max_cohort: 2020\n \n # CCHS cycles to include\n cchs_cycles: [\"2001\", \"2003\", \"2005\", \"2007-2008\", \"2009-2010\", \n \"2011-2012\", \"2013-2014\", \"2015-2016\", \"2017-2018\"]\n \n # APC model specifications\n age_knots: [10, 15, 20, 50, 60]\n period_knots: [1940, 1950, 1960, 1970, 1980]\n cohort_knots: [1930, 1940, 1945, 1950, 1955, 1960, 1965, 1970, 1975, 1980]\n```",
+ "supporting": [
+ "project-specifications_files"
+ ],
+ "filters": [
+ "rmarkdown/pagebreak.lua"
+ ],
+ "includes": {},
+ "engineDependencies": {},
+ "preserve": {},
+ "postProcess": true
+ }
+}
\ No newline at end of file
diff --git a/_freeze/docs/reproducing-Ontario-study/execute-results/html.json b/_freeze/docs/reproducing-Ontario-study/execute-results/html.json
new file mode 100644
index 0000000..e44513b
--- /dev/null
+++ b/_freeze/docs/reproducing-Ontario-study/execute-results/html.json
@@ -0,0 +1,15 @@
+{
+ "hash": "247d81f10d309a2efb63562bd9d63f23",
+ "result": {
+ "engine": "knitr",
+ "markdown": "---\ntitle: \"Reproducing the Canadian Smoking History Generator Model\"\nsubtitle: \"A Guide to Variable Derivation and Analysis\"\nauthor: \"CSHGM Development Team\"\ndate: \"Last Updated: 2025-03-07\"\nformat:\n html:\n toc: true\n toc-depth: 3\n number-sections: true\n theme: cosmo\n code-fold: show\nexecute:\n echo: true\n warning: false\n message: false\n---\n\n\n\n\n\n# Introduction\n\nThis document provides a guide to reproducing the Canadian Smoking History Generator Model as described in Manuel et al. (2002) \"Current and future mortality probabilities in Ontario, 2002 to 2032\". The model uses Age-Period-Cohort (APC) analysis to estimate smoking initiation and cessation probabilities across different birth cohorts, which are then used to generate complete smoking histories.\n\n## Purpose of This Guide\n\nThis guide aims to:\n\n1. Describe the key variables needed from the Canadian Community Health Survey (CCHS)\n2. Show how to derive the necessary variables for the model\n3. Provide R code implementations of the original SAS procedures\n4. Explain how to interpret and validate the results\n\n## Original Study Overview\n\nThe original study by Manuel et al. (2002) developed a model to:\n\n- Generate individual smoking histories for a simulated population\n- Estimate smoking prevalence across different time periods and birth cohorts\n- Model both smoking initiation and cessation probabilities\n- Account for age, period, and cohort effects in smoking behavior\n\nThe approach used data from the Canadian Community Health Survey to construct smoking histories and applied statistical modeling to estimate probabilities of starting and quitting smoking.\n\n# Required Data Sources\n\n## Canadian Community Health Survey (CCHS)\n\nThe CCHS is the primary data source for this analysis. Multiple cycles have been conducted:\n\n- CCHS 2001 (Cycle 1.1)\n- CCHS 2003 (Cycle 2.1)\n- CCHS 2005 (Cycle 3.1)\n- CCHS 2007-2008\n- CCHS 2009-2010\n- CCHS 2011-2012\n- CCHS 2013-2014\n- CCHS 2015-2016\n- CCHS 2017-2018\n\n## Variable Harmonization Across Cycles\n\nCCHS variable names have changed across cycles. We use the cchsflow package for variable harmonization:\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of harmonizing CCHS data using cchsflow\n# library(cchsflow)\n# harmonized_data <- rec_with_table(\n# cchs_data,\n# c(\"SMK_01A\", \"SMKG01C_cont\", \"SMKG203_cont\", \"SMK_204\") \n# )\n```\n:::\n\n\n\n# Key Variables for Smoking History\n\n## Core Smoking Status Variables\n\nThe following variables are critical for determining smoking status:\n\n| Harmonized Variable | Description | Values |\n|----------------------------------|---------------------|-----------------|\n| SMK_01A | In lifetime, smoked 100 or more cigarettes | 1=Yes, 2=No |\n| SMK_202 | Current smoking status | 1=Daily, 2=Occasional, 3=Not at all |\n| SMKDSTY | Type of smoker | 1=Daily, 2=Occasional, 3=Former daily, 4=Former occasional, 5=Never |\n\n## Smoking Initiation Variables\n\nVariables measuring when someone started smoking:\n\n| Harmonized Variable | Description | Type |\n|-----------------------------------|---------------------|----------------|\n| SMKG01C_cont | Age smoked first cigarette | Continuous |\n| SMKG203_cont | Age started smoking daily (daily smokers) | Continuous |\n| SMKG207_cont | Age started smoking daily (former daily smokers) | Continuous |\n\n## Smoking Cessation Variables\n\nVariables measuring when someone quit smoking:\n\n| Harmonized Variable | Description | Type |\n|-----------------------------------|---------------------|----------------|\n| SMK_09A_cont | When stopped smoking daily (former daily) | Continuous |\n| SMKG09C | Years since stopped smoking daily (former daily) | Categorical |\n\n## Smoking Intensity Variables\n\nVariables measuring how much someone smokes/smoked:\n\n| Harmonized Variable | Description | Type |\n|-----------------------------------|---------------------|----------------|\n| SMK_204 | Number of cigarettes smoked daily (daily smokers) | Continuous |\n| SMK_208 | Number of cigarettes smoked daily (former daily smokers) | Continuous |\n| SMK_05B | Number of cigarettes smoked daily (occasional smokers) | Continuous |\n| SMK_05C | Number of days smoked in past month (occasional smokers) | Continuous |\n\n## Demographic Variables\n\nVariables used for cohort assignment and stratification:\n\n| Harmonized Variable | Description | Type |\n|---------------------|-------------------|-------------|\n| DHH_SEX | Sex of respondent | Categorical |\n| cchsbdate | CCHS survey date | Date |\n\n# Variable Derivation for Age-Period-Cohort Analysis\n\n## Smoking Initiation Process\n\n### Step 1: Identify Smoking Initiators\n\nThe first step is to identify individuals who have initiated smoking, defined as having smoked at least 100 cigarettes in their lifetime.\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Using our process_smoking_initiation function\ninits_male <- process_smoking_initiation(harmonized_data, sex = \"M\")\ninits_female <- process_smoking_initiation(harmonized_data, sex = \"F\")\n```\n:::\n\n\n\nThe function shown above performs the following steps:\n\n1. Filters respondents by sex\n2. Identifies never-smokers (SMK_01A = 2) and sets their initiation age to 101 (a flag value)\n3. Creates an initiation indicator (init=1 for smokers, init=0 for never-smokers)\n4. Filters to include only those born in 1920 or later\n5. Creates a subset containing only those who initiated smoking\n6. Applies age filter (age \\>= 8)\n7. Calculates period (year of initiation) as cohort + age\n8. Returns a dataset with ont_id, weighting, age, cohort, period, and init variables\n\n### Step 2: Create Population Denominators\n\nTo calculate initiation rates, we need the population at risk of initiation at each age, period, and cohort combination.\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example function for creating denominators (not fully implemented yet)\ncreate_initiation_denominators <- function(dataset, sex = \"M\") {\n # Function implementation would go here\n # This creates population counts by age, period, and cohort\n}\n```\n:::\n\n\n\n## Smoking Cessation Process\n\n### Step 1: Identify Smoking Cessation\n\nSimilarly, we identify individuals who have quit smoking and when they quit.\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example function for processing cessation data\nprocess_smoking_cessation <- function(dataset, sex = \"M\") {\n # Function implementation would follow similar steps to initiation\n # but with cessation-specific criteria\n}\n```\n:::\n\n\n\n## Age-Period-Cohort Modeling\n\nThe APC modeling approach uses splines to model age, period, and cohort effects:\n\n1. Natural cubic splines are constructed for age, period, and cohort\n2. These splines are used in a generalized linear model with binomial errors\n3. Survival probabilities are used to adjust for differential mortality\n4. Separate models are fit for males and females\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of creating splines for APC modeling\ncreate_age_splines <- function(ages, knots) {\n # Function to create natural cubic splines for ages\n # Implementation would go here\n}\n```\n:::\n\n\n\n# Applying the Model\n\n## Estimating Smoking Probabilities\n\nOnce the APC models are fitted, they can be used to predict:\n\n1. Initiation probabilities by age, period, and cohort\n2. Cessation probabilities by age, period, and cohort\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of predicting initiation probabilities\npredict_initiation <- function(age, period, cohort, sex) {\n # Implementation would use fitted coefficients from APC model\n}\n```\n:::\n\n\n\n## Generating Smoking Histories\n\nComplete smoking histories can be generated using:\n\n1. A microsimulation approach based on the estimated probabilities\n2. Monte Carlo methods to introduce appropriate stochasticity\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of generating a smoking history\ngenerate_smoking_history <- function(birth_year, sex) {\n # Simulation implementation would go here\n}\n```\n:::\n\n\n\n# Validation and Calibration\n\n## Comparing to Historical Data\n\nModel validation involves comparing the generated smoking histories to:\n\n1. Historical smoking prevalence data\n2. Known patterns in smoking behavior by birth cohort\n3. Other published estimates of smoking trends\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example of validating model results\nvalidate_smoking_prevalence <- function(simulated_data, reference_data) {\n # Validation code would go here\n}\n```\n:::\n\n\n\n## Calibration Procedures\n\nIf necessary, model calibration can be performed by:\n\n1. Adjusting the baseline rates\n2. Modifying the period effects\n3. Fine-tuning cohort-specific parameters\n\n# Implementation Example\n\nBelow is a simplified example of processing smoking initiation data using the function we've developed:\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create simulated data for testing\ntest_data <- data.frame(\n ont_id = 1001:1010,\n sex = rep(c(\"M\", \"F\"), 5),\n SMK_01A = c(1, 2, 1, 1, 1, 2, 1, 2, 1, 1), # 1=Yes, 2=No to 100+ cigarettes\n agefirst = c(16, NA, 12, 21, 7, NA, 18, NA, 15, 22),\n cchsbdate = as.Date(c(\"2001-06-15\", \"2001-07-20\", \"2001-08-10\", \n \"2001-09-05\", \"2001-10-25\", \"2002-01-15\",\n \"2002-02-20\", \"2002-03-10\", \"2002-04-15\", \"2002-05-20\")),\n weighting = round(runif(10, 100, 300))\n)\n\n# Process the initiation data\nmale_init <- process_smoking_initiation(test_data, sex = \"M\")\nfemale_init <- process_smoking_initiation(test_data, sex = \"F\")\n\n# Examine the results\nprint(male_init)\nprint(female_init)\n```\n:::\n\n\n\n# Conclusion\n\nThis guide has provided an overview of how to:\n\n1. Prepare CCHS data for smoking history analysis\n2. Derive the key variables needed for APC modeling\n3. Implement the core components of the Canadian Smoking History Generator Model\n4. Apply and validate the model\n\nFurther development is ongoing to complete the full implementation of the model in R, including additional validation against historical data and extensions to incorporate more recent survey cycles.\n\n# References\n\nManuel, D.G. et al. (2002). Current and future mortality probabilities in Ontario, 2002 to 2032. *Health Reports*, Statistics Canada.\n\nHolford, T.R. (1991). Understanding the effects of age, period, and cohort on incidence and mortality rates. *Annual Review of Public Health*, 12, 425-457.\n\nCanadian Community Health Survey (CCHS) - Annual Component. Statistics Canada.\n\n# Appendix: Additional Resources\n\n## SAS to R Translation Notes\n\nThe original model was implemented in SAS. Key translation considerations include:\n\n1. SAS `PROC GENMOD` is equivalent to R's `glm()` function\n2. SAS natural cubic splines can be created with R's `splines::ns()`\n3. SAS data steps are translated to R's data.frame operations\n4. SAS `%macro` functions are converted to R functions\n\n## Complete Variable Crosswalk\n\nFor a complete mapping of variables across CCHS cycles, refer to the included variable sheets:\n\n- `worksheets/cshgm-variables.csv`\n- `worksheets/cshgm-variable-details.csv`",
+ "supporting": [],
+ "filters": [
+ "rmarkdown/pagebreak.lua"
+ ],
+ "includes": {},
+ "engineDependencies": {},
+ "preserve": {},
+ "postProcess": true
+ }
+}
\ No newline at end of file
diff --git a/_freeze/site_libs/clipboard/clipboard.min.js b/_freeze/site_libs/clipboard/clipboard.min.js
new file mode 100644
index 0000000..1103f81
--- /dev/null
+++ b/_freeze/site_libs/clipboard/clipboard.min.js
@@ -0,0 +1,7 @@
+/*!
+ * clipboard.js v2.0.11
+ * https://clipboardjs.com/
+ *
+ * Licensed MIT © Zeno Rocha
+ */
+!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1= threshold triggers truncation
+ truncate_percentile: 99 # Upper percentile cap for skewed continuous vars
+
+ # Imputation parameters (MICE)
+ imputation_m: 5 # Number of multiple imputations
+ imputation_maxit: 5 # MICE iterations per imputation
+
+ # Logging
+ log_level: "INFO"
+ log_file: !expr file.path(getwd(), "logs/cshm.log")
+
+ # APC model specifications (Holford knot structure)
+ apc:
+ age_knots: [10, 15, 20, 50, 60]
+ period_knots: [1940, 1950, 1960, 1970, 1980]
+ cohort_knots: [1930, 1940, 1945, 1950, 1955, 1960, 1965, 1970, 1975, 1980]
+ # Period effect constraints (held constant beyond observed range)
+ # Men/women differ for initiation per Manuel et al. (2020)
+ period_constraints:
+ initiation_men_from: 1999
+ initiation_women_from: 2003
+ cessation_from: 2013
+ cohort_constraints:
+ initiation_prior_to: 1920
+ cessation_from: 1985
+ # Period and cohort range for denominator construction
+ # period_max is the last observed CCHS cycle year (PUMF: 2022; Master: 2023 via statscan profile)
+ # projection_max is the final projection year for rate tables and smoking histories
+ period_min: 1965
+ period_max: 2022 # statscan profile overrides to 2023
+ projection_max: 2050 # rate tables and smoking histories projected to this year
+ cohort_min: 1920
+ # Spline implementation
+ # Primary: "nsp" (natural splines via splines2::nsp())
+ # Sensitivity: "rcs" (restricted cubic splines via rms::rcs())
+ spline_library: "splines2"
+ spline_type: "nsp"
+ # Mortality adjustment method
+ # Primary: "mport" (MPoRT algorithm — see protocol §3.4.4)
+ # Sensitivity: "peto" (Peto constant risk ratio, consistent with Holford et al. 2014)
+ # NOTE: mport not yet implemented; peto used during development.
+ mortality_method: "peto"
+ # Subgroup structure for model stratification
+ # Subgroup stratification uses config keys (resolved via survey_var() at runtime)
+ subgroups:
+ national:
+ stratify_by: [sex]
+ provincial:
+ stratify_by: [province, sex]
+
+ # Sensitivity analyses (prespecified; see protocol §3.5)
+ # Each entry documents the parameter change and its protocol rationale.
+ # To run a sensitivity analysis, override the relevant apc: parameter in your
+ # R session or add a named config profile below.
+ sensitivity:
+ spline_type:
+ description: >
+ Test restricted cubic splines (rcs) versus the primary natural splines (nsp).
+ Override: cfg$apc$spline_type <- "rcs"
+ primary: "nsp"
+ alternative: "rcs"
+ mortality_method:
+ description: >
+ Test Peto constant mortality risk ratio versus the primary MPoRT adjustment.
+ Peto is consistent with the original Holford et al. (2014) US implementation.
+ Override: cfg$apc$mortality_method <- "peto"
+ primary: "mport"
+ alternative: "peto"
+ period_constraints:
+ description: >
+ Test alternative period constraint years (e.g. extending to 2003/2007 for
+ initiation, or 2015 for cessation) to assess sensitivity to the assumed
+ stabilisation point. Constraints are held constant at the most recent observed
+ values, so the choice of constraint year affects projections into the future.
+ primary: {initiation_men_from: 1999, initiation_women_from: 2003, cessation_from: 2013}
+ cchs_era:
+ description: >
+ Examination of descriptive discontinuities across CCHS design eras
+ (2001-2005, 2007-2014, 2015-2021, 2022+). Not a formal sensitivity model;
+ addressed through descriptive statistics in Table 1 and considered for
+ SIMEX-style measurement error adjustment if era effects are detected.
+ eras:
+ - label: "2001-2005"
+ cycles: [cchs2001_p, cchs2003_p, cchs2005_p]
+ note: "Biennial, three-frame design"
+ - label: "2007-2014"
+ cycles: [cchs2007_2008_p, cchs2009_2010_p, cchs2011_2012_p, cchs2013_2014_p]
+ note: "Annual continuous collection; major redesign"
+ - label: "2015-2021"
+ cycles: [cchs2015_2016_p, cchs2017_2018_p, cchs2019_2020_p]
+ note: "Major frame and content redesign"
+ - label: "2022+"
+ cycles: [cchs2022_p]
+ note: "Electronic questionnaire mode shift"
+ vaping:
+ description: >
+ Explore alternative assumptions for younger cohorts (born ~1995+) regarding
+ the relationship between vaping initiation and subsequent combustible cigarette
+ smoking: gateway (vaping increases smoking), displacement (vaping replaces
+ smoking), or common liability (shared underlying propensity).
+ note: "Applied to projection scenarios only; does not affect historical APC estimates."
+
+ # Survey year lookup: integer year per SurveyCycle code (1–11)
+ # 2-year cycles use midpoint year (e.g. 2008 for 2007-08)
+ cycle_survey_years:
+ "1": 2001
+ "2": 2003
+ "3": 2005
+ "4": 2008 # 2007-08
+ "5": 2010 # 2009-10
+ "6": 2012 # 2011-12
+ "7": 2014 # 2013-14
+ "8": 2016 # 2015-16
+ "9": 2018 # 2017-18
+ "10": 2020 # 2019-20
+ "11": 2022
+
+draft:
+ # 5% sample from cchsflow-data release files (all 11 cycles).
+ # Files named CCHS_2001.RData with internal object `table`.
+ # Usage: Sys.setenv(R_CONFIG_ACTIVE = "draft"); targets::tar_make()
+ sample_proportion: 0.05
+ log_level: "DEBUG"
+ log_file: !expr file.path(getwd(), "logs/cshm-draft.log")
+ raw_data_dir: !expr path.expand("~/github/cchsflow-data/data/sources/rdata")
+ imputation_m: 1
+ imputation_maxit: 1
+ raw_data_file_map:
+ cchs2001_p: CCHS_2001.RData
+ cchs2003_p: CCHS_2003.RData
+ cchs2005_p: CCHS_2005.RData
+ cchs2007_2008_p: CCHS_2007_2008.RData
+ cchs2009_2010_p: CCHS_2009_2010.RData
+ cchs2011_2012_p: CCHS_2011_2012.RData
+ cchs2013_2014_p: CCHS_2013_2014.RData
+ cchs2015_2016_p: CCHS_2015_2016.RData
+ cchs2017_2018_p: CCHS_2017_2018.RData
+ cchs2019_2020_p: CCHS_2019_2020.RData
+ cchs2022_p: CCHS_2022.RData
+
+dev:
+ # Fast iteration: 10% sample, debug logging, single imputation
+ sample_proportion: 0.1
+ log_level: "DEBUG"
+ log_file: !expr file.path(getwd(), "logs/cshm-dev.log")
+ imputation_m: 1
+ imputation_maxit: 1
+
prod:
- data:
- cchs2001_p: !expr file.path(getwd(), "data/prod/cchs2001_p.RData")
- cchs2003_p: !expr file.path(getwd(), "data/prod/cchs2003_p.RData")
- cchs2005_p: !expr file.path(getwd(), "data/prod/cchs2005_p.RData")
- cchs2007_2008_p: !expr file.path(getwd(), "data/prod/cchs2007_2008_p.RData")
- cchs2009_2010_p: !expr file.path(getwd(), "data/prod/cchs2009_2010_p.RData")
- cchs2012_p: !expr file.path(getwd(), "data/prod/cchs2012_p.RData")
- cchs2013_2014_p: !expr file.path(getwd(), "data/prod/cchs2013_2014_p.RData")
+ # Full PUMF run (same source as default, full sample)
+ strict_validation: true
+ sample_proportion: 1.0
+ log_level: "WARN"
+ log_file: !expr file.path(getwd(), "logs/cshm-prod.log")
+ derived_data:
+ study_data: !expr file.path(getwd(), "data/study_data.rds")
+ analysis_data: !expr file.path(getwd(), "data/analysis_data.rds")
+
+statscan:
+ # Delegates to a local config file with RDC-specific paths.
+ # config/statscan.yml is gitignored — never commit.
+ # Copy config/statscan.yml.example → config/statscan.yml and fill in paths.
+ # Usage: Sys.setenv(R_CONFIG_ACTIVE = "statscan")
+ strict_validation: true
+ local_config: !expr config::get(file = file.path(getwd(), "config/statscan.yml"))
+ apc:
+ period_max: 2023 # Master files include 2023 cycle
+ projection_max: 2050 # same projection horizon as PUMF
diff --git a/config/statscan.yml.example b/config/statscan.yml.example
new file mode 100644
index 0000000..8e9648e
--- /dev/null
+++ b/config/statscan.yml.example
@@ -0,0 +1,37 @@
+# statscan.yml.example
+#
+# Template for Statistics Canada RDC environment.
+# Copy to config/statscan.yml and fill in actual paths.
+# config/statscan.yml is gitignored — never commit real RDC paths.
+#
+# Usage: Sys.setenv(R_CONFIG_ACTIVE = "statscan")
+
+statscan:
+ # Master file paths (exact continuous variables)
+ # SMK_01C, SMK_040 instead of PUMF midpoint estimates
+ raw_data_dir: "/path/to/rdc/cchs/master/rdata"
+ data_type: ".RData"
+ sample_proportion: 1.0
+
+ # Master cycles available at RDC (2001–2023)
+ cchs_cycles:
+ - cchs2001_master
+ - cchs2003_master
+ - cchs2005_master
+ - cchs2007_2008_master
+ - cchs2009_2010_master
+ - cchs2011_2012_master
+ - cchs2013_2014_master
+ - cchs2015_2016_master
+ - cchs2017_2018_master
+ - cchs2019_2020_master
+ - cchs2022_master
+ - cchs2023_master
+
+ # Output paths within RDC vetting folder
+ derived_data:
+ study_data: "/path/to/rdc/output/study_data.rds"
+ analysis_data: "/path/to/rdc/output/analysis_data.rds"
+
+ log_level: "WARN"
+ log_file: "/path/to/rdc/logs/cshm-statscan.log"
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000..0e3521a
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,3 @@
+/.quarto/
+
+**/*.quarto_ipynb
diff --git a/docs/_extensions/docstyle/_extension.yml b/docs/_extensions/docstyle/_extension.yml
new file mode 100644
index 0000000..7c85750
--- /dev/null
+++ b/docs/_extensions/docstyle/_extension.yml
@@ -0,0 +1,28 @@
+title: docstyle
+author: POPCORN Initiative
+version: 0.1.0
+quarto-required: ">=1.4.0"
+contributes:
+ formats:
+ docx:
+ # Reference document generated by pre-render hook from CSS configuration
+ reference-doc: _docstyle/reference.docx
+ # Suppress Quarto's default title block; author-plate.lua renders it instead
+ title-block-style: none
+
+ # Lua filters applied in order
+ # page-section injects section breaks for named page styles (landscape, etc.)
+ # char-style runs at post-quarto to see expanded shortcodes
+ filters:
+ - page-section.lua
+ - toc-field.lua
+ - table-style.lua
+ - figure.lua
+ - list-style.lua
+ - version-history.lua
+ - author-plate.lua
+ - at: post-quarto
+ path: char-style.lua
+ - comment-inject.lua
+ - revisions-inject.lua
+ - zotero-inject.lua
diff --git a/docs/_extensions/docstyle/author-plate.lua b/docs/_extensions/docstyle/author-plate.lua
new file mode 100644
index 0000000..76df490
--- /dev/null
+++ b/docs/_extensions/docstyle/author-plate.lua
@@ -0,0 +1,432 @@
+-- author-plate.lua
+-- Pandoc Lua filter that generates a formatted author plate from YAML metadata
+--
+-- Usage in QMD:
+-- ::: author-plate
+-- :::
+--
+-- Configuration in _quarto.yml (under docstyle.author-plate):
+-- corresponding-marker: "*" # Symbol for corresponding author
+-- equal-marker: "†" # Symbol for equal contributors
+-- show-orcid: false # Show ORCID after author name
+-- show-email: true # Show corresponding author email
+-- affiliation-style: numbered # numbered (superscripts) or inline
+--
+-- Author metadata in QMD YAML front matter (Quarto manuscript format):
+-- author:
+-- - name:
+-- given: "First"
+-- family: "Last"
+-- orcid: "0000-0000-0000-0000"
+-- email: "author@example.com"
+-- corresponding: true
+-- equal-contributor: true
+-- affiliations:
+-- - ref: inst1
+-- affiliations:
+-- - id: inst1
+-- name: "Institution Name"
+-- department: "Department"
+-- city: "City"
+-- region: "Province"
+-- country: "Country"
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local FORMAT = "openxml"
+
+-- Store metadata
+local authors = nil
+local affiliations = nil
+local config = {
+ corresponding_marker = "*",
+ equal_marker = "†",
+ show_orcid = false,
+ show_email = true,
+ affiliation_style = "numbered"
+}
+
+-- Unicode superscript digits
+local superscripts = {
+ ["0"] = "⁰", ["1"] = "¹", ["2"] = "²", ["3"] = "³", ["4"] = "⁴",
+ ["5"] = "⁵", ["6"] = "⁶", ["7"] = "⁷", ["8"] = "⁸", ["9"] = "⁹"
+}
+
+-- Use shared xml_escape from field-code-utils
+local xml_escape = fcu.xml_escape
+
+-- Convert number to superscript string
+local function to_superscript(num)
+ local s = tostring(num)
+ local result = ""
+ for i = 1, #s do
+ local digit = s:sub(i, i)
+ result = result .. (superscripts[digit] or digit)
+ end
+ return result
+end
+
+-- Get author display name from by-author entry
+-- Quarto's by-author has: name.literal (Inlines), name.given, name.family
+local function get_author_name(author)
+ -- Try name.literal first (Quarto's normalized format)
+ if author["name"] and author["name"]["literal"] then
+ return pandoc.utils.stringify(author["name"]["literal"])
+ end
+
+ -- Try given + family
+ if author["name"] then
+ local name = author["name"]
+ local given = name["given"] and pandoc.utils.stringify(name["given"]) or ""
+ local family = name["family"] and pandoc.utils.stringify(name["family"]) or ""
+ if given ~= "" and family ~= "" then
+ return given .. " " .. family
+ end
+ return family ~= "" and family or given
+ end
+
+ -- Fallback: stringify the whole author object
+ local name_str = pandoc.utils.stringify(author)
+ if name_str and name_str ~= "" then
+ return name_str
+ end
+
+ return ""
+end
+
+-- Build affiliation lookup table from affiliations metadata
+-- Works with both raw affiliations and Quarto's by-affiliation format
+local function build_affiliation_map(affs)
+ local map = {}
+ local ordered = {}
+
+ if not affs then return map, ordered end
+
+ for i, aff in ipairs(affs) do
+ local id = nil
+ if aff["id"] then
+ id = pandoc.utils.stringify(aff["id"])
+ end
+
+ local display = ""
+ local parts = {}
+
+ -- Build display string: Department, Name, City, Region, Country
+ if aff["department"] then
+ table.insert(parts, pandoc.utils.stringify(aff["department"]))
+ end
+ if aff["name"] then
+ table.insert(parts, pandoc.utils.stringify(aff["name"]))
+ end
+ if aff["city"] then
+ table.insert(parts, pandoc.utils.stringify(aff["city"]))
+ end
+ if aff["region"] then
+ table.insert(parts, pandoc.utils.stringify(aff["region"]))
+ end
+ if aff["country"] then
+ table.insert(parts, pandoc.utils.stringify(aff["country"]))
+ end
+
+ display = table.concat(parts, ", ")
+
+ local entry = {
+ id = id,
+ number = i,
+ display = display
+ }
+
+ if id then
+ map[id] = entry
+ end
+ table.insert(ordered, entry)
+ end
+
+ return map, ordered
+end
+
+-- Get affiliation numbers for an author
+-- In Quarto's by-author, affiliations are resolved objects with id, name, etc.
+local function get_author_affiliations(author, aff_map)
+ local numbers = {}
+
+ if not author["affiliations"] then return numbers end
+
+ for _, aff in ipairs(author["affiliations"]) do
+ local aff_id = nil
+
+ -- Quarto resolves affiliations, so we get full objects with id
+ if type(aff) == "table" and aff["id"] then
+ aff_id = pandoc.utils.stringify(aff["id"])
+ elseif type(aff) == "table" and aff["ref"] then
+ aff_id = pandoc.utils.stringify(aff["ref"])
+ end
+
+ if aff_id and aff_map[aff_id] then
+ table.insert(numbers, aff_map[aff_id].number)
+ end
+ end
+
+ return numbers
+end
+
+-- Check if author has attribute (Quarto stores these in attributes sub-object)
+local function has_attribute(author, attr)
+ -- Check in attributes sub-object first (Quarto's normalized location)
+ if author["attributes"] and author["attributes"][attr] then
+ local val = author["attributes"][attr]
+ if type(val) == "boolean" then return val end
+ local str_val = pandoc.utils.stringify(val)
+ return str_val == "true" or str_val == "1"
+ end
+
+ -- Check top-level as fallback
+ if author[attr] then
+ local val = author[attr]
+ if type(val) == "boolean" then return val end
+ local str_val = pandoc.utils.stringify(val)
+ return str_val == "true" or str_val == "1"
+ end
+
+ return false
+end
+
+-- Read configuration from metadata
+function Meta(meta)
+ -- Check whether author-plate is disabled before loading authors.
+ -- When disabled, docstyle.authors in _quarto.yml is the correct pattern
+ -- (it avoids Pandoc's native title block without triggering author-plate
+ -- rendering). Suppress the deprecation warning in that case.
+ local plate_enabled = true
+ if meta.docstyle and meta.docstyle["author-plate"] then
+ local ap = meta.docstyle["author-plate"]
+ if ap["enabled"] ~= nil then
+ local val = ap["enabled"]
+ plate_enabled = (type(val) == "boolean" and val) or
+ (pandoc.utils.stringify(val) == "true")
+ end
+ end
+
+ -- Priority 1: docstyle.authors (avoids Pandoc's native title block)
+ -- Priority 2: by-author (Quarto's normalized format)
+ -- Priority 3: author (basic Pandoc format)
+ if meta.docstyle and meta.docstyle["authors"] then
+ authors = meta.docstyle["authors"]
+ if plate_enabled then
+ io.stderr:write("[author-plate] Warning: docstyle.authors is deprecated. " ..
+ "Use standard Quarto author: metadata instead. " ..
+ "See https://quarto.org/docs/journals/authors.html\n")
+ end
+ io.stderr:write("[author-plate] Found " .. #authors .. " authors (from docstyle.authors)\n")
+ elseif meta["by-author"] then
+ authors = meta["by-author"]
+ io.stderr:write("[author-plate] Found " .. #authors .. " authors (from by-author)\n")
+ elseif meta.author then
+ authors = meta.author
+ io.stderr:write("[author-plate] Found " .. #authors .. " authors (from author - basic)\n")
+ end
+
+ -- Get affiliations - priority order mirrors authors
+ if meta.docstyle and meta.docstyle["affiliations"] then
+ affiliations = meta.docstyle["affiliations"]
+ if plate_enabled then
+ io.stderr:write("[author-plate] Warning: docstyle.affiliations is deprecated. " ..
+ "Use standard Quarto affiliations: metadata instead.\n")
+ end
+ io.stderr:write("[author-plate] Found " .. #affiliations .. " affiliations (from docstyle.affiliations)\n")
+ elseif meta["by-affiliation"] then
+ affiliations = meta["by-affiliation"]
+ io.stderr:write("[author-plate] Found " .. #affiliations .. " affiliations (from by-affiliation)\n")
+ elseif meta.affiliations then
+ affiliations = meta.affiliations
+ io.stderr:write("[author-plate] Found " .. #affiliations .. " affiliations\n")
+ end
+
+ -- Get config from docstyle.author-plate
+ if meta.docstyle and meta.docstyle["author-plate"] then
+ local ap_config = meta.docstyle["author-plate"]
+
+ if ap_config["corresponding-marker"] then
+ config.corresponding_marker = pandoc.utils.stringify(ap_config["corresponding-marker"])
+ end
+ if ap_config["equal-marker"] then
+ config.equal_marker = pandoc.utils.stringify(ap_config["equal-marker"])
+ end
+ if ap_config["show-orcid"] ~= nil then
+ local val = ap_config["show-orcid"]
+ config.show_orcid = (type(val) == "boolean" and val) or (pandoc.utils.stringify(val) == "true")
+ end
+ if ap_config["show-email"] ~= nil then
+ local val = ap_config["show-email"]
+ config.show_email = (type(val) == "boolean" and val) or (pandoc.utils.stringify(val) == "true")
+ end
+ if ap_config["affiliation-style"] then
+ config.affiliation_style = pandoc.utils.stringify(ap_config["affiliation-style"])
+ end
+ end
+
+ return nil
+end
+
+-- Build the author plate XML
+local function build_author_plate_xml()
+ if not authors or #authors == 0 then
+ return nil
+ end
+
+ local aff_map, aff_ordered = build_affiliation_map(affiliations)
+ local blocks = {}
+
+ -- Build author line with superscript affiliations
+ local author_runs = {}
+ local corresponding_email = nil
+ local has_equal_contributors = false
+
+ for i, author in ipairs(authors) do
+ local name = get_author_name(author)
+ local aff_nums = get_author_affiliations(author, aff_map)
+ local is_corresponding = has_attribute(author, "corresponding")
+ local is_equal = has_attribute(author, "equal-contributor")
+
+ if is_equal then has_equal_contributors = true end
+
+ -- Get email for corresponding author
+ if is_corresponding and author.email then
+ corresponding_email = pandoc.utils.stringify(author.email)
+ end
+
+ -- Build superscript string
+ local superscript_parts = {}
+ for _, num in ipairs(aff_nums) do
+ table.insert(superscript_parts, to_superscript(num))
+ end
+ if is_corresponding then
+ table.insert(superscript_parts, config.corresponding_marker)
+ end
+ if is_equal then
+ table.insert(superscript_parts, config.equal_marker)
+ end
+ local superscript_str = table.concat(superscript_parts, ",")
+
+ -- Add ORCID if configured
+ local orcid_str = ""
+ if config.show_orcid and author.orcid then
+ orcid_str = " " .. pandoc.utils.stringify(author.orcid)
+ end
+
+ -- Build run XML for this author
+ local author_xml = '' .. xml_escape(name) .. ''
+
+ -- Add superscript
+ if superscript_str ~= "" then
+ author_xml = author_xml ..
+ '' ..
+ '' .. xml_escape(superscript_str) .. ''
+ end
+
+ -- Add ORCID
+ if orcid_str ~= "" then
+ author_xml = author_xml .. '' .. xml_escape(orcid_str) .. ''
+ end
+
+ -- Add separator (comma) unless last author
+ if i < #authors then
+ author_xml = author_xml .. ', '
+ end
+
+ table.insert(author_runs, author_xml)
+ end
+
+ -- Author paragraph (centered, Author style)
+ local author_para = '' ..
+ '' ..
+ table.concat(author_runs) ..
+ ''
+ table.insert(blocks, author_para)
+
+ -- Empty paragraph for spacing
+ table.insert(blocks, '')
+
+ -- Affiliation lines (using Affiliation style)
+ for _, aff in ipairs(aff_ordered) do
+ local aff_line = to_superscript(aff.number) .. " " .. aff.display
+ local aff_para = '' ..
+ '' ..
+ '' .. xml_escape(aff_line) .. '' ..
+ ''
+ table.insert(blocks, aff_para)
+ end
+
+ -- Empty paragraph for spacing before footnotes
+ table.insert(blocks, '')
+
+ -- Corresponding author line (using Affiliation style for consistency)
+ if config.show_email and corresponding_email then
+ local corr_line = config.corresponding_marker .. "Corresponding author: " .. corresponding_email
+ local corr_para = '' ..
+ '' ..
+ '' .. xml_escape(corr_line) .. '' ..
+ ''
+ table.insert(blocks, corr_para)
+ end
+
+ -- Equal contributors line (using Affiliation style for consistency)
+ if has_equal_contributors then
+ local equal_line = config.equal_marker .. "These authors contributed equally to this work"
+ local equal_para = '' ..
+ '' ..
+ '' .. xml_escape(equal_line) .. '' ..
+ ''
+ table.insert(blocks, equal_para)
+ end
+
+ return table.concat(blocks)
+end
+
+-- Process Div elements looking for .author-plate class
+function Div(div)
+ -- Check if this div has the "author-plate" class
+ if not div.classes:includes("author-plate") then
+ return nil
+ end
+
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ io.stderr:write("[author-plate] Skipping (not docx output)\n")
+ return nil
+ end
+
+ if not authors or #authors == 0 then
+ io.stderr:write("[author-plate] No author metadata found\n")
+ return {} -- Remove the div entirely
+ end
+
+ io.stderr:write("[author-plate] Generating author plate with " .. #authors .. " authors\n")
+
+ -- Build the author plate XML
+ local plate_xml = build_author_plate_xml()
+ if not plate_xml then
+ return {}
+ end
+
+ -- Wrap in ADDIN DOCSTYLE field code (using shared utility)
+ return {
+ pandoc.RawBlock("openxml", fcu.build_div_field_start("author-plate")),
+ pandoc.RawBlock("openxml", plate_xml),
+ pandoc.RawBlock("openxml", fcu.build_block_field_end())
+ }
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Div = Div }
+}
diff --git a/docs/_extensions/docstyle/char-style.lua b/docs/_extensions/docstyle/char-style.lua
new file mode 100644
index 0000000..07c36a8
--- /dev/null
+++ b/docs/_extensions/docstyle/char-style.lua
@@ -0,0 +1,172 @@
+-- char-style.lua
+-- Pandoc Lua filter that converts spans with style classes to Word character styles
+--
+-- Usage in QMD:
+-- Date: [{{< meta version-summary.date >}}]{.date} -- Shortcode with styling
+-- Date: []{.date} -- Auto-populated from metadata
+-- Custom: [my text]{.date} -- Explicit content
+--
+-- All three syntaxes work. Empty spans auto-populate from version-summary metadata.
+-- Shortcode syntax is preferred as it's explicit and works with any metadata field.
+--
+-- This applies w:rStyle to the run, creating a character-level style in Word.
+-- The style must exist in reference.docx (generated from CSS via docstyle).
+--
+-- Round-trip support: Each styled span is wrapped in an ADDIN DOCSTYLE field code
+-- that carries the original QMD source as JSON metadata. During harvest, the field
+-- code's instrText is parsed to restore the exact QMD source (e.g., shortcodes).
+-- See development/spec-round-trip-mechanism.md for the full specification.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Metadata values for auto-population (set in Meta filter)
+local meta_values = {
+ date = nil,
+ version = nil
+}
+
+-- Get style ID for a class from schema, with fallback
+local function get_style_id(class)
+ local class_def = fcu.get_char_class(class)
+ if class_def and class_def.word_style then
+ return class_def.word_style
+ end
+ -- Fallback for classes not in schema
+ local fallback = {
+ date = "Date",
+ version = "Version",
+ author = "Author",
+ affiliation = "Affiliation"
+ }
+ return fallback[class]
+end
+
+-- List of supported style classes (for iteration)
+local supported_classes = {"date", "version", "author", "affiliation"}
+
+-- Process Span elements with character style classes
+function Span(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Check if this span has any of our style classes
+ local matched_class = nil
+ local style_id = nil
+ for _, class in ipairs(supported_classes) do
+ if el.classes:includes(class) then
+ matched_class = class
+ style_id = get_style_id(class)
+ break
+ end
+ end
+
+ if not style_id then
+ return nil
+ end
+
+ -- Get the text content
+ local text = fcu.inlines_to_text(el.content)
+
+ -- Auto-populate empty spans from metadata
+ if text == "" or text == nil then
+ if matched_class and meta_values[matched_class] then
+ text = meta_values[matched_class]
+ debug("[char-style] Auto-populated '" .. style_id .. "' from metadata: " .. text .. "\n")
+ else
+ debug("[char-style] Warning: Empty span with style '" .. style_id .. "' and no metadata value\n")
+ return nil -- Return nil to keep span as-is if we can't populate it
+ end
+ else
+ debug("[char-style] Applying style '" .. style_id .. "' to: " .. text .. "\n")
+ end
+
+ -- Build field code XML using shared utility
+ local field_xml = fcu.build_char_field_code(style_id, text, matched_class)
+ debug("[char-style] Emitting field code for '" .. matched_class .. "'\n")
+
+ return pandoc.RawInline('openxml', field_xml)
+end
+
+-- Process Div elements with .center class for paragraph alignment
+function Div(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Check if this div has the center class
+ if not el.classes:includes('center') then
+ return nil
+ end
+
+ debug("[char-style] Applying center alignment to div\n")
+
+ -- For each paragraph in the div, add custom-style="Centered" attribute
+ -- This requires a "Centered" style in reference.docx with center alignment
+ -- Alternatively, we can directly inject the alignment via RawBlock
+ local result = {}
+ for _, block in ipairs(el.content) do
+ if block.t == "Para" then
+ -- Convert paragraph content to runs, wrapped in a centered paragraph
+ local runs = {}
+ for _, inline in ipairs(block.content) do
+ -- If it's already a RawInline openxml (from Span filter), keep it
+ if inline.t == "RawInline" and inline.format == "openxml" then
+ table.insert(runs, inline.text)
+ elseif inline.t == "Str" then
+ table.insert(runs, '' .. fcu.xml_escape(inline.text) .. '')
+ elseif inline.t == "Space" then
+ table.insert(runs, ' ')
+ end
+ end
+
+ local para_xml = '' .. table.concat(runs) .. ''
+ table.insert(result, pandoc.RawBlock('openxml', para_xml))
+ else
+ -- Keep other blocks as-is
+ table.insert(result, block)
+ end
+ end
+
+ return result
+end
+
+-- Extract metadata values for auto-population
+function Meta(meta)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ debug("[char-style] Filter active for Word output\n")
+ end
+
+ -- Extract version-summary.date and version-summary.version
+ if meta["version-summary"] then
+ local vs = meta["version-summary"]
+ if vs.date then
+ meta_values.date = pandoc.utils.stringify(vs.date)
+ debug("[char-style] Found version-summary.date: " .. meta_values.date .. "\n")
+ end
+ if vs.version then
+ meta_values.version = pandoc.utils.stringify(vs.version)
+ debug("[char-style] Found version-summary.version: " .. meta_values.version .. "\n")
+ end
+ end
+
+ return nil
+end
+
+-- Filter order: Meta first (to extract values), then Span (character styles), then Div (centering)
+return {
+ { Meta = Meta },
+ { Span = Span },
+ { Div = Div }
+}
diff --git a/docs/_extensions/docstyle/comment-inject.lua b/docs/_extensions/docstyle/comment-inject.lua
new file mode 100644
index 0000000..403770d
--- /dev/null
+++ b/docs/_extensions/docstyle/comment-inject.lua
@@ -0,0 +1,162 @@
+-- comment-inject.lua
+-- Pandoc Lua filter that converts comment markers to OpenXML comment markers
+--
+-- Supported formats (HTML comments only):
+-- 1. Range comment: text
+-- 2. Point comment:
+--
+-- The HTML comment format is robust because it can span complex structures
+-- like tracked changes, multiple paragraphs, and nested formatting without
+-- breaking Pandoc's parsing.
+--
+-- Fallback behavior:
+-- - Start marker without end: Converts to point comment at document end
+-- - End marker without start: Ignored (orphan end markers are harmless)
+--
+-- After rendering, R post-processing (inject_comments) adds the actual
+-- comments.xml file to the DOCX container.
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+local xml_escape = fcu.xml_escape
+local parse_comment_marker = fcu.parse_comment_marker
+
+-- Track comment states for fallback handling:
+-- "started" = saw start marker, waiting for end
+-- "completed" = saw both start and end (proper range comment)
+-- "point" = point comment (no range, just a marker)
+local comment_states = {}
+
+-- Generate OpenXML for comment range start
+local function comment_start_xml(id)
+ return ''
+end
+
+-- Generate OpenXML for comment range end (includes the clickable reference marker)
+local function comment_end_xml(id)
+ return '' ..
+ ''
+end
+
+-- Generate OpenXML for point comment (start + end + reference together)
+local function comment_point_xml(id)
+ return '' ..
+ '' ..
+ ''
+end
+
+-- Process RawInline elements for HTML comment markers
+function RawInline(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Only process HTML raw content (where our markers live)
+ if el.format ~= "html" then
+ return nil
+ end
+
+ local id, marker_type = parse_comment_marker(el.text)
+ if not id then
+ return nil -- Not a comment marker, leave as-is
+ end
+
+ if marker_type == "point" then
+ -- Simple point comment - emit complete marker
+ debug("[comment-inject] Found point comment id=" .. id .. "\n")
+ comment_states[id] = "point"
+ return pandoc.RawInline('openxml', comment_point_xml(id))
+
+ elseif marker_type == "start" then
+ debug("[comment-inject] Found comment start marker id=" .. id .. "\n")
+ comment_states[id] = "started"
+ -- Emit only the start marker; end will come later (or we'll close at doc end)
+ return pandoc.RawInline('openxml', comment_start_xml(id))
+
+ elseif marker_type == "end" then
+ if comment_states[id] == "started" then
+ -- Normal case: matching end for a start we saw
+ debug("[comment-inject] Found comment end marker id=" .. id .. "\n")
+ comment_states[id] = "completed"
+ return pandoc.RawInline('openxml', comment_end_xml(id))
+ elseif comment_states[id] == "completed" then
+ -- Duplicate end marker - ignore
+ debug("[comment-inject] Warning: duplicate end marker id=" .. id .. " (ignoring)\n")
+ return pandoc.RawInline('openxml', '') -- Empty, effectively removes it
+ else
+ -- Orphan end marker (no matching start) - ignore
+ debug("[comment-inject] Warning: orphan end marker id=" .. id .. " (no matching start, ignoring)\n")
+ return pandoc.RawInline('openxml', '') -- Empty, effectively removes it
+ end
+ end
+
+ return nil
+end
+
+-- Handle orphan start markers at document end
+function Pandoc(doc)
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ debug("[comment-inject] Filter active for Word output\n")
+
+ -- Check for orphan start markers (started but never completed)
+ -- These need to be closed at the end of the document
+ local orphan_ids = {}
+ for id, state in pairs(comment_states) do
+ if state == "started" then
+ table.insert(orphan_ids, id)
+ end
+ end
+
+ if #orphan_ids == 0 then
+ return nil -- No orphans, document unchanged
+ end
+
+ -- We have orphan start markers - inject end markers at end of document
+ debug("[comment-inject] Warning: " .. #orphan_ids .. " orphan start marker(s) found, closing at document end\n")
+ for _, id in ipairs(orphan_ids) do
+ debug("[comment-inject] - Orphan comment id=" .. id .. "\n")
+ comment_states[id] = "point"
+ end
+
+ -- Inject the end+reference markers at the very end of the document
+ if #doc.blocks > 0 then
+ local last_block = doc.blocks[#doc.blocks]
+
+ -- Create the closing XML for all orphan comments
+ local closing_inlines = {}
+ for _, id in ipairs(orphan_ids) do
+ table.insert(closing_inlines, pandoc.RawInline('openxml', comment_end_xml(id)))
+ end
+
+ -- Append to last block based on its type
+ if last_block.t == "Para" or last_block.t == "Plain" then
+ for _, inline in ipairs(closing_inlines) do
+ table.insert(last_block.content, inline)
+ end
+ else
+ -- For other block types, add a new Plain block with the closures
+ table.insert(doc.blocks, pandoc.Plain(closing_inlines))
+ end
+ end
+
+ return doc
+end
+
+return {
+ -- First pass: process inline elements (populates comment_states)
+ { RawInline = RawInline },
+ -- Second pass: handle orphan comments at document level
+ { Pandoc = Pandoc }
+}
diff --git a/docs/_extensions/docstyle/default.css b/docs/_extensions/docstyle/default.css
new file mode 100644
index 0000000..320ba97
--- /dev/null
+++ b/docs/_extensions/docstyle/default.css
@@ -0,0 +1,339 @@
+/* docstyle Default Styles
+ * CIHR-compliant formatting for Canadian health research documents
+ *
+ * Based on CIHR Application Formatting Requirements:
+ * https://cihr-irsc.gc.ca/e/29300.html
+ *
+ * Requirements:
+ * - Font: minimum 12pt Times New Roman, black (can be larger)
+ * - Margins: 2cm (0.79in) minimum, strictly enforced
+ * - Line spacing: single minimum
+ * - Page size: letter (8.5" x 11")
+ */
+
+/* ==========================================================================
+ TYPOGRAPHY - CIHR Compliant
+ ========================================================================== */
+
+/* Body text - 12pt Times New Roman, single spaced */
+p, body {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+ line-height: 1;
+}
+
+/* Headings - Times New Roman, bold, same size hierarchy */
+h1 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+h2 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+h3 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ font-style: italic;
+ color: #000000;
+}
+
+h4 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ font-style: italic;
+ color: #000000;
+}
+
+h5 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ text-decoration: underline;
+ color: #000000;
+}
+
+/* Links - blue for visibility */
+a {
+ color: #0000EE;
+}
+
+/* ==========================================================================
+ DOCUMENT STRUCTURE
+ ========================================================================== */
+
+/* Title - bold, slightly larger */
+.title {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 14pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+/* Subtitle */
+.subtitle {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+}
+
+/* Author and affiliation blocks */
+.author {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ text-align: center;
+}
+
+.affiliation {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-style: italic;
+ text-align: center;
+ color: #000000;
+}
+
+/* Date and version display */
+.date {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ text-align: center;
+}
+
+.version {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ text-align: center;
+}
+
+/* Header and footer */
+.header {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ color: #000000;
+}
+
+.footer {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ color: #000000;
+}
+
+/* ==========================================================================
+ TABLE OF CONTENTS
+ ========================================================================== */
+
+.toc-heading {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+.toc-1 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+ line-height: 1;
+}
+
+.toc-2 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+ margin-left: 12pt;
+ line-height: 1;
+}
+
+.toc-3 {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: normal;
+ color: #000000;
+ margin-left: 24pt;
+ line-height: 1;
+}
+
+/* ==========================================================================
+ BODY TEXT VARIANTS
+ Pandoc assigns content to BodyText/FirstParagraph/Compact — not Normal.
+ These styles inherit from Normal via basedOn chains in reference.docx.
+ Use these selectors only when you need to override the inherited values.
+ ========================================================================== */
+
+/* .body-text — inherits from Normal; override only if body paragraphs
+ need different formatting than headings or other Normal-based styles */
+/* .body-text { } */
+
+/* .first-paragraph — first paragraph after a heading (basedOn BodyText) */
+/* .first-paragraph { } */
+
+/* .compact — tight list paragraphs (basedOn BodyText) */
+/* .compact { margin-bottom: 0; } */
+
+/* blockquote — block quotations (basedOn BodyText) */
+/* blockquote { margin-left: 0.5in; margin-right: 0.5in; } */
+
+/* ==========================================================================
+ CAPTIONS AND FIGURES
+ ========================================================================== */
+
+caption, .caption {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ color: #000000;
+}
+
+.table-caption {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+.image-caption {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-style: italic;
+ color: #000000;
+}
+
+/* .figure — image container paragraph */
+/* .figure { text-align: center; } */
+
+/* .captioned-figure — image with caption, keeps image and caption together */
+/* .captioned-figure { text-align: center; } */
+
+/* ==========================================================================
+ BIBLIOGRAPHY
+ ========================================================================== */
+
+.bibliography {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ color: #000000;
+ text-indent: -0.5in;
+ padding-left: 0.5in;
+}
+
+/* ==========================================================================
+ DEFINITION LISTS
+ ========================================================================== */
+
+dt {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ font-weight: bold;
+ color: #000000;
+}
+
+dd {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ color: #000000;
+ margin-left: 0.25in;
+}
+
+/* ==========================================================================
+ FOOTNOTES
+ ========================================================================== */
+
+.footnote-text {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 10pt;
+ font-weight: normal;
+ color: #000000;
+ line-height: 1;
+}
+
+/* ==========================================================================
+ LISTS
+ ========================================================================== */
+
+ol {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+}
+
+ul {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+}
+
+/* ==========================================================================
+ TABLES
+ ========================================================================== */
+
+/* Formal table - top/bottom borders, shaded header */
+.table-formal {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ border-top: 1pt solid #000000;
+ border-bottom: 1pt solid #000000;
+ border-left: none;
+ border-right: none;
+}
+
+.table-formal th {
+ background-color: #D9D9D9;
+ font-weight: bold;
+ padding: 4pt;
+}
+
+.table-formal td {
+ padding: 4pt;
+}
+
+/* Grid table - all borders */
+.table-grid {
+ font-family: "Times New Roman", "Times", serif;
+ font-size: 12pt;
+ border: 1pt solid #000000;
+ border-collapse: collapse;
+}
+
+.table-grid th,
+.table-grid td {
+ border: 1pt solid #000000;
+ padding: 4pt;
+}
+
+.table-grid th {
+ font-weight: bold;
+}
+
+/* ==========================================================================
+ REVISION STYLES (Track Changes Preview)
+ ========================================================================== */
+
+.del {
+ background-color: #ffebe9;
+ color: #6a737d;
+ text-decoration: line-through;
+}
+
+.ins {
+ background-color: #e6ffec;
+ text-decoration: underline;
+}
+
+/* ==========================================================================
+ COMMENT STYLES (Preview)
+ ========================================================================== */
+
+.comment {
+ background-color: #fff3cd;
+ border-bottom: 2px solid #ffc107;
+}
diff --git a/docs/_extensions/docstyle/field-code-utils.lua b/docs/_extensions/docstyle/field-code-utils.lua
new file mode 100644
index 0000000..48a131d
--- /dev/null
+++ b/docs/_extensions/docstyle/field-code-utils.lua
@@ -0,0 +1,682 @@
+-- field-code-utils.lua
+-- Shared utilities for ADDIN DOCSTYLE field code generation
+--
+-- This module provides:
+-- 1. Schema loading from inst/schema/docstyle-field-codes.json
+-- 2. XML and JSON escaping functions
+-- 3. Field code XML builders for all types (char, div, list, section)
+--
+-- All Lua filters should require this module instead of reimplementing
+-- these functions locally.
+
+local M = {}
+
+-- Current schema version (must match R's DOCSTYLE_SCHEMA_VERSION)
+-- v2: R-First Assembly - Lua emits text markers, R builds sectPr
+M.SCHEMA_VERSION = 2
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function log_debug(msg)
+ if DEBUG then
+ io.stderr:write("[field-code-utils] " .. msg .. "\n")
+ end
+end
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Schema Loading
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Cached schema (loaded once per filter run)
+local cached_schema = nil
+
+-- Find the schema file path relative to the extension directory
+local function find_schema_path()
+ -- Try multiple locations:
+ -- 1. Installed R package: system.file("schema/docstyle-field-codes.json", package = "docstyle")
+ -- 2. Development: relative to _extensions/docstyle/
+ -- 3. Quarto extension: QUARTO_PROJECT_DIR/_extensions/docstyle/../../inst/schema/
+
+ local paths_to_try = {}
+
+ -- Get the directory of this Lua file
+ local source = debug.getinfo(1, "S").source
+ if source:sub(1, 1) == "@" then
+ local lua_dir = source:sub(2):match("(.*/)")
+ if lua_dir then
+ -- Development layout: _extensions/docstyle/ -> ../../inst/schema/
+ table.insert(paths_to_try, lua_dir .. "../../inst/schema/docstyle-field-codes.json")
+ -- Installed extension layout (schema copied to extension dir)
+ table.insert(paths_to_try, lua_dir .. "docstyle-field-codes.json")
+ end
+ end
+
+ -- Try QUARTO_PROJECT_DIR
+ local project_dir = os.getenv("QUARTO_PROJECT_DIR")
+ if project_dir then
+ table.insert(paths_to_try, project_dir .. "/_extensions/docstyle/docstyle-field-codes.json")
+ table.insert(paths_to_try, project_dir .. "/inst/schema/docstyle-field-codes.json")
+ end
+
+ for _, path in ipairs(paths_to_try) do
+ local f = io.open(path, "r")
+ if f then
+ f:close()
+ log_debug("Found schema at: " .. path)
+ return path
+ end
+ end
+
+ return nil
+end
+
+-- Simple JSON parser for our schema (handles objects, arrays, strings, numbers, booleans)
+-- This avoids requiring external JSON libraries in Pandoc Lua filters
+local function parse_json(str)
+ local pos = 1
+ local function skip_whitespace()
+ pos = str:match("^%s*()", pos)
+ end
+
+ local function parse_value()
+ skip_whitespace()
+ local c = str:sub(pos, pos)
+
+ if c == '"' then
+ -- String
+ local start = pos + 1
+ pos = pos + 1
+ while pos <= #str do
+ local ch = str:sub(pos, pos)
+ if ch == '"' then
+ local result = str:sub(start, pos - 1)
+ pos = pos + 1
+ -- Unescape basic sequences
+ result = result:gsub("\\n", "\n"):gsub("\\t", "\t"):gsub('\\"', '"'):gsub("\\\\", "\\")
+ return result
+ elseif ch == "\\" then
+ pos = pos + 2
+ else
+ pos = pos + 1
+ end
+ end
+ elseif c == "{" then
+ -- Object
+ pos = pos + 1
+ local obj = {}
+ skip_whitespace()
+ if str:sub(pos, pos) == "}" then
+ pos = pos + 1
+ return obj
+ end
+ while true do
+ skip_whitespace()
+ local key = parse_value()
+ skip_whitespace()
+ pos = pos + 1 -- skip ':'
+ local value = parse_value()
+ obj[key] = value
+ skip_whitespace()
+ local sep = str:sub(pos, pos)
+ pos = pos + 1
+ if sep == "}" then break end
+ end
+ return obj
+ elseif c == "[" then
+ -- Array
+ pos = pos + 1
+ local arr = {}
+ skip_whitespace()
+ if str:sub(pos, pos) == "]" then
+ pos = pos + 1
+ return arr
+ end
+ while true do
+ table.insert(arr, parse_value())
+ skip_whitespace()
+ local sep = str:sub(pos, pos)
+ pos = pos + 1
+ if sep == "]" then break end
+ end
+ return arr
+ elseif str:sub(pos, pos + 3) == "true" then
+ pos = pos + 4
+ return true
+ elseif str:sub(pos, pos + 4) == "false" then
+ pos = pos + 5
+ return false
+ elseif str:sub(pos, pos + 3) == "null" then
+ pos = pos + 4
+ return nil
+ else
+ -- Number
+ local num_str = str:match("^-?%d+%.?%d*", pos)
+ if num_str then
+ pos = pos + #num_str
+ return tonumber(num_str)
+ end
+ end
+ error("JSON parse error at position " .. pos .. ": " .. str:sub(pos, pos + 20))
+ end
+
+ return parse_value()
+end
+
+-- Load and cache the schema
+function M.load_schema()
+ if cached_schema then
+ return cached_schema
+ end
+
+ local schema_path = find_schema_path()
+ if not schema_path then
+ log_debug("Schema file not found, using built-in defaults")
+ -- Return minimal built-in schema as fallback
+ cached_schema = {
+ schema_version = M.SCHEMA_VERSION,
+ char_classes = {},
+ div_types = {},
+ list_classes = {}
+ }
+ return cached_schema
+ end
+
+ local f = io.open(schema_path, "r")
+ if not f then
+ log_debug("Could not open schema file: " .. schema_path)
+ return nil
+ end
+
+ local content = f:read("*a")
+ f:close()
+
+ local ok, schema = pcall(parse_json, content)
+ if not ok then
+ log_debug("Failed to parse schema JSON: " .. tostring(schema))
+ return nil
+ end
+
+ cached_schema = schema
+ log_debug("Loaded schema version " .. (schema.schema_version or "unknown"))
+ return cached_schema
+end
+
+-- Get char class definition from schema
+function M.get_char_class(class)
+ local schema = M.load_schema()
+ if schema and schema.char_classes then
+ return schema.char_classes[class]
+ end
+ return nil
+end
+
+-- Get div type definition from schema
+function M.get_div_type(name)
+ local schema = M.load_schema()
+ if schema and schema.div_types then
+ return schema.div_types[name]
+ end
+ return nil
+end
+
+-- Get list class definition from schema
+function M.get_list_class(class)
+ local schema = M.load_schema()
+ if schema and schema.list_classes then
+ return schema.list_classes[class]
+ end
+ return nil
+end
+
+-- Get table class definition from schema
+function M.get_table_class(class)
+ local schema = M.load_schema()
+ if schema and schema.table_classes then
+ return schema.table_classes[class]
+ end
+ return nil
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Escaping Functions
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Escape XML special characters for use in OOXML content
+function M.xml_escape(text)
+ if not text then return "" end
+ text = text:gsub("&", "&")
+ text = text:gsub("<", "<")
+ text = text:gsub(">", ">")
+ text = text:gsub('"', """)
+ text = text:gsub("'", "'")
+ return text
+end
+
+-- Escape a string for use inside a JSON string value
+-- Handles backslash and double-quote
+function M.json_escape(text)
+ if not text then return "" end
+ text = text:gsub('\\', '\\\\')
+ text = text:gsub('"', '\\"')
+ return text
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Field Code Builders
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Build JSON payload for field code instrText
+-- @param payload_type: "char", "div", "list", or "section"
+-- @param fields: table of additional fields to include
+-- @return JSON string (not XML-escaped)
+function M.build_payload_json(payload_type, fields)
+ local parts = {}
+ table.insert(parts, '"type":"' .. M.json_escape(payload_type) .. '"')
+ table.insert(parts, '"version":' .. M.SCHEMA_VERSION)
+
+ for key, value in pairs(fields) do
+ if type(value) == "string" then
+ table.insert(parts, '"' .. key .. '":"' .. M.json_escape(value) .. '"')
+ elseif type(value) == "number" then
+ table.insert(parts, '"' .. key .. '":' .. value)
+ elseif type(value) == "boolean" then
+ table.insert(parts, '"' .. key .. '":' .. (value and "true" or "false"))
+ end
+ end
+
+ return "{" .. table.concat(parts, ",") .. "}"
+end
+
+-- Build the QMD source string for a char class
+-- Uses source_template from schema if available, otherwise builds explicit form
+function M.build_char_source(class, text)
+ local class_def = M.get_char_class(class)
+ if class_def and class_def.source_template then
+ return class_def.source_template
+ else
+ return "[" .. text .. "]{." .. class .. "}"
+ end
+end
+
+-- Build complete ADDIN DOCSTYLE field code XML for char type
+-- @param style_id: Word style ID (e.g., "Date")
+-- @param text: Display text
+-- @param class: CSS class name (e.g., "date")
+-- @return OOXML string
+function M.build_char_field_code(style_id, text, class)
+ local source = M.build_char_source(class, text)
+ local json = M.build_payload_json("char", {
+ class = class,
+ source = source
+ })
+ local json_xml = M.xml_escape(json)
+
+ local display_run = '' ..
+ '' ..
+ '' .. M.xml_escape(text) .. '' ..
+ ''
+
+ return '' ..
+ ' ADDIN DOCSTYLE ' .. json_xml .. ' ' ..
+ '' ..
+ display_run ..
+ ''
+end
+
+-- Build field code start marker for block types (div, list, section)
+-- @param payload_type: "div", "list", or "section"
+-- @param fields: payload fields (e.g., {name = "toc"} or {class = "list-alpha"})
+-- @return OOXML paragraph string
+function M.build_block_field_start(payload_type, fields)
+ local json = M.build_payload_json(payload_type, fields)
+ local json_xml = M.xml_escape(json)
+
+ return '' ..
+ ' ADDIN DOCSTYLE ' .. json_xml .. ' ' ..
+ ''
+end
+
+-- Build field code end marker for block types
+-- @return OOXML paragraph string
+function M.build_block_field_end()
+ return ''
+end
+
+-- Convenience: Build div field code start
+function M.build_div_field_start(name)
+ return M.build_block_field_start("div", {name = name})
+end
+
+-- Convenience: Build list field code start
+function M.build_list_field_start(class, start_num)
+ local fields = {class = class}
+ if start_num and start_num > 1 then
+ fields.start = start_num
+ end
+ return M.build_block_field_start("list", fields)
+end
+
+-- Convenience: Build table field code start
+-- @param class: table class (e.g., "table-formal")
+-- @param attrs: optional attributes table (widths, width, font-size, etc.)
+function M.build_table_field_start(class, attrs)
+ local fields = {class = class}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ return M.build_block_field_start("table", fields)
+end
+
+-- Convenience: Build figure field code start
+-- @param id: QMD figure ID (e.g. "fig-consort-flow")
+-- @param attrs: optional attributes table (docpr_id, width, align, wrap, original_path)
+function M.build_figure_field_start(id, attrs)
+ local fields = {id = id}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ return M.build_block_field_start("figure", fields)
+end
+
+-- Convenience: Build section field code start
+function M.build_section_field_start(class, attrs)
+ local fields = {class = class}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ return M.build_block_field_start("section", fields)
+end
+
+-- Build complete section field code in a SINGLE paragraph for R-First Assembly.
+-- This prevents the 3-line gap by emitting BEGIN/instrText/SEPARATE/marker/END
+-- all in one paragraph rather than three separate paragraphs.
+-- @param class: section class (e.g., "section-body")
+-- @param attrs: optional attributes table
+-- @param marker_text: the DOCSTYLE_SECTION::... marker text
+-- @return OOXML paragraph string
+function M.build_section_marker_para(class, attrs, marker_text)
+ local fields = {class = class}
+ if attrs then
+ for k, v in pairs(attrs) do
+ fields[k] = v
+ end
+ end
+ local json = M.build_payload_json("section", fields)
+ local json_xml = M.xml_escape(json)
+ local marker_xml = M.xml_escape(marker_text)
+
+ return '' ..
+ '' ..
+ ' ADDIN DOCSTYLE ' .. json_xml .. ' ' ..
+ '' ..
+ '' .. marker_xml .. '' ..
+ ''
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Helper Functions
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Convert inline content to plain text (recursively handles nested spans)
+function M.inlines_to_text(inlines)
+ local text = ""
+ for _, inline in ipairs(inlines) do
+ if inline.t == "Str" then
+ text = text .. inline.text
+ elseif inline.t == "Space" then
+ text = text .. " "
+ elseif inline.t == "SoftBreak" then
+ text = text .. " "
+ elseif inline.t == "Span" then
+ text = text .. M.inlines_to_text(inline.content)
+ end
+ end
+ return text
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Page Config Loading (shared across filters)
+-- ═══════════════════════════════════════════════════════════════════════════
+
+-- Cached page config (loaded once, shared by all filters in same render)
+local cached_page_config = nil
+
+--- Load page-config.json from _docstyle/ directory.
+-- Returns the parsed config table, or nil if not found.
+-- Result is cached so multiple filters reading the same file pay I/O once.
+function M.load_page_config()
+ if cached_page_config then return cached_page_config end
+
+ local config_paths = {
+ "_docstyle/page-config.json",
+ "./_docstyle/page-config.json"
+ }
+
+ for _, path in ipairs(config_paths) do
+ local file = io.open(path, "r")
+ if file then
+ local content = file:read("*a")
+ file:close()
+ local ok, config = pcall(function()
+ return pandoc.json.decode(content)
+ end)
+ if ok and config then
+ log_debug("Loaded page config from " .. path)
+ cached_page_config = config
+ return config
+ end
+ end
+ end
+
+ log_debug("No page-config.json found")
+ return nil
+end
+
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- Inline Renderer: Pandoc AST → OOXML runs
+-- ═══════════════════════════════════════════════════════════════════════════
+--
+-- Converts Pandoc inline elements to OOXML run XML strings.
+-- Handles the pre-conversion AST forms present when table-style.lua runs
+-- (before char-style.lua and comment-inject.lua in the filter chain).
+--
+-- base_rPr_parts: array of XML fragments to include in every
+-- e.g., {"", '', ''}
+
+-- Build ... from an array of parts, or "" if empty
+local function build_rPr(parts)
+ if #parts == 0 then return "" end
+ return "" .. table.concat(parts) .. ""
+end
+
+-- Build a single with given rPr parts and text
+local function build_text_run(rPr_parts, text)
+ return "" .. build_rPr(rPr_parts) ..
+ '' .. M.xml_escape(text) .. ""
+end
+
+-- Char-style class → Word style ID (via schema, with minimal fallback)
+local char_style_fallback = {
+ date = "Date", version = "Version", author = "Author",
+ affiliation = "Affiliation", sc = "SmallCaps"
+}
+local function get_char_style_id(class)
+ local def = M.get_char_class(class)
+ if def and def.word_style then return def.word_style end
+ return char_style_fallback[class]
+end
+
+-- Parse comment marker from HTML text
+-- Returns id and type ("start", "end", "point"), or nil
+-- Exported as M.parse_comment_marker for use by comment-inject.lua
+local function parse_comment_marker(text)
+ if not text then return nil, nil end
+ local start_id = text:match('')
+ if start_id then return start_id, "start" end
+ local end_id = text:match('')
+ if end_id then return end_id, "end" end
+ local point_id = text:match('')
+ if point_id then return point_id, "point" end
+ return nil, nil
+end
+
+-- Recurse into inline content with an extra rPr fragment appended
+-- Returns array of XML strings
+local function recurse_with_rPr(content, rPr_parts, extra_rPr)
+ local new_rPr = {}
+ for _, p in ipairs(rPr_parts) do table.insert(new_rPr, p) end
+ if type(extra_rPr) == "table" then
+ for _, e in ipairs(extra_rPr) do table.insert(new_rPr, e) end
+ else
+ table.insert(new_rPr, extra_rPr)
+ end
+ local results = {}
+ for _, child in ipairs(content) do
+ for _, xml in ipairs(render_inline(child, new_rPr)) do
+ table.insert(results, xml)
+ end
+ end
+ return results
+end
+
+-- Render a single Pandoc inline element to OOXML run(s)
+-- Returns array of XML strings
+local function render_inline(inline, rPr_parts)
+ local results = {}
+
+ if inline.t == "Str" then
+ table.insert(results, build_text_run(rPr_parts, inline.text))
+
+ elseif inline.t == "Space" or inline.t == "SoftBreak" then
+ table.insert(results, build_text_run(rPr_parts, " "))
+
+ elseif inline.t == "Strong" then
+ return recurse_with_rPr(inline.content, rPr_parts, "")
+
+ elseif inline.t == "Emph" then
+ return recurse_with_rPr(inline.content, rPr_parts, "")
+
+ elseif inline.t == "Strikeout" then
+ return recurse_with_rPr(inline.content, rPr_parts, "")
+
+ elseif inline.t == "Superscript" then
+ return recurse_with_rPr(inline.content, rPr_parts, '')
+
+ elseif inline.t == "Subscript" then
+ return recurse_with_rPr(inline.content, rPr_parts, '')
+
+ elseif inline.t == "Span" then
+ -- Check for char-style class via schema lookup
+ local matched_class = nil
+ local matched_style_id = nil
+ for _, class in ipairs(inline.classes) do
+ local sid = get_char_style_id(class)
+ if sid then
+ matched_class = class
+ matched_style_id = sid
+ break
+ end
+ end
+
+ if matched_class then
+ -- Emit char field code (replicates char-style.lua)
+ local text = M.inlines_to_text(inline.content)
+ if text ~= "" then
+ local field_xml = M.build_char_field_code(
+ matched_style_id, text, matched_class)
+ table.insert(results, field_xml)
+ end
+ else
+ -- Unknown span class — recurse into children with current rPr
+ for _, child in ipairs(inline.content) do
+ for _, xml in ipairs(render_inline(child, rPr_parts)) do
+ table.insert(results, xml)
+ end
+ end
+ end
+
+ elseif inline.t == "Link" then
+ return recurse_with_rPr(inline.content, rPr_parts,
+ {'', ''})
+
+ elseif inline.t == "RawInline" then
+ if inline.format == "html" then
+ -- Check for comment markers (replicates comment-inject.lua)
+ local id, marker_type = parse_comment_marker(inline.text)
+ if id then
+ if marker_type == "start" then
+ table.insert(results,
+ '')
+ elseif marker_type == "end" then
+ table.insert(results,
+ '' ..
+ '')
+ elseif marker_type == "point" then
+ table.insert(results,
+ '' ..
+ '' ..
+ '')
+ end
+ end
+ -- Other HTML raw inlines are dropped (no meaningful OOXML equivalent)
+ elseif inline.format == "openxml" then
+ -- Already OOXML — pass through unchanged
+ table.insert(results, inline.text)
+ end
+
+ elseif inline.t == "LineBreak" then
+ -- Line break within a paragraph
+ table.insert(results, "")
+
+ elseif inline.t == "Code" then
+ table.insert(results, build_text_run(rPr_parts, inline.text))
+
+ -- Fallback: try to recurse into content, or stringify
+ elseif inline.content then
+ for _, child in ipairs(inline.content) do
+ for _, xml in ipairs(render_inline(child, rPr_parts)) do
+ table.insert(results, xml)
+ end
+ end
+ else
+ -- Leaf node we don't handle — stringify
+ local text = pandoc.utils.stringify(pandoc.Inlines({inline}))
+ if text ~= "" then
+ table.insert(results, build_text_run(rPr_parts, text))
+ end
+ end
+
+ return results
+end
+
+--- Render an array of Pandoc Inlines to OOXML run XML.
+-- @param inlines Array of Pandoc inline elements
+-- @param base_rPr_parts Array of base run property XML fragments
+-- @return Concatenated OOXML string (runs only, no paragraph wrapper)
+function M.render_inlines(inlines, base_rPr_parts)
+ base_rPr_parts = base_rPr_parts or {}
+ local all_runs = {}
+ for _, inline in ipairs(inlines) do
+ for _, xml in ipairs(render_inline(inline, base_rPr_parts)) do
+ table.insert(all_runs, xml)
+ end
+ end
+ return table.concat(all_runs)
+end
+
+-- Export parse_comment_marker for use by comment-inject.lua
+M.parse_comment_marker = parse_comment_marker
+
+
+return M
diff --git a/docs/_extensions/docstyle/figure.lua b/docs/_extensions/docstyle/figure.lua
new file mode 100644
index 0000000..79b9583
--- /dev/null
+++ b/docs/_extensions/docstyle/figure.lua
@@ -0,0 +1,108 @@
+-- figure.lua
+-- Pandoc Lua filter that wraps .figure divs in ADDIN DOCSTYLE field codes
+-- for round-trip harvest fidelity.
+--
+-- Usage in QMD:
+-- ::: {#fig-consort-flow .figure width="80%" align="center"}
+-- 
+--
+-- **Figure 1.** Caption text with [@citation].
+-- :::
+--
+-- The filter:
+-- 1. Detects divs with class "figure"
+-- 2. Emits opening ADDIN DOCSTYLE field code carrying id and attributes
+-- 3. Passes through all inner blocks (image + caption paragraph) unchanged
+-- 4. Emits closing field code
+--
+-- On re-harvest, detect_docstyle_field_codes() finds these markers,
+-- handle_docstyle_figure() reconstructs the div_open with the original id,
+-- and the harvest loop emits the figure div with the correct QMD id.
+
+local fcu = require("field-code-utils")
+
+-- Normalise FORMAT: Quarto passes "docx" at runtime; shadow and re-map to "openxml"
+-- so all checks use the canonical name (same pattern as table-style.lua, list-style.lua).
+local FORMAT = "openxml"
+
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write("[figure] " .. msg .. "\n")
+ end
+end
+
+-- Div attributes excluded from the field code payload (Pandoc-internal)
+local skip_attr_keys = { ["data-pos"] = true }
+
+-- Process Div elements with class "figure"
+function Div(div)
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Only handle divs with the "figure" class
+ local is_figure = false
+ for _, class in ipairs(div.classes) do
+ if class == "figure" then
+ is_figure = true
+ break
+ end
+ end
+ if not is_figure then
+ return nil
+ end
+
+ -- Collect the QMD id (from div.identifier) and attributes
+ local fig_id = div.identifier
+ if not fig_id or fig_id == "" then
+ fig_id = "fig-unknown"
+ end
+
+ local attrs = {}
+ for key, val in pairs(div.attributes) do
+ if val and val ~= "" and not skip_attr_keys[key] then
+ attrs[key] = val
+ end
+ end
+
+ -- Extract image path from the first Para containing an Image inside the div.
+ -- This becomes original_path in the field code payload for re-harvest path restoration.
+ local original_path = nil
+ for _, block in ipairs(div.content) do
+ if block.t == "Para" then
+ for _, inline in ipairs(block.content) do
+ if inline.t == "Image" then
+ original_path = inline.src
+ break
+ end
+ end
+ end
+ if original_path then break end
+ end
+ if original_path and original_path ~= "" then
+ attrs["original_path"] = original_path
+ end
+
+ debug("Processing .figure div: id=" .. fig_id)
+
+ local field_start = fcu.build_figure_field_start(fig_id, attrs)
+ local field_end = fcu.build_block_field_end()
+
+ -- Wrap: field_start | inner blocks | field_end
+ local result = pandoc.Blocks({ pandoc.RawBlock("openxml", field_start) })
+ for _, block in ipairs(div.content) do
+ result:insert(block)
+ end
+ result:insert(pandoc.RawBlock("openxml", field_end))
+
+ return result
+end
+
+-- Normalise FORMAT at document level: Quarto passes "docx", canonicalise to "openxml".
+function Pandoc(_)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
diff --git a/docs/_extensions/docstyle/generate-reference.R b/docs/_extensions/docstyle/generate-reference.R
new file mode 100644
index 0000000..614338b
--- /dev/null
+++ b/docs/_extensions/docstyle/generate-reference.R
@@ -0,0 +1,330 @@
+#!/usr/bin/env Rscript
+# Pre-render hook: Generate reference.docx from CSS
+#
+# This script is called by Quarto before rendering to generate a reference.docx
+# from the docstyle CSS configuration. Uses hash-based caching to avoid
+# regeneration when nothing has changed.
+#
+# Usage in _quarto.yml:
+# project:
+# pre-render: _extensions/docstyle/generate-reference.R
+#
+# Behaviour:
+# - If user explicitly sets reference-doc in YAML, skip generation (use theirs)
+# - Otherwise, generate reference.docx from docstyle.css configuration
+# - Cache by hash: only regenerate when CSS or docstyle config changes
+#
+# Output:
+# - _docstyle/reference.docx (cached reference document)
+# - _docstyle/reference.docx.hash (hash of inputs for cache validation)
+
+# Null coalesce helper (define early since used throughout)
+`%||%` <- function(x, y) if (is.null(x)) y else x
+
+# Resolve project root — prefers QUARTO_PROJECT_DIR, then walks upward
+# looking for a _quarto.yml with project:/docstyle: or a .git anchor.
+if (requireNamespace("docstyle", quietly = TRUE)) {
+ project_dir <- docstyle::find_project_root(getwd())
+} else {
+ # Inline fallback for environments without the package installed
+ env_dir <- Sys.getenv("QUARTO_PROJECT_DIR", "")
+ project_dir <- if (nzchar(env_dir)) env_dir else getwd()
+}
+
+# Find _quarto.yml
+quarto_yml <- file.path(project_dir, "_quarto.yml")
+if (!file.exists(quarto_yml)) {
+ message("[generate-reference] No _quarto.yml found, skipping reference generation")
+ quit(save = "no", status = 0)
+}
+
+# Parse _quarto.yml
+config <- tryCatch({
+ yaml::read_yaml(quarto_yml)
+}, error = function(e) {
+ message("[generate-reference] Error reading _quarto.yml: ", e$message)
+ quit(save = "no", status = 0)
+})
+
+# Check if user explicitly set a custom reference-doc (not the generated one)
+# If pointing to _docstyle/reference.docx, we still need to generate it
+has_custom_reference_doc <- function(cfg, sidecar_dir) {
+ generated_path <- file.path(sidecar_dir, "reference.docx")
+
+ check_path <- function(path) {
+ if (is.null(path)) return(FALSE)
+ # Normalize paths for comparison
+ norm_path <- normalizePath(path, mustWork = FALSE)
+ norm_generated <- normalizePath(generated_path, mustWork = FALSE)
+ # If it's the generated path, we should still generate
+ if (norm_path == norm_generated) return(FALSE)
+ # Also check relative path comparison
+ if (path == generated_path) return(FALSE)
+ if (basename(dirname(path)) == basename(sidecar_dir) &&
+ basename(path) == "reference.docx") return(FALSE)
+ TRUE
+ }
+
+ # Check format.docx.reference-doc
+ if (check_path(cfg$format$docx$`reference-doc`)) return(TRUE)
+ if (check_path(cfg$format$`docstyle-docx`$`reference-doc`)) return(TRUE)
+ # Check top-level reference-doc
+ if (check_path(cfg$`reference-doc`)) return(TRUE)
+ FALSE
+}
+
+sidecar_dir_name <- config$docstyle$`sidecar-dir` %||% "_docstyle"
+if (has_custom_reference_doc(config, sidecar_dir_name)) {
+ message("[generate-reference] User specified custom reference-doc, skipping CSS generation")
+ quit(save = "no", status = 0)
+}
+
+# Check if docstyle configuration exists
+if (is.null(config$docstyle)) {
+ message("[generate-reference] No docstyle: section found, skipping reference generation")
+ quit(save = "no", status = 0)
+}
+
+# Resolve CSS path(s) - supports single path, array, or uses default
+css_config <- config$docstyle$css
+
+if (is.null(css_config)) {
+ # No CSS specified - use default.css from extension
+ # Find the extension directory (where this script lives)
+ script_dir <- tryCatch({
+ # When running as Rscript
+ script_path <- commandArgs(trailingOnly = FALSE)
+ file_arg <- grep("^--file=", script_path, value = TRUE)
+ if (length(file_arg) > 0) {
+ dirname(normalizePath(sub("^--file=", "", file_arg)))
+ } else {
+ # Fallback: look relative to project
+ file.path(project_dir, "_extensions", "docstyle")
+ }
+ }, error = function(e) {
+ file.path(project_dir, "_extensions", "docstyle")
+ })
+
+ default_css <- file.path(script_dir, "default.css")
+ if (file.exists(default_css)) {
+ css_paths <- default_css
+ message("[generate-reference] Using default CIHR-compliant CSS")
+ } else {
+ message("[generate-reference] No docstyle.css specified and default.css not found")
+ quit(save = "no", status = 0)
+ }
+} else {
+ # User specified CSS - resolve paths
+ css_paths <- vapply(css_config, function(p) {
+ full_path <- file.path(project_dir, p)
+ if (file.exists(full_path)) full_path else p
+ }, character(1))
+
+ # Check all CSS files exist
+ missing_css <- css_paths[!file.exists(css_paths)]
+ if (length(missing_css) > 0) {
+ message("[generate-reference] CSS file(s) not found: ", paste(missing_css, collapse = ", "))
+ quit(save = "no", status = 0)
+ }
+}
+
+# Setup output paths
+sidecar_dir <- config$docstyle$`sidecar-dir` %||% "_docstyle"
+sidecar_path <- file.path(project_dir, sidecar_dir)
+if (!dir.exists(sidecar_path)) {
+ dir.create(sidecar_path, recursive = TRUE)
+}
+
+reference_path <- file.path(sidecar_path, "reference.docx")
+hash_path <- file.path(sidecar_path, "reference.docx.hash")
+
+# Compute hash of inputs (CSS content + relevant docstyle config)
+compute_input_hash <- function(css_paths, docstyle_config) {
+ # Read CSS content from all files
+ css_contents <- vapply(css_paths, function(p) {
+ paste(readLines(p, warn = FALSE), collapse = "\n")
+ }, character(1))
+ css_content <- paste(css_contents, collapse = "\n---CSS-SEPARATOR---\n")
+
+ # Extract config elements that affect reference.docx
+ # Note: page includes line-numbers sub-config
+ relevant_config <- list(
+ page = docstyle_config$page,
+ header = docstyle_config$header,
+ footer = docstyle_config$footer,
+ sections = docstyle_config$sections,
+ toc = docstyle_config$toc,
+ `base-doc` = docstyle_config$`base-doc`
+ )
+ config_json <- jsonlite::toJSON(relevant_config, auto_unbox = TRUE)
+
+ # Include template version so template changes trigger regeneration
+ template_version <- ""
+ if (requireNamespace("docstyle", quietly = TRUE)) {
+ template_version <- as.character(utils::packageVersion("docstyle"))
+ }
+
+ # Combine and hash
+ combined <- paste0(css_content, "\n---\n", config_json,
+ "\n---TEMPLATE---\n", template_version)
+ digest::digest(combined, algo = "sha256")
+}
+
+# Check if regeneration is needed
+current_hash <- compute_input_hash(css_paths, config$docstyle)
+cached_hash <- ""
+if (file.exists(hash_path)) {
+ cached_hash <- trimws(readLines(hash_path, n = 1, warn = FALSE))
+}
+
+if (current_hash == cached_hash && file.exists(reference_path)) {
+ message("[generate-reference] Reference doc up to date (hash match)")
+ quit(save = "no", status = 0)
+}
+
+message("[generate-reference] Generating reference.docx from CSS...")
+
+# Try to load docstyle
+docstyle_loaded <- FALSE
+
+if (requireNamespace("docstyle", quietly = TRUE)) {
+ docstyle_loaded <- TRUE
+} else {
+ # Try to find and load from development source
+ search_dirs <- c(
+ project_dir,
+ dirname(project_dir),
+ dirname(dirname(project_dir)),
+ dirname(dirname(dirname(project_dir))),
+ dirname(dirname(dirname(dirname(project_dir))))
+ )
+
+ for (dir in search_dirs) {
+ desc_path <- file.path(dir, "DESCRIPTION")
+ if (file.exists(desc_path)) {
+ desc_content <- readLines(desc_path, n = 1, warn = FALSE)
+ if (grepl("Package:\\s*docstyle", desc_content)) {
+ if (requireNamespace("devtools", quietly = TRUE)) {
+ tryCatch({
+ devtools::load_all(dir, quiet = TRUE)
+ docstyle_loaded <- TRUE
+ break
+ }, error = function(e) NULL)
+ }
+ }
+ }
+ }
+}
+
+if (!docstyle_loaded) {
+ message("[generate-reference] docstyle package not found, skipping reference generation")
+ quit(save = "no", status = 0)
+}
+
+# Generate reference.docx
+tryCatch({
+ docstyle::generate_reference_doc(
+ config_path = quarto_yml,
+ output_path = reference_path
+ )
+
+ # Write hash file
+ writeLines(current_hash, hash_path)
+
+ message("[generate-reference] Generated: ", reference_path)
+ message("[generate-reference] Hash: ", substr(current_hash, 1, 12), "...")
+}, error = function(e) {
+ message("[generate-reference] Error generating reference.docx: ", e$message)
+ quit(save = "no", status = 1)
+})
+
+# Generate page-config.json for Lua filters and R finisher
+# Exports page layout, named sections, and header/footer config with pre-computed rPr_xml
+tryCatch({
+ page_config_path <- file.path(sidecar_path, "page-config.json")
+
+ # Read CSS and extract page config
+ css_styles <- docstyle::read_css(css_paths)
+ page_config <- attr(css_styles, "page")
+
+ if (is.null(page_config)) page_config <- list()
+
+ # Helper: compute rPr_xml from a CSS style name
+ resolve_rPr <- function(style_name) {
+ if (is.null(style_name) || is.null(css_styles)) return("")
+ selector <- paste0(".", style_name)
+ if (!is.null(css_styles[[selector]])) {
+ rPr <- docstyle::css_to_rPr(css_styles[[selector]])
+ return(docstyle::build_rPr_xml(rPr))
+ }
+ ""
+ }
+
+ # Export footer config with pre-computed rPr_xml and default text
+ ds <- config$docstyle
+ if (!is.null(ds$footer) && isTRUE(ds$footer$enabled)) {
+ page_config$footer <- list(
+ enabled = TRUE,
+ first_page = ds$footer$`first-page` %||% TRUE,
+ style = ds$footer$style %||% NULL,
+ rPr_xml = resolve_rPr(ds$footer$style),
+ left = ds$footer$left %||% "",
+ center = ds$footer$center %||% ds$footer$content %||% "",
+ right = ds$footer$right %||% ""
+ )
+ }
+
+ # Export header config with pre-computed rPr_xml and default text
+ if (!is.null(ds$header) && isTRUE(ds$header$enabled)) {
+ page_config$header <- list(
+ enabled = TRUE,
+ first_page = ds$header$`first-page` %||% TRUE,
+ style = ds$header$style %||% NULL,
+ rPr_xml = resolve_rPr(ds$header$style),
+ left = ds$header$left %||% "",
+ center = ds$header$center %||% ds$header$content %||% "",
+ right = ds$header$right %||% ""
+ )
+ }
+
+ # Export per-section style overrides with pre-computed rPr_xml
+ if (!is.null(ds$sections)) {
+ section_exports <- list()
+ for (sec_name in names(ds$sections)) {
+ sec <- ds$sections[[sec_name]]
+ section_exports[[sec_name]] <- list(
+ footer_style = sec$`footer-style` %||% NULL,
+ footer_rPr_xml = resolve_rPr(sec$`footer-style`),
+ header_style = sec$`header-style` %||% NULL,
+ header_rPr_xml = resolve_rPr(sec$`header-style`)
+ )
+ }
+ page_config$sections <- section_exports
+ }
+
+ # Extract table styles from CSS (e.g., .table-formal, .table-grid)
+ table_styles <- docstyle::extract_table_styles(css_styles)
+ if (length(table_styles) > 0) {
+ page_config$table_styles <- table_styles
+ message("[generate-reference] Table styles: ", paste(names(table_styles), collapse = ", "))
+ }
+
+ # Write page config as JSON
+ jsonlite::write_json(
+ page_config,
+ page_config_path,
+ auto_unbox = TRUE,
+ pretty = TRUE
+ )
+
+ # Report available named page styles
+ if (!is.null(page_config$named)) {
+ named_styles <- names(page_config$named)
+ if (length(named_styles) > 0) {
+ message("[generate-reference] Named page styles: ", paste(named_styles, collapse = ", "))
+ }
+ }
+}, error = function(e) {
+ # Non-fatal: page-config.json is optional
+ message("[generate-reference] Note: Could not generate page-config.json: ", e$message)
+})
diff --git a/docs/_extensions/docstyle/list-style.lua b/docs/_extensions/docstyle/list-style.lua
new file mode 100644
index 0000000..41838f7
--- /dev/null
+++ b/docs/_extensions/docstyle/list-style.lua
@@ -0,0 +1,166 @@
+-- list-style.lua
+-- Pandoc Lua filter for CSS-defined list styles in Word output
+--
+-- Approach: AST rewriting + ADDIN DOCSTYLE field code markers
+-- 1. Converts BulletList → OrderedList with correct ListNumberStyle
+-- (Pandoc's docx writer generates proper numbering.xml definitions)
+-- 2. Wraps styled lists in ADDIN DOCSTYLE field codes
+-- (harvest detects field codes to recover CSS class on round-trip)
+--
+-- Usage in QMD:
+-- ::: {.list-alpha}
+-- - First item (renders as a.)
+-- - Second item (renders as b.)
+-- :::
+--
+-- Supported list classes:
+-- .list-bullet - Bullet list (explicit)
+-- .list-decimal - Numbered 1. 2. 3. at all levels
+-- .list-alpha - Lettered a. b. c. at all levels
+-- .list-roman - Roman i. ii. iii. at all levels
+-- .list-formal - Hierarchical: 1. / a. / i. per level
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write("[list-style] " .. msg .. "\n")
+ end
+end
+
+local FORMAT = "openxml"
+
+-- Map CSS class → Pandoc ListNumberStyle per indent level
+-- Pandoc styles: DefaultStyle, Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman
+local list_styles = {
+ ["list-bullet"] = nil, -- keep as BulletList
+ ["list-decimal"] = {
+ [0] = "Decimal", [1] = "Decimal", [2] = "Decimal"
+ },
+ ["list-alpha"] = {
+ [0] = "LowerAlpha", [1] = "LowerAlpha", [2] = "LowerAlpha"
+ },
+ ["list-roman"] = {
+ [0] = "LowerRoman", [1] = "LowerRoman", [2] = "LowerRoman"
+ },
+ ["list-formal"] = {
+ [0] = "Decimal", [1] = "LowerAlpha", [2] = "LowerRoman"
+ }
+}
+
+-- Find list style class in div classes
+local function find_list_style(classes)
+ for _, class in ipairs(classes) do
+ if list_styles[class] ~= nil then
+ return class
+ end
+ end
+ for _, class in ipairs(classes) do
+ if class == "list-bullet" then
+ return "list-bullet"
+ end
+ end
+ return nil
+end
+
+-- Convert a BulletList or OrderedList to an OrderedList with the specified style
+-- Handles nested lists recursively with level tracking
+-- div_start: optional start value from div attribute (applied at level 0 only)
+local function convert_list(block, style_name, level, div_start)
+ level = level or 0
+ local style_def = list_styles[style_name]
+
+ -- list-bullet: keep as-is
+ if not style_def then
+ return block
+ end
+
+ local pandoc_style = style_def[level] or style_def[0]
+
+ -- Process items, converting nested lists (nested lists don't inherit div_start)
+ local new_items = {}
+ for _, item in ipairs(block.content) do
+ local new_blocks = {}
+ for _, b in ipairs(item) do
+ if b.t == "BulletList" or b.t == "OrderedList" then
+ table.insert(new_blocks, convert_list(b, style_name, level + 1, nil))
+ else
+ table.insert(new_blocks, b)
+ end
+ end
+ table.insert(new_items, new_blocks)
+ end
+
+ -- Determine start number: div_start at level 0, then block's own start, then 1
+ local start_num = 1
+ if div_start and level == 0 then
+ start_num = div_start
+ elseif block.t == "OrderedList" and block.listAttributes then
+ start_num = block.listAttributes[1] or 1
+ end
+
+ return pandoc.OrderedList(new_items, pandoc.ListAttributes(start_num, pandoc_style, "Period"))
+end
+
+-- Process Div elements looking for list style classes
+function Div(div)
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ local style_name = find_list_style(div.classes)
+ if not style_name then
+ return nil
+ end
+
+ -- Read optional start attribute for list continuation
+ local div_start = tonumber(div.attributes.start) or nil
+
+ debug("Found ." .. style_name .. " div" ..
+ (div_start and (" start=" .. div_start) or ""))
+
+ -- Convert all lists in the div
+ local converted_blocks = {}
+ local modified = false
+
+ for _, block in ipairs(div.content) do
+ if block.t == "BulletList" or block.t == "OrderedList" then
+ table.insert(converted_blocks, convert_list(block, style_name, 0, div_start))
+ modified = true
+ else
+ table.insert(converted_blocks, block)
+ end
+ end
+
+ if not modified then
+ debug("No lists found in ." .. style_name .. " div")
+ return nil
+ end
+
+ -- Wrap with ADDIN DOCSTYLE field code markers using shared utility
+ local result = {}
+ table.insert(result, pandoc.RawBlock("openxml", fcu.build_list_field_start(style_name, div_start)))
+ for _, block in ipairs(converted_blocks) do
+ table.insert(result, block)
+ end
+ table.insert(result, pandoc.RawBlock("openxml", fcu.build_block_field_end()))
+
+ debug("Converted lists in ." .. style_name .. " div (field code marker added)")
+
+ return result
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+return {
+ { Pandoc = Pandoc },
+ { Div = Div }
+}
diff --git a/docs/_extensions/docstyle/page-section.lua b/docs/_extensions/docstyle/page-section.lua
new file mode 100644
index 0000000..03d85b0
--- /dev/null
+++ b/docs/_extensions/docstyle/page-section.lua
@@ -0,0 +1,643 @@
+-- page-section.lua
+-- Pandoc Lua filter that injects Word section breaks for named page styles
+--
+-- Usage in QMD (use .section-* class prefix):
+-- ::: {.section-landscape}
+-- | Wide | Table | Here |
+-- :::
+--
+-- ::: {.section-body page-break="true" line-numbers="continuous"}
+-- :::
+--
+-- Attributes:
+-- page-break="true" Start section on new page (default: false)
+-- line-numbers="page" Line numbers restart each page
+-- line-numbers="section" Line numbers restart each section
+-- line-numbers="continuous" Line numbers never restart
+-- line-numbers="false" Disable line numbers for this section
+--
+-- CSS configuration (optional - attributes can override):
+-- @page landscape {
+-- size: letter landscape;
+-- margin: 0.5in;
+-- }
+-- @page body {
+-- --docstyle-line-numbers: every 1;
+-- --docstyle-line-numbers-restart: page;
+-- }
+--
+-- The filter reads named page rules from a JSON file generated by the pre-render
+-- hook. If no @page rule exists for a section name, the filter falls back to
+-- default page properties (allowing sections to work without explicit CSS).
+--
+-- Section breaks and Pandoc's docx writer
+-- ----------------------------------------
+-- Word's section model: sectPr defines properties for the section that ENDS at
+-- that point. The document's final sectPr (in ) defines the last section.
+-- Pandoc's docx writer always emits this final sectPr from the reference document.
+--
+-- This filter inserts mid-document sectPr elements to create section breaks. The
+-- final section's properties (including line numbers) come from the reference
+-- document, generated by generate-reference.R with settings from page-config.json.
+--
+-- IMPORTANT: Page breaks use an explicit paragraph followed
+-- by a continuous sectPr, NOT the nextPage section type. The nextPage type alone
+-- is unreliable when Pandoc's docx writer inserts bookmark elements around the
+-- break. The explicit break + continuous sectPr pattern matches how Word itself
+-- creates page breaks in natively-authored documents.
+--
+-- IMPORTANT: Do NOT insert an additional sectPr at document end (in the Pandoc()
+-- function). For empty marker divs this creates three sections instead of two,
+-- and Word silently drops the earlier page break. Line numbers for the final
+-- section must be configured in the reference document, not via an extra sectPr.
+--
+-- Wrapping divs (non-empty) DO insert a closing sectPr after their content.
+-- This creates three sections (before | wrapped content | after), which is the
+-- correct structure for scoping line numbers to the wrapped content only.
+--
+-- Round-trip support
+-- ------------------
+-- Section divs are wrapped in ADDIN DOCSTYLE field codes for harvest round-trip.
+-- The JSON payload preserves the class name, page-break, and line-numbers
+-- attributes so they can be reconstructed during docx_to_qmd() harvest.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write("[page-section] " .. msg .. "\n")
+ end
+end
+
+local FORMAT = "openxml"
+
+-- Deep copy a table (for nested structures like line-numbers)
+-- Prevents reference aliasing when multiple markers use the same named style
+local function deep_copy(t)
+ if type(t) ~= "table" then return t end
+ local copy = {}
+ for k, v in pairs(t) do
+ copy[k] = deep_copy(v)
+ end
+ return copy
+end
+
+-- Page configuration from JSON
+local page_config = nil
+local default_page = nil
+local final_section_style = nil -- Track the last .section-* style for final sectPr
+local current_section_props = nil -- Track current section for correct sectPr emission
+
+-- Unit conversion: CSS units to twips (1/20th of a point)
+-- 1 inch = 1440 twips, 1 pt = 20 twips, 1 cm = 567 twips, 1 mm = 56.7 twips
+local function css_to_twips(value)
+ if not value then return nil end
+
+ local num, unit = string.match(value, "^([%d%.]+)(%a+)$")
+ if not num then
+ -- Try just a number (assume inches)
+ num = tonumber(value)
+ if num then return math.floor(num * 1440) end
+ return nil
+ end
+
+ num = tonumber(num)
+ if not num then return nil end
+
+ unit = string.lower(unit)
+ if unit == "in" then
+ return math.floor(num * 1440)
+ elseif unit == "pt" then
+ return math.floor(num * 20)
+ elseif unit == "cm" then
+ return math.floor(num * 567)
+ elseif unit == "mm" then
+ return math.floor(num * 56.7)
+ elseif unit == "px" then
+ -- CSS standard: 96px = 1in
+ return math.floor(num * 15)
+ end
+
+ return nil
+end
+
+-- Get page dimensions in twips for a given size
+local function get_page_size(size, orientation)
+ -- Standard page sizes in twips (width x height in portrait)
+ local sizes = {
+ letter = { w = 12240, h = 15840 }, -- 8.5" x 11"
+ a4 = { w = 11906, h = 16838 }, -- 210mm x 297mm
+ legal = { w = 12240, h = 20160 } -- 8.5" x 14"
+ }
+
+ local dims = sizes[size] or sizes.letter
+
+ if orientation == "landscape" then
+ return dims.h, dims.w -- Swap width and height
+ else
+ return dims.w, dims.h
+ end
+end
+
+-- Build Word section properties XML
+local function build_sect_pr(page_props, sect_type)
+ sect_type = sect_type or "nextPage" -- nextPage, continuous, evenPage, oddPage
+
+ local size = page_props.size or "letter"
+ local orientation = page_props.orientation or "portrait"
+ local margins = page_props.margins or {}
+
+ local w, h = get_page_size(size, orientation)
+
+ -- Default margins (1 inch = 1440 twips)
+ local margin_top = css_to_twips(margins.top) or 1440
+ local margin_bottom = css_to_twips(margins.bottom) or 1440
+ local margin_left = css_to_twips(margins.left) or 1440
+ local margin_right = css_to_twips(margins.right) or 1440
+ local gutter = css_to_twips(margins.gutter) or 0
+
+ -- Build the sectPr XML
+ local xml_parts = {
+ ''
+ }
+
+ -- Section type
+ table.insert(xml_parts, '')
+
+ -- Page size with orientation
+ local orient_attr = ""
+ if orientation == "landscape" then
+ orient_attr = ' w:orient="landscape"'
+ end
+ table.insert(xml_parts, string.format(
+ '',
+ w, h, orient_attr
+ ))
+
+ -- Page margins
+ table.insert(xml_parts, string.format(
+ '',
+ margin_top, margin_right, margin_bottom, margin_left, gutter
+ ))
+
+ -- Line numbers if configured
+ if page_props["line-numbers"] and page_props["line-numbers"].enabled then
+ local ln = page_props["line-numbers"]
+ local count_by = ln["count-by"] or 1
+ local restart = ln.restart or "newPage"
+ local distance = css_to_twips(ln.distance) or 360 -- Default 0.25in
+
+ debug(" build_sect_pr: adding line numbers with restart=" .. tostring(restart))
+
+ -- Map restart values to Word
+ local restart_map = {
+ page = "newPage",
+ section = "newSection",
+ continuous = "continuous"
+ }
+ restart = restart_map[restart] or "newPage"
+
+ table.insert(xml_parts, string.format(
+ '',
+ count_by, restart, distance
+ ))
+ else
+ debug(" build_sect_pr: no line numbers (page_props['line-numbers']=" ..
+ tostring(page_props["line-numbers"]) .. ")")
+ end
+
+ table.insert(xml_parts, '')
+
+ return table.concat(xml_parts)
+end
+
+-- Build section break paragraphs as a list of XML strings.
+-- Returns a table of XML strings, each to be emitted as a separate RawBlock.
+--
+-- IMPORTANT: Each XML string must be a separate RawBlock. When concatenated
+-- into a single RawBlock, Pandoc's docx writer may not process the page break
+-- correctly (the break is silently dropped). Separate RawBlocks match how
+-- raw openxml blocks in markdown are processed.
+--
+-- For page breaks: Word reliably renders an explicit
+-- followed by a continuous sectPr. The nextPage sect type alone is unreliable
+-- in some contexts (e.g., when Pandoc bookmark elements surround the break).
+-- This matches the pattern Word itself uses in natively-authored documents.
+local function build_section_break_paras(page_props, sect_type)
+ local sect_pr = build_sect_pr(page_props, "continuous")
+ if sect_type == "nextPage" then
+ -- Two separate paragraphs: page break + continuous sectPr
+ return {
+ '',
+ '' .. sect_pr .. ''
+ }
+ else
+ return {
+ '' .. sect_pr .. ''
+ }
+ end
+end
+
+-- Read page configuration from JSON file (delegates to shared loader)
+local function load_page_config()
+ return fcu.load_page_config()
+end
+
+-- Read configuration from metadata
+function Meta(meta)
+ -- Try to load from JSON file first
+ page_config = load_page_config()
+
+ -- Also check metadata for inline config (for testing)
+ if meta.docstyle and meta.docstyle.page then
+ local page = meta.docstyle.page
+
+ -- Parse metadata into page_config structure
+ if not page_config then
+ page_config = { named = {} }
+ end
+
+ -- Store default page settings
+ default_page = {
+ size = page.size and pandoc.utils.stringify(page.size) or "letter",
+ orientation = page.orientation and pandoc.utils.stringify(page.orientation) or "portrait",
+ margins = {}
+ }
+
+ if page.margins then
+ for k, v in pairs(page.margins) do
+ default_page.margins[k] = pandoc.utils.stringify(v)
+ end
+ end
+ end
+
+ -- Set default page from config if available
+ if page_config and not default_page then
+ default_page = {
+ size = page_config.size or "letter",
+ orientation = page_config.orientation or "portrait",
+ margins = page_config.margins or {},
+ ["line-numbers"] = page_config["line-numbers"]
+ }
+ end
+
+ -- Ultimate fallback
+ if not default_page then
+ default_page = {
+ size = "letter",
+ orientation = "portrait",
+ margins = { top = "1in", bottom = "1in", left = "1in", right = "1in" }
+ }
+ end
+
+ current_section_props = deep_copy(default_page)
+
+ debug("Default page: " .. (default_page.size or "letter") ..
+ " " .. (default_page.orientation or "portrait"))
+
+ if page_config and page_config.named then
+ for name, _ in pairs(page_config.named) do
+ debug("Named page style available: " .. name)
+ end
+ end
+
+ return nil
+end
+
+-- Process Div elements looking for .section-* classes
+--
+-- Word Section Model:
+-- - A section runs from one section break to the next (or document end)
+-- - Section properties are defined by the sectPr that ENDS the section
+-- - The document's final sectPr defines properties for the LAST section
+--
+-- Usage (empty div as marker - recommended):
+-- ::: section-body
+-- :::
+--
+-- # 1. Introduction
+-- Content here is in the body section...
+--
+-- Usage (div wrapping content - also supported):
+-- ::: {.section-body}
+-- # 1. Introduction
+-- Content here is in the body section...
+-- :::
+--
+-- The section marker inserts a section break that ENDS the previous section.
+-- Everything after the marker is in the NEW section until the next marker or document end.
+--
+-- Add page-break="true" to start the new section on a new page:
+-- ::: {.section-body page-break="true"}
+-- :::
+
+function Div(div)
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Look for section-* classes
+ local section_style = nil
+ for _, class in ipairs(div.classes) do
+ local name = string.match(class, "^section%-(.+)$")
+ if name then
+ section_style = name
+ break
+ end
+ end
+
+ -- Handle standalone page break: ::: {.page-break} :::
+ -- Emits explicit that Pandoc passes through reliably.
+ -- Use this instead of \newpage which Pandoc drops near headings/bookmarks.
+ -- Harvest detects and restores ::: {.page-break} :::
+ if not section_style then
+ for _, class in ipairs(div.classes) do
+ if class == "page-break" then
+ debug("Found .page-break div")
+ local blocks = {}
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ ''))
+ -- Pass through any content inside the div
+ for _, block in ipairs(div.content) do
+ table.insert(blocks, block)
+ end
+ return blocks
+ end
+ end
+ return nil
+ end
+
+ debug("Found .section-" .. section_style .. " div")
+
+ -- Get the named page properties for the NEW section
+ -- IMPORTANT: Use deep_copy to avoid aliasing issues when multiple markers
+ -- use the same named style. Without copy, modifying new_section_props
+ -- (e.g., setting line-numbers) would corrupt the shared config table.
+ local new_section_props = nil
+ if page_config and page_config.named and page_config.named[section_style] then
+ new_section_props = deep_copy(page_config.named[section_style])
+ debug(" Using @page " .. section_style .. " properties")
+ else
+ -- No named @page rule found - fall back to default with style-specific inference
+ debug(" No @page " .. section_style .. " rule found, using defaults")
+
+ if section_style == "landscape" then
+ -- Landscape: swap orientation
+ new_section_props = {
+ size = default_page.size or "letter",
+ orientation = "landscape",
+ margins = default_page.margins or {}
+ }
+ else
+ -- Other styles (body, appendix, etc.): inherit from default_page
+ -- This allows sections to work even without explicit @page rules
+ new_section_props = {
+ size = default_page.size or "letter",
+ orientation = default_page.orientation or "portrait",
+ margins = default_page.margins or {},
+ ["line-numbers"] = default_page["line-numbers"]
+ }
+ end
+ end
+
+ -- Line numbers: opt-in only via div attribute.
+ -- Default is NO line numbers. The @page CSS config provides page geometry
+ -- (size, margins, orientation) but line numbers require explicit opt-in:
+ -- ::: {.section-body line-numbers="continuous"}
+ -- This matches user expectation: "open it, start writing" = no line numbers.
+ -- Supports: line-numbers="true", "false", "page", "section", "continuous"
+ local ln_attr = div.attributes["line-numbers"]
+ if ln_attr then
+ ln_attr = pandoc.utils.stringify(ln_attr)
+ end
+
+ -- Clear any inherited line numbers from CSS config; only div attribute controls
+ new_section_props["line-numbers"] = nil
+
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ if ln_attr == "true" or ln_attr == "page" then
+ new_section_props["line-numbers"] = {
+ enabled = true,
+ ["count-by"] = 1,
+ restart = "page"
+ }
+ debug(" Line numbers enabled (restart per page) via attribute")
+ elseif ln_attr == "section" then
+ new_section_props["line-numbers"] = {
+ enabled = true,
+ ["count-by"] = 1,
+ restart = "section"
+ }
+ debug(" Line numbers enabled (restart per section) via attribute")
+ elseif ln_attr == "continuous" then
+ new_section_props["line-numbers"] = {
+ enabled = true,
+ ["count-by"] = 1,
+ restart = "continuous"
+ }
+ debug(" Line numbers enabled (continuous) via attribute")
+ end
+ else
+ debug(" Line numbers: off (default, no attribute)")
+ end
+
+ -- In Word, sectPr defines properties for the section that ENDS at that point.
+ -- To start a new section:
+ -- 1. Insert sectPr with PREVIOUS section's properties (ends the previous section)
+ -- 2. Content after that sectPr is in the new section
+ -- 3. New section's properties come from the NEXT sectPr (or document's final sectPr)
+
+ -- Check for page-break attribute: ::: {.section-body page-break="true"}
+ local sect_type = "continuous"
+ if pandoc.utils.stringify(div.attributes["page-break"] or "") == "true" then
+ sect_type = "nextPage"
+ debug(" Using nextPage section break")
+ end
+
+ -- Build result blocks
+ local blocks = {}
+
+ -- For page breaks: emit the explicit BEFORE the field code
+ -- and sectPr. This prevents the page break from being sandwiched between two
+ -- consecutive sectPr elements (e.g., a closing sectPr from a previous wrapping
+ -- div and this opening sectPr), which causes Word to silently drop the break.
+ if sect_type == "nextPage" then
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ ''))
+ debug(" Page break emitted before section break")
+ end
+
+ -- === R-FIRST ASSEMBLY (v2) ===
+ -- Instead of emitting complex sectPr XML that Pandoc wraps in container
+ -- paragraphs (causing the 3-line gap), emit a simple text marker that
+ -- the R finisher will find and process.
+ --
+ -- The R finisher will:
+ -- 1. Find this marker
+ -- 2. Build sectPr using page-config.json
+ -- 3. Attach sectPr to the PRECEDING paragraph
+ -- 4. Delete the marker paragraph (eliminates the gap)
+
+ -- Determine line-numbers for marker
+ local ln_for_marker = "none"
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ ln_for_marker = ln_attr
+ end
+
+ -- Build text marker: DOCSTYLE_SECTION::{class}::{page-break}::{line-numbers}
+ local marker_text = string.format("DOCSTYLE_SECTION::section-%s::%s::%s",
+ section_style,
+ sect_type == "nextPage" and "true" or "false",
+ ln_for_marker
+ )
+ debug(" Emitting marker: " .. marker_text)
+
+ -- Build attributes for field code
+ local field_attrs = {}
+ if sect_type == "nextPage" then
+ field_attrs["page-break"] = true
+ end
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ field_attrs["line-numbers"] = ln_attr
+ end
+ -- Pass through all remaining div attributes (footer-left, header-left,
+ -- page-start, etc.) so the R finisher can read them from the JSON payload
+ local skip_attrs = {["line-numbers"]=true, ["page-break"]=true, ["page-break-after"]=true}
+ for k, v in pairs(div.attributes) do
+ if not field_attrs[k] and not skip_attrs[k] then
+ field_attrs[k] = pandoc.utils.stringify(v)
+ end
+ end
+
+ -- Emit field code + marker in a SINGLE paragraph to prevent 3-line gap
+ -- Uses build_section_marker_para which combines BEGIN/instrText/SEPARATE/marker/END
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ fcu.build_section_marker_para("section-" .. section_style, field_attrs, marker_text)))
+
+ -- For wrapping divs (non-empty): emit content, then closing marker.
+ -- The opening marker already contains complete field code (BEGIN/SEPARATE/marker/END).
+ -- For empty marker divs: field code is already complete, nothing more needed.
+ if #div.content > 0 then
+ -- Div content
+ for _, block in ipairs(div.content) do
+ table.insert(blocks, block)
+ end
+
+ -- Closing marker: R finisher will attach sectPr to last content paragraph
+ local close_page_break = pandoc.utils.stringify(div.attributes["page-break-after"] or "") == "true"
+ local close_marker = string.format("DOCSTYLE_SECTION_END::section-%s::%s::%s",
+ section_style,
+ close_page_break and "true" or "false",
+ ln_for_marker
+ )
+ debug(" Emitting closing marker: " .. close_marker)
+
+ -- Page break after content if requested
+ if close_page_break then
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ ''))
+ end
+
+ -- Closing marker in single paragraph with field code wrapper
+ -- Strip footer/header/page-start attributes from closing markers — the
+ -- finisher reads these from opening markers only (via payload shift)
+ local close_skip = {}
+ for k, v in pairs(skip_attrs) do close_skip[k] = v end
+ close_skip["footer-left"]=true; close_skip["footer-center"]=true; close_skip["footer-right"]=true
+ close_skip["header-left"]=true; close_skip["header-center"]=true; close_skip["header-right"]=true
+ close_skip["footer"]=true; close_skip["header"]=true
+ close_skip["page-start"]=true
+ local close_attrs = {}
+ if close_page_break then
+ close_attrs["page-break"] = true
+ end
+ if ln_attr and ln_attr ~= "false" and ln_attr ~= "none" then
+ close_attrs["line-numbers"] = ln_attr
+ end
+ for k, v in pairs(div.attributes) do
+ if not close_attrs[k] and not close_skip[k] then
+ close_attrs[k] = pandoc.utils.stringify(v)
+ end
+ end
+ table.insert(blocks, pandoc.RawBlock("openxml",
+ fcu.build_section_marker_para("section-" .. section_style .. "-end", close_attrs, close_marker)))
+
+ debug("Wrapping div: section '" .. section_style .. "' opened and closed")
+ else
+ -- Empty marker div: field code already complete in single paragraph
+ debug("Empty marker: section '" .. section_style .. "' started")
+ end
+
+ return blocks
+end
+
+-- Check output format and apply final section style if needed
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+
+ -- NOTE: We intentionally do NOT insert an extra sectPr here for line numbers.
+ --
+ -- Previously, this function inserted a sectPr paragraph at the end to apply
+ -- line number properties to the final section. However, this creates THREE
+ -- sections in the document:
+ -- 1. Mid-document sectPr (from Div()) - ends front matter
+ -- 2. Filter's final sectPr (from here) - ends body section
+ -- 3. Pandoc's body sectPr - ends document
+ --
+ -- This confuses Word's page break rendering - the explicit
+ -- before sectPr #1 gets silently ignored.
+ --
+ -- Instead, line numbers should be configured in the reference document via
+ -- generate-reference.R, which properly sets in Pandoc's final
+ -- body sectPr. This approach:
+ -- - Keeps only 2 sections (front matter + body)
+ -- - Page breaks work reliably
+ -- - Line numbers still apply to body content
+ --
+ -- See: R/page_layout.R apply_line_numbers() function
+
+ if final_section_style and FORMAT == "openxml" then
+ debug("Final section style tracked: " .. (final_section_style["line-numbers"] and "with line numbers" or "no line numbers"))
+ debug("Line numbers should be configured in reference doc, not via extra sectPr")
+ end
+
+ return nil
+end
+
+-- Consume stray ":::" paragraphs that appear when users add standalone :::
+-- markers in QMD (which Pandoc parses as literal text, not fenced div closers).
+-- This ensures users only need to mark section STARTS - no closing markers needed.
+function Para(para)
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Check if paragraph contains only ":::" (possibly with whitespace)
+ if #para.content == 1 and
+ para.content[1].t == "Str" and
+ para.content[1].text:match("^:::?$") then
+ debug("Consuming stray ':::' paragraph")
+ return {} -- Remove the paragraph
+ end
+
+ return nil -- Keep unchanged
+end
+
+-- Filter execution order:
+-- 1. Meta - load configuration
+-- 2. Div - process .page-* and .section-* divs
+-- 3. Para - consume stray ::: paragraphs
+-- 4. Pandoc - apply final section style if needed
+return {
+ { Meta = Meta },
+ { Div = Div },
+ { Para = Para },
+ { Pandoc = Pandoc }
+}
diff --git a/docs/_extensions/docstyle/reference.docx b/docs/_extensions/docstyle/reference.docx
new file mode 100644
index 0000000..590ff7d
Binary files /dev/null and b/docs/_extensions/docstyle/reference.docx differ
diff --git a/docs/_extensions/docstyle/revisions-inject.lua b/docs/_extensions/docstyle/revisions-inject.lua
new file mode 100644
index 0000000..94e6f33
--- /dev/null
+++ b/docs/_extensions/docstyle/revisions-inject.lua
@@ -0,0 +1,315 @@
+-- revisions-inject.lua
+-- Pandoc Lua filter that converts revision spans to OpenXML track changes
+--
+-- Usage in QMD:
+-- Insertions: [inserted text]{.ins id="rev_101"}
+-- Deletions: [~~deleted text~~]{.del id="rev_102"}
+--
+-- The filter reads revision metadata from a sidecar JSON file (revisions.json)
+-- which contains author, date, and other metadata for each revision.
+--
+-- Metadata loading (in priority order):
+-- 1. -M revisions-file:path/to/revisions.json (explicit path)
+-- 2. Auto-detect _docstyle/revisions.json (convention-based)
+--
+-- Note: Deletions use strikethrough syntax with a .del class wrapper.
+-- This is achieved via a Span around the Strikeout:
+-- [~~deleted~~]{.del id="x"}
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Metadata storage for revisions (loaded from revisions.json)
+local revisions_meta = {}
+local revisions_loaded = false
+
+-- Helper function to escape XML special characters
+local function xml_escape(text)
+ if not text then return "" end
+ text = text:gsub("&", "&")
+ text = text:gsub("<", "<")
+ text = text:gsub(">", ">")
+ text = text:gsub('"', """)
+ text = text:gsub("'", "'")
+ return text
+end
+
+-- Helper to generate xml:space attribute for whitespace preservation
+-- Word requires xml:space="preserve" when text has leading/trailing whitespace
+local function get_space_attr(text)
+ if text and (text:match("^%s") or text:match("%s$") or text:match("%s%s")) then
+ return ' xml:space="preserve"'
+ end
+ return ""
+end
+
+-- Helper to extract numeric ID from revision ID string (e.g., "rev_9" -> "9")
+-- Word requires numeric w:id values
+local function get_numeric_id(id)
+ if not id then return "0" end
+ local num = id:match("rev_(%d+)")
+ if num then return num end
+ -- If already numeric or doesn't match pattern, return as-is
+ return id:match("^%d+$") and id or "0"
+end
+
+-- Helper to get revision metadata by ID
+local function get_revision(id)
+ if revisions_meta[id] then
+ return revisions_meta[id]
+ end
+ -- Return defaults if not found
+ return {
+ author = "Unknown",
+ date = "2025-01-01T00:00:00Z"
+ }
+end
+
+-- Extract text and RawInlines from inline elements
+-- Returns: { text = "plain text", raw_inlines = { {pos="before|after", el=RawInline}, ... } }
+-- RawInlines (like comment markers) are preserved for output outside the deletion
+local function extract_deletion_content(inlines)
+ local text_parts = {}
+ local raw_inlines_before = {} -- RawInlines that appear before any text
+ local raw_inlines_after = {} -- RawInlines that appear after text starts
+ local seen_text = false
+
+ local function process_inlines(items)
+ for _, inline in ipairs(items) do
+ if inline.t == "Str" then
+ table.insert(text_parts, inline.text)
+ seen_text = true
+ elseif inline.t == "Space" then
+ table.insert(text_parts, " ")
+ seen_text = true
+ elseif inline.t == "SoftBreak" then
+ table.insert(text_parts, " ")
+ seen_text = true
+ elseif inline.t == "LineBreak" then
+ table.insert(text_parts, "\n")
+ seen_text = true
+ elseif inline.t == "RawInline" and inline.format == "openxml" then
+ -- Preserve OpenXML RawInlines (e.g., comment markers from comment-inject.lua)
+ if seen_text then
+ table.insert(raw_inlines_after, inline)
+ else
+ table.insert(raw_inlines_before, inline)
+ end
+ elseif inline.t == "Strikeout" then
+ -- Recursively process strikeout content
+ process_inlines(inline.content)
+ elseif inline.content then
+ process_inlines(inline.content)
+ end
+ end
+ end
+
+ process_inlines(inlines)
+
+ return {
+ text = table.concat(text_parts),
+ raw_before = raw_inlines_before,
+ raw_after = raw_inlines_after
+ }
+end
+
+-- Legacy function for backward compatibility (insertions still use this)
+local function stringify_inlines(inlines)
+ local result = extract_deletion_content(inlines)
+ return result.text
+end
+
+-- Process Span elements with .ins class (insertions)
+function Span(el)
+ -- Only process for Word output
+ if FORMAT ~= "docx" and FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Handle insertions (.ins class)
+ if el.classes:includes('ins') then
+ -- Pandoc parses {.ins id="x"} with "id" as the identifier, not an attribute
+ local id = el.identifier
+ if (not id or id == "") then
+ id = el.attributes['id'] or "0"
+ end
+ local rev = get_revision(id)
+
+ debug("[revisions-inject] Processing insertion id=" .. id .. "\n")
+
+ -- Build w:ins wrapper (use numeric ID for Word compatibility)
+ local numeric_id = get_numeric_id(id)
+ local start_xml = string.format(
+ '',
+ xml_escape(numeric_id),
+ xml_escape(rev.author),
+ xml_escape(rev.date)
+ )
+ local end_xml = ''
+
+ local result = { pandoc.RawInline('openxml', start_xml) }
+
+ -- Add content
+ for _, item in ipairs(el.content) do
+ table.insert(result, item)
+ end
+
+ table.insert(result, pandoc.RawInline('openxml', end_xml))
+ return result
+ end
+
+ -- Handle deletions (.del class wrapping strikethrough)
+ -- Pattern: [~~deleted text~~]{.del id="x"}
+ if el.classes:includes('del') then
+ -- Pandoc parses {.del id="x"} with "id" as the identifier, not an attribute
+ local id = el.identifier
+ if (not id or id == "") then
+ id = el.attributes['id'] or "0"
+ end
+ local rev = get_revision(id)
+
+ debug("[revisions-inject] Processing deletion id=" .. id .. "\n")
+
+ -- Extract text and any RawInlines (like comment markers) from content
+ local content = extract_deletion_content(el.content)
+ local del_text = content.text
+
+ -- Remove any remaining strikethrough markers (~~ ) that may have leaked through
+ del_text = del_text:gsub("~~", "")
+
+ -- Build w:del with w:delText (use numeric ID for Word compatibility)
+ -- Include xml:space="preserve" if text has significant whitespace
+ local numeric_id = get_numeric_id(id)
+ local space_attr = get_space_attr(del_text)
+ local del_xml = string.format(
+ '' ..
+ '%s' ..
+ '',
+ xml_escape(numeric_id),
+ xml_escape(rev.author),
+ xml_escape(rev.date),
+ space_attr,
+ xml_escape(del_text)
+ )
+
+ -- Build result: RawInlines before + deletion + RawInlines after
+ -- This preserves comment markers that were inside the deletion
+ local result = {}
+
+ -- Add any RawInlines that appeared before text (e.g., comment start markers)
+ for _, raw in ipairs(content.raw_before) do
+ table.insert(result, raw)
+ debug("[revisions-inject] Preserving RawInline before deletion\n")
+ end
+
+ -- Add the deletion itself
+ table.insert(result, pandoc.RawInline('openxml', del_xml))
+
+ -- Add any RawInlines that appeared after text started (e.g., comment end markers)
+ for _, raw in ipairs(content.raw_after) do
+ table.insert(result, raw)
+ debug("[revisions-inject] Preserving RawInline after deletion\n")
+ end
+
+ -- Return single element or list depending on whether we have RawInlines
+ if #result == 1 then
+ return result[1]
+ else
+ return result
+ end
+ end
+
+ return nil
+end
+
+-- Parse JSON file content into revisions_meta table
+-- Uses regex-based parsing that handles our flat JSON structure
+local function parse_revisions_json(content, source_path)
+ local count = 0
+
+ -- Pattern matches revision entries with author and date fields
+ -- Handles both orderings: author before date, or date before author
+ for id, block in content:gmatch('"(rev_[^"]+)":%s*(%b{})') do
+ local author = block:match('"author":%s*"([^"]*)"')
+ local date = block:match('"date":%s*"([^"]*)"')
+ local rev_type = block:match('"type":%s*"([^"]*)"')
+
+ if author then
+ revisions_meta[id] = {
+ author = author,
+ date = date or os.date("!%Y-%m-%dT%H:%M:%SZ"),
+ type = rev_type
+ }
+ count = count + 1
+ debug("[revisions-inject] Loaded revision: " .. id .. " by " .. author .. "\n")
+ end
+ end
+
+ if count > 0 then
+ debug("[revisions-inject] Loaded " .. count .. " revisions from: " .. source_path .. "\n")
+ revisions_loaded = true
+ end
+
+ return count
+end
+
+-- Try to load revisions from a file path
+local function try_load_revisions(path)
+ local file = io.open(path, "r")
+ if file then
+ local content = file:read("*all")
+ file:close()
+ return parse_revisions_json(content, path)
+ end
+ return 0
+end
+
+-- Load revision metadata from document metadata or auto-detect
+-- Priority:
+-- 1. -M revisions-file:path (explicit)
+-- 2. _docstyle/revisions.json (convention)
+function Meta(meta)
+ -- Skip if already loaded
+ if revisions_loaded then
+ return nil
+ end
+
+ -- Priority 1: Explicit path via metadata
+ if meta['revisions-file'] then
+ local path = pandoc.utils.stringify(meta['revisions-file'])
+ debug("[revisions-inject] Trying explicit path: " .. path .. "\n")
+ if try_load_revisions(path) > 0 then
+ return nil
+ end
+ debug("[revisions-inject] Warning: Could not open revisions file: " .. path .. "\n")
+ end
+
+ -- Priority 2: Auto-detect _docstyle/revisions.json
+ local auto_path = "_docstyle/revisions.json"
+ debug("[revisions-inject] Trying auto-detect: " .. auto_path .. "\n")
+ if try_load_revisions(auto_path) > 0 then
+ return nil
+ end
+
+ debug("[revisions-inject] No revisions.json found (checked _docstyle/revisions.json)\n")
+ return nil
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ debug("[revisions-inject] Filter active for Word output\n")
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Pandoc = Pandoc },
+ { Span = Span }
+}
diff --git a/docs/_extensions/docstyle/strip-docstyle.lua b/docs/_extensions/docstyle/strip-docstyle.lua
new file mode 100644
index 0000000..2ab390f
--- /dev/null
+++ b/docs/_extensions/docstyle/strip-docstyle.lua
@@ -0,0 +1,132 @@
+--- strip-docstyle.lua
+--- Lua filter for preprint-typst rendering from docstyle QMD sources.
+--- Extracts body-level abstract to metadata and strips docstyle-specific
+--- div wrappers so the same QMD renders cleanly via quarto-preprint.
+
+--- Pass 1: Extract abstract and keywords from body to metadata
+function Pandoc(doc)
+ local dominated_blocks = doc.blocks
+ local abstract_blocks = pandoc.List()
+ local keyword_inlines = nil
+ local in_abstract = false
+ local in_keywords = false
+ local abstract_start = nil
+ local abstract_end = nil
+
+ for i, block in ipairs(dominated_blocks) do
+ if block.t == "Header" and block.level == 1 then
+ if block.content and pandoc.utils.stringify(block.content) == "Abstract" then
+ in_abstract = true
+ in_keywords = false
+ abstract_start = i
+ elseif in_abstract then
+ -- Hit the next level-1 heading: abstract section is over
+ abstract_end = i - 1
+ in_abstract = false
+ in_keywords = false
+ break
+ end
+ elseif in_abstract then
+ if block.t == "Header" and block.level == 2 then
+ local heading_text = pandoc.utils.stringify(block.content)
+ if heading_text == "Keywords" then
+ in_keywords = true
+ else
+ -- Convert abstract subsection headings to bold paragraphs
+ in_keywords = false
+ abstract_blocks:insert(pandoc.Para(
+ {pandoc.Strong(block.content)}
+ ))
+ end
+ elseif in_keywords then
+ if block.t == "Para" then
+ keyword_inlines = block.content
+ end
+ else
+ abstract_blocks:insert(block)
+ end
+ end
+ end
+
+ -- If abstract_end wasn't set (abstract runs to end of doc), set it now
+ if in_abstract and not abstract_end then
+ abstract_end = #dominated_blocks
+ end
+
+ -- Move abstract to metadata and remove from body
+ if abstract_start and abstract_end and #abstract_blocks > 0 then
+ doc.meta.abstract = pandoc.MetaBlocks(abstract_blocks)
+
+ if keyword_inlines then
+ -- Split comma-separated keywords into a MetaList
+ local kw_text = pandoc.utils.stringify(keyword_inlines)
+ local keywords = pandoc.List()
+ for kw in kw_text:gmatch("[^,]+") do
+ local trimmed = kw:match("^%s*(.-)%.?%s*$")
+ if trimmed and trimmed ~= "" then
+ keywords:insert(pandoc.MetaInlines({pandoc.Str(trimmed)}))
+ end
+ end
+ if #keywords > 0 then
+ doc.meta.keywords = pandoc.MetaList(keywords)
+ end
+ end
+
+ -- Remove abstract blocks from body (iterate in reverse to preserve indices)
+ for i = abstract_end, abstract_start, -1 do
+ dominated_blocks:remove(i)
+ end
+ end
+
+ -- Insert a #refs div after the References heading so citeproc places the
+ -- bibliography there instead of at the end of the document
+ local new_blocks = pandoc.List()
+ for i, block in ipairs(dominated_blocks) do
+ new_blocks:insert(block)
+ if block.t == "Header" and block.level == 1 then
+ local text = pandoc.utils.stringify(block.content)
+ if text:match("[Rr]eferences") then
+ new_blocks:insert(pandoc.Div({}, pandoc.Attr("refs")))
+ end
+ end
+ end
+
+ doc.blocks = new_blocks
+ return doc
+end
+
+--- Pass 2: Handle docstyle divs and date/version paragraphs
+
+function Div(el)
+ local classes = el.classes
+
+ -- Page breaks → Typst pagebreak
+ if classes:includes("page-break") then
+ return pandoc.RawBlock("typst", "#pagebreak()")
+ end
+
+ -- Drop divs that are generated by docstyle Lua filters (empty in typst)
+ if classes:includes("version-history") or
+ classes:includes("author-plate") or
+ classes:includes("toc") then
+ return {}
+ end
+
+ -- Strip section wrapper divs, pass through content
+ for _, cls in ipairs(classes) do
+ if cls:match("^section%-") then
+ return el.content
+ end
+ end
+
+ return nil
+end
+
+function Para(el)
+ local text = pandoc.utils.stringify(el)
+ -- Drop date/version line: "2026-02-24 | Version: 0.2.5"
+ if text:match("^%d%d%d%d%-%d%d%-%d%d%s*|%s*Version:") then
+ return {}
+ end
+ return nil
+end
diff --git a/docs/_extensions/docstyle/table-style.lua b/docs/_extensions/docstyle/table-style.lua
new file mode 100644
index 0000000..70aae5f
--- /dev/null
+++ b/docs/_extensions/docstyle/table-style.lua
@@ -0,0 +1,598 @@
+-- table-style.lua
+-- Pandoc Lua filter that applies CSS-defined table styles to Word output
+--
+-- Usage in QMD:
+-- ::: {.table-formal}
+-- | Column 1 | Column 2 |
+-- |----------|----------|
+-- | Data | Data |
+-- :::
+--
+-- Supported table classes:
+-- .table-formal - Top/bottom borders, shaded header row
+-- .table-grid - Full grid borders on all cells
+--
+-- Table styles are loaded from page-config.json (CSS-derived) at runtime.
+-- Built-in defaults are used as fallback when no CSS config is available.
+
+-- Load field-code-utils for ADDIN DOCSTYLE field code emission
+local fcu = require("field-code-utils")
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+local FORMAT = "openxml"
+
+-- Built-in fallback table style definitions (used when CSS config not available)
+local builtin_table_styles = {
+ ["table-formal"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "7F7F7F" },
+ bottom = { val = "single", sz = "4", color = "7F7F7F" },
+ left = nil,
+ right = nil,
+ insideH = nil,
+ insideV = nil
+ },
+ header_shading = "D9D9D9"
+ },
+ ["table-grid"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "000000" },
+ bottom = { val = "single", sz = "4", color = "000000" },
+ left = { val = "single", sz = "4", color = "000000" },
+ right = { val = "single", sz = "4", color = "000000" },
+ insideH = { val = "single", sz = "4", color = "000000" },
+ insideV = { val = "single", sz = "4", color = "000000" }
+ },
+ header_shading = nil,
+ header_bold = true
+ }
+}
+
+-- Active table styles (populated from CSS config or fallback)
+local table_styles = nil
+
+-- Load table styles from page-config.json (CSS-derived) via shared loader
+local function load_table_styles()
+ local config = fcu.load_page_config()
+ if config and config.table_styles then
+ debug("[table-style] Loaded CSS table styles from page-config.json\n")
+ return config.table_styles
+ end
+ debug("[table-style] No CSS table config found, using built-in defaults\n")
+ return nil
+end
+
+-- Initialise table_styles: CSS config with built-in fallback
+local function init_table_styles()
+ if table_styles then return end
+
+ local css_styles = load_table_styles()
+ if css_styles then
+ -- Start with built-in defaults, then deep-merge CSS values
+ table_styles = {}
+ -- Deep-copy all built-in styles (avoids mutating builtin_table_styles
+ -- when the CSS overlay loop writes into nested tables like borders)
+ for name, style in pairs(builtin_table_styles) do
+ table_styles[name] = {}
+ for k, v in pairs(style) do
+ if type(v) == "table" then
+ table_styles[name][k] = {}
+ for sub_k, sub_v in pairs(v) do
+ table_styles[name][k][sub_k] = sub_v
+ end
+ else
+ table_styles[name][k] = v
+ end
+ end
+ end
+ -- Overlay CSS-derived styles field-by-field (preserves built-in
+ -- fields not covered by CSS, e.g. header_shading when CSS only
+ -- defines borders). Deep-merges nested tables like borders.
+ for name, css_style in pairs(css_styles) do
+ if not table_styles[name] then
+ table_styles[name] = {}
+ end
+ for k, v in pairs(css_style) do
+ if type(v) == "table" and type(table_styles[name][k]) == "table" then
+ -- Deep merge: overlay CSS sub-keys over built-in sub-keys
+ for sub_k, sub_v in pairs(v) do
+ table_styles[name][k][sub_k] = sub_v
+ end
+ else
+ table_styles[name][k] = v
+ end
+ end
+ end
+ else
+ table_styles = builtin_table_styles
+ end
+end
+
+-- Build border XML element
+local function build_border_xml(name, border)
+ if not border then return "" end
+ return string.format('',
+ name, border.val, border.sz, border.color)
+end
+
+-- Build table borders XML
+local function build_tblBorders_xml(borders)
+ if not borders then return "" end
+
+ local parts = { "" }
+ if borders.top then table.insert(parts, build_border_xml("top", borders.top)) end
+ if borders.left then table.insert(parts, build_border_xml("left", borders.left)) end
+ if borders.bottom then table.insert(parts, build_border_xml("bottom", borders.bottom)) end
+ if borders.right then table.insert(parts, build_border_xml("right", borders.right)) end
+ if borders.insideH then table.insert(parts, build_border_xml("insideH", borders.insideH)) end
+ if borders.insideV then table.insert(parts, build_border_xml("insideV", borders.insideV)) end
+ table.insert(parts, "")
+
+ return table.concat(parts)
+end
+
+-- Build cell shading XML
+local function build_shading_xml(color)
+ if not color then return "" end
+ return string.format('', color)
+end
+
+
+-- Parse widths attribute (e.g., "30,70" or "25,50,25")
+-- Returns array of percentages or nil if not specified
+local function parse_widths(widths_str)
+ if not widths_str or widths_str == "" then
+ return nil
+ end
+
+ local widths = {}
+ for w in string.gmatch(widths_str, "([^,]+)") do
+ local num = tonumber(w)
+ if num then
+ table.insert(widths, num)
+ end
+ end
+
+ return #widths > 0 and widths or nil
+end
+
+-- Convert Pandoc table to OpenXML with custom styling
+-- widths_str: optional comma-separated percentages (e.g., "30,70")
+-- width_pct: optional table width as percentage of page (e.g., "50" for half width)
+-- font_size_pt: optional font size in points (e.g., 9)
+-- overrides: optional table of per-table overrides (header_bold, header_shading)
+local function styled_table_to_openxml(tbl, style_name, widths_str, width_pct, font_size_pt, overrides)
+ local style = table_styles[style_name]
+ if not style then
+ debug("[table-style] Unknown table style: " .. style_name .. "\n")
+ return nil
+ end
+
+ -- Apply per-table overrides from div attributes (header-bold, header-shading)
+ overrides = overrides or {}
+ local eff_header_bold = style.header_bold
+ local eff_header_shading = style.header_shading
+ if overrides.header_bold ~= nil then
+ eff_header_bold = overrides.header_bold
+ end
+ if overrides.header_shading then
+ eff_header_shading = overrides.header_shading
+ end
+
+ debug("[table-style] Applying style '" .. style_name .. "' to table\n")
+
+ -- Get table dimensions
+ local num_cols = 0
+ local rows = {}
+
+ -- Process table head
+ if tbl.head and tbl.head.rows then
+ for _, row in ipairs(tbl.head.rows) do
+ local cells = {}
+ for _, cell in ipairs(row.cells) do
+ table.insert(cells, { content = cell, is_header = true })
+ num_cols = math.max(num_cols, #row.cells)
+ end
+ table.insert(rows, { cells = cells, is_header_row = true })
+ end
+ end
+
+ -- Process table body
+ if tbl.bodies then
+ for _, body in ipairs(tbl.bodies) do
+ if body.body then
+ for _, row in ipairs(body.body) do
+ local cells = {}
+ for _, cell in ipairs(row.cells) do
+ table.insert(cells, { content = cell, is_header = false })
+ num_cols = math.max(num_cols, #row.cells)
+ end
+ table.insert(rows, { cells = cells, is_header_row = false })
+ end
+ end
+ end
+ end
+
+ -- Calculate table width (default 9000 twips = ~6.25 inches = full text width)
+ local full_width = 9000
+ local total_width = full_width
+
+ -- Apply width percentage if specified (e.g., "50" for half width)
+ if width_pct then
+ local pct = tonumber(width_pct)
+ if pct and pct > 0 and pct <= 100 then
+ total_width = math.floor(full_width * pct / 100)
+ debug("[table-style] Using table width: " .. pct .. "%\n")
+ end
+ end
+
+ -- Calculate column widths
+ local col_widths = {}
+ local widths = parse_widths(widths_str)
+
+ if widths and #widths == num_cols then
+ -- Use specified percentages
+ local total_pct = 0
+ for _, w in ipairs(widths) do
+ total_pct = total_pct + w
+ end
+ for i, w in ipairs(widths) do
+ col_widths[i] = math.floor(total_width * w / total_pct)
+ end
+ debug("[table-style] Using custom column widths: " .. widths_str .. "\n")
+ else
+ -- Auto-compute widths from cell content.
+ -- For each column: find the longest single word (minimum width to avoid
+ -- mid-word breaks), then distribute remaining space by total text volume.
+
+ -- Approximate characters that fit in the full table width
+ -- ~11 chars/inch at 10pt Calibri, scale inversely with font size
+ local base_font = font_size_pt or 10
+ local chars_per_inch = 11 * (10 / base_font)
+ local total_chars = math.floor(6.5 * chars_per_inch)
+
+ -- Collect text per column (header + all body cells)
+ local col_texts = {}
+ for i = 1, num_cols do col_texts[i] = {} end
+ for _, row in ipairs(rows) do
+ for col_idx, cell in ipairs(row.cells) do
+ if col_idx <= num_cols then
+ local text = ""
+ if cell.content then
+ text = pandoc.utils.stringify(cell.content)
+ end
+ table.insert(col_texts[col_idx], text)
+ end
+ end
+ end
+
+ local min_chars = {}
+ local volume = {}
+ for i = 1, num_cols do
+ -- Longest single word in this column (determines minimum width)
+ local max_word = 1
+ for _, text in ipairs(col_texts[i]) do
+ for word in text:gmatch("%S+") do
+ max_word = math.max(max_word, #word)
+ end
+ end
+ min_chars[i] = max_word + 1 -- +1 char padding
+
+ -- Total text volume (drives proportional allocation)
+ local vol = 0
+ for _, text in ipairs(col_texts[i]) do
+ vol = vol + #text
+ end
+ volume[i] = math.max(vol, 1)
+ end
+
+ -- Convert minimum chars to percentage of page
+ local min_pct = {}
+ local sum_min = 0
+ for i = 1, num_cols do
+ min_pct[i] = min_chars[i] / total_chars * 100
+ sum_min = sum_min + min_pct[i]
+ end
+
+ local auto_widths = {}
+ if sum_min >= 100 then
+ -- Minimums fill the page; scale proportionally
+ for i = 1, num_cols do
+ auto_widths[i] = min_pct[i] / sum_min * 100
+ end
+ else
+ -- Allocate minimums, distribute remaining space by volume
+ local remaining = 100 - sum_min
+ local total_vol = 0
+ for i = 1, num_cols do total_vol = total_vol + volume[i] end
+ for i = 1, num_cols do
+ auto_widths[i] = min_pct[i] + (volume[i] / total_vol * remaining)
+ end
+ end
+
+ -- Convert percentages to twips, adjusting for rounding
+ local sum_tw = 0
+ for i = 1, num_cols do
+ col_widths[i] = math.floor(total_width * auto_widths[i] / 100)
+ sum_tw = sum_tw + col_widths[i]
+ end
+ -- Give rounding remainder to the widest column
+ local widest = 1
+ for i = 2, num_cols do
+ if col_widths[i] > col_widths[widest] then widest = i end
+ end
+ col_widths[widest] = col_widths[widest] + (total_width - sum_tw)
+
+ -- Log the computed widths
+ local pcts = {}
+ for i = 1, num_cols do
+ table.insert(pcts, tostring(math.floor(auto_widths[i] + 0.5)))
+ end
+ debug("[table-style] Auto-computed column widths: " .. table.concat(pcts, ",") .. "\n")
+ end
+
+ -- Build table properties XML
+ -- Add small bottom cell margin (~0.5 line = 120 twips) for breathing room
+ local cell_margin_xml = ''
+
+ local tblPr_parts = {
+ "",
+ '',
+ build_tblBorders_xml(style.borders),
+ '',
+ cell_margin_xml,
+ ""
+ }
+
+ -- Build grid columns
+ local grid_parts = { "" }
+ for i = 1, num_cols do
+ table.insert(grid_parts, '')
+ end
+ table.insert(grid_parts, "")
+
+ -- Build rows
+ -- Pre-compute font size string once (table-level constant)
+ local half_pts = font_size_pt and tostring(font_size_pt * 2) or nil
+
+ local row_parts = {}
+ for _, row in ipairs(rows) do
+ local row_xml = { "" }
+
+ -- Build row-level run properties (same for all cells in this row)
+ local rPr_parts = {}
+ if row.is_header_row and eff_header_bold then
+ table.insert(rPr_parts, "")
+ end
+ if half_pts then
+ table.insert(rPr_parts, '')
+ table.insert(rPr_parts, '')
+ end
+ local rPr = ""
+ if #rPr_parts > 0 then
+ rPr = "" .. table.concat(rPr_parts) .. ""
+ end
+
+ -- Paragraph properties for single line spacing (no space after)
+ local pPr = ''
+
+ -- Helper: render a list of inlines to a Word paragraph
+ local function build_rich_para(inlines)
+ if #inlines == 0 then return nil end
+ local runs_xml = fcu.render_inlines(inlines, rPr_parts)
+ if runs_xml == "" then return nil end
+ return "" .. pPr .. runs_xml .. ""
+ end
+
+ for col_idx, cell in ipairs(row.cells) do
+ -- Cell properties
+ local tcPr_parts = {
+ "",
+ ''
+ }
+
+ -- Add header shading if this is a header row
+ if row.is_header_row and eff_header_shading then
+ table.insert(tcPr_parts, build_shading_xml(eff_header_shading))
+ end
+
+ table.insert(tcPr_parts, "")
+
+ -- Build paragraphs for the cell using the inline renderer
+ -- This preserves bold, italic, comments, char-style spans, etc.
+ local paragraphs = {}
+
+ -- Get the cell's content blocks
+ local cell_blocks = {}
+ if cell.content and cell.content.contents then
+ cell_blocks = cell.content.contents
+ elseif cell.content then
+ cell_blocks = cell.content
+ end
+
+ -- Process each block in the cell
+ for _, block in ipairs(cell_blocks) do
+ if block.content then
+ -- Split content on LineBreak elements to create separate Word paragraphs
+ local current_line = {}
+
+ for _, inline in ipairs(block.content) do
+ if inline.t == "LineBreak" then
+ local para = build_rich_para(current_line)
+ if para then table.insert(paragraphs, para) end
+ current_line = {}
+ else
+ table.insert(current_line, inline)
+ end
+ end
+
+ -- Last line (after final LineBreak or if no LineBreak)
+ if #current_line > 0 then
+ local para = build_rich_para(current_line)
+ if para then table.insert(paragraphs, para) end
+ end
+ else
+ -- Block without inline content — stringify as fallback
+ local text = pandoc.utils.stringify(block)
+ if text ~= "" then
+ table.insert(paragraphs,
+ "" .. pPr .. "" .. rPr ..
+ '' .. fcu.xml_escape(text) .. "")
+ end
+ end
+ end
+
+ -- If no paragraphs found (shouldn't happen), add empty paragraph
+ if #paragraphs == 0 then
+ table.insert(paragraphs, "" .. pPr .. "" .. rPr .. "")
+ end
+
+ -- Build cell XML with all paragraphs
+ local cell_xml = "" ..
+ table.concat(tcPr_parts) ..
+ table.concat(paragraphs) ..
+ ""
+
+ table.insert(row_xml, cell_xml)
+ end
+
+ table.insert(row_xml, "")
+ table.insert(row_parts, table.concat(row_xml))
+ end
+
+ -- Assemble complete table XML
+ local table_xml = "" ..
+ table.concat(tblPr_parts) ..
+ table.concat(grid_parts) ..
+ table.concat(row_parts) ..
+ ""
+
+ return table_xml
+end
+
+-- Find table style class in div classes
+local function find_table_style(classes)
+ for _, class in ipairs(classes) do
+ if table_styles and table_styles[class] then
+ return class
+ end
+ end
+ return nil
+end
+
+-- Keys to skip when collecting div attributes for field code payload
+-- (Pandoc-internal keys that should not be serialised)
+local skip_attr_keys = { id = true, ["data-pos"] = true }
+
+-- Process Div elements looking for table style classes
+function Div(div)
+ -- Check if this div has a table style class
+ local style_name = find_table_style(div.classes)
+ if not style_name then
+ return nil
+ end
+
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ return nil
+ end
+
+ -- Extract attributes if present
+ -- Usage: ::: {.table-formal widths="30,70" width="50" font-size="9"}
+ local widths_str = div.attributes["widths"] -- column widths (e.g., "30,70")
+ local width_pct = div.attributes["width"] -- table width % (e.g., "50")
+ local font_size_str = div.attributes["font-size"] -- font size in pt (e.g., "9")
+
+ -- Find Table element inside the div (search recursively through nested Divs,
+ -- since Quarto wraps R code chunk output in .cell > .cell-output-display divs)
+ local function find_table(blocks)
+ for _, block in ipairs(blocks) do
+ if block.t == "Table" then
+ return block
+ elseif block.t == "Div" and block.content then
+ local found = find_table(block.content)
+ if found then return found end
+ end
+ end
+ return nil
+ end
+ local tbl = find_table(div.content)
+
+ if not tbl then
+ debug("[table-style] No table found in ." .. style_name .. " div\n")
+ return nil
+ end
+
+ -- Parse font size (points)
+ local font_size_pt = nil
+ if font_size_str then
+ font_size_pt = tonumber(font_size_str)
+ if font_size_pt then
+ debug("[table-style] Using font size: " .. font_size_pt .. "pt\n")
+ end
+ end
+
+ -- Also check for CSS-config font size if not specified as div attribute
+ if not font_size_pt then
+ local style = table_styles[style_name]
+ if style and style.font_size_half_pts then
+ font_size_pt = style.font_size_half_pts / 2
+ debug("[table-style] Using CSS font size: " .. font_size_pt .. "pt\n")
+ end
+ end
+
+ -- Parse per-table header overrides from div attributes
+ local overrides = {}
+ local hb = div.attributes["header-bold"]
+ if hb then
+ overrides.header_bold = (hb == "true" or hb == "1")
+ end
+ local hs = div.attributes["header-shading"]
+ if hs and hs ~= "" then
+ overrides.header_shading = hs:gsub("^#", "") -- strip leading # if present
+ end
+
+ -- Convert to styled OpenXML
+ local table_xml = styled_table_to_openxml(tbl, style_name, widths_str, width_pct, font_size_pt, overrides)
+ if not table_xml then
+ return nil
+ end
+
+ -- Build field code payload and wrap table with ADDIN DOCSTYLE markers
+ -- Filter out Pandoc-internal keys before passing to field code builder
+ local attrs = {}
+ for key, val in pairs(div.attributes) do
+ if val and val ~= "" and not skip_attr_keys[key] then
+ attrs[key] = val
+ end
+ end
+ local field_start = fcu.build_table_field_start(style_name, attrs)
+ local field_end = fcu.build_block_field_end()
+
+ return pandoc.Blocks({
+ pandoc.RawBlock("openxml", field_start),
+ pandoc.RawBlock("openxml", table_xml),
+ pandoc.RawBlock("openxml", field_end)
+ })
+end
+
+-- Initialise format and load table styles once
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ init_table_styles()
+ return nil
+end
+
+return {
+ { Pandoc = Pandoc },
+ { Div = Div }
+}
diff --git a/docs/_extensions/docstyle/toc-field.lua b/docs/_extensions/docstyle/toc-field.lua
new file mode 100644
index 0000000..12c221a
--- /dev/null
+++ b/docs/_extensions/docstyle/toc-field.lua
@@ -0,0 +1,214 @@
+-- toc-field.lua
+-- Pandoc Lua filter that injects Word TOC field codes
+--
+-- Usage in QMD:
+-- ::: {.toc}
+-- :::
+--
+-- Configuration in _quarto.yml (under docstyle.toc):
+-- title: "Contents" # Optional heading above TOC
+-- title-level: 1 # Heading level for title (default: 1)
+-- levels: "1-3" # Which heading levels to include
+-- page-numbers: true # Show page numbers
+-- hyperlinks: true # Make entries clickable
+-- tab-leader: "dot" # dot, dash, underscore, none
+--
+-- This filter finds Div elements with class "toc" and replaces them with
+-- Word TOC field codes, enabling dynamic table of contents in Word.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local FORMAT = "openxml"
+
+-- Default configuration
+local toc_config = {
+ title = nil, -- No title by default
+ title_level = 1, -- # heading
+ levels = "1-3",
+ page_numbers = true,
+ hyperlinks = true,
+ tab_leader = "dot"
+}
+
+-- Read configuration from metadata
+function Meta(meta)
+ if meta.docstyle and meta.docstyle.toc then
+ local toc = meta.docstyle.toc
+
+ if toc.title then
+ toc_config.title = pandoc.utils.stringify(toc.title)
+ end
+
+ if toc["title-level"] then
+ toc_config.title_level = tonumber(pandoc.utils.stringify(toc["title-level"])) or 1
+ end
+
+ if toc.levels then
+ toc_config.levels = pandoc.utils.stringify(toc.levels)
+ end
+
+ if toc["page-numbers"] ~= nil then
+ local val = toc["page-numbers"]
+ if type(val) == "boolean" then
+ toc_config.page_numbers = val
+ else
+ toc_config.page_numbers = pandoc.utils.stringify(val) ~= "false"
+ end
+ end
+
+ if toc.hyperlinks ~= nil then
+ local val = toc.hyperlinks
+ if type(val) == "boolean" then
+ toc_config.hyperlinks = val
+ else
+ toc_config.hyperlinks = pandoc.utils.stringify(val) ~= "false"
+ end
+ end
+
+ if toc["tab-leader"] then
+ toc_config.tab_leader = pandoc.utils.stringify(toc["tab-leader"])
+ end
+
+ io.stderr:write("[toc-field] Config: levels=" .. toc_config.levels ..
+ ", page-numbers=" .. tostring(toc_config.page_numbers) ..
+ ", hyperlinks=" .. tostring(toc_config.hyperlinks) ..
+ ", tab-leader=" .. toc_config.tab_leader .. "\n")
+ end
+
+ return nil
+end
+
+-- Normalize levels to a range format (Word requires "1-3" not just "1")
+local function normalize_levels(levels)
+ -- If already a range (contains "-"), return as-is
+ if string.find(levels, "-") then
+ return levels
+ end
+ -- Single number: convert to range "n-n"
+ return levels .. "-" .. levels
+end
+
+-- Build the TOC field instruction text
+local function build_toc_instr()
+ -- TOC field switches:
+ -- \o "1-3" - Include heading levels 1-3
+ -- \h - Hyperlink entries to headings
+ -- \z - Hide tab leader and page numbers in Web Layout view
+ -- \u - Use applied paragraph outline level
+ -- \n - Suppress page numbers (if page-numbers: false)
+
+ local switches = {}
+
+ -- Heading levels (normalize to range format)
+ local levels_range = normalize_levels(toc_config.levels)
+ table.insert(switches, '\\o "' .. levels_range .. '"')
+
+ -- Hyperlinks
+ if toc_config.hyperlinks then
+ table.insert(switches, "\\h")
+ end
+
+ -- Hide formatting in web view
+ table.insert(switches, "\\z")
+
+ -- Use outline levels
+ table.insert(switches, "\\u")
+
+ -- Suppress page numbers if disabled
+ if not toc_config.page_numbers then
+ table.insert(switches, "\\n")
+ end
+
+ return "TOC " .. table.concat(switches, " ")
+end
+
+-- Build the OpenXML for a TOC field code
+local function build_toc_field_xml()
+ local instr = build_toc_instr()
+
+ -- Pad instruction text (Word requires leading/trailing spaces)
+ instr = " " .. instr .. " "
+
+ -- Build the 5-part Word field code structure
+ local xml = '' ..
+ '' ..
+ '' .. instr .. '' ..
+ '' ..
+ '[Update field to generate table of contents]' ..
+ '' ..
+ ''
+
+ return xml
+end
+
+-- Build a heading element for the TOC title
+-- Uses the configured title-level to determine the Word style
+-- title-level: 1 -> Heading1, 2 -> Heading2, etc.
+local function build_title_blocks()
+ if not toc_config.title then
+ return {}
+ end
+
+ -- Map title-level to Word heading style
+ -- Default to Heading1 if title_level is 1 or not specified
+ local style_id = "Heading" .. tostring(toc_config.title_level)
+
+ -- Build OpenXML paragraph with the appropriate Heading style
+ local title_xml = '' ..
+ '' ..
+ '' .. toc_config.title .. '' ..
+ ''
+
+ return { pandoc.RawBlock("openxml", title_xml) }
+end
+
+-- Process Div elements looking for .toc class
+function Div(div)
+ -- Check if this div has the "toc" class
+ if not div.classes:includes("toc") then
+ return nil
+ end
+
+ -- Only process for docx output
+ if not FORMAT or FORMAT ~= "openxml" then
+ io.stderr:write("[toc-field] Skipping TOC injection (not docx output)\n")
+ return nil
+ end
+
+ io.stderr:write("[toc-field] Found .toc div, injecting TOC field code\n")
+
+ -- Build the result blocks
+ local blocks = {}
+
+ -- ADDIN DOCSTYLE field code begin (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_div_field_start("toc")))
+
+ -- Add title heading if configured
+ local title_blocks = build_title_blocks()
+ for _, block in ipairs(title_blocks) do
+ table.insert(blocks, block)
+ end
+
+ -- Add the TOC field code
+ local toc_xml = build_toc_field_xml()
+ table.insert(blocks, pandoc.RawBlock("openxml", toc_xml))
+
+ -- ADDIN DOCSTYLE field code end (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_block_field_end()))
+
+ return blocks
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Div = Div }
+}
diff --git a/docs/_extensions/docstyle/update-field-codes.R b/docs/_extensions/docstyle/update-field-codes.R
new file mode 100755
index 0000000..006dfd3
--- /dev/null
+++ b/docs/_extensions/docstyle/update-field-codes.R
@@ -0,0 +1,405 @@
+#!/usr/bin/env Rscript
+# Post-render hook: Inject comments, Zotero components, and update field-codes.json
+#
+# This script is called by Quarto after rendering to:
+# 1. Inject comments from comments.json into the rendered DOCX
+# 2. Inject Zotero components (ZOTERO_PREF) for full round-trip support
+# 3. Extract Zotero field codes and merge into field-codes.json (for caching)
+# 4. Validate the rendered document (optional)
+#
+# The comment-inject.lua filter creates comment markers in document.xml,
+# but the actual comments.xml must be built from the JSON sidecar file.
+#
+# Usage in _quarto.yml:
+# project:
+# post-render: _extensions/docstyle/update-field-codes.R
+#
+# Environment variables (set by Quarto):
+# QUARTO_PROJECT_OUTPUT_FILES - newline-separated list of output files
+#
+# Optional environment variables:
+# DOCSTYLE_VALIDATE=1 - Enable DOCX structure validation
+# DOCSTYLE_VALIDATE_COMMENTS=1 - Enable comment validation
+# DOCSTYLE_VALIDATE_ZOTERO=1 - Enable Zotero field code validation
+# DOCSTYLE_DEBUG=1 - Enable verbose debug output
+
+# Get output files from Quarto
+output_files_env <- Sys.getenv("QUARTO_PROJECT_OUTPUT_FILES", "")
+
+if (nchar(output_files_env) == 0) {
+
+ # Not running as Quarto hook, exit silently
+ quit(save = "no", status = 0)
+}
+
+output_files <- strsplit(output_files_env, "\n")[[1]]
+docx_files <- output_files[grepl("\\.docx$", output_files, ignore.case = TRUE)]
+
+if (length(docx_files) == 0) {
+ # No DOCX files rendered, nothing to do
+ quit(save = "no", status = 0)
+}
+
+# Try to load docstyle (check installed package first, then try devtools::load_all)
+docstyle_loaded <- FALSE
+
+if (requireNamespace("docstyle", quietly = TRUE)) {
+ docstyle_loaded <- TRUE
+} else {
+ # Try to find and load from development source
+ # First, try relative to this script (follows symlinks to find package root)
+ script_path <- tryCatch({
+ args <- commandArgs(trailingOnly = FALSE)
+ file_arg <- grep("^--file=", args, value = TRUE)
+ if (length(file_arg) > 0) {
+ normalizePath(sub("^--file=", "", file_arg), mustWork = FALSE)
+ } else {
+ NULL
+ }
+ }, error = function(e) NULL)
+
+ # Build search paths: script location parents + project parents
+ project_dir <- Sys.getenv("QUARTO_PROJECT_DIR", getwd())
+ search_dirs <- c(
+ project_dir,
+ dirname(project_dir),
+ dirname(dirname(project_dir)),
+ dirname(dirname(dirname(project_dir)))
+ )
+
+ # Add script-relative paths (for symlinked extensions)
+ if (!is.null(script_path) && file.exists(script_path)) {
+ script_dir <- dirname(script_path)
+ # Script is in _extensions/docstyle/, package root is 2 levels up
+ search_dirs <- c(
+ dirname(dirname(script_dir)), # Package root (e.g., /path/to/docstyle)
+ search_dirs
+ )
+ }
+
+ search_dirs <- unique(search_dirs)
+
+ for (dir in search_dirs) {
+ desc_path <- file.path(dir, "DESCRIPTION")
+ if (file.exists(desc_path)) {
+ desc_content <- readLines(desc_path, n = 1, warn = FALSE)
+ if (grepl("Package:\\s*docstyle", desc_content)) {
+ if (requireNamespace("devtools", quietly = TRUE)) {
+ tryCatch({
+ devtools::load_all(dir, quiet = TRUE)
+ docstyle_loaded <- TRUE
+ break
+ }, error = function(e) NULL)
+ }
+ }
+ }
+ }
+}
+
+if (!docstyle_loaded) {
+ message("[post-render] docstyle package not found, skipping comment injection")
+ quit(save = "no", status = 0)
+}
+
+# Resolve project root — uses docstyle::find_project_root() which prefers
+# QUARTO_PROJECT_DIR, then walks upward for _quarto.yml/project: or .git.
+project_dir <- docstyle::find_project_root(getwd())
+
+# Read _quarto.yml for docstyle.zotero config (used to set ZOTERO_PREF style)
+quarto_yml_path <- file.path(project_dir, "_quarto.yml")
+zotero_config <- NULL
+if (file.exists(quarto_yml_path)) {
+ cfg <- tryCatch(yaml::read_yaml(quarto_yml_path), error = function(e) NULL)
+ if (!is.null(cfg$docstyle$zotero)) {
+ zotero_config <- cfg$docstyle$zotero
+ }
+}
+
+# Helper: resolve a docx path that may be relative to the document directory
+# rather than the project root (happens when output-dir: ../_site/docs in a
+# subdirectory _quarto.yml). QUARTO_PROJECT_OUTPUT_FILES is built by Quarto
+# as relative3(projDir, outputFile) but when output-dir contains ".." the
+# resolved path goes above projDir. QUARTO_DOCUMENT_PATH is always the
+# document's directory — try that as a fallback base.
+resolve_docx_path <- function(path, project_dir) {
+ if (file.exists(path)) return(path)
+ # Try relative to project root
+ attempt <- normalizePath(file.path(project_dir, path), mustWork = FALSE)
+ if (file.exists(attempt)) return(attempt)
+ # Try relative to document directory (handles output-dir: ../_site/... in subdirs)
+ doc_path <- Sys.getenv("QUARTO_DOCUMENT_PATH", "")
+ if (nzchar(doc_path)) {
+ attempt2 <- normalizePath(file.path(doc_path, path), mustWork = FALSE)
+ if (file.exists(attempt2)) return(attempt2)
+ }
+ path # return original; caller handles missing file
+}
+
+# Process each DOCX file
+for (docx_path in docx_files) {
+ docx_path <- resolve_docx_path(docx_path, project_dir)
+ if (!file.exists(docx_path)) {
+ next
+ }
+
+ # Determine output directory for field-codes.json
+ # Use _docstyle/ in project root if it exists, otherwise same dir as DOCX
+ docstyle_dir <- file.path(project_dir, "_docstyle")
+
+ if (dir.exists(docstyle_dir)) {
+ output_dir <- docstyle_dir
+ } else {
+ output_dir <- dirname(docx_path)
+ }
+
+ # Collect summary info
+ n_comments <- 0
+ zotero_pref_injected <- FALSE
+
+ # Debug mode (used throughout)
+ debug_mode <- Sys.getenv("DOCSTYLE_DEBUG", "0") == "1"
+
+ # Step 1: Inject comments from comments.json (if present)
+ comments_json <- file.path(output_dir, "comments.json")
+ if (file.exists(comments_json)) {
+ tryCatch({
+ # Scan for comment IDs used in the rendered document
+ used_ids <- docstyle::scan_used_comment_ids(docx_path)
+
+ if (length(used_ids) > 0) {
+ # Validate that used IDs exist in comments.json before injection
+ # This prevents corrupt DOCX when QMD and JSON are out of sync
+ comments <- docstyle::read_comments_json(comments_json)
+ json_ids <- names(comments)
+ missing_ids <- setdiff(used_ids, json_ids)
+
+ if (length(missing_ids) > 0) {
+ # Critical error: QMD references comments not in JSON
+ message("[docstyle] ERROR: Comment ID mismatch detected!")
+ message(" Document references ", length(missing_ids), " comment ID(s) not in comments.json:")
+ message(" ", paste(missing_ids, collapse = ", "))
+ message(" This would produce a corrupt DOCX file.")
+ message(" To fix: Run docstyle::sync_comment_ids() to re-sync IDs from source DOCX")
+ message(" Skipping comment injection to prevent corruption.")
+ # Don't inject - leave document without comments rather than corrupt it
+ } else {
+ docstyle::inject_comments(
+ docx_path = docx_path,
+ comments_json = comments_json,
+ used_ids = used_ids
+ )
+ n_comments <- length(used_ids)
+ }
+ }
+ }, error = function(e) {
+ message("[docstyle] Error injecting comments: ", conditionMessage(e))
+ })
+ }
+
+ # Step 1b: Fix comment-deletion nesting
+ # Comments attached to deleted text end up after the deletion due to Lua filter
+ # limitations. This repositions them to span the deletion properly.
+ if (n_comments > 0) {
+ tryCatch({
+ n_fixed <- docstyle::fix_comment_deletion_nesting(
+ docx_path = docx_path,
+ verbose = debug_mode
+ )
+ if (n_fixed > 0 && debug_mode) {
+ message("[docstyle] Fixed ", n_fixed, " comment-deletion nesting issue(s)")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error fixing comment nesting: ", conditionMessage(e))
+ })
+ }
+
+ # Step 1c: Inject Zotero citation field codes from markers
+ # The Lua filter emits DOCSTYLE_CITE:: markers; this replaces them with
+ # real Word field code XML using data from field-codes.json.
+ n_citations_injected <- 0L
+ field_codes_json <- file.path(output_dir, "field-codes.json")
+ if (file.exists(field_codes_json)) {
+ tryCatch({
+ cite_result <- docstyle::inject_zotero_citations(
+ docx_path = docx_path,
+ field_codes_path = field_codes_json,
+ verbose = debug_mode
+ )
+ n_citations_injected <- cite_result$n_injected
+ }, error = function(e) {
+ message("[docstyle] Error injecting Zotero citations: ", conditionMessage(e))
+ })
+ }
+
+ # Step 2: Validate comments (if enabled via DOCSTYLE_VALIDATE_COMMENTS=1)
+ validate_comments <- Sys.getenv("DOCSTYLE_VALIDATE_COMMENTS", "0")
+ if (validate_comments == "1" && file.exists(comments_json)) {
+ tryCatch({
+ result <- docstyle::validate_comments(
+ docx_path = docx_path,
+ comments_json = comments_json,
+ verbose = TRUE
+ )
+ if (!result$valid) {
+ message("[docstyle] Comment validation failed")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error validating comments: ", conditionMessage(e))
+ })
+ }
+
+ # Step 2b: Validate DOCX structure (if enabled via DOCSTYLE_VALIDATE=1)
+ # Catches XML issues, malformed tracked changes, duplicate IDs, etc.
+ validate_structure <- Sys.getenv("DOCSTYLE_VALIDATE", "0")
+ if (debug_mode) {
+ message("[docstyle] DOCSTYLE_VALIDATE=", validate_structure)
+ }
+ if (validate_structure == "1") {
+ tryCatch({
+ result <- docstyle::validate_docx_structure(
+ docx_path = docx_path,
+ verbose = debug_mode
+ )
+ if (!result$valid) {
+ message("[docstyle] Structure validation: ", length(result$errors), " issue(s) found")
+ for (err in result$errors) {
+ message(" - ", err)
+ }
+ } else if (debug_mode) {
+ message("[docstyle] Structure validation: passed all checks")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error validating structure: ", conditionMessage(e))
+ })
+ }
+
+ # Step 3: Inject Zotero components (ZOTERO_PREF if missing)
+ # This ensures rendered documents have full Zotero functionality for round-trip editing
+ tryCatch({
+ result <- docstyle::inject_zotero_components(
+ docx_path = docx_path,
+ field_codes_json = if (file.exists(field_codes_json)) field_codes_json else NULL,
+ zotero_config = zotero_config,
+ validate = FALSE, # Will validate separately if enabled
+ verbose = debug_mode
+ )
+ if (result$zotero_pref_injected) {
+ zotero_pref_injected <- TRUE
+ if (debug_mode) {
+ message("[docstyle] Injected ZOTERO_PREF (style: ", result$style_id, ")")
+ }
+ }
+ }, error = function(e) {
+ if (debug_mode) {
+ message("[docstyle] Error injecting Zotero components: ", conditionMessage(e))
+ }
+ })
+
+ # Step 3b: Validate Zotero field codes (if enabled via DOCSTYLE_VALIDATE_ZOTERO=1)
+ validate_zotero <- Sys.getenv("DOCSTYLE_VALIDATE_ZOTERO", "0")
+ if (validate_zotero == "1") {
+ tryCatch({
+ result <- docstyle::validate_zotero(
+ docx_path = docx_path,
+ verbose = debug_mode
+ )
+ if (!result$valid) {
+ message("[docstyle] Zotero validation: ", length(result$issues$errors), " error(s)")
+ for (err in result$issues$errors) {
+ message(" - ", err)
+ }
+ } else if (debug_mode) {
+ message("[docstyle] Zotero validation: passed all checks")
+ }
+ }, error = function(e) {
+ message("[docstyle] Error validating Zotero: ", conditionMessage(e))
+ })
+ }
+
+ # Note: Step 4 (extract and merge field codes) was removed in v0.7.6.
+ # The render pipeline is read-only with respect to field-codes.json.
+ # New citations only enter via harvest (docx_to_qmd), not via render.
+ # See: https://github.com/DougManuel/docstyle/issues/38
+
+ # Step 5: Finalize section structure
+ # Post-process sectPr elements: remove leaked line numbers from body sectPr,
+ # validate opening/closing sectPr have correct properties
+ n_sections_fixed <- 0L
+ body_sectPr_fixed <- FALSE
+ tryCatch({
+ result <- docstyle::finalize_docx(
+ docx_path = docx_path,
+ sidecar_path = output_dir,
+ verbose = debug_mode
+ )
+ n_sections_fixed <- result$fixed
+ body_sectPr_fixed <- isTRUE(result$body_fixed)
+ }, error = function(e) {
+ message("[docstyle] Error finalizing sections: ", conditionMessage(e))
+ })
+
+ # Step 6: Prune unused styles (remove Pandoc bloat)
+ n_styles_pruned <- 0L
+ tryCatch({
+ n_styles_pruned <- docstyle::prune_styles_file(
+ docx_path = docx_path,
+ sidecar_dir = output_dir,
+ verbose = debug_mode
+ )
+ }, error = function(e) {
+ message("[docstyle] Error pruning styles: ", conditionMessage(e))
+ })
+
+ # Step 7: Scan for unresolved citations (always runs)
+ # Any [@citekey] text remaining in the output means the citation could not
+ # be resolved to a Zotero field code. This catches both Lua-filter misses
+ # (citekey not in field-codes.json) and R-finisher fallbacks.
+ unresolved_cites <- character()
+ tryCatch({
+ unresolved_cites <- docstyle::scan_unresolved_citations(docx_path)
+ }, error = function(e) {
+ if (debug_mode) {
+ message("[docstyle] Error scanning for unresolved citations: ", conditionMessage(e))
+ }
+ })
+
+ # Partition unresolved citations into staged vs unknown.
+ # Staged: metadata exists in field-codes.json citations catalog but no citationGroup
+ # (added via add_citations_from_zotero() or QMD-first drafting — expected during drafting)
+ # Unknown: no metadata at all — likely a typo or missing harvest
+ staged_cites <- character()
+ unknown_cites <- character()
+ if (length(unresolved_cites) > 0 && file.exists(field_codes_json)) {
+ tryCatch({
+ fc_obj <- jsonlite::fromJSON(field_codes_json, simplifyVector = FALSE)
+ known_keys <- names(fc_obj$citations %||% list())
+ staged_cites <- unresolved_cites[unresolved_cites %in% known_keys]
+ unknown_cites <- unresolved_cites[!unresolved_cites %in% known_keys]
+ }, error = function(e) {
+ unknown_cites <<- unresolved_cites
+ })
+ } else {
+ unknown_cites <- unresolved_cites
+ }
+
+ # Print single summary line
+ parts <- character()
+ if (n_comments > 0) parts <- c(parts, sprintf("%d comment%s", n_comments, if (n_comments == 1) "" else "s"))
+ if (n_citations_injected > 0) parts <- c(parts, sprintf("%d citation%s injected", n_citations_injected, if (n_citations_injected == 1) "" else "s"))
+ if (zotero_pref_injected) parts <- c(parts, "ZOTERO_PREF injected")
+ if (body_sectPr_fixed) parts <- c(parts, "section structure finalized")
+ if (n_styles_pruned > 0) parts <- c(parts, sprintf("%d style%s pruned", n_styles_pruned, if (n_styles_pruned == 1) "" else "s"))
+ if (length(parts) > 0) {
+ message("[docstyle] Processed: ", paste(parts, collapse = ", "))
+ }
+ if (length(staged_cites) > 0) {
+ message("[docstyle] Info: ", length(staged_cites),
+ " staged citation(s) pending Zotero insertion in Word: ",
+ paste(staged_cites, collapse = ", "))
+ }
+ if (length(unknown_cites) > 0) {
+ message("[docstyle] Warning: ", length(unknown_cites),
+ " unresolved citation(s) with no metadata — check citekeys or re-harvest: ",
+ paste(unknown_cites, collapse = ", "))
+ }
+}
diff --git a/docs/_extensions/docstyle/validate-markup.R b/docs/_extensions/docstyle/validate-markup.R
new file mode 100755
index 0000000..f13d016
--- /dev/null
+++ b/docs/_extensions/docstyle/validate-markup.R
@@ -0,0 +1,144 @@
+#!/usr/bin/env Rscript
+# Pre-render hook: Validate QMD markup before rendering
+#
+# This script validates comment markers, revision spans, and other
+# docstyle-specific markup before Quarto renders the document.
+# It catches issues that would cause Word "unreadable content" errors.
+#
+# Usage in _quarto.yml:
+# project:
+# pre-render:
+# - _extensions/docstyle/validate-markup.R
+# - _extensions/docstyle/generate-reference.R
+#
+# Environment variables (set by Quarto):
+# QUARTO_PROJECT_DIR - project root directory
+# QUARTO_PROJECT_INPUT_FILES - files being rendered
+#
+# Exit codes:
+# 0 - Validation passed (or skipped)
+# 1 - Validation failed (stops render)
+
+# Get input files from Quarto
+input_files_env <- Sys.getenv("QUARTO_PROJECT_INPUT_FILES", "")
+project_dir <- Sys.getenv("QUARTO_PROJECT_DIR", getwd())
+
+if (nchar(input_files_env) == 0) {
+ # Not running as Quarto hook, exit silently
+ quit(save = "no", status = 0)
+}
+
+# Parse input files (newline-separated)
+input_files <- strsplit(input_files_env, "\n")[[1]]
+qmd_files <- input_files[grepl("\\.qmd$", input_files, ignore.case = TRUE)]
+
+if (length(qmd_files) == 0) {
+ # No QMD files being rendered, nothing to validate
+ quit(save = "no", status = 0)
+}
+
+# Try to load docstyle
+docstyle_loaded <- FALSE
+
+if (requireNamespace("docstyle", quietly = TRUE)) {
+ docstyle_loaded <- TRUE
+} else {
+ # Try to find and load from development source
+ search_dirs <- c(
+ project_dir,
+ dirname(project_dir),
+ dirname(dirname(project_dir)),
+ dirname(dirname(dirname(project_dir))),
+ dirname(dirname(dirname(dirname(project_dir))))
+ )
+
+ for (dir in search_dirs) {
+ desc_path <- file.path(dir, "DESCRIPTION")
+ if (file.exists(desc_path)) {
+ desc_content <- readLines(desc_path, n = 1, warn = FALSE)
+ if (grepl("Package:\\s*docstyle", desc_content)) {
+ if (requireNamespace("devtools", quietly = TRUE)) {
+ tryCatch({
+ devtools::load_all(dir, quiet = TRUE)
+ docstyle_loaded <- TRUE
+ break
+ }, error = function(e) NULL)
+ }
+ }
+ }
+ }
+}
+
+if (!docstyle_loaded) {
+ message("[validate-markup] docstyle package not found, skipping validation")
+ quit(save = "no", status = 0)
+}
+
+# Find sidecar directory for comments.json
+sidecar_dir <- file.path(project_dir, "_docstyle")
+if (!dir.exists(sidecar_dir)) {
+ # Try relative to first QMD file
+ sidecar_dir <- file.path(dirname(qmd_files[1]), "_docstyle")
+}
+
+comments_json <- if (dir.exists(sidecar_dir)) {
+ json_path <- file.path(sidecar_dir, "comments.json")
+ if (file.exists(json_path)) json_path else NULL
+} else {
+ NULL
+}
+
+# Validate each QMD file
+all_valid <- TRUE
+total_errors <- 0
+total_warnings <- 0
+
+cat("\n")
+cat("=== docstyle Markup Validation ===\n")
+
+for (qmd_path in qmd_files) {
+ # Make path absolute if needed
+ if (!startsWith(qmd_path, "/")) {
+ qmd_path <- file.path(project_dir, qmd_path)
+ }
+
+ if (!file.exists(qmd_path)) {
+ next
+ }
+
+ cat(sprintf("\nValidating: %s\n", basename(qmd_path)))
+
+ result <- tryCatch({
+ docstyle::validate_qmd(
+ qmd_path = qmd_path,
+ comments_json = comments_json,
+ verbose = TRUE
+ )
+ }, error = function(e) {
+ message("[validate-markup] Error: ", conditionMessage(e))
+ list(valid = FALSE, issues = list(errors = conditionMessage(e), warnings = character()))
+ })
+
+ if (!result$valid) {
+ all_valid <- FALSE
+ }
+ total_errors <- total_errors + length(result$issues$errors)
+ total_warnings <- total_warnings + length(result$issues$warnings)
+}
+
+cat("\n")
+cat("=== Validation Summary ===\n")
+cat(sprintf("Files: %d | Errors: %d | Warnings: %d\n",
+ length(qmd_files), total_errors, total_warnings))
+
+if (!all_valid) {
+ cat("\n")
+ cat("ERROR: Validation failed. Fix errors before rendering.\n")
+ cat("Hint: Convert deprecated [text]{.comment id=\"X\"} to:\n")
+ cat(" - Range: text\n")
+ cat(" - Point: \n")
+ cat("\n")
+ quit(save = "no", status = 1)
+}
+
+cat("\n")
diff --git a/docs/_extensions/docstyle/version-history.lua b/docs/_extensions/docstyle/version-history.lua
new file mode 100644
index 0000000..c820a89
--- /dev/null
+++ b/docs/_extensions/docstyle/version-history.lua
@@ -0,0 +1,326 @@
+-- version-history.lua
+-- Pandoc Lua filter that generates a version history table from YAML metadata
+--
+-- Usage in QMD:
+-- ::: version-history
+-- :::
+--
+-- Configuration in _quarto.yml (under docstyle.version-history):
+-- title: "Version history" # Heading text (or false to disable)
+-- title-level: 1 # 1-6: uses Heading1-Heading6 style
+-- widths: "15,70,15" # Column width percentages (Version, Description, Date)
+-- style: "table-grid" # Table style: table-grid (all borders) or table-formal (top/bottom)
+--
+-- Version entries in QMD YAML front matter:
+-- version-history:
+-- - version: "1.0.0"
+-- date: "2025-01-15"
+-- description: "Final release"
+--
+-- This filter finds Div elements with class "version-history" and replaces them
+-- with a Word table generated from the version-history metadata.
+
+-- Load shared field code utilities
+local fcu = require("field-code-utils")
+
+local FORMAT = "openxml"
+
+-- Built-in table style definitions (matching table-style.lua)
+local table_styles = {
+ ["table-grid"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "000000" },
+ bottom = { val = "single", sz = "4", color = "000000" },
+ left = { val = "single", sz = "4", color = "000000" },
+ right = { val = "single", sz = "4", color = "000000" },
+ insideH = { val = "single", sz = "4", color = "000000" },
+ insideV = { val = "single", sz = "4", color = "000000" }
+ },
+ header_shading = nil,
+ header_bold = true
+ },
+ ["table-formal"] = {
+ borders = {
+ top = { val = "single", sz = "4", color = "7F7F7F" },
+ bottom = { val = "single", sz = "4", color = "7F7F7F" },
+ left = nil,
+ right = nil,
+ insideH = nil,
+ insideV = nil
+ },
+ header_shading = "D9D9D9",
+ header_bold = true
+ }
+}
+
+-- Store version history from metadata
+local version_history = nil
+local div_found = false
+local config = {
+ title = "Version history",
+ title_level = 1,
+ widths = {15, 70, 15}, -- Default: Version 15%, Description 70%, Date 15%
+ style = "table-grid" -- Default table style
+}
+
+-- Use shared xml_escape from field-code-utils
+local xml_escape = fcu.xml_escape
+
+-- Parse widths string "15,70,15" into table {15, 70, 15}
+local function parse_widths(widths_str)
+ local widths = {}
+ for w in string.gmatch(widths_str, "([^,]+)") do
+ local num = tonumber(w)
+ if num then
+ table.insert(widths, num)
+ end
+ end
+ -- Ensure we have exactly 3 widths
+ if #widths ~= 3 then
+ return {15, 70, 15} -- Default
+ end
+ return widths
+end
+
+-- Read configuration from metadata
+function Meta(meta)
+ -- Get version history entries
+ if meta["version-history"] then
+ version_history = meta["version-history"]
+ io.stderr:write("[version-history] Found " .. #version_history .. " version entries in metadata\n")
+ end
+
+ -- Get optional config from docstyle.version-history
+ if meta.docstyle and meta.docstyle["version-history"] then
+ local vh_config = meta.docstyle["version-history"]
+
+ -- Title (string or false to disable)
+ if vh_config.title ~= nil then
+ local title_val = vh_config.title
+ if type(title_val) == "boolean" and not title_val then
+ config.title = nil -- Disable title
+ else
+ config.title = pandoc.utils.stringify(title_val)
+ end
+ end
+
+ -- Title level (1-6)
+ if vh_config["title-level"] then
+ config.title_level = tonumber(pandoc.utils.stringify(vh_config["title-level"])) or 1
+ end
+
+ -- Column widths
+ if vh_config.widths then
+ local widths_str = pandoc.utils.stringify(vh_config.widths)
+ config.widths = parse_widths(widths_str)
+ io.stderr:write("[version-history] Column widths: " .. table.concat(config.widths, ", ") .. "\n")
+ end
+
+ -- Table style
+ if vh_config.style then
+ local style_name = pandoc.utils.stringify(vh_config.style)
+ if table_styles[style_name] then
+ config.style = style_name
+ io.stderr:write("[version-history] Table style: " .. style_name .. "\n")
+ else
+ io.stderr:write("[version-history] Unknown table style '" .. style_name .. "', using default\n")
+ end
+ end
+ end
+
+ return nil
+end
+
+-- Build border XML element
+local function build_border_xml(name, border)
+ if not border then return "" end
+ return string.format('',
+ name, border.val, border.sz, border.color)
+end
+
+-- Build table borders XML from style definition
+local function build_tblBorders_xml(borders)
+ if not borders then return "" end
+
+ local parts = { "" }
+ if borders.top then table.insert(parts, build_border_xml("top", borders.top)) end
+ if borders.left then table.insert(parts, build_border_xml("left", borders.left)) end
+ if borders.bottom then table.insert(parts, build_border_xml("bottom", borders.bottom)) end
+ if borders.right then table.insert(parts, build_border_xml("right", borders.right)) end
+ if borders.insideH then table.insert(parts, build_border_xml("insideH", borders.insideH)) end
+ if borders.insideV then table.insert(parts, build_border_xml("insideV", borders.insideV)) end
+ table.insert(parts, "")
+
+ return table.concat(parts)
+end
+
+-- Build a table cell XML with optional width and shading
+local function build_cell(text, bold, width_pct, shading)
+ local rPr = ""
+ if bold then
+ rPr = ""
+ end
+
+ -- Width in fiftieths of a percent (5000 = 100%)
+ local tcPr_parts = { "" }
+ if width_pct then
+ local width_val = math.floor(width_pct * 50) -- Convert % to fiftieths
+ table.insert(tcPr_parts, '')
+ end
+ if shading then
+ table.insert(tcPr_parts, '')
+ end
+ table.insert(tcPr_parts, "")
+
+ return '' ..
+ table.concat(tcPr_parts) ..
+ '' ..
+ '' .. rPr ..
+ '' .. xml_escape(text) .. '' ..
+ '' ..
+ '' ..
+ ''
+end
+
+-- Build the version history table XML
+local function build_table_xml()
+ if not version_history or #version_history == 0 then
+ return nil
+ end
+
+ local w = config.widths
+ local style = table_styles[config.style] or table_styles["table-grid"]
+
+ -- Header row with column widths (bold header if style specifies, with optional shading)
+ local header_bold = style.header_bold
+ local header_shading = style.header_shading
+ local header_row = '' ..
+ build_cell("Version", header_bold, w[1], header_shading) ..
+ build_cell("Description", header_bold, w[2], header_shading) ..
+ build_cell("Date", header_bold, w[3], header_shading) ..
+ ''
+
+ -- Data rows
+ local data_rows = {}
+ for _, entry in ipairs(version_history) do
+ local version = ""
+ local description = ""
+ local date = ""
+
+ if entry.version then
+ version = pandoc.utils.stringify(entry.version)
+ end
+ if entry.description then
+ description = pandoc.utils.stringify(entry.description)
+ end
+ if entry.date then
+ date = pandoc.utils.stringify(entry.date)
+ end
+
+ local row = '' ..
+ build_cell(version, false, w[1], nil) ..
+ build_cell(description, false, w[2], nil) ..
+ build_cell(date, false, w[3], nil) ..
+ ''
+ table.insert(data_rows, row)
+ end
+
+ -- Build table borders from style
+ local borders_xml = build_tblBorders_xml(style.borders)
+
+ -- Complete table with style-defined borders
+ local table_xml = '' ..
+ '' ..
+ '' .. -- 100% width
+ borders_xml ..
+ '' ..
+ header_row ..
+ table.concat(data_rows) ..
+ ''
+
+ return table_xml
+end
+
+-- Build heading for the title
+local function build_title_xml()
+ if not config.title then
+ return nil
+ end
+
+ local style_id = "Heading" .. tostring(config.title_level)
+
+ return '' ..
+ '' ..
+ '' .. xml_escape(config.title) .. '' ..
+ ''
+end
+
+-- Process Div elements looking for .version-history class
+function Div(div)
+ -- Check if this div has the "version-history" class
+ if not div.classes:includes("version-history") then
+ return nil
+ end
+
+ div_found = true
+
+ -- Only process for docx output
+ if FORMAT ~= "openxml" then
+ io.stderr:write("[version-history] Skipping (not docx output)\n")
+ return nil
+ end
+
+ if not version_history or #version_history == 0 then
+ io.stderr:write("[version-history] No version-history metadata found\n")
+ return {} -- Remove the div entirely
+ end
+
+ io.stderr:write("[version-history] Generating table with " .. #version_history .. " entries\n")
+
+ -- Build the result blocks
+ local blocks = {}
+
+ -- ADDIN DOCSTYLE field code begin (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_div_field_start("version-history")))
+
+ -- Add title heading
+ local title_xml = build_title_xml()
+ if title_xml then
+ table.insert(blocks, pandoc.RawBlock("openxml", title_xml))
+ end
+
+ -- Add the table
+ local table_xml = build_table_xml()
+ if table_xml then
+ table.insert(blocks, pandoc.RawBlock("openxml", table_xml))
+ end
+
+ -- ADDIN DOCSTYLE field code end (using shared utility)
+ table.insert(blocks, pandoc.RawBlock("openxml", fcu.build_block_field_end()))
+
+ return blocks
+end
+
+-- Check output format
+function Pandoc(doc)
+ if FORMAT == "docx" or FORMAT == "openxml" then
+ FORMAT = "openxml"
+ end
+ return nil
+end
+
+-- Warn if version-history metadata exists but no div was found
+local function CheckUnused(doc)
+ if version_history and #version_history > 0 and not div_found then
+ io.stderr:write("[version-history] Warning: " .. #version_history ..
+ " version-history entries in metadata but no ::: version-history ::: " ..
+ "div in document. Add the div where you want the table to appear.\n")
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta, Pandoc = Pandoc },
+ { Div = Div },
+ { Pandoc = CheckUnused }
+}
diff --git a/docs/_extensions/docstyle/zotero-inject.lua b/docs/_extensions/docstyle/zotero-inject.lua
new file mode 100644
index 0000000..e24b95a
--- /dev/null
+++ b/docs/_extensions/docstyle/zotero-inject.lua
@@ -0,0 +1,136 @@
+-- zotero-inject.lua
+-- Pandoc Lua filter that emits text markers for Zotero citation field codes.
+--
+-- The R finisher (inject_zotero_citations) replaces these markers with real
+-- Word field code XML after Pandoc has finished rendering to docx.
+--
+-- Markers:
+-- DOCSTYLE_CITE::key1;key2 – citation (single or grouped)
+-- DOCSTYLE_CITE_BIBL – bibliography placeholder
+--
+-- Usage:
+-- pandoc --lua-filter=zotero-inject.lua -M field-codes=path/to/field-codes.json ...
+
+-- Debug logging (set DOCSTYLE_DEBUG=1 to enable)
+local DEBUG = os.getenv("DOCSTYLE_DEBUG") == "1"
+local function debug(msg)
+ if DEBUG then
+ io.stderr:write(msg)
+ end
+end
+
+-- Citekey existence lookup (populated from field-codes.json citations section)
+local known_citekeys = {}
+
+-- Check if file exists
+local function file_exists(path)
+ local f = io.open(path, "r")
+ if f then
+ f:close()
+ return true
+ end
+ return false
+end
+
+-- Read field-codes.json at startup; populate known_citekeys
+function Meta(meta)
+ local field_codes_path = nil
+
+ -- Get path from metadata (explicit override)
+ if meta["field-codes"] then
+ field_codes_path = pandoc.utils.stringify(meta["field-codes"])
+ end
+
+ -- Auto-detect common locations if not specified
+ if not field_codes_path then
+ debug("[zotero-inject] Looking for field-codes.json...\n")
+ local search_paths = {
+ "_docstyle/field-codes.json",
+ "field-codes.json",
+ "../_docstyle/field-codes.json"
+ }
+ for _, path in ipairs(search_paths) do
+ debug("[zotero-inject] Checking: " .. path .. "\n")
+ if file_exists(path) then
+ field_codes_path = path
+ debug("[zotero-inject] Found field-codes.json: " .. path .. "\n")
+ break
+ end
+ end
+ end
+
+ if not field_codes_path then
+ debug("[zotero-inject] No field-codes.json found; markers will not be emitted\n")
+ return nil
+ end
+
+ -- Read and parse the JSON file
+ local file = io.open(field_codes_path, "r")
+ if not file then
+ debug("[zotero-inject] Could not open: " .. field_codes_path .. "\n")
+ return nil
+ end
+
+ local content = file:read("*all")
+ file:close()
+
+ local ok, parsed = pcall(function()
+ return pandoc.json.decode(content)
+ end)
+
+ if not ok then
+ debug("[zotero-inject] Failed to parse field-codes.json\n")
+ return nil
+ end
+
+ -- Build citekey existence set from citations catalog
+ local count = 0
+ if parsed.citations then
+ for citekey, _ in pairs(parsed.citations) do
+ known_citekeys[citekey] = true
+ count = count + 1
+ end
+ end
+ debug("[zotero-inject] Loaded " .. count .. " citekey(s) from field-codes.json\n")
+
+ return nil
+end
+
+-- Process Cite elements: emit text markers instead of raw OpenXML
+function Cite(cite)
+ local citekeys = {}
+ for _, citation in ipairs(cite.citations) do
+ table.insert(citekeys, citation.id)
+ end
+
+ if #citekeys == 0 then
+ return nil
+ end
+
+ -- All citekeys must be known; otherwise fall back to Pandoc default rendering
+ for _, citekey in ipairs(citekeys) do
+ if not known_citekeys[citekey] then
+ debug("[zotero-inject] Skipping (unknown citekey): " .. citekey .. "\n")
+ return nil
+ end
+ end
+
+ local marker = "DOCSTYLE_CITE::" .. table.concat(citekeys, ";")
+ debug("[zotero-inject] Emitting marker: " .. marker .. "\n")
+ return pandoc.Str(marker)
+end
+
+-- Process Div elements: emit bibliography marker for bibliography placeholder
+function Div(div)
+ if div.classes:includes("bibliography") then
+ debug("[zotero-inject] Emitting bibliography marker from div\n")
+ return pandoc.Para({ pandoc.Str("DOCSTYLE_CITE_BIBL") })
+ end
+ return nil
+end
+
+return {
+ { Meta = Meta },
+ { Cite = Cite },
+ { Div = Div }
+}
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
new file mode 100644
index 0000000..f6503ed
--- /dev/null
+++ b/docs/_quarto.yml
@@ -0,0 +1,87 @@
+project:
+ type: default
+ pre-render: _extensions/docstyle/generate-reference.R
+ post-render: _extensions/docstyle/update-field-codes.R
+ output-dir: ../_site/docs
+
+bibliography: ../references.bib
+
+format:
+ docstyle-docx:
+ toc: false
+ number-sections: false
+ reference-doc: ../_docstyle/reference.docx
+
+docstyle:
+ css:
+ - ../popcorn-base.css
+ - ../pop-draft-manuscript.css
+ sidecar-dir: ../_docstyle
+ header:
+ enabled: true
+ left: "CSHM Study Protocol"
+ first-page: false
+ style: header
+ footer:
+ enabled: true
+ left: "DRAFT"
+ right: "Page {page} of {pages}"
+ first-page: false
+ style: footer
+ toc:
+ title: "Table of contents"
+ levels: "1-3"
+ author-plate:
+ enabled: false
+ version-history:
+ enabled: true
+ title: "Version history"
+ style: "table-formal"
+ authors:
+ - name:
+ given: "Douglas"
+ family: "Manuel"
+ email: "dmanuel@ohri.ca"
+ orcid: "0000-0003-0912-0845"
+ corresponding: true
+ affiliations:
+ - ref: ohri
+ - name:
+ given: "Rafael"
+ family: "Meza"
+ orcid: "0000-0002-1076-5037"
+ affiliations:
+ - ref: bccancer
+ - ref: ubc
+ - name:
+ given: "Rochelle E."
+ family: "Garner"
+ email: "rochelle.garner@statcan.gc.ca"
+ affiliations:
+ - ref: statscan
+ - name:
+ given: "Maikol"
+ family: "Diasparra"
+ email: "maikol.diasparra@statcan.gc.ca"
+ affiliations:
+ - ref: statscan
+ affiliations:
+ - id: ohri
+ name: "Ottawa Hospital Research Institute"
+ city: "Ottawa"
+ country: "Canada"
+ - id: bccancer
+ name: "BC Cancer Research Institute"
+ city: "Vancouver"
+ country: "Canada"
+ - id: ubc
+ name: "University of British Columbia"
+ department: "School of Population and Public Health"
+ city: "Vancouver"
+ country: "Canada"
+ - id: statscan
+ name: "Statistics Canada"
+ department: "Health Analysis Division"
+ city: "Ottawa"
+ country: "Canada"
+
diff --git a/docs/explanation/apc-method.qmd b/docs/explanation/apc-method.qmd
new file mode 100644
index 0000000..0786c67
--- /dev/null
+++ b/docs/explanation/apc-method.qmd
@@ -0,0 +1,50 @@
+---
+title: "Age-Period-Cohort methodology"
+subtitle: "Understanding the statistical approach behind CSHM"
+---
+
+# Age-Period-Cohort methodology
+
+This document explains the Age-Period-Cohort (APC) methodology used in the CSHM.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+The Age-Period-Cohort (APC) model is a statistical approach used to separate the effects of three time-related variables on an outcome of interest.
+
+## The three temporal dimensions
+
+The three key dimensions in an APC model are:
+
+1. **Age effects**: How behavior changes as individuals age
+2. **Period effects**: How behavior changes across calendar time (affecting all age groups)
+3. **Cohort effects**: How behavior differs across birth cohorts (groups born in the same time period)
+
+## The identification problem
+
+A fundamental challenge in APC models is the linear dependency between the three dimensions:
+
+```
+Cohort = Period - Age
+```
+
+This creates an identification problem that requires constraints or additional modeling approaches to solve.
+
+## APC in CSHM
+
+The CSHM implements APC modeling using:
+
+1. Natural cubic splines to model age, period, and cohort effects
+2. Strategic constraints based on empirical research
+3. Separate models for smoking initiation and cessation
+4. Sex-specific modeling to capture gender differences
+
+## Statistical implementation
+
+The statistical approach involves:
+
+1. Using generalized linear models with a logit link function
+2. Incorporating natural cubic splines for flexible effect estimation
+3. Applying constraints to address the identification problem
+4. Accounting for complex survey design and weighting
\ No newline at end of file
diff --git a/docs/explanation/index.qmd b/docs/explanation/index.qmd
new file mode 100644
index 0000000..092452e
--- /dev/null
+++ b/docs/explanation/index.qmd
@@ -0,0 +1,12 @@
+---
+title: "Explanation"
+---
+
+This section provides conceptual explanations of the methodology and approach used in CSHM:
+
+- [Smoking history modelling](smoking-history.qmd): Conceptual overview of smoking history modelling
+- [APC methodology](apc-method.qmd): Explanation of the Age-Period-Cohort methodology
+- [Validation approaches](validation.qmd): Discussion of validation approaches and considerations
+- [Research applications](research-applications.qmd): Use cases in population health research
+
+Use this section when you want to understand the concepts and theory behind the project.
\ No newline at end of file
diff --git a/docs/explanation/research-applications.qmd b/docs/explanation/research-applications.qmd
new file mode 100644
index 0000000..02aee5d
--- /dev/null
+++ b/docs/explanation/research-applications.qmd
@@ -0,0 +1,42 @@
+---
+title: "Research applications"
+subtitle: "Using CSHM in population health research"
+---
+
+# Research applications
+
+This document discusses ways the Canadian Smoking Histories Model can be applied in research.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+The CSHM has applications in various fields of research, policy analysis, and public health planning.
+
+## Population health research
+
+Applications in population health research include:
+
+1. **Smoking-attributable disease burden**: Estimating the burden of disease attributable to smoking
+2. **Health inequality assessment**: Analyzing disparities in smoking behavior across population groups
+3. **Temporal trend analysis**: Understanding how smoking patterns have changed over time
+
+## Health policy research
+
+Applications in health policy include:
+
+1. **Policy evaluation**: Assessing the impact of past tobacco control policies
+2. **Policy simulation**: Modeling the potential effects of proposed policies
+3. **Economic analysis**: Estimating costs associated with smoking and benefits of interventions
+
+## Future research directions
+
+Promising areas for future research using the CSHM include:
+
+1. Integration with e-cigarette and vaping behavior models
+2. Incorporation of social network effects on smoking behavior
+3. Analysis of interactions between smoking and other health behaviors
+
+## Case studies
+
+*This section will include examples of published research using the CSHM or similar approaches.*
\ No newline at end of file
diff --git a/docs/explanation/smoking-history.qmd b/docs/explanation/smoking-history.qmd
new file mode 100644
index 0000000..0faaac2
--- /dev/null
+++ b/docs/explanation/smoking-history.qmd
@@ -0,0 +1,41 @@
+---
+title: "Smoking history modeling"
+subtitle: "Conceptual overview of smoking behavior modeling approaches"
+---
+
+# Smoking history modeling
+
+This document provides a conceptual overview of smoking history modeling approaches.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+Smoking history modeling is a method to understand and predict patterns of smoking behavior in populations over time.
+
+## Key concepts
+
+Important concepts in smoking history modeling include:
+
+1. **Smoking states**: Different states individuals can occupy (never, current, former smoker)
+2. **Transition probabilities**: The likelihood of moving between smoking states
+3. **Temporal effects**: How age, time period, and birth cohort influence smoking behavior
+4. **Population heterogeneity**: Different patterns across demographic groups
+
+## Modeling approaches
+
+Different approaches to smoking history modeling include:
+
+1. **Descriptive approaches**: Using survey data to directly estimate prevalence
+2. **Statistical approaches**: Using regression models to identify patterns and trends
+3. **Microsimulation approaches**: Simulating individual smoking histories
+4. **Age-Period-Cohort models**: Separating three temporal effects on smoking behavior
+
+## Applications
+
+Smoking history models have various applications:
+
+1. Projecting future smoking prevalence
+2. Estimating disease burden attributable to smoking
+3. Evaluating tobacco control policies
+4. Understanding smoking behavior dynamics
\ No newline at end of file
diff --git a/docs/explanation/validation.qmd b/docs/explanation/validation.qmd
new file mode 100644
index 0000000..9acef2f
--- /dev/null
+++ b/docs/explanation/validation.qmd
@@ -0,0 +1,51 @@
+---
+title: "Validation approaches"
+subtitle: "Methods for assessing model accuracy and reliability"
+---
+
+# Validation approaches
+
+This document explains the validation approaches used in the CSHM.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+Validation is a critical step in ensuring that the CSHM produces accurate and reliable results that reflect real-world smoking patterns.
+
+## Validation goals
+
+The primary goals of validation are to ensure:
+
+1. **Internal validity**: The model produces results that are consistent with the data used to build it
+2. **External validity**: The model can generalize to new data and scenarios
+3. **Face validity**: The results align with expert knowledge and historical trends
+4. **Predictive validity**: The model can make accurate predictions about future smoking patterns
+
+## Validation methods
+
+The CSHM employs several validation methods:
+
+1. **Historical comparison**: Comparing model-generated prevalence with historical survey data
+2. **Cross-validation**: Testing the model on different subsets of data
+3. **Sensitivity analysis**: Examining how results change when model parameters are varied
+4. **Expert review**: Engaging subject matter experts to review the results
+
+## Validation metrics
+
+Key metrics used for validation include:
+
+1. Smoking prevalence by age, sex, and calendar year
+2. Age-specific initiation rates
+3. Age-specific cessation rates
+4. Duration of smoking distributions
+5. Cigarettes per day distributions
+
+## Validation challenges
+
+Common challenges in validating smoking history models include:
+
+1. Limited historical data for some time periods
+2. Changes in smoking behavior definitions over time
+3. Recall bias in self-reported smoking histories
+4. Underreporting of smoking in surveys
\ No newline at end of file
diff --git a/docs/how-to/custom-models.qmd b/docs/how-to/custom-models.qmd
new file mode 100644
index 0000000..f5f7a7c
--- /dev/null
+++ b/docs/how-to/custom-models.qmd
@@ -0,0 +1,41 @@
+---
+title: "Creating custom models"
+subtitle: "Extending the CSHM for specialized analyses"
+---
+
+# Creating custom models
+
+This guide explains how to create custom models that extend the core functionality of the CSHM.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+The CSHM can be extended in various ways to address specific research questions or incorporate additional factors.
+
+## Customization approaches
+
+Common customization approaches include:
+
+1. **Adding covariates**: Including additional variables like education or income
+2. **Alternative spline specifications**: Using different knot placements or spline types
+3. **Interaction terms**: Modeling interactions between temporal components
+4. **Stratified analyses**: Creating separate models for specific subpopulations
+
+## Example extensions
+
+```r
+# Example custom model code (placeholder)
+# custom_apc_model <- function(data, covariates = NULL) {
+# # Custom model implementation would go here
+# }
+```
+
+## Best practices
+
+When creating custom models:
+
+1. Document model specifications clearly
+2. Validate custom models against baseline models
+3. Perform sensitivity analyses
+4. Consider computational efficiency for complex models
\ No newline at end of file
diff --git a/docs/how-to/data-loading-and-harmonizing.qmd b/docs/how-to/data-loading-and-harmonizing.qmd
new file mode 100644
index 0000000..adbb97c
--- /dev/null
+++ b/docs/how-to/data-loading-and-harmonizing.qmd
@@ -0,0 +1,312 @@
+---
+title: "Data loading and harmonizing"
+subtitle: "Preparing CCHS data for the Smoking Histories Model"
+author: "CSHM Development Team"
+date: "Last Updated: `r Sys.Date()`"
+format:
+ html:
+ toc: true
+ toc-depth: 3
+ number-sections: true
+ theme: cosmo
+ code-fold: show
+execute:
+ echo: true
+ warning: false
+ message: false
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+# Load required packages
+library(dplyr)
+library(ggplot2)
+# If you have installed the cshgm package, uncomment this:
+# library(cshm)
+```
+
+# Introduction
+
+This guide focuses on the initial steps of the CSHM workflow: loading Canadian Community Health Survey (CCHS) data and harmonizing variables across survey cycles. These steps are critical for ensuring consistent analysis across different time periods.
+
+## Purpose of data harmonization
+
+Variable names, coding, and survey design have changed across CCHS cycles. Harmonization ensures that:
+
+1. Variables have consistent names across cycles
+2. Response codes are standardized
+3. Derived variables are calculated consistently
+4. Missing values are handled appropriately
+
+# Required data sources
+
+## Canadian Community Health Survey (CCHS)
+
+The CCHS is the primary data source for the smoking history model. We typically use multiple cycles:
+
+- CCHS 2001 (Cycle 1.1)
+- CCHS 2003 (Cycle 2.1)
+- CCHS 2005 (Cycle 3.1)
+- CCHS 2007-2008
+- CCHS 2009-2010
+- CCHS 2011-2012
+- CCHS 2013-2014
+- CCHS 2015-2016
+- CCHS 2017-2018
+
+## Accessing CCHS data
+
+CCHS data can be accessed through:
+
+1. **Statistics Canada Research Data Centres (RDCs)**: For researchers who need access to the detailed microdata
+2. **Public Use Microdata Files (PUMFs)**: Available through academic institutions
+3. **Sample files**: For demonstration purposes, sample datasets are included with this package
+
+# Loading CCHS data
+
+## Step 1: Set up your environment
+
+First, set up your environment with the necessary packages:
+
+```{r load-packages, eval=FALSE}
+# Load required packages
+library(cshm)
+library(dplyr)
+library(cchsflow) # For harmonizing CCHS variables
+library(haven) # For reading SAS or SPSS files
+
+# Configure paths using the config package
+library(config)
+cfg <- config::get()
+data_path <- cfg$data_path
+```
+
+## Step 2: Load individual CCHS cycles
+
+Load each CCHS cycle file. The format will depend on how your data is stored:
+
+```{r load-cycles, eval=FALSE}
+# Example: Loading RData files
+cchs2001 <- readRDS(file.path(data_path, "cchs2001_p.RData"))
+cchs2003 <- readRDS(file.path(data_path, "cchs2003_p.RData"))
+cchs2005 <- readRDS(file.path(data_path, "cchs2005_p.RData"))
+cchs2007_2008 <- readRDS(file.path(data_path, "cchs2007_2008_p.RData"))
+cchs2009_2010 <- readRDS(file.path(data_path, "cchs2009_2010_p.RData"))
+cchs2011_2012 <- readRDS(file.path(data_path, "cchs2012_p.RData"))
+cchs2013_2014 <- readRDS(file.path(data_path, "cchs2013_2014_p.RData"))
+
+# For SAS or SPSS files, you would use:
+# cchs2001 <- haven::read_sas(file.path(data_path, "cchs2001.sas7bdat"))
+# or
+# cchs2001 <- haven::read_spss(file.path(data_path, "cchs2001.sav"))
+```
+
+## Step 3: Data preparation
+
+Before harmonizing, prepare the data by adding cycle-specific identifiers:
+
+```{r prepare-data, eval=FALSE}
+# Add cycle identifier to each dataset
+cchs2001 <- cchs2001 %>% mutate(cycle = "2001")
+cchs2003 <- cchs2003 %>% mutate(cycle = "2003")
+cchs2005 <- cchs2005 %>% mutate(cycle = "2005")
+cchs2007_2008 <- cchs2007_2008 %>% mutate(cycle = "2007-2008")
+cchs2009_2010 <- cchs2009_2010 %>% mutate(cycle = "2009-2010")
+cchs2011_2012 <- cchs2011_2012 %>% mutate(cycle = "2011-2012")
+cchs2013_2014 <- cchs2013_2014 %>% mutate(cycle = "2013-2014")
+
+# Create a list of all datasets
+cchs_list <- list(
+ cchs2001, cchs2003, cchs2005, cchs2007_2008,
+ cchs2009_2010, cchs2011_2012, cchs2013_2014
+)
+```
+
+# Variable harmonization
+
+## Core smoking variables
+
+For the CSHM, we need to harmonize several key smoking-related variables:
+
+| Type | Variables | Description |
+|------|-----------|-------------|
+| Status | SMK_01A, SMK_202, SMKDSTY | Basic smoking status indicators |
+| Initiation | SMKG01C_cont, SMKG203_cont, SMKG207_cont | Age of smoking initiation |
+| Cessation | SMK_09A_cont, SMKG09C | When smoking cessation occurred |
+| Intensity | SMK_204, SMK_208, SMK_05B, SMK_05C | Quantity of cigarettes smoked |
+
+## Step 4: Harmonize variables using cchsflow
+
+The `cchsflow` package provides tools for harmonizing CCHS variables across cycles:
+
+```{r harmonize, eval=FALSE}
+# Define the variables needed for CSHM
+smoking_vars <- c(
+ "SMK_01A", # In lifetime, smoked 100 or more cigarettes
+ "SMK_202", # Current smoking status
+ "SMKDSTY", # Type of smoker
+ "SMKG01C_cont", # Age smoked first cigarette
+ "SMKG203_cont", # Age started smoking daily (daily smokers)
+ "SMKG207_cont", # Age started smoking daily (former daily smokers)
+ "SMK_09A_cont", # When stopped smoking daily (former daily)
+ "SMKG09C", # Years since stopped smoking daily
+ "SMK_204", # Number of cigarettes smoked daily (daily smokers)
+ "SMK_208", # Number of cigarettes smoked daily (former daily smokers)
+ "SMK_05B", # Number of cigarettes smoked daily (occasional smokers)
+ "SMK_05C" # Number of days smoked in past month (occasional smokers)
+)
+
+# Demographic variables
+demographic_vars <- c(
+ "DHH_SEX", # Sex of respondent
+ "DHH_AGE", # Age of respondent
+ "WTS_M" # Survey weight
+)
+
+# Variables for timing
+timing_vars <- c(
+ "SAMPLEID" # Contains year and month of survey
+)
+
+# Combine all variables
+all_vars <- c(smoking_vars, demographic_vars, timing_vars)
+
+# Process each dataset with cchsflow
+harmonized_list <- lapply(cchs_list, function(dataset) {
+ rec_with_table(dataset, all_vars)
+})
+
+# Combine all harmonized datasets
+harmonized_data <- bind_rows(harmonized_list)
+```
+
+## Step 5: Create derived variables
+
+Some additional variables need to be created for the CSHM:
+
+```{r derived-vars, eval=FALSE}
+# Create survey date and additional variables
+harmonized_data <- harmonized_data %>%
+ mutate(
+ # Extract survey year and month from SAMPLEID
+ survey_year = as.numeric(substr(as.character(SAMPLEID), 1, 4)),
+ survey_month = as.numeric(substr(as.character(SAMPLEID), 5, 6)),
+
+ # Create a date for the survey (use middle of month as approximation)
+ cchsbdate = as.Date(paste(survey_year, survey_month, "15", sep = "-")),
+
+ # Rename age of first cigarette for consistency with original code
+ agefirst = SMKG01C_cont,
+
+ # Create survey age variable (age at time of survey)
+ surveyage = DHH_AGE,
+
+ # Calculate birth year
+ birth_year = survey_year - surveyage,
+
+ # Create weight variable with consistent name
+ weighting = WTS_M,
+
+ # Create respondent ID
+ ont_id = row_number() # Create sequential ID if none exists
+ )
+```
+
+## Step 6: Clean and validate the data
+
+Data cleaning is essential to ensure valid analyses:
+
+```{r cleaning, eval=FALSE}
+# Remove cases with missing key variables
+cleaned_data <- harmonized_data %>%
+ filter(!is.na(DHH_SEX)) %>% # Remove cases missing sex
+ filter(!is.na(surveyage)) %>% # Remove cases missing age
+ filter(surveyage >= 12) %>% # Include only respondents aged 12+
+ filter(!is.na(weighting)) # Remove cases missing weights
+
+# Recode sex to match CSHM requirements (M/F)
+cleaned_data <- cleaned_data %>%
+ mutate(sex = ifelse(DHH_SEX == 1, "M", "F"))
+```
+
+## Step 7: Check variable distributions
+
+Verify that the harmonization worked correctly by checking key variable distributions:
+
+```{r check-distributions, eval=FALSE}
+# Check smoking status distribution
+cleaned_data %>%
+ count(SMKDSTY) %>%
+ mutate(
+ label = case_when(
+ SMKDSTY == 1 ~ "Daily smoker",
+ SMKDSTY == 2 ~ "Occasional smoker",
+ SMKDSTY == 3 ~ "Former daily smoker",
+ SMKDSTY == 4 ~ "Former occasional smoker",
+ SMKDSTY == 5 ~ "Never smoker",
+ TRUE ~ "Unknown"
+ ),
+ percentage = 100 * n / sum(n)
+ ) %>%
+ arrange(SMKDSTY)
+
+# Check distribution by cycle
+cleaned_data %>%
+ group_by(cycle) %>%
+ count() %>%
+ mutate(percentage = 100 * n / sum(n))
+
+# Check age distribution
+cleaned_data %>%
+ mutate(age_group = cut(surveyage,
+ breaks = c(12, 20, 30, 40, 50, 60, 70, 80, 999),
+ labels = c("12-19", "20-29", "30-39", "40-49",
+ "50-59", "60-69", "70-79", "80+"))) %>%
+ count(age_group) %>%
+ mutate(percentage = 100 * n / sum(n))
+```
+
+# Save the processed data
+
+Once your data is harmonized and validated, save it for future use:
+
+```{r save-data, eval=FALSE}
+# Save the processed data
+saveRDS(cleaned_data, file.path(data_path, "harmonized_cchs_data.rds"))
+```
+
+# Additional considerations
+
+## Variable mapping challenges
+
+CCHS variables may not always harmonize cleanly. Here are some common challenges and solutions:
+
+1. **Variable availability**: Not all variables are available in all cycles. Check which cycles include your variables of interest.
+
+2. **Coding differences**: Response codes may differ across cycles, even after harmonization. Verify code mappings using `cchsflow` documentation.
+
+3. **Skip patterns**: Survey skip patterns have changed over time, affecting the population asked certain questions.
+
+4. **Variable granularity**: Some cycles may have more detailed responses than others. Harmonization usually adopts the least granular version.
+
+## Working with secure environments
+
+When working in Statistics Canada Research Data Centres (RDCs) or other secure environments:
+
+1. Use relative file paths or configuration files to manage data locations
+2. Avoid storing large intermediate files if storage is limited
+3. Follow the RDC's guidelines for output review
+4. Consider batch processing if interactive sessions are limited
+
+# Next steps
+
+After harmonizing your CCHS data, you can proceed to:
+
+1. [Process smoking variables](processing-cchs-data.qmd) in more detail
+2. Begin [APC modeling](reproducing-manuel-study.qmd) using the harmonized data
+
+# References
+
+Canadian Community Health Survey (CCHS) - Annual Component. Statistics Canada.
+
+Onysko, J., & MacKenzie, A. (2015). The cchsflow R package: Standardizing variables across CCHS cycles. Statistics Canada.
\ No newline at end of file
diff --git a/docs/how-to/generating-histories.qmd b/docs/how-to/generating-histories.qmd
new file mode 100644
index 0000000..cdb06b5
--- /dev/null
+++ b/docs/how-to/generating-histories.qmd
@@ -0,0 +1,40 @@
+---
+title: "Generating smoking histories"
+subtitle: "Creating synthetic smoking histories from APC model results"
+---
+
+# Generating smoking histories
+
+This guide explains how to generate synthetic smoking histories using the results from the Age-Period-Cohort model.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+After fitting the APC model, you can generate complete smoking histories for simulated individuals. These histories can then be used in microsimulation models of smoking-related diseases.
+
+## Simulation process
+
+The simulation process involves:
+
+1. Creating a population with defined birth cohorts
+2. Simulating smoking initiation based on age-specific probabilities
+3. Simulating cessation for those who initiate
+4. Recording complete lifetime smoking histories
+
+## Example code
+
+```r
+# Example code (placeholder)
+# generate_smoking_histories <- function(n_people = 10000, birth_cohort_range = 1920:2000) {
+# # Simulation code would go here
+# }
+```
+
+## Validation
+
+It's important to validate the generated smoking histories against:
+
+1. Historical smoking prevalence data
+2. Age-specific initiation patterns
+3. Duration of smoking patterns
\ No newline at end of file
diff --git a/docs/how-to/index.qmd b/docs/how-to/index.qmd
new file mode 100644
index 0000000..68c86b7
--- /dev/null
+++ b/docs/how-to/index.qmd
@@ -0,0 +1,12 @@
+---
+title: "How-to guides"
+---
+
+This section provides practical instructions for specific tasks with CSHM:
+
+- [Processing CCHS data](processing-cchs-data.qmd): How to process and prepare CCHS data
+- [Running the APC model](running-apc-model.qmd): How to set up and run the APC model
+- [Generating smoking histories](generating-histories.qmd): How to generate complete smoking histories
+- [Validating results](validating-results.qmd): How to validate model results against reference data
+
+Use this section when you need to accomplish specific tasks with CSHM.
\ No newline at end of file
diff --git a/docs/how-to/integration.qmd b/docs/how-to/integration.qmd
new file mode 100644
index 0000000..17551ee
--- /dev/null
+++ b/docs/how-to/integration.qmd
@@ -0,0 +1,36 @@
+---
+title: "Integrating with other models"
+subtitle: "Using CSHM outputs in broader health and policy models"
+---
+
+# Integrating with other models
+
+This guide explains how to integrate the CSHM with other health and policy models.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+The CSHM can serve as an input to various health policy and disease models, providing realistic smoking histories for population simulations.
+
+## Integration scenarios
+
+Common integration scenarios include:
+
+1. **Disease risk models**: Using smoking histories to predict cancer, cardiovascular, and respiratory disease risks
+2. **Health economic evaluations**: Assessing the cost-effectiveness of tobacco control policies
+3. **Population health forecasting**: Projecting future health outcomes based on smoking trends
+4. **Policy simulations**: Modeling the impact of tobacco control interventions
+
+## Data exchange formats
+
+```r
+# Example integration code (placeholder)
+# export_smoking_histories <- function(histories, format = "csv") {
+# # Export code would go here
+# }
+```
+
+## Case studies
+
+*This section will include examples of successful integration with other models.*
\ No newline at end of file
diff --git a/docs/how-to/interpreting-results.qmd b/docs/how-to/interpreting-results.qmd
new file mode 100644
index 0000000..8e0a4a3
--- /dev/null
+++ b/docs/how-to/interpreting-results.qmd
@@ -0,0 +1,44 @@
+---
+title: "Interpreting APC model results"
+subtitle: "Understanding the output of Age-Period-Cohort models"
+---
+
+# Interpreting APC model results
+
+This guide explains how to interpret the results from the Age-Period-Cohort model.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+After fitting the APC model, you'll need to interpret various coefficients and visualize the results to understand smoking patterns.
+
+## Key metrics
+
+The key metrics from the APC model include:
+
+1. Age effects - how smoking behavior varies with age
+2. Period effects - how smoking behavior changes over time
+3. Cohort effects - how birth cohorts differ in smoking patterns
+
+## Visualizing results
+
+Creating visualizations can help with interpretation:
+
+```r
+# Example plot (placeholder)
+# ggplot(apc_results, aes(x = age, y = age_effect)) +
+# geom_line() +
+# theme_minimal() +
+# labs(title = "Age Effect on Smoking Initiation",
+# x = "Age",
+# y = "Log-odds of Initiation")
+```
+
+## Common interpretation challenges
+
+Some challenges in interpreting APC models include:
+
+1. The identification problem (linear dependency)
+2. Distinguishing between the three temporal effects
+3. Interpreting interaction effects
\ No newline at end of file
diff --git a/docs/how-to/processing-cchs-data.qmd b/docs/how-to/processing-cchs-data.qmd
new file mode 100644
index 0000000..6851253
--- /dev/null
+++ b/docs/how-to/processing-cchs-data.qmd
@@ -0,0 +1,201 @@
+---
+title: "Processing CCHS data"
+---
+
+This guide explains how to process Canadian Community Health Survey (CCHS) data for use with the CSHM.
+
+## Overview
+
+The Canadian Community Health Survey (CCHS) is conducted by Statistics Canada and collects information on health status, health care utilization, and health determinants for the Canadian population. For CSHM, we're particularly interested in the smoking-related variables, which need to be harmonized across different survey cycles.
+
+## Obtaining CCHS data
+
+CCHS data can be accessed through:
+
+1. **Statistics Canada Research Data Centres (RDCs)**: For researchers who need access to the detailed microdata
+2. **Public Use Microdata Files (PUMFs)**: Available through academic institutions
+3. **Sample files**: For demonstration purposes, sample datasets are included with this package
+
+For this guide, we'll assume you have access to CCHS data files for the following cycles:
+
+- CCHS 2001 (Cycle 1.1)
+- CCHS 2003 (Cycle 2.1)
+- CCHS 2005 (Cycle 3.1)
+- CCHS 2007-2008
+- CCHS 2009-2010
+- CCHS 2011-2012
+- CCHS 2013-2014
+- And optionally newer cycles
+
+## Step 1: Loading CCHS data
+
+First, load the CCHS data files:
+
+```r
+# Load required packages
+library(cshm)
+library(dplyr)
+library(cchsflow) # For harmonizing CCHS variables
+
+# Path to your data files (modify as needed)
+cchs_files <- c(
+ "path/to/cchs2001.rds",
+ "path/to/cchs2003.rds",
+ "path/to/cchs2005.rds",
+ "path/to/cchs2007_2008.rds",
+ "path/to/cchs2009_2010.rds",
+ "path/to/cchs2011_2012.rds",
+ "path/to/cchs2013_2014.rds"
+)
+
+# Load and combine CCHS datasets
+cchs_data <- list()
+for (i in seq_along(cchs_files)) {
+ cchs_data[[i]] <- readRDS(cchs_files[i])
+}
+
+# Combine datasets (assuming they have compatible structures)
+# In practice, you may need more complex data preparation
+combined_cchs <- bind_rows(cchs_data)
+```
+
+## Step 2: Harmonizing variables across cycles
+
+CCHS variable names have changed across cycles. We use the `cchsflow` package to harmonize them:
+
+```r
+# List of smoking-related variables needed for CSHM
+smoking_vars <- c(
+ "SMK_01A", # In lifetime, smoked 100 or more cigarettes
+ "SMK_202", # Current smoking status
+ "SMKDSTY", # Type of smoker
+ "SMKG01C_cont", # Age smoked first cigarette
+ "SMKG203_cont", # Age started smoking daily (daily smokers)
+ "SMKG207_cont", # Age started smoking daily (former daily smokers)
+ "SMK_09A_cont", # When stopped smoking daily (former daily)
+ "SMKG09C", # Years since stopped smoking daily
+ "SMK_204", # Number of cigarettes smoked daily (daily smokers)
+ "SMK_208", # Number of cigarettes smoked daily (former daily smokers)
+ "SMK_05B", # Number of cigarettes smoked daily (occasional smokers)
+ "SMK_05C" # Number of days smoked in past month (occasional smokers)
+)
+
+# Demographic variables
+demographic_vars <- c(
+ "DHH_SEX", # Sex of respondent
+ "DHH_AGE", # Age of respondent
+ "WTS_M" # Survey weight
+)
+
+# Combine all variables
+all_vars <- c(smoking_vars, demographic_vars)
+
+# Harmonize variables using cchsflow
+harmonized_data <- rec_with_table(
+ combined_cchs,
+ all_vars
+)
+```
+
+## Step 3: Creating derived variables
+
+Some variables need to be derived for the CSHM:
+
+```r
+# Create survey date
+harmonized_data <- harmonized_data %>%
+ mutate(
+ # Convert survey year and month to a date
+ survey_year = as.numeric(substr(as.character(SAMPLEID), 1, 4)),
+ survey_month = as.numeric(substr(as.character(SAMPLEID), 5, 6)),
+ cchsbdate = as.Date(paste(survey_year, survey_month, "15", sep = "-"))
+ )
+
+# Calculate additional derived variables
+harmonized_data <- harmonized_data %>%
+ mutate(
+ # Age of first cigarette (can be NA for non-smokers)
+ agefirst = SMKG01C_cont,
+
+ # Survey age (age at time of survey)
+ surveyage = DHH_AGE,
+
+ # Birth cohort (year)
+ birth_year = year(cchsbdate) - surveyage,
+
+ # Create weighting variable
+ weighting = WTS_M,
+
+ # Respondent ID
+ ont_id = PERSONID
+ )
+```
+
+## Step 4: Data cleaning and validation
+
+Check for data quality issues and prepare the final dataset:
+
+```r
+# Remove cases with missing key variables
+cleaned_data <- harmonized_data %>%
+ filter(!is.na(DHH_SEX)) %>% # Remove cases missing sex
+ filter(!is.na(surveyage)) %>% # Remove cases missing age
+ filter(surveyage >= 12) %>% # Include only respondents aged 12+
+ filter(!is.na(weighting)) # Remove cases missing weights
+
+# Recode sex to match CSHM requirements (M/F)
+cleaned_data <- cleaned_data %>%
+ mutate(sex = ifelse(DHH_SEX == 1, "M", "F"))
+
+# Save the processed data
+saveRDS(cleaned_data, "harmonized_cchs_data.rds")
+```
+
+## Step 5: Verify variable distributions
+
+It's important to check the distributions of key variables to ensure the harmonization worked as expected:
+
+```r
+# Check smoking status distribution
+smoking_distribution <- cleaned_data %>%
+ count(SMKDSTY) %>%
+ mutate(
+ label = case_when(
+ SMKDSTY == 1 ~ "Daily smoker",
+ SMKDSTY == 2 ~ "Occasional smoker",
+ SMKDSTY == 3 ~ "Former daily smoker",
+ SMKDSTY == 4 ~ "Former occasional smoker",
+ SMKDSTY == 5 ~ "Never smoker",
+ TRUE ~ "Unknown"
+ ),
+ percentage = 100 * n / sum(n)
+ )
+
+print(smoking_distribution)
+
+# Check age distribution
+age_distribution <- cleaned_data %>%
+ mutate(age_group = cut(surveyage,
+ breaks = c(12, 20, 30, 40, 50, 60, 70, 80, 999),
+ labels = c("12-19", "20-29", "30-39", "40-49",
+ "50-59", "60-69", "70-79", "80+"))) %>%
+ count(age_group) %>%
+ mutate(percentage = 100 * n / sum(n))
+
+print(age_distribution)
+```
+
+## Next steps
+
+After processing the CCHS data, you can proceed to:
+
+1. [Analyze smoking initiation patterns](../tutorials/basic-usage.qmd)
+2. [Run the Age-Period-Cohort model](running-apc-model.qmd)
+
+## Common issues and solutions
+
+**Missing data in key variables**: If you encounter high levels of missing data in key smoking variables, check whether the variables were included in all survey cycles. Some variables may be unavailable in certain cycles.
+
+**Inconsistent variable coding**: Despite harmonization, some variables may have inconsistent coding across cycles. Always check the value distributions before proceeding with analysis.
+
+**Survey design effects**: CCHS uses a complex survey design. For proper population estimates, use the survey weights provided and consider the design effect in variance estimation.
\ No newline at end of file
diff --git a/docs/how-to/reproducing-ontario-study.qmd b/docs/how-to/reproducing-ontario-study.qmd
new file mode 100644
index 0000000..ec375e9
--- /dev/null
+++ b/docs/how-to/reproducing-ontario-study.qmd
@@ -0,0 +1,303 @@
+---
+title: "Reproducing the Canadian Smoking Histories Model"
+subtitle: "A Guide to Variable Derivation and Analysis"
+author: "CSHM Development Team"
+date: "Last Updated: `r Sys.Date()`"
+format:
+ html:
+ toc: true
+ toc-depth: 3
+ number-sections: true
+ theme: cosmo
+ code-fold: show
+execute:
+ echo: true
+ warning: false
+ message: false
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+# Load required packages
+library(dplyr)
+library(ggplot2)
+# If you have installed the cshgm package, uncomment this:
+# library(cshm)
+```
+
+# Introduction
+
+This document provides a guide to reproducing the Canadian Smoking Histories Model as described in @Manuel_HR_2020. The model uses Age-Period-Cohort (APC) analysis to estimate smoking initiation and cessation probabilities across different birth cohorts, which are then used to generate complete smoking histories.
+
+## Purpose of This Guide
+
+This guide aims to:
+
+1. Describe the key variables needed from the Canadian Community Health Survey (CCHS)
+2. Show how to derive the necessary variables for the model
+3. Provide R code implementations of the original SAS procedures
+4. Explain how to interpret and validate the results
+
+## Original Study Overview
+
+The original study [@Manuel_HR_2020] developed a model to:
+
+- Generate individual smoking histories for a simulated population
+- Estimate smoking prevalence across different time periods and birth cohorts
+- Model both smoking initiation and cessation probabilities
+- Account for age, period, and cohort effects in smoking behavior
+
+The approach used data from the Canadian Community Health Survey to construct smoking histories and applied statistical modeling to estimate probabilities of starting and quitting smoking.
+
+# Required Data Sources
+
+## Canadian Community Health Survey (CCHS)
+
+The CCHS is the primary data source for this analysis. Multiple cycles have been conducted:
+
+- CCHS 2001 (Cycle 1.1)
+- CCHS 2003 (Cycle 2.1)
+- CCHS 2005 (Cycle 3.1)
+- CCHS 2007-2008
+- CCHS 2009-2010
+- CCHS 2011-2012
+- CCHS 2013-2014
+- CCHS 2015-2016
+- CCHS 2017-2018
+
+## Variable Harmonization Across Cycles
+
+CCHS variable names have changed across cycles. We use the cchsflow package for variable harmonization:
+
+```{r harmonization-example, eval=FALSE}
+# Example of harmonizing CCHS data using cchsflow
+# library(cchsflow)
+# harmonized_data <- rec_with_table(
+# cchs_data,
+# c("SMK_01A", "SMKG01C_cont", "SMKG203_cont", "SMK_204")
+# )
+```
+
+# Key Variables for Smoking History
+
+## Core Smoking Status Variables
+
+The following variables are critical for determining smoking status:
+
+| Harmonized Variable | Description | Values |
+|----------------------------------|---------------------|-----------------|
+| SMK_01A | In lifetime, smoked 100 or more cigarettes | 1=Yes, 2=No |
+| SMK_202 | Current smoking status | 1=Daily, 2=Occasional, 3=Not at all |
+| SMKDSTY | Type of smoker | 1=Daily, 2=Occasional, 3=Former daily, 4=Former occasional, 5=Never |
+
+## Smoking Initiation Variables
+
+Variables measuring when someone started smoking:
+
+| Harmonized Variable | Description | Type |
+|-----------------------------------|---------------------|----------------|
+| SMKG01C_cont | Age smoked first cigarette | Continuous |
+| SMKG203_cont | Age started smoking daily (daily smokers) | Continuous |
+| SMKG207_cont | Age started smoking daily (former daily smokers) | Continuous |
+
+## Smoking Cessation Variables
+
+Variables measuring when someone quit smoking:
+
+| Harmonized Variable | Description | Type |
+|-----------------------------------|---------------------|----------------|
+| SMK_09A_cont | When stopped smoking daily (former daily) | Continuous |
+| SMKG09C | Years since stopped smoking daily (former daily) | Categorical |
+
+## Smoking Intensity Variables
+
+Variables measuring how much someone smokes/smoked:
+
+| Harmonized Variable | Description | Type |
+|-----------------------------------|---------------------|----------------|
+| SMK_204 | Number of cigarettes smoked daily (daily smokers) | Continuous |
+| SMK_208 | Number of cigarettes smoked daily (former daily smokers) | Continuous |
+| SMK_05B | Number of cigarettes smoked daily (occasional smokers) | Continuous |
+| SMK_05C | Number of days smoked in past month (occasional smokers) | Continuous |
+
+## Demographic Variables
+
+Variables used for cohort assignment and stratification:
+
+| Harmonized Variable | Description | Type |
+|---------------------|-------------------|-------------|
+| DHH_SEX | Sex of respondent | Categorical |
+| cchsbdate | CCHS survey date | Date |
+
+# Variable Derivation for Age-Period-Cohort Analysis
+
+## Smoking Initiation Process
+
+### Step 1: Identify Smoking Initiators
+
+The first step is to identify individuals who have initiated smoking, defined as having smoked at least 100 cigarettes in their lifetime.
+
+```{r smoking-initiators, eval=FALSE}
+# Using our process_smoking_initiation function
+inits_male <- process_smoking_initiation(harmonized_data, sex = "M")
+inits_female <- process_smoking_initiation(harmonized_data, sex = "F")
+```
+
+The function shown above performs the following steps:
+
+1. Filters respondents by sex
+2. Identifies never-smokers (SMK_01A = 2) and sets their initiation age to 101 (a flag value)
+3. Creates an initiation indicator (init=1 for smokers, init=0 for never-smokers)
+4. Filters to include only those born in 1920 or later
+5. Creates a subset containing only those who initiated smoking
+6. Applies age filter (age \>= 8)
+7. Calculates period (year of initiation) as cohort + age
+8. Returns a dataset with ont_id, weighting, age, cohort, period, and init variables
+
+### Step 2: Create Population Denominators
+
+To calculate initiation rates, we need the population at risk of initiation at each age, period, and cohort combination.
+
+```{r denominator-example, eval=FALSE}
+# Example function for creating denominators (not fully implemented yet)
+create_initiation_denominators <- function(dataset, sex = "M") {
+ # Function implementation would go here
+ # This creates population counts by age, period, and cohort
+}
+```
+
+## Smoking Cessation Process
+
+### Step 1: Identify Smoking Cessation
+
+Similarly, we identify individuals who have quit smoking and when they quit.
+
+```{r cessation-example, eval=FALSE}
+# Example function for processing cessation data
+process_smoking_cessation <- function(dataset, sex = "M") {
+ # Function implementation would follow similar steps to initiation
+ # but with cessation-specific criteria
+}
+```
+
+## Age-Period-Cohort Modeling
+
+The APC modeling approach uses splines to model age, period, and cohort effects:
+
+1. Natural cubic splines are constructed for age, period, and cohort
+2. These splines are used in a generalized linear model with binomial errors
+3. Survival probabilities are used to adjust for differential mortality
+4. Separate models are fit for males and females
+
+```{r apc-splines, eval=FALSE}
+# Example of creating splines for APC modeling
+create_age_splines <- function(ages, knots) {
+ # Function to create natural cubic splines for ages
+ # Implementation would go here
+}
+```
+
+# Applying the Model
+
+## Estimating Smoking Probabilities
+
+Once the APC models are fitted, they can be used to predict:
+
+1. Initiation probabilities by age, period, and cohort
+2. Cessation probabilities by age, period, and cohort
+
+```{r predict-example, eval=FALSE}
+# Example of predicting initiation probabilities
+predict_initiation <- function(age, period, cohort, sex) {
+ # Implementation would use fitted coefficients from APC model
+}
+```
+
+## Generating Smoking Histories
+
+Complete smoking histories can be generated using:
+
+1. A microsimulation approach based on the estimated probabilities
+2. Monte Carlo methods to introduce appropriate stochasticity
+
+```{r history-example, eval=FALSE}
+# Example of generating a smoking history
+generate_smoking_history <- function(birth_year, sex) {
+ # Simulation implementation would go here
+}
+```
+
+# Validation and Calibration
+
+## Comparing to Historical Data
+
+Model validation involves comparing the generated smoking histories to:
+
+1. Historical smoking prevalence data
+2. Known patterns in smoking behavior by birth cohort
+3. Other published estimates of smoking trends
+
+```{r validation-example, eval=FALSE}
+# Example of validating model results
+validate_smoking_prevalence <- function(simulated_data, reference_data) {
+ # Validation code would go here
+}
+```
+
+## Calibration Procedures
+
+If necessary, model calibration can be performed by:
+
+1. Adjusting the baseline rates
+2. Modifying the period effects
+3. Fine-tuning cohort-specific parameters
+
+# Implementation Example
+
+Below is a simplified example of processing smoking initiation data using the function we've developed:
+
+```{r implementation-example, eval=FALSE}
+# Create simulated data for testing
+test_data <- data.frame(
+ ont_id = 1001:1010,
+ sex = rep(c("M", "F"), 5),
+ SMK_01A = c(1, 2, 1, 1, 1, 2, 1, 2, 1, 1), # 1=Yes, 2=No to 100+ cigarettes
+ agefirst = c(16, NA, 12, 21, 7, NA, 18, NA, 15, 22),
+ cchsbdate = as.Date(c("2001-06-15", "2001-07-20", "2001-08-10",
+ "2001-09-05", "2001-10-25", "2002-01-15",
+ "2002-02-20", "2002-03-10", "2002-04-15", "2002-05-20")),
+ weighting = round(runif(10, 100, 300))
+)
+
+# Process the initiation data
+male_init <- process_smoking_initiation(test_data, sex = "M")
+female_init <- process_smoking_initiation(test_data, sex = "F")
+
+# Examine the results
+print(male_init)
+print(female_init)
+```
+
+# Conclusion
+
+This guide has provided an overview of how to:
+
+1. Prepare CCHS data for smoking history analysis
+2. Derive the key variables needed for APC modeling
+3. Implement the core components of the Canadian Smoking Histories Model
+4. Apply and validate the model
+
+Further development is ongoing to complete the full implementation of the model in R, including additional validation against historical data and extensions to incorporate more recent survey cycles.
+
+# References
+
+::: {#refs}
+:::
+
+# Appendix: Additional Resources
+
+
+For a complete mapping of variables across CCHS cycles, refer to the included variable sheets:
+
+- `worksheets/cshm-variables.csv`
+- `worksheets/cshm-variable-details.csv`
\ No newline at end of file
diff --git a/docs/how-to/running-apc-model.qmd b/docs/how-to/running-apc-model.qmd
new file mode 100644
index 0000000..b1b1cfb
--- /dev/null
+++ b/docs/how-to/running-apc-model.qmd
@@ -0,0 +1,36 @@
+---
+title: "Running the APC model"
+subtitle: "Applying Age-Period-Cohort modeling to smoking data"
+---
+
+# Running the APC model
+
+This guide explains how to run the Age-Period-Cohort model on processed CCHS data.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+The Age-Period-Cohort (APC) model is a statistical approach used to disentangle the effects of age, time period, and birth cohort on health behaviors like smoking.
+
+## Prerequisites
+
+Before running the APC model, make sure you have:
+
+1. Harmonized and processed CCHS data
+2. Derived all necessary variables for smoking initiation and cessation
+
+## Implementation steps
+
+The basic workflow includes:
+
+1. Preparing spline terms for age, period, and cohort
+2. Fitting generalized linear models
+3. Predicting probabilities for all age-period-cohort combinations
+
+## Next steps
+
+For more information on interpreting and using the model results, see:
+
+- [Interpreting APC results](interpreting-results.qmd)
+- [Generating smoking histories](generating-histories.qmd)
\ No newline at end of file
diff --git a/docs/how-to/validating-results.qmd b/docs/how-to/validating-results.qmd
new file mode 100644
index 0000000..bcc3e0d
--- /dev/null
+++ b/docs/how-to/validating-results.qmd
@@ -0,0 +1,41 @@
+---
+title: "Validating model results"
+subtitle: "Assessing the accuracy of CSHM predictions"
+---
+
+# Validating model results
+
+This guide explains how to validate the results from the Canadian Smoking Histories Model.
+
+*This page is under development. Content coming soon.*
+
+## Overview
+
+Validation is a critical step in ensuring that the CSHM accurately reflects historical smoking patterns and can generate plausible future scenarios.
+
+## Validation approaches
+
+Several approaches can be used for validation:
+
+1. **Historical comparison**: Comparing model-generated prevalence with historical survey data
+2. **Out-of-sample testing**: Reserving some data for validation
+3. **Cross-validation**: Testing the model on different subsets of data
+4. **Expert review**: Having subject matter experts review the results
+
+## Example validation
+
+```r
+# Example validation code (placeholder)
+# compare_prevalence <- function(simulated_data, reference_data) {
+# # Validation code would go here
+# }
+```
+
+## Interpreting validation results
+
+When interpreting validation results, consider:
+
+1. Absolute and relative differences between model and reference data
+2. Systematic patterns in discrepancies
+3. Sensitivity to key parameters
+4. Validation across different demographic groups
\ No newline at end of file
diff --git a/docs/images/workflow-diagram-ascii.txt b/docs/images/workflow-diagram-ascii.txt
new file mode 100644
index 0000000..0f57459
--- /dev/null
+++ b/docs/images/workflow-diagram-ascii.txt
@@ -0,0 +1,14 @@
++--------------------+ +--------------------+ +--------------------+
+| | | | | |
+| Data Loading |--->| Data Harmonization|--->| Variable Derivation|
+| (CCHS Cycles) | | (cchsflow) | | (Age, Cohort, etc) |
+| | | | | |
++--------------------+ +--------------------+ +--------------------+
+ |
+ v
++--------------------+ +--------------------+ +--------------------+
+| | | | | |
+| Model Validation |<---| Smoking History |<---| APC Modeling |
+| (Calibration) | | Generation | | (Splines) |
+| | | | | |
++--------------------+ +--------------------+ +--------------------+
\ No newline at end of file
diff --git a/docs/images/workflow-diagram.png b/docs/images/workflow-diagram.png
new file mode 100644
index 0000000..2d691c7
--- /dev/null
+++ b/docs/images/workflow-diagram.png
@@ -0,0 +1,15 @@
+iVBORw0KGgoAAAANSUhEUgAAAlgAAAGQAQMAAABI+4zbAAAABlBMVEX///8AAABVwtN+AAADr0lEQVR42u2czWsTQRjGn2yT
+ZrNJurVppcVWKK2i+FG0iCiKglA/QBD8ogjeRFARby14UTzowYNHL3pQb+JB0YNe/Ae8Ka3U+lFrk822abvbZpPd8bA7u7NJ
+NtN9Ztg52PfUzPKS95nf7rz7vi8JYBiGYRiGYRiGYRiGYRiGYRiGYRiGYRiGYRiGYRiGYRiGYRiGYRiGYRhm25KOHen5+Pj2
+pNcOKWd2ydCFmXRB2I6QFGkqSoVUUZRKnkrD/6pUf1arB5/eGQMXjmTCuy7WRhZrNhZTxn9rSulK9d7HzwMPTtT/XUK20i+E
+kGSjLevZyvWPj/oPf1AHBu87o9Pp9KcQJbm9ZTvbP7s0jx9M5Q/fd0YdKwxBo+y2c3jJMlvcQKbhXL2/5+6Fy4N3JhzKjpJS
+lEW+uHmzp0vCXPlDVx98ufiufOmcNTDqTCGmSMlc0+yO2sNKdx0YOJ5IT/QvO5RiHcGCWFGLDZtdOjJeTgUXHcnxLtbQKhuG
+rTG7OsK9q4dv29mJZAMXRxC+tlvrzUYeXJ1/9XrKfjgTDKBU7V3k+5r/+MOlxa/2tseCAJQs/VEfJWs/7G0vBgAowPVNGWQc
+xfBqu9HhAFylbH30m5adRALYfql+0/KJgLl/d7HoEoDfLFu6sbGq+H8t2bwWx7+LLVrvVdQPl7Gy+Y/ELRf1w6UE/nblIhUO
+ApeJKlTWxwEY39S2qQOomk0OJTJNd2JVRbG+y6Vvg3CtpKNcWkdLc2mhyxJVjV6WkLIuXJ0sFZ3svixR0ujLkuF+LZlRCJal
+Gq1QojjK1otWxnG57lWKLrpCSOZ+5+Wy9bqVdLsVk1zfrDDplcwzxQwh+S2FPfvcMkp5k47jXC5nHH89z2UcFrPVG6K0Fc9a
+G5m6zOVcL5erzzO1YpSMGWc1wk9Xw1zqeiZhzTu1shSk1FPJlDOPk9qSh1LPlrNJrpvLlY3OZcLTOCzbXJ5v9cbyPJccPpdt
+83nBzpfn5YL7PO9K1e2OmuSp5A1JXkq+UchHSZcuVKv5S+mq6VfJSC3y40j7VSrhtFQCl+Oo9U7i9KukeCl5Q5KXkn+lqBVC
+Hkp6QeaYK/SG20dJRKSoiJTPRaYERzm9KqpH5ZdXF8KE4cDZYnbcC2dLcbI82SIk2RKnuXCLkNwiJOcI/yVnqxzn0ufnKGdL
+rZTTwdkCfZgSBThtAj/ICZqnDcMwDMMwDMMwDMMwDMMwDMMwDMMwDMMwDMMwDMMwDMMwDMMwDMMwDPOf+gUBomcNnZ8AbAAA
+AABJRU5ErkJggg==
diff --git a/docs/index.qmd b/docs/index.qmd
new file mode 100644
index 0000000..96d3a67
--- /dev/null
+++ b/docs/index.qmd
@@ -0,0 +1,61 @@
+---
+title: "Canadian Smoking Histories Model"
+subtitle: "An R implementation of Age-Period-Cohort smoking behavior analysis"
+---
+
+# Canadian Smoking Histories Model
+
+This website provides information about using and extending the Canadian Smoking Histories Model (CSHM) documentation for smoking behavior analysis and simulation.
+
+## About CSHM
+
+The Canadian Smoking Histories Model is an R implementation of the Age-Period-Cohort (APC) modeling approach to smoking behavior. It builds upon the work of Holford and extends the methodology developed in the Ontario SHGM study (2020).
+
+The model analyzes smoking initiation and cessation patterns from Canadian Community Health Survey (CCHS) data, accounting for age, period, and cohort effects. With this foundation, it can generate synthetic smoking histories for population health modeling.
+
+## Key features
+
+- Processes and harmonizes data from multiple CCHS cycles
+- Implements APC modeling using natural cubic splines
+- Estimates smoking initiation and cessation probabilities
+- Generates complete smoking histories for simulated populations
+- Provides validation against historical data
+- Works in various computing environments, including secure settings
+
+## Getting started
+
+New users should start with these resources:
+
+- [Getting started guide](tutorials/getting-started.qmd) - Introduction to the package
+- [Basic usage tutorial](tutorials/basic-usage.qmd) - Common analysis tasks
+- [Processing CCHS data](how-to/processing-cchs-data.qmd) - Working with survey data
+
+## Documentation structure
+
+This documentation is organized into four main sections:
+
+1. **Tutorials** - Step-by-step lessons to get started
+2. **How-to guides** - Practical instructions for specific tasks
+3. **Explanation** - Conceptual discussions of methodology
+4. **Reference** - Technical details of functions and data
+
+## Project status
+
+The CSHM is currently under active development. Contributions, bug reports, and feature requests are welcome through the [GitHub repository](https://github.com/Big-Life-Lab/cshm-dev).
+
+## Citation
+
+If you use the CSHM in your research, please cite:
+
+```
+CSHM Development Team (2024). Canadian Smoking Histories Model:
+An R implementation of Age-Period-Cohort smoking behavior analysis.
+```
+
+## Licence
+
+The code in this repository is licensed under the [MIT License](../LICENSE).
+
+## Acknowledgments
+
+This project builds upon the foundational work of Dr. Ted Holford at Yale University and the Ontario SHGM study team at Statistics Canada.
\ No newline at end of file
diff --git a/docs/protocol/full-protocol.qmd b/docs/protocol/full-protocol.qmd
new file mode 100644
index 0000000..d1d0e7f
--- /dev/null
+++ b/docs/protocol/full-protocol.qmd
@@ -0,0 +1,359 @@
+---
+title: "A Canadian Smoking Histories Model: a study protocol to generate smoking cohorts from 1940 and project to 2050"
+status: "Draft"
+version-summary:
+ date: "2026-02-24"
+ version: "0.2.5"
+version-history:
+ - version: "0.2.0"
+ date: "2026-03-09"
+ description: "Update to include reporting guideline placeholders. Added potential measurement error and sensitivity analyses. Equity subgroup analysis removed from primary analysis. Added total cigarettes smoked in Canada by year as an expected output. Added sensitivity analyses for spline type (NSP vs RCS) and mortality adjustment method (MPoRT vs Peto)."
+ - version: "0.1.1"
+ date: "2025-05-22"
+ description: "Detailed protocol update including APC knots, MPoRT, and equity strategy."
+ - version: "0.1.0"
+ date: "2025-03-27"
+ description: "Initial 1-page outline."
+---
+
+[{{< meta version-summary.date >}}]{.date} \| Version: [{{< meta version-summary.version >}}]{.version}
+
+# Abstract
+
+> STROBE: Clear summary of study components; ISPOR-SMDM: Overview of model purpose
+
+**Background**
+
+Understanding historical and projected smoking behaviour across Canadian populations informs the evaluation of tobacco control policies and smoking-attributable disease burden. While the U.S. Smoking History Generator (SHG) supports cancer risk modelling, a pan-Canadian equivalent does not yet exist.
+
+**Objectives**
+
+This study aims to develop a Canadian Smoking Histories Model (CSHM) that reconstructs smoking patterns by birth cohort, sex, and province from 1965 to 2023, with projections to 2050. Specifically, we will: (1) estimate historical and current smoking initiation, cessation, and intensity; (2) assess regional and temporal variations across all provinces and territories; (3) project future trends under status quo conditions; and (4) provide an open-access, reproducible model for population health modelling.
+
+**Methods**
+
+Using harmonized data from over one million respondents in the Canadian Community Health Survey (2001--2023), we will apply age-period-cohort models to estimate smoking initiation, cessation, and intensity. Mortality-adjusted estimates using the MPoRT algorithm will address survival bias. Estimates will be produced for each province and territory, and projections will extend to 2050.
+
+**Expected Outcomes**
+
+The model will be publicly accessible and used for policy evaluation and disease modelling in Canada, including projecting future smoking patterns and evaluating historic or proposed tobacco policy.
+
+:::: {.section-body line-numbers="continuous"}
+::: page-break
+:::
+
+# 1. Background
+
+Smoking behaviour varies across birth cohorts in ways that affect projections of tobacco-attributable disease and the evaluation of tobacco control policies in Canada. While smoking prevalence has declined since the mid-20th century, tobacco remains the leading cause of preventable disease and death in Canada, killing approximately 48,000 Canadians annually and costing over $11 billion per year in healthcare, lost productivity, and other costs [@HealthCanada_SmokingMortality_2024; @CSUCH_2023]. The smoking health burden is concentrated among lower-income and other equity-deserving populations, with lung cancer incidence nearly three times higher among Canadians with the lowest educational attainment [@mitra2015]. National trends mask heterogeneity across birth cohorts and provinces, driven by evolving social norms and regional tobacco control measures.
+
+The Smoking History Generator (SHG) framework, developed by the National Cancer Institute (NCI) CISNET Smoking Working Group, characterizes these patterns in the United States using Age-Period-Cohort (APC) models [@Holford_AJPM_2014]. This approach was adapted for Ontario [@Manuel_HR_2020], demonstrating that Canadian Community Health Survey (CCHS) data can be used to reconstruct birth-cohort-specific smoking histories from 1965 to 2013. A pan-Canadian equivalent covering all provinces and territories has not yet been developed.
+
+Three gaps motivate the CSHM. First, Canada’s tobacco control landscape occurs at both the national and regional levels, with provincial variation in taxation, smoke-free legislation, and cessation support. Second, Canadian smoking initiation and cessation patterns differ from those in other countries, particularly in the timing of peak prevalence and the rate of decline among younger cohorts. Third, Canadian health policy models (e.g., OncoSim, POHEM, SimSmoke) rely on behaviour transition parameters from the 1994--2004 National Population Health Survey (NPHS), now over 20 years old, with no planned replacement [@hennessy2015; @gauvreau2017; @chaiton2021]; these parameters no longer reflect contemporary patterns, particularly among younger cohorts, immigrant populations, and lower-income groups. The CCHS [@beland2002], with over 1.4 million respondents and annual data collection since 2001, provides an unparalleled opportunity to address this gap. The methods and infrastructure developed here will also expand knowledge for many countries facing the same challenge: large cross-sectional surveys but lacking population-based panel data [@kopasker2023; @vasquezlavin2022]. This study addresses these gaps with a unified, open-source framework for smoking history generation across all Canadian jurisdictions.
+
+# 2. Objectives
+
+This study will develop a Canadian Smoking Histories Model (CSHM) that describes smoking patterns by birth cohort, sex, and province from 1965 to 2023, with projections to 2050. Specifically, this study will:
+
+> Note: the starting date (1965) may need revision based on model performance.
+
+1. **Estimate historical and current smoking parameters:** Reconstruct rates of smoking initiation, cessation, and intensity (cigarettes per day) by birth cohort (1890--2030) for each province and territory.
+2. **Examine regional and temporal variations:** Analyse how smoking patterns have evolved across Canadian jurisdictions in response to differing policy environments and social trends.
+3. **Project future smoking prevalence:** Model future trends in smoking prevalence and related parameters through 2050 under status quo policy conditions.
+4. **Develop an open-access model:** Create a publicly-accessible, reproducible R-based model that can be used and updated by researchers, policy analysts, and health system planners.
+
+The model will provide demographic and geographic estimates (by single year of age and birth cohort) to interface with existing population health models. Historical reconstruction will cover the period 1965--2023, with projections extending from 2024 to 2050. Analysis will be conducted at the provincial level, with national estimates derived from aggregated provincial data.
+
+# 3. Methods
+
+## 3.1 Overall study design
+
+This study uses a retrospective population-based simulation modelling approach to reconstruct and project smoking life-course trajectories for the Canadian population. We use pooled cross-sectional data from the Canadian Community Health Survey (CCHS) to estimate transition probabilities within an Age-Period-Cohort (APC) framework.
+
+> STROBE: Study design; ISPOR-SMDM: Model type
+
+The conceptual framework for the CSHM is structured as a discrete-time Markov state-transition model. The population is divided into three primary compartments:
+
+- **Never Smoker** (individuals who have smoked fewer than 100 cigarettes)
+- **Current Smoker** (daily or occasional use)
+- **Former Smoker** (quit for $\ge$ 1 year).
+
+Individuals transition between these states based on annual probabilities of **initiation** (Never to Current) and **cessation** (Current to Former). To ensure precision in tobacco "dose" estimation, the Current Smoker compartment is further characterized by smoking intensity (cigarettes per day).
+
+> ISPOR-SMDM: Model structure; STRESS: Conceptualization
+
+The APC framework is implemented to separate temporal trends into three distinct components: age effects (biological and developmental influences), period effects (influences such as policy shifts or social norms), and cohort effects (generational differences). This framework is used for three reasons:
+
+1. **Identifiability:** It addresses the fundamental APC identity ($cohort = period - age$) through constrained natural cubic splines, enabling the estimation of unique generational trends.
+2. **Historical Reconstruction:** It allows for the back-calculation of smoking rates for birth cohorts, effectively using current survivors to understand historical patterns.
+3. **Survival Bias Mitigation:** It provides a method to correct for differential mortality (the "healthy survivor" effect), ensuring that estimated historical initiation and cessation rates reflect the original population rather than only those who survived to be surveyed.
+
+> ISPOR-SMDM: Analytical approach; GATHER: Methods overview
+
+## 3.2 Data sources
+
+The primary data source for the CSHM is the Canadian Community Health Survey (CCHS), a national cross-sectional survey conducted by Statistics Canada. We will use all available survey cycles from 2001 to 2023. The CCHS employs a multistage stratified cluster design to provide representative estimates for the Canadian population aged 12 years and older, excluding individuals living on reserves, full-time members of the Canadian Forces, and institutionalized populations [@beland2002].
+
+The study will use two distinct computing environments and data formats:
+
+1. **Development Environment (Public Use Microdata Files - PUMF):** Initial model development, coding, and internal validation will be performed using PUMF data (cycles 2001--2022). These files contain about 1 million respondents and are used for the open-source implementation of the model. Continuous variables in PUMF data (e.g., age at initiation) are often grouped; we will use midpoint-estimated pseudo-continuous values for these parameters.
+2. **Production Environment (Master Files):** The final model parameters will be estimated using CCHS Master Files (2001--2023) available within Statistics Canada’s secure Regional Data Centres (RDCs). Master files provide exact continuous values for key parameters for all survey years, include age, age of smoking intitation, and age of smoking cessation.
+
+Secondary data sources for model calibration include:
+
+3. **Canadian Human Mortality Database (CHMD):** Age- and sex-specific mortality rates and life tables for Canada and all provinces, used to apply the MPoRT mortality adjustment and construct survival weights. The CHMD is the Canadian component of the Human Mortality Database and is maintained by the Département de démographie, Université de Montréal, in collaboration with Statistics Canada. Data are freely accessible at [www.bdlc.umontreal.ca/CHMD](http://www.bdlc.umontreal.ca/CHMD). National and provincial life tables will be extracted for the period 1921 to the most recent available year.
+
+See [Appendix C: Data availability and computational environment] for data access procedures for each data source used in this study.
+
+## 3.3 Exposure ascertainment
+
+We will use the `cchsflow` R package (version 3.0) to harmonize smoking-related variables across all CCHS cycles. This library provides a standardized metadata-driven approach to recoding survey responses into unified variables, ensuring consistency despite changes in survey questions and coding over time.
+
+Compared to the US National Health Interview Survey (NHIS) used in existing CISNET models, the CCHS provides more granular data on non-daily smoking. While the NHIS categorizes current smokers as "every day" or "some days," the CCHS captures more detailed information about occasional smokers. The NHIS has fewer annual respondents (approximately 27,000 for the NHIS versus 65,000 for the CCHS), but with an earlier start date for tobacco questions (1965 for NHIS versus 2001 for the CCHS) [@backinger2008]. The potential effect of these differences are more statistically stable earlier birth cohorts for NHIS, and greater statistical power for regional estimates using the CCHS.
+
+Key smoking parameters to be extracted and harmonized include:
+
+- **Smoking status:** Classified into six categories: daily, occasional (former-daily), occasional (never-daily), former-daily, former-occasional, and never smoker. The distinction between former-daily and never-daily occasional smokers allows for more precise characterization of the cessation process and tobacco "dose" over the life course. For the APC initiation and cessation models, these will be collapsed into Never, Current, and Former smokers.
+- **Age at initiation:** The age at which the respondent first smoked a whole cigarette (`age_first_cigarette`) and the age at which they started smoking daily (`age_start_smoking`). In PUMF data, these are derived from categorical variables using midpoint imputation.
+- **Time since cessation:** The number of years since the respondent last smoked (`time_quit_smoking`), used to calculate the age at cessation.
+- **Smoking intensity:** The number of cigarettes smoked per day (CPD). Unlike the NHIS, the CCHS allows us to distinguish the intensity profiles of occasional smokers who were previously daily smokers from those who have always smoked occasionally.
+
+The following sociodemographic covariates will also be extracted for use in imputation models, descriptive analyses, and mortality adjustment:
+
+- **Age:** Continuous age at survey (`DHHGAGE_cont`), derived from grouped categorical responses in PUMF using midpoint imputation; exact continuous age available in Master files.
+- **Sex:** Binary (`DHH_SEX`), available in all cycles.
+- **Ethnicity:** In Master files, detailed ethnicity categories are available. In PUMF files, the analysis will use a binary white/non-white classification consistent with Manuel et al. (2020) [@Manuel_HR_2020].
+- **Immigration status:** Born in Canada vs. immigrant (`SDCFIMM`). Following Manuel et al. (2020), pre-immigration periods will be excluded from smoking history reconstruction for immigrants. In PUMF files, a binary Canadian-born/born outside Canada classification will be used; detailed immigration year is available in Master files only.
+- **Education:** Three-category classification (less than high school, high school graduate, postsecondary graduate), used in imputation models. Note that education variables differ across cycles and between PUMF and Master files; harmonization will follow the `cchsflow` approach using `EDUDR03`.
+- **Province:** Used for provincial stratification and regional imputation models (`GEOGPRV`).
+
+Inconsistent or biologically implausible responses (e.g., age at initiation greater than current age) will be identified and handled during the data cleaning stage. Missing data on smoking history parameters will be addressed using multiple imputation by chained equations (MICE), incorporating the sociodemographic and health-related predictors listed above.
+
+> STROBE: Statistical methods; GATHER: Data processing
+
+## 3.4 Analytical approach
+
+The CSHM uses a two-stage analytical framework. In the first stage, we estimate population-level smoking initiation and cessation rates from harmonized CCHS data using Age-Period-Cohort (APC) models. In the second stage, these estimated rates are used to simulate individual-level smoking histories for synthetic Canadian populations.
+
+### 3.4.1 Data preparation and imputation
+
+Analysis-ready datasets will be produced through a standardized pipeline.
+
+1. **Cleaning:** Distribution checks and truncation of extreme values for continuous variables (e.g., smoking intensity).
+2. **Imputation:** Missing data on smoking status, initiation age, and cessation age will be addressed using Multiple Imputation by Chained Equations (MICE). The imputation model will include sociodemographic predictors (age, sex, province, education, income) and health status indicators.
+3. **Descriptive statistics:** Baseline characteristics will be reported pre-imputation (Table 1a) and post-imputation (Table 1b). Tables 1 will include stratification for sex and survey year. Descriptive statistics will be examined for potential discontinuities across CCHS design eras (2001–2005, 2007–2014, 2015–2021, 2022+), which reflect major changes to the survey frame, recruitment, and collection mode [@gagne2017; @backinger2008]. Although smoking variables can be harmonized across cycles, changes to the sampling frame and mode may introduce measurement differences that are not fully correctable through harmonization alone [@chen2020joinpoint].
+
+### 3.4.2 Age-period-cohort modelling
+
+Three separate APC models will be fitted by sex (men and women) to characterize smoking life-course trajectories: one for initiation, one for cessation, and one for smoking intensity (cigarettes per day). The primary analysis produces national estimates stratified by sex, as well as separate estimates for each province and territory.
+
+All models use constrained natural cubic splines to decompose temporal trends. Following the methodology of Holford et al. (2014), the fundamental APC identity is `cohort = period − age`. To address the identifiability problem, we apply constraints to the period effects, holding them constant beyond the observed data range. Period effects for initiation are held constant at the most recent observed values; period effects for cessation are similarly constrained. Cohort effects are assumed constant prior to 1920 and constant from 1985 forward, consistent with Manuel et al. (2020).
+
+Knot placement for the splines follows established standards:
+
+- **Age knots:** [10, 15, 20, 50, 60]
+- **Period knots:** [1940, 1950, 1960, 1970, 1980]
+- **Cohort knots:** [1930, 1940, 1945, 1950, 1955, 1960, 1965, 1970, 1975, 1980]
+
+### 3.4.3 Smoking intensity analysis
+
+We will use a dual approach to characterize smoking intensity (cigarettes per day; CPD):
+
+1. **Simple descriptives:** We will first calculate mean CPD and intensity distributions by age, sex, and survey year. This descriptive analysis will identify broad temporal shifts and assess whether intensity has remained stagnant across cohorts, as observed in other jurisdictions such as Brazil [@Tam_AJPM_2023].
+2. **APC intensity model:** We will fit an APC model to estimate the expected CPD by birth cohort. These parameters will serve as the "tobacco dose" input for the simulation stage, allowing for the reconstruction of pack-year histories. For occasional smokers, we will apply a standardized CPD adjustment based on their reported frequency of use.
+
+### 3.4.4 Mortality adjustment
+
+To address survival bias (ever-smokers having lower survival to survey date than never-smokers), we will apply mortality adjustments using the Mortality Population Risk Tool (MPoRT) [@Manuel_HR_2020]. MPoRT weights are adjusted for age, sex, smoking status, years since quitting, and immigration status. For each respondent, the one-year probability of death for each historical year up to the survey date is calculated; the survival bias weight is the proportion of ever-smokers who would have died prior to the survey date. This adjustment ensures that the reconstructed historical prevalence reflects the original population rather than only the survivors, following the approach of Manuel et al. (2020) [@Manuel_HR_2020].
+
+A sensitivity analysis will be conducted using the Peto approach — a constant mortality risk ratio by smoking status — consistent with the original Holford et al. (2014) US implementation [@Holford_AJPM_2014]. This quantifies the influence of the mortality adjustment method on historical prevalence estimates.
+
+> ISPOR-SMDM: Model structure; GATHER: Statistical methods
+
+CCHS sampling weights (`WTS_M` in Master files; equivalent PUMF weights) are applied throughout the analysis. In descriptive analyses, weights are used with the `{survey}` package to produce design-based estimates with bootstrap variance estimation. In the APC models, sampling weights serve as regression weights in the binomial logistic regression, following the pseudo-likelihood approach of Manuel et al. (2020) [@Manuel_HR_2020]: weighted events (numerator) and weighted person-years (denominator) are aggregated within each age-period-cohort cell before model fitting. The MPoRT survival correction adjusts respondent weights to account for differential mortality; these adjusted weights are then used as the APC regression weights, so the two reweighting steps are applied sequentially rather than independently.
+
+### 3.4.5 Projections to 2050
+
+Estimated initiation, cessation, and intensity rates will be extrapolated to 2050. The projection engine will support the following scenarios:
+
+- **Status quo scenarios:** Period effects are held constant at the most recent observed values. This scenario can be interpreted as... with the assumptions...
+- **Period effect trend continue:** Period effects are extrapolated based on the historic trend. This scenario can be interpreted as... with the assumptions...
+
+### 3.4.6 Provincial and territorial estimates
+
+Separate APC models will be estimated for each province and territory, stratified by sex. Provinces and territories with limited sample sizes may require pooled CCHS cycles or adjusted model constraints to ensure stable estimates. The simulation engine runs on individual CCHS respondents, enabling regional projections that are consistent with national aggregates.
+
+> GATHER: Statistical methods; ISPOR-SMDM: Subgroup analysis
+
+## 3.5 Model validation and uncertainty analysis
+
+The model will be validated through:
+
+1. **Internal validation:** Comparing model-estimated prevalence against the input CCHS data.
+
+> ISPOR-SMDM: Internal validation; STRESS: Validation - internal
+
+2. **External validation:** No external validation will be performed.
+
+> ISPOR-SMDM: External validation; STRESS: Validation - external
+
+3. **Uncertainty analysis:** Quantifying parameter uncertainty through bootstrapping and probabilistic sensitivity analysis (PSA) to propagate APC model error through to the final population-level projections.
+
+> ISPOR-SMDM: Uncertainty analysis; STRESS: Sensitivity analysis
+
+4. **Sensitivity testing and measurement error:**
+ - **Self-report bias:** Assessing the impact of misclassification using Simulation Extrapolation (SIMEX) informed by CHMS biomarker (cotinine) data. Corrected parameters will be propagated through the simulation engine to quantify the impact on final projections. Consideration will also be given to sensitivity testing for measurement differences attributable to CCHS survey design changes across eras.
+
+> STROBE: Potential bias
+
+ - **Recall bias and relapse:** Quantifying the impact of CCHS’s single-event history capture (missing relapse cycles) by comparing net transition rates against observed longitudinal dynamics in the National Population Health Survey (NPHS) panel.
+ - **Back-casting validation:** Validating the APC model by reconstructing smoking prevalence for the 1994--2000 period and benchmarking against observed NPHS data.
+ - **Model specification:** Testing the sensitivity of results to spline knot placement and alternative constraints for the period effects in the APC models. We will also compare two spline implementations: natural splines (`splines2::nsp()`, primary analysis) versus restricted cubic splines (`splines2::rcs()`), which may better characterize the boundary behaviour of period and cohort effects.
+ - **Mortality adjustment method:** The primary analysis uses the MPoRT algorithm for mortality adjustment. We will conduct a sensitivity analysis using the Peto approach (a constant mortality risk ratio by smoking status), consistent with the original Holford et al. (2014) US implementation, to quantify the influence of the mortality adjustment method on historical prevalence estimates.
+ - **Vaping transitions:** Exploring alternative assumptions for younger cohorts regarding the relationship between vaping initiation and subsequent smoking (gateway, displacement, or common liability scenarios).
+
+> STRESS: Sensitivity analysis
+
+## 3.6 Computational methods
+
+The CSHM is implemented in R (version 4.2+) using a reproducible pipeline architecture. See [Appendix C: Data availability and computational environment] for full details of the software environment, package dependencies, and code availability.
+
+> STRESS: Implementation details; ISPOR-SMDM: Computational methods
+
+# 4. Expected outcomes
+
+The primary output of this study is the CSHM: a validated, open-source tool for population health modelling.
+
+Specific outputs include:
+
+1. **Annual rate tables:** A complete set of estimated rates for smoking initiation, cessation, and intensity by single year of age, birth cohort (1890--2030), sex, and province. These will cover the historical period (1965--2023) and future projections (2024--2050).
+2. **Total smoking burden estimates:** Annual estimates of the total number of cigarettes smoked in Canada by calendar year, derived by combining smoking prevalence, intensity (cigarettes per day), and population counts. This aggregate measure provides a direct indicator of national tobacco exposure over time and can be used to contextualize tobacco policy impacts and project future cigarette consumption.
+
+3. **Individual-level smoking histories:** Synthetic birth-cohort-specific smoking histories will be generated directly from the estimated APC rate tables, following the approach of Manuel et al. (2020) [@Manuel_HR_2020] and Holford et al. (2014) [@Holford_AJPM_2014]. For each birth cohort, the probability of being a current, former, or never smoker at each age is derived by combining the initiation and cessation rate tables with the mortality adjustment. These histories provide the inputs needed for microsimulation models of tobacco-attributable disease burden in Canada.
+4. **Open-source repository:** All R code, metadata worksheets, and documentation will be made publicly available on GitHub to ensure full reproducibility and allow for future updates as new CCHS cycles are released.
+
+> GATHER: Access to data and analytics; STRESS: Implementation details
+
+5. **Interactive online platform:** A web-based dashboard (built using R Shiny) will allow users to visualize smoking trends by province and cohort, download rate tables, and explore different projection scenarios.
+
+> GATHER: Results publication; ISPOR-SMDM: Reporting
+
+The CSHM will provide inputs for established Canadian health policy models such as Oncosim (cancer) and POHEM (chronic disease), supporting the evaluation of tobacco control policies and estimation of smoking-attributable disease burden (e.g., cancer, cardiovascular disease, and dementia) at national and provincial levels. This follows the approach to model-based policy evaluation used in other jurisdictions [@Meza_J_2021].
+
+> ISPOR-SMDM: Model application; CHEERS: Model-based evaluation
+
+# 5. Timeline and resources
+
+## Governance and collaboration
+
+The CSHM is led by the Ottawa Hospital Research Institute in collaboration with the Canadian Population Attributable Causes (CPAC) consortium, the BC Centre for Disease Control and Prevention (BCCDC), Statistics Canada's Health Analysis and Modelling Division, and academic partners. The core modelling team is responsible for analytical decisions. Partner representatives will review key milestone outputs and the final model prior to dissemination. Roles and responsibilities are documented in the project repository.
+
+> ISPOR-SMDM: Project management; STRESS: Project planning
+
+## Prespecification
+
+All analyses described in this protocol are prespecified. Any deviations from the prespecified plan will be documented in the repository changelog and reported in the methods of any resulting publications.
+
+## Key milestones
+
+| Phase | Activities | Environment |
+|---|---|---|
+| Data preparation | CCHS harmonization, variable QC, descriptive tables | PUMF |
+| Model development | APC model fitting, spline tuning, sensitivity analyses | PUMF |
+| RDC analysis | Master File replication; exact continuous-variable estimates | RDC (Statistics Canada) |
+| Validation | Internal validation; back-casting against NPHS; SIMEX | PUMF + RDC |
+| Dissemination | Open-source release; manuscript submission; Shiny dashboard | Public |
+
+## Resource and access considerations
+
+Model development uses publicly available PUMF data in an open R environment hosted on GitHub. The codebase is designed to run on both PUMF and Master File data without modification, controlled by a configuration profile. Production estimates using CCHS Master Files will be produced within a Statistics Canada RDC. RDC analysis requires institutional affiliation, an approved project proposal, and compliance with Statistics Canada disclosure control protocols. The lead analyst will apply for RDC access upon protocol approval.
+
+> ISPOR-SMDM: Project management; STRESS: Implementation details
+
+# 6. Ethical considerations
+
+This study involves the secondary analysis of de-identified survey data. Analysis of CCHS Master Files will be conducted within the secure environment of Statistics Canada’s Regional Data Centres, following all protocols for data privacy and disclosure control. No individual-level data that could identify respondents will be released. The use of PUMF data for model development follows the Statistics Canada Open Licence agreement.
+
+> GATHER: Ethics; STROBE: Ethics
+
+# 7. Dissemination plan
+
+Results will be disseminated to researchers, policy analysts, and public health decision-makers through:
+
+1. **Peer-reviewed publications:** Results will be submitted to journals in population health, epidemiology, and tobacco control.
+2. **Open-source release:** The CSHM R package and associated documentation will be released on GitHub.
+
+> ISPOR-SMDM: Reporting; GATHER: Technical assistance
+
+::::
+
+
+:::: {.section-body page-break="true"}
+
+# Appendices
+
+## Appendix A: Variable definitions and harmonization details
+
+Detailed documentation of the `cchsflow` harmonization rules and variable mappings used in this study will be maintained in the project repository.
+
+> GATHER: Data processing; STRESS: Input parameters
+
+## Appendix B: Mathematical model specification
+
+A full mathematical description of the APC models, spline basis functions, and mortality adjustment algorithms will be provided as supplementary material.
+
+> ISPOR-SMDM: Model specification; STRESS: Mathematical formulation
+
+## Appendix C: Data availability and computational environment {#appendix-c-data-availability}
+
+Three data sources are used in this study, each with different access procedures.
+
+## Canadian Community Health Survey — Public Use Microdata Files (PUMF)
+
+The CCHS PUMF files are publicly accessible and available without special permissions or restricted access. PUMF files can be downloaded from the [Ontario Data Documentation, Extraction Service and Infrastructure (ODESI)](https://odesi.ca) and directly from [Statistics Canada](https://www150.statcan.gc.ca/n1/en/catalogue/82M0013X). The PUMF files contain de-identified data and are released under the Statistics Canada Open Licence. These files are used for model development and the open-source implementation of the CSHM.
+
+## Canadian Community Health Survey — Master Files
+
+The CCHS Master Files, used for the production model estimates, are available at Statistics Canada's Research Data Centres (RDCs). Access is restricted to researchers affiliated with academic, government, or other recognized institutions. Researchers must apply for access through one of the RDCs across Canada; these secure facilities provide access to confidential microdata under the confidentiality provisions of the *Statistics Act*. Master files provide exact continuous values for key variables not available in the PUMF. Further information about access procedures, eligibility, and application requirements is available on the [Statistics Canada website](https://www.statcan.gc.ca/en/microdata/data-centres).
+
+## US National Health Interview Survey (NHIS)
+
+The NHIS is used for international validation of the CSHM and is conducted by the National Center for Health Statistics (NCHS). Public-use NHIS data files are freely available without special permissions and can be downloaded directly from the [NCHS website](https://www.cdc.gov/nchs/nhis/data-questionnaires-documentation.htm). The public-use files contain de-identified data. For researchers requiring access to more detailed data not available in the public-use files, the NCHS Research Data Center offers access to restricted-use files subject to an application process and strict confidentiality protocols. Further information is available at .
+
+> *Note: The NHIS validation analyses described above are planned. The text below is retained as a placeholder pending integration of NHIS data into the study.*
+
+> GATHER: Data access; STROBE: Ethics
+
+## Computational environment
+
+The CSHM is implemented in the R programming environment (version 4.2+). The project architecture uses a reproducible data science workflow:
+
+- **Pipeline management:** The `{targets}` R package manages the dependency graph and ensures that only outdated targets are recomputed when inputs change.
+- **Environment management:** The `{renv}` package records the exact versions of all R packages and ensures a consistent software environment across development, production, and the Statistics Canada RDC environment.
+- **Harmonization:** The `{cchsflow}` package (version 3.0) handles survey data harmonization across CCHS cycles.
+
+All code will be hosted on GitHub under the MIT licence. The repository will include full documentation, metadata worksheets, and instructions for replication on both PUMF and Master file environments.
+
+> STRESS: Implementation details; GATHER: Access to data and analytics
+
+::::
+
+:::: {.section-body page-break="true"}
+
+> STROBE: References; GATHER: References
+
+# References
+
+::: bibliography
+:::
+
+::::
+
+:::: {.section-body page-break="true"}
+
+# Version history
+
+::: version-history
+:::
+
+::::
+
diff --git a/docs/protocol/study-summary.qmd b/docs/protocol/study-summary.qmd
new file mode 100644
index 0000000..987a1d9
--- /dev/null
+++ b/docs/protocol/study-summary.qmd
@@ -0,0 +1,40 @@
+# A Canadian Smoking Histories Model: a study protocol
+
+Version 0.1.0
+
+Date: 2025-03-27
+
+## Background
+
+Understanding the patterns of smoking behaviour across different birth cohorts is important for evaluating tobacco control policies and predicting smoking-related health outcomes in Canada. The Canadian Smoking Histories Model (CSHM) developed by the National Cancer Institute in the US has been instrumental in cancer risk modelling. The approach was replicated in Ontario, but a comprehensive Canadian equivalent covering all provinces has not yet been developed. Given Canada's distinct tobacco control policies and smoking patterns, developing a Canadian Smoking Histories Model helps inform future public health planning, evaluate past interventions, and project the smoking-attributable disease burden nationwide.
+
+## Objectives
+
+This study will develop a Canadian Smoking Histories Model that characterizes smoking patterns by birth cohort, sex, and province from 1965 to 2023, with projections to 2050. Specifically, this study will:
+
+1) Estimate historical and current smoking prevalence, initiation, cessation, and intensity by birth cohort for each province and territory;
+2) Examine regional and temporal differences in smoking patterns across Canada;
+3) Project future smoking prevalence and related parameters under status quo conditions; and
+4) Create a publicly-accessible open model that policy actors, researchers, and others can use and update.
+
+## Methods
+
+### Data Sources
+
+The primary data source will be the Canadian Community Health Survey (CCHS) conducted from 2001 to 2023, which provides nationally representative cross-sectional information on smoking behaviours. We will use all available cycles of the CCHS, with a combined sample size of approximately 1.4 million respondents. From these surveys, we will ascertain current smoking status, age at smoking initiation, age at cessation, and smoking intensity (cigarettes per day).
+
+### Exposure ascertainment
+
+The `cchsflow` library will harmonize smoking variables in the CCHS across all survey cycles. Appendix X has harmonization details (variables and variable details).
+
+### Analytical approach
+
+Following the approach developed by Holford et al.[ref] and adapted in Canada by Manuel et al.[ref], we will use an age-period-cohort modelling framework to generate smoking histories. The model will consist of two primary components: a smoking initiation model (for people who had not already started smoking) and a smoking cessation model (for those who are currently smoking). We will first construct retrospective smoking histories for each respondent up to the survey date based on their reported age of initiation and cessation. To address survival bias (whereby smokers are less likely to survive to be interviewed), we will apply mortality adjustments using the Mortality Population Risk Tool (MPoRT) \[tba\], which accounts for differential mortality by smoking status. Age-period-cohort models with constrained natural splines will be fitted to estimate smoking initiation and cessation probabilities by birth cohort, age, sex, and province. Additional models will estimate smoking intensity distribution. The models will be calibrated against historical survey estimates of smoking prevalence and then used to project future smoking prevalence through 2050 under status quo conditions.
+
+## Expected Outcomes
+
+The primary outcome will be a Canadian Smoking Histories Model capable of producing detailed smoking histories by birth cohort, age, sex, and region. The model will provide annual estimates of smoking prevalence, initiation rates, cessation rates, and smoking intensity, both historically (1965-2023) and projected forward (2024-2050). This model will be made available through an online interactive platform and open-source code repository, enabling researchers and policymakers to access detailed smoking parameters for health economic modelling, policy evaluation, and public health planning. The Generator will support various policy planning models, such as Oncosim and POHEM, designed to evaluate the impact of tobacco control policies and predict smoking-attributable disease burden, including cancer, cardiovascular disease, and dementia. By providing Canada-specific smoking parameters at both national and provincial levels, this Generator will enable a more precise estimation of the health and economic impacts in the Canadian context and support evidence-based tobacco control strategies.
+
+## Change log
+
+Version 0.1.0. Date: 2025-03-27 Initial 1-page outline.
\ No newline at end of file
diff --git a/docs/reference/faq.qmd b/docs/reference/faq.qmd
new file mode 100644
index 0000000..84fa254
--- /dev/null
+++ b/docs/reference/faq.qmd
@@ -0,0 +1,86 @@
+---
+title: "Frequently asked questions"
+---
+
+## General questions
+
+### What is the Canadian Smoking Histories Model?
+
+The Canadian Smoking Histories Model (CSHM) is an R implementation of an Age-Period-Cohort model for analyzing smoking behavior patterns across different generations and time periods. It builds on the original work developed for the Ontario SHGM study (2020) and the methodological approach pioneered by Holford.
+
+### Who should use CSHM?
+
+CSHM is primarily designed for:
+
+- Epidemiologists studying smoking trends
+- Health policy researchers
+- Public health analysts
+- Population health modelers
+- Researchers interested in age-period-cohort effects
+
+### Is CSHM specific to Canadian data?
+
+While CSHM was designed for use with Canadian Community Health Survey data, the methodology can be adapted for use with other survey data sources that contain similar smoking history information.
+
+## Technical questions
+
+### What R version is required?
+
+CSHM requires R version 4.1.0 or higher.
+
+### Can I use CSHM in secure environments?
+
+Yes, CSHM is designed to work in secure computing environments like Statistics Canada's Research Data Centres (RDCs). The package uses configuration files to manage paths and dependencies, making it adaptable to different environments.
+
+### How does CSHM handle missing data?
+
+CSHM has built-in strategies for handling missing data in smoking variables, including:
+
+- Filtering out cases with missing key variables
+- Imputation options for less critical variables
+- Clear documentation of data limitations
+
+### Can I extend CSHM with my own models?
+
+Yes, CSHM is designed to be modular. You can:
+
+- Modify the spline specifications for the APC model
+- Add additional covariates to the models
+- Implement alternative statistical approaches
+- Extend the generated smoking histories with additional variables
+
+## Methodological questions
+
+### What is Age-Period-Cohort (APC) analysis?
+
+APC analysis is a statistical approach that attempts to disentangle three time-related effects:
+
+- **Age effects**: How behavior changes as individuals age
+- **Period effects**: How external factors affect everyone at a specific point in time
+- **Cohort effects**: How being born in a specific generation influences behavior
+
+### How does CSHM handle the identification problem in APC models?
+
+The APC identification problem arises because age, period, and cohort have a linear relationship (period = age + cohort). CSHM addresses this by:
+
+1. Using natural cubic splines to model non-linear effects
+2. Applying constraints to the spline coefficients
+3. Following the approach developed by Holford for identifiability
+
+### How accurate are the generated smoking histories?
+
+The generated smoking histories are based on statistical models fitted to survey data. Their accuracy depends on:
+
+1. The quality and representativeness of the input survey data
+2. The appropriateness of the APC modeling approach
+3. The validity of assumptions about smoking behavior
+
+CSHM includes validation procedures to compare the generated histories against historical data and reference studies.
+
+## Getting help
+
+If your question isn't answered here, please:
+
+1. Check the [documentation](../index.qmd)
+2. Submit an issue on [GitHub](https://github.com/Big-Life-Lab/cshm-dev/issues)
+3. Contact the project maintainers directly
\ No newline at end of file
diff --git a/docs/reference/functions.qmd b/docs/reference/functions.qmd
new file mode 100644
index 0000000..0373d78
--- /dev/null
+++ b/docs/reference/functions.qmd
@@ -0,0 +1,46 @@
+---
+title: "Function reference"
+---
+
+This page provides detailed technical documentation for the functions in the CSHM package.
+
+## Smoking initiation functions
+
+### process_smoking_initiation
+
+```r
+process_smoking_initiation(dataset, sex = "M")
+```
+
+Processes data for smoking initiation based on the Canadian Community Health Survey (CCHS) data that has been harmonized with the cchsflow package. It identifies individuals who have initiated smoking and creates a dataset with key variables needed for age-period-cohort (APC) modeling.
+
+#### Parameters
+
+- `dataset`: A data frame containing harmonized CCHS smoking history data
+- `sex`: Character string indicating sex ("M" or "F") for filtering the data
+
+#### Returns
+
+A data frame with the variables needed for smoking initiation modeling:
+- `ont_id`: Ontario resident identifier
+- `weighting`: Survey weighting factor
+- `age`: Age of smoking initiation
+- `cohort`: Birth cohort (year)
+- `period`: Period (year) when smoking initiated
+- `init`: Binary indicator of smoking initiation (1=initiated, 0=not initiated)
+
+#### Details
+
+This function replicates the SAS code used in the Canadian History Smoking Generator Model. It filters data by sex, processes the harmonized smoking status variables, and creates a dataset that serves as the numerator for age-period-cohort rates in smoking initiation modeling.
+
+#### Example
+
+```r
+# Assuming 'cchs_data' is your harmonized CCHS dataset from cchsflow
+inits_male <- process_smoking_initiation(cchs_data, sex = "M")
+inits_female <- process_smoking_initiation(cchs_data, sex = "F")
+```
+
+## Utility functions
+
+Additional function documentation will be added as the implementation progresses.
\ No newline at end of file
diff --git a/docs/reference/index.qmd b/docs/reference/index.qmd
new file mode 100644
index 0000000..f20506e
--- /dev/null
+++ b/docs/reference/index.qmd
@@ -0,0 +1,11 @@
+---
+title: "Reference documentation"
+---
+
+This section provides technical reference information for the CSHM project:
+
+- [Variables](variables.qmd): Detailed information on all variables used in the project
+- [Functions](functions.qmd): Complete API reference for all functions
+- [Model](model.qmd): Technical details of the APC model implementation
+
+Use this section when you need precise information about specific components of the CSHM.
\ No newline at end of file
diff --git a/docs/reference/model.qmd b/docs/reference/model.qmd
new file mode 100644
index 0000000..8b56661
--- /dev/null
+++ b/docs/reference/model.qmd
@@ -0,0 +1,81 @@
+---
+title: "APC model reference"
+---
+
+This page provides detailed technical documentation for the Age-Period-Cohort (APC) model implementation in the CSHM package.
+
+## Overview
+
+The APC model is used to separate the effects of age, time period, and birth cohort on smoking initiation and cessation rates. This implementation follows the approach developed by Holford, using natural cubic splines to model these effects, as applied to Canadian smoking patterns in @Manuel_HR_2020.
+
+## Mathematical formulation
+
+The basic APC model can be expressed as:
+
+$$\log(\lambda_{apc}) = \mu + \alpha_a + \pi_p + \gamma_c$$
+
+where:
+- $\lambda_{apc}$ is the rate for age $a$, period $p$, and cohort $c$
+- $\mu$ is the overall mean
+- $\alpha_a$ is the age effect for age $a$
+- $\pi_p$ is the period effect for period $p$
+- $\gamma_c$ is the cohort effect for cohort $c = p - a$
+
+Due to the linear dependency between age, period, and cohort, constraints are needed to identify the model. This implementation uses natural cubic splines with constraints that make the first and last coefficients of each effect equal to zero.
+
+## Spline implementation
+
+The APC model uses natural cubic splines to model the age, period, and cohort effects. The spline functions are defined as:
+
+$$S(x) = \sum_{j=1}^{k} \beta_j B_j(x)$$
+
+where:
+- $B_j(x)$ are the basis functions for the natural cubic spline
+- $\beta_j$ are the coefficients to be estimated
+- $k$ is the number of knots
+
+## Knot placement
+
+The default knot placements for the APC model are:
+
+### Age knots
+```r
+age_knots = [10, 15, 20, 50, 60]
+```
+
+### Period knots
+```r
+period_knots = [1940, 1950, 1960, 1970, 1980]
+```
+
+### Cohort knots
+```r
+cohort_knots = [1930, 1940, 1945, 1950, 1955, 1960, 1965, 1970, 1975, 1980]
+```
+
+## Survival probability adjustments
+
+To account for differential mortality between smokers and non-smokers, the model includes adjustments for survival probabilities:
+
+$$\text{adj\_pred} = \frac{\text{pred} / \text{surv}_1}{\text{pred} / \text{surv}_1 + (1 - \text{pred}) / \text{surv}_0}$$
+
+where:
+- $\text{pred}$ is the predicted probability from the APC model
+- $\text{surv}_1$ is the survival probability for smokers
+- $\text{surv}_0$ is the survival probability for non-smokers
+
+## Implementation details
+
+The APC model is implemented using the following steps:
+
+1. Natural cubic splines are constructed for age, period, and cohort
+2. These splines are used in a generalized linear model with binomial errors
+3. Survival probabilities are used to adjust for differential mortality
+4. Separate models are fit for males and females
+
+Additional technical documentation will be added as the implementation progresses.
+
+## References
+
+::: {#refs}
+:::
\ No newline at end of file
diff --git a/docs/reference/variables.qmd b/docs/reference/variables.qmd
new file mode 100644
index 0000000..5e46c7c
--- /dev/null
+++ b/docs/reference/variables.qmd
@@ -0,0 +1,520 @@
+---
+title: "Data dictionary"
+---
+
+This page documents all variables used in the Canadian Smoking Histories Model (CSHM)
+pipeline, the synthetic cohort population, and the model output rate tables. It is generated
+from the machine-readable schemas in the [`schemas/`](https://github.com/big-life-lab/cshm/tree/main/schemas)
+directory.
+
+Three schemas cover distinct layers of the analysis:
+
+| Schema | What it covers | Pipeline stage |
+|--------|---------------|---------------|
+| [CCHS harmonization variables](#cchs-harmonization-variables) | Variables drawn from the CCHS and harmonized via cchsflow | Stages 1–7 |
+| [Synthetic cohort](#synthetic-cohort) | Synthetic Canadian population used in simulation | Stage 9 |
+| [Rate tables](#rate-tables) | APC model output; input to shg-rcpp | Stages 8–9 |
+
+---
+
+## CCHS harmonization variables
+
+Variables in `worksheets/cshm-variables.csv`. All CCHS variables are harmonized across
+survey cycles using the [cchsflow](https://github.com/Big-Life-Lab/cchsflow) R package.
+
+Each variable carries two CSHM-specific fields not present in cchsflow:
+
+- **Role** — how the variable is used in the pipeline (see [role definitions](#variable-roles))
+- **Source** — whether the variable is available in PUMF files, Master files, or both
+
+### Smoking status
+
+The primary smoking status classification uses three variables in combination.
+
+#### `SMKDSTY` — smoking status (6-category)
+
+**Label:** Type of smoker derived — 6-category (D)\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** Both (PUMF and Master)\
+**Cycles:** 2003–2022 (all except 2001)
+
+Statistics Canada derived variable. The six categories are:
+
+| Code | Label |
+|------|-------|
+| 1 | Daily smoker |
+| 2 | Occasional smoker |
+| 3 | Former daily smoker |
+| 4 | Former occasional smoker |
+| 5 | Experimental smoker (tried but <100 cigarettes) |
+| 6 | Never smoker (abstainer) |
+
+Used as the primary smoking classification in CSHM. The initiation denominator uses
+codes 1–5 (ever exposed); the cessation denominator uses codes 1–4 (ever daily or
+occasional smoker). Never-smokers (code 6) are excluded from both numerators.
+
+---
+
+#### `SMK_01A` — smoked 100+ cigarettes lifetime
+
+**Label:** Ever smoked 100 or more cigarettes in lifetime\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2001–2022
+
+| Code | Label |
+|------|-------|
+| 1 | Yes |
+| 2 | No |
+
+Gate question for ever-smoker definition. Combined with `SMK_01B` and `SMK_202` to
+derive the three-category never/current/former smoking status used in cchsflow.
+
+---
+
+#### `SMK_01B` — ever smoked a whole cigarette
+
+**Label:** Ever smoked a whole cigarette\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** PUMF\
+**Cycles:** 2001–2022
+
+| Code | Label |
+|------|-------|
+| 1 | Yes |
+| 2 | No |
+
+Second gate for never-smoker definition. A respondent is classified as a never smoker
+if they have smoked fewer than 100 cigarettes lifetime AND have never smoked a whole
+cigarette.
+
+---
+
+#### `SMK_202` — current smoking frequency
+
+**Label:** Type of smoker presently (daily/occasional/not at all)\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2001–2022 (cchsflow harmonized from `SMK_005` in 2015–16)
+
+| Code | Label |
+|------|-------|
+| 1 | Daily |
+| 2 | Occasional |
+| 3 | Not at all |
+
+---
+
+### Smoking initiation
+
+#### `age_first_cigarette` — age smoked first whole cigarette (unified)
+
+**Label:** Age smoked first whole cigarette — unified (cchsflow v3)\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2001–2022\
+**Units:** Years\
+**Requires:** cchsflow v3 (PR #163)
+
+Primary input for constructing the initiation numerator. The initiation age defines
+the age-period-cohort cell in which a respondent transitions from never smoker to
+current smoker.
+
+- **Master files:** exact age (`SMK_01C`)
+- **PUMF files:** midpoint-estimated from grouped response (`SMKG01C_cont`)
+
+Derived from `SMKG01C_cont` (intermediate; see [derived inputs](#derived-inputs)).
+
+---
+
+#### `age_start_smoking` — age started smoking daily (unified)
+
+**Label:** Age started smoking daily — unified (cchsflow v3)\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2001–2022\
+**Units:** Years\
+**Requires:** cchsflow v3 (PR #163)
+
+Age at which a daily smoker (or former daily smoker) began smoking daily. Used
+alongside `age_first_cigarette` when distinguishing initiation age (first cigarette)
+from daily onset.
+
+- **Master files:** exact age (`SMK_040`)
+- **PUMF files:** midpoint-estimated (`SMKG040_cont`, ±3 years)
+
+---
+
+### Smoking cessation
+
+#### `time_quit_smoking` — years since quit (unified)
+
+**Label:** Years since quit smoking — unified (cchsflow v3)\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2001–2022\
+**Units:** Years\
+**Requires:** cchsflow v3 (PR #163)
+
+Primary input for the cessation numerator. Used to compute cessation age:
+`age_cessation = age − time_quit_smoking`.
+
+- **Master files:** exact years (`SMK_09C`)
+- **PUMF files:** midpoint-estimated from grouped response (`SMK_09A_cont`)
+
+Covers former daily smokers only. Former occasional smokers use `SMK_06A_cont`.
+
+---
+
+#### `SMK_09A_cont` — years quit daily (PUMF)
+
+**Label:** Years since stopped smoking daily — former daily (PUMF continuous)\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** PUMF\
+**Cycles:** 2001–2013–14\
+**Units:** Years
+
+PUMF fallback for cessation timing before cchsflow v3. Superseded by
+`time_quit_smoking` once PR #163 merges. Retained for pre-v3 compatibility.
+
+---
+
+#### `SMKDGSTP_cont` — years since quit (all former smokers)
+
+**Label:** Years since quit smoking completely — all former smokers (continuous)\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2009–10 to 2022\
+**Units:** Years
+
+Statistics Canada derived continuous variable covering both former daily and former
+occasional smokers. Preferred over `SMK_09A_cont` where available (2009+).
+
+---
+
+### Smoking intensity
+
+#### `SMK_204` — cigarettes per day (current daily smokers)
+
+**Label:** Number of cigarettes smoked daily — current daily smokers\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2001–2022\
+**Units:** Cigarettes per day
+
+---
+
+#### `SMK_208` — cigarettes per day (former daily smokers)
+
+**Label:** Number of cigarettes smoked daily — former daily smokers\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** Both\
+**Cycles:** 2001–2022\
+**Units:** Cigarettes per day
+
+Peak cigarettes per day while smoking. Used as a smoking intensity input alongside
+cessation timing.
+
+---
+
+### Demographics
+
+#### `DHH_SEX` — sex
+
+**Label:** Sex\
+**Type:** Categorical\
+**Role:** Model stratifier\
+**Source:** PUMF (extended to 2019–20 and 2022 in `cshm-variable-details.csv`)\
+**Cycles:** 2001–2022
+
+| Code | Label |
+|------|-------|
+| 1 | Male |
+| 2 | Female |
+
+APC models are fitted separately for men and women because period effect constraint
+years differ by sex (initiation women: from 1999; initiation men: from 2003).
+
+---
+
+#### `DHHGAGE_cont` — age (continuous)
+
+**Label:** Age — continuous\
+**Type:** Continuous\
+**Role:** Predictor\
+**Source:** PUMF\
+**Cycles:** 2001–2022\
+**Units:** Years
+
+Preferred continuous age variable for the APC model. In PUMF files, derived as a
+midpoint estimate from grouped age categories (e.g., 5-year bands in 2019–20 and
+2022, 16-category scale in earlier cycles). Master files provide exact age.
+
+---
+
+#### `DHHGAGE_A` — age (grouped)
+
+**Label:** Age (grouped)\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** PUMF\
+**Cycles:** 2001–2022\
+**Units:** Years
+
+Grouped age category. Used in data cleaning for age-based exclusion filters
+(e.g., excluding respondents aged 12–17, category 1).
+
+---
+
+#### `GEOGPRV` — province of residence
+
+**Label:** Province of residence\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** PUMF\
+**Cycles:** 2001–2022
+
+Statistics Canada province/territory codes. Used for provincial APC stratification.
+Territories are pooled due to small sample sizes. See [rate tables schema](#rate-tables)
+for province code definitions.
+
+---
+
+#### `WTS_M` — survey weight
+
+**Label:** Master survey weight\
+**Type:** Continuous\
+**Role:** Design\
+**Source:** PUMF\
+**Cycles:** 2001–2022
+
+CCHS sampling and post-stratification weight. Applied during APC model fitting.
+See protocol §3.4.4 for the weighting method used (Peto / case weight in logistic
+regression vs. `svyglm()`).
+
+---
+
+#### `SurveyCycle` — survey cycle
+
+**Label:** Survey cycle\
+**Type:** Categorical\
+**Role:** Design\
+**Source:** Both\
+**Cycles:** 2001–2022
+
+| Code | Survey | Midpoint year used |
+|------|--------|--------------------|
+| 1 | CCHS 2001 | 2002 |
+| 2 | CCHS 2003 | 2003 |
+| 3 | CCHS 2005 | 2005 |
+| 4 | CCHS 2007–08 | 2008 |
+| 5 | CCHS 2009–10 | 2010 |
+| 6 | CCHS 2011–12 | 2012 |
+| 7 | CCHS 2013–14 | 2014 |
+| 8 | CCHS 2015–16 | 2016 |
+| 9 | CCHS 2017–18 | 2018 |
+| 10 | CCHS 2019–20 | 2020 |
+| 11 | CCHS 2022 | 2022 |
+
+Derived from the dataset name at harmonization time. Used to compute `survey_year`,
+which in turn defines `period` for the APC model.
+
+---
+
+#### `SDCFIMM` — immigrant status
+
+**Label:** Immigrant status (D)\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** PUMF\
+**Cycles:** 2007–08 to 2013–14 only
+
+| Code | Label |
+|------|-------|
+| 1 | Non-immigrant |
+| 2 | Immigrant |
+| 3 | Non-permanent resident |
+
+Used in the MPoRT mortality correction. Not available in 2001–2005 or 2015–2022
+PUMF files (coded as `NA(c)` — not asked this cycle).
+
+---
+
+#### `EDUDR03` — education (3-category)
+
+**Label:** Highest education level — 3 categories\
+**Type:** Categorical\
+**Role:** Predictor\
+**Source:** PUMF\
+**Cycles:** 2001–2017–18 (not available 2019–20 or 2022)
+
+| Code | Label |
+|------|-------|
+| 1 | Less than high school |
+| 2 | High school graduate |
+| 3 | Post-secondary |
+
+cchsflow harmonized variable. Used for subgroup analyses, not a primary APC model
+covariate.
+
+---
+
+### Derived inputs
+
+These variables are intermediate outputs of cchsflow harmonization. They are not used
+directly in the APC model but are required inputs for the unified smoking variables above.
+
+| Variable | Role | Description |
+|----------|------|-------------|
+| `SMKG01C_cont` | Derived input | Age smoked first cigarette — PUMF grouped continuous (midpoint estimate for `age_first_cigarette`) |
+| `SMKG040_cont` | Derived input | Age started smoking daily — PUMF grouped continuous (midpoint estimate for `age_start_smoking`) |
+| `SMKDVSTP` | Derived input | Time since quit — Master exact value (exact input for `time_quit_smoking` in RDC environment) |
+| `SMK_06A_cont` | Predictor | Years since quit — former occasional smokers (PUMF, 2001–2013–14) |
+
+---
+
+### Variable roles
+
+| Role | Meaning |
+|------|---------|
+| `predictor` | Covariate used as a model input |
+| `outcome` | Dependent variable (initiation or cessation event) |
+| `derived-input` | Intermediate variable; input to a unified variable but not directly in the model |
+| `weight` | Survey sampling weight |
+| `design` | Survey design variable (cycle, weight) used to set up the analysis |
+| `model-stratifier` | Variable on which separate models are fitted (sex) |
+| `apc-internal` | Variable computed during APC preparation with no counterpart in the source survey |
+
+---
+
+## Synthetic cohort
+
+Variables in the synthetic Canadian population used for simulation (Stage 9, shg-rcpp).
+These records are generated from Statistics Canada demographic projections, not from the
+CCHS. See `schemas/cshm-cohort.yaml` for the full schema.
+
+### Person-level attributes
+
+| Variable | Type | Description |
+|----------|------|-------------|
+| `person_id` | Integer | Unique identifier for the synthetic individual within a simulation run |
+| `birth_year` | Integer (1900–2023) | Year of birth; defines the birth cohort (`cohort = period − age`) |
+| `sex` | Categorical (1/2) | Biological sex: 1 = male, 2 = female |
+| `province` | Categorical | Province/territory of residence (Statistics Canada GEOGPRV codes; territories pooled as 60) |
+| `weight` | Float (≥ 0) | Population weight from demographic projections; used to scale simulation results to population estimates |
+| `simulation_run` | Integer | Monte Carlo run identifier; multiple runs quantify uncertainty |
+
+### Smoking history record
+
+One record per person per age in the simulation. A complete smoking history is a
+sequence of these records from birth to the projection end year.
+
+| Variable | Type | Description |
+|----------|------|-------------|
+| `person_id` | Integer | Links to person-level record |
+| `age` | Integer (0–110) | Age at this time point |
+| `period` | Integer | Calendar year: `birth_year + age` |
+| `smoking_status` | Categorical | `never` / `current` / `former` |
+| `age_initiation` | Integer | Age first smoked a whole cigarette (null if never smoker) |
+| `age_cessation` | Integer | Age of most recent quit (null if never or current smoker) |
+
+### Smoking status definitions
+
+| Status | Definition |
+|--------|-----------|
+| Never smoker | Fewer than 100 lifetime cigarettes AND never smoked a whole cigarette |
+| Current smoker | ≥ 100 lifetime cigarettes AND currently smoking (daily or occasionally) |
+| Former smoker | ≥ 100 lifetime cigarettes AND not currently smoking |
+
+These definitions match the CCHS-derived classification in the APC model.
+
+### Simulation provenance
+
+Each simulation run carries metadata for reproducibility and traceability.
+
+| Field | Description |
+|-------|-------------|
+| `run_id` | Unique run identifier (e.g., `cshm-prod-20260301-001`) |
+| `run_date` | Date the simulation was executed |
+| `config_profile` | `{config}` profile used (`prod`, `statscan`, etc.) |
+| `rate_table_version` | Version of the rate tables used as input |
+| `random_seed` | Random seed for reproducibility |
+
+---
+
+## Rate tables
+
+Model output from Stage 8 (APC model fitting), formatted as input to the
+[shg-rcpp](https://github.com/NCI-CISNET/shg-rcpp) Smoking History Generator.
+See `schemas/cshm-rate-tables.yaml` for the full schema.
+
+A complete Canadian shg-rcpp run requires four rate tables:
+initiation × {male, female} and cessation × {male, female}.
+
+### Rate table columns
+
+| Variable | Type | Description |
+|----------|------|-------------|
+| `model_type` | Categorical | `initiation` or `cessation` |
+| `sex` | Categorical (1/2) | 1 = male, 2 = female |
+| `province` | Categorical | `national` or Statistics Canada GEOGPRV code (territories pooled as 60) |
+| `age` | Integer | Age in years (0–110) |
+| `period` | Integer | Calendar year |
+| `cohort` | Integer | Birth year: `period − age`; included for shg-rcpp compatibility |
+| `rate` | Float (0–1) | Conditional probability of the smoking transition |
+| `rate_lower` | Float (0–1) | Lower bound of 95% confidence interval |
+| `rate_upper` | Float (0–1) | Upper bound of 95% confidence interval |
+| `mortality_correction` | String | Method applied: `peto` (default) or `mport` (not yet implemented) |
+
+### Rate definitions
+
+**Initiation rate:** P(initiate at age *a* | never smoker at age *a*−1).
+Zero for ages below `survey_bound(cfg, "age_first_cigarette", "min")` (PUMF: 13, Master: 8).
+
+**Cessation rate:** P(quit at age *a* | current smoker at age *a*−1).
+Zero for ages below `survey_bound(cfg, "age_first_cigarette", "min")` for cessation as well.
+
+### Period constraint years
+
+Period effects are held constant beyond the observed data range. Rates at ages/periods
+beyond these constraints use the last estimated period effect.
+
+| Model | Sex | Constant from |
+|-------|-----|--------------|
+| Initiation | Women | 1999 |
+| Initiation | Men | 2003 |
+| Cessation | Both | 2013 |
+| Cohort (initiation) | Both | Prior to 1920 |
+| Cohort (cessation) | Both | 1985 onward |
+
+See Manuel et al. (2020) and Holford et al. (2014) for the methodological basis.
+
+### Rate table provenance
+
+Each rate table file includes a metadata block for traceability.
+
+| Field | Description |
+|-------|-------------|
+| `table_id` | Unique identifier encoding stratum (e.g., `cshm-initiation-male-national-v0.1.0`) |
+| `cshm_version` | CSHM software version |
+| `cchs_cycles` | CCHS cycles used to fit the model |
+| `data_source` | `pumf` or `master` |
+| `date_produced` | Date the table was generated |
+| `config_profile` | `{config}` profile used |
+| `shgrcpp_compatible` | Whether the table has been validated against the shg-rcpp input specification |
+
+PUMF-derived national tables (`province = national`, `data_source = pumf`) are the
+primary shareable artifact. They can be distributed under the
+[Statistics Canada Open Licence](https://www.statcan.gc.ca/eng/reference/licence)
+and used directly with shg-rcpp without requiring access to restricted Master files.
diff --git a/docs/results/table-1.qmd b/docs/results/table-1.qmd
new file mode 100644
index 0000000..6005d62
--- /dev/null
+++ b/docs/results/table-1.qmd
@@ -0,0 +1,79 @@
+---
+title: "Table 1 — Study population characteristics"
+author: "CSHM Development Team"
+date: today
+format:
+ html:
+ toc: true
+ self-contained: true
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+
+cfg <- config::get(file = "config.yml")
+```
+
+# Table 1a — Study population before imputation
+
+Characteristics of CCHS respondents included in CSHM, prior to multiple imputation.
+Columns show males and females separately; overall column shows the combined sample.
+
+```{r table-1a}
+source("R/variables-sheet-utils.R")
+source("R/variable-details-sheet-utils.R")
+source("R/get-descriptive-data.R")
+source("R/create-descriptive-tables.R")
+source("R/descriptive-data.R")
+
+variables_sheet <- targets::tar_read("variables_sheet")
+variable_details_sheet <- targets::tar_read("variable_details_sheet")
+table_1a_data <- targets::tar_read("table_1a_data")
+
+predictor_vars <- select_vars_by_role("predictor", variables_sheet)
+
+create_descriptive_table(
+ descriptive_data = table_1a_data,
+ variables_sheet = variables_sheet,
+ variable_details_sheet = variable_details_sheet,
+ variables = predictor_vars,
+ column_stratifier = survey_var(cfg, "sex"),
+ sections_order = c("Sociodemographics", "Health behaviour")
+)
+```
+
+# Table 1b — Study population after imputation
+
+```{r table-1b}
+table_1b_data <- targets::tar_read("table_1b_data")
+
+create_descriptive_table(
+ descriptive_data = table_1b_data,
+ variables_sheet = variables_sheet,
+ variable_details_sheet = variable_details_sheet,
+ variables = predictor_vars,
+ column_stratifier = survey_var(cfg, "sex"),
+ sections_order = c("Sociodemographics", "Health behaviour")
+)
+```
+
+# Appendix — Characteristics by survey cycle (pre-imputation)
+
+Each cycle shows sex-stratified results. Used to assess temporal trends in
+sample composition and smoking variable availability across cycles.
+
+```{r table-cycle}
+cleaned_data <- targets::tar_read("cleaned_data")
+
+create_cycle_specific_descriptive_table(
+ study_data = cleaned_data,
+ variables_sheet = variables_sheet,
+ variable_details_sheet = variable_details_sheet,
+ variables = predictor_vars,
+ cycle_col = survey_var(cfg, "cycle"),
+ cycle_labels = cfg$cycle_labels,
+ column_stratifier = survey_var(cfg, "sex"),
+ sections_order = c("Sociodemographics", "Health behaviour")
+)
+```
diff --git a/docs/smoking-ascertainment.qmd b/docs/smoking-ascertainment.qmd
deleted file mode 100644
index f5e30bf..0000000
--- a/docs/smoking-ascertainment.qmd
+++ /dev/null
@@ -1,28 +0,0 @@
-# Define smoking status
-
-Each Canadian Community Health Survey respondent is classified into three groups: never smoker, current smoker, and former smoker based on smoking intensity variables (2.1: smkc_01a, smkc_202, smkc_204, smkc_05d, smkc_06a, smkc_208).
-
-
-
-Age at initiation, age at daily smoking start, age at daily smoking stop, and age at final cessation (where appropriate), along with smoking intensity responses, are taken from the survey responses (2.1: smkc_01b, smkc_203, smkc_06a-c, smkc_10a-c, smkc_204, smkc_208), and used to create life-histories of smoking for all survey respondents. Any missing ages are imputed through multiple imputation using birth year, sex, race, immigration status, and prior age measures as covariates (2.1: dhhc_yob, dhhc_sex, sdccdrac, sdccfimm). The MPoRT model is applied to the life-histories to calculate one-year mortality probabilities. These mortality probabilities were scaled to published Statistics Canada mortality probabilities, and the product of the scaled mortality probabilities become cumulative survival probabilities from each age to survey age.
-
-
-1) review smoking variables (above)
-2) Label which variables are included and which are not.
-3) create a R code to list the variables in this QMD.
-4) connect with Caitlin re: ICES variables. Where are they? Was only up to 2018 harmonized?
-5) ask Rafiel to check the variables.
-6) add folks to this repo.
-
-
-smkc_01a 'In lifetime, smoked 100 or more cigarettes'
-smkc_01b 'Ever smoked whole cigarette' - missing in cchsflow
-smkc_203
-smkc_06a-c
-smkc_10a-c
-smkc_204
-smkc_208
-dhhc_yob
-dhhc_sex
-sdccdrac
-sdccfimm
\ No newline at end of file
diff --git a/docs/tutorials/advanced-usage.qmd b/docs/tutorials/advanced-usage.qmd
new file mode 100644
index 0000000..3052e54
--- /dev/null
+++ b/docs/tutorials/advanced-usage.qmd
@@ -0,0 +1,189 @@
+---
+title: "Advanced usage of CSHM"
+---
+> The following is a template example of a tutorial. To be developed.
+
+This tutorial covers advanced usage scenarios for the Canadian Smoking Histories Model (CSHM).
+
+## Prerequisites
+
+Before proceeding with this tutorial, you should:
+
+- Have completed the [Getting Started](getting-started.qmd) guide
+- Be familiar with the [basic usage](basic-usage.qmd) of CSHM
+- Have a good understanding of R programming
+- Be familiar with statistical concepts related to Age-Period-Cohort models
+
+## Customizing APC model specifications
+
+The default APC model specifications in CSHM are based on the original Canadian SHM study, following the Holford/CISNET approach. However, you can customize these specifications to better fit your specific research needs.
+
+### Modifying spline knots
+
+The placement of knots in the spline functions can impact the model's ability to capture temporal trends:
+
+```r
+# Custom knot placement
+custom_config <- list(
+ age_knots = c(10, 15, 20, 30, 40, 50, 60), # More knots for age
+ period_knots = c(1940, 1960, 1980, 2000, 2020), # Different period knots
+ cohort_knots = c(1920, 1940, 1960, 1980, 2000) # Simplified cohort knots
+)
+
+# Run APC model with custom knots
+custom_model <- run_apc_model(
+ data = harmonized_data,
+ sex = "M",
+ knots = custom_config
+)
+```
+
+### Adding covariates
+
+You can extend the basic APC model to include additional covariates:
+
+```r
+# APC model with additional covariates
+covariate_model <- run_apc_model(
+ data = harmonized_data,
+ sex = "M",
+ formula = d/pop ~ age_s + period_s + cohort_s +
+ age_spl1 + age_spl2 + age_spl3 +
+ period_spl1 + period_spl2 + period_spl3 +
+ cohort_spl1 + cohort_spl2 + cohort_spl3 +
+ education + urban # Additional covariates
+)
+```
+
+## Advanced data preprocessing
+
+### Handling complex survey designs
+
+For proper population estimates, using the example data (the Canadian Community Health Survey) you need to account for the complex survey design of the population health survey:
+
+```r
+library(survey)
+
+...
+
+### Custom variable derivation
+
+You can create custom derived variables for specific research questions:
+
+```r
+library(dplyr)
+
+# Create a variable for time-to-event analysis
+analysis_data <- harmonized_data %>%
+ mutate(
+ # Years between first cigarette and daily smoking
+ time_to_daily = ifelse(!is.na(SMKG203_cont) & !is.na(SMKG01C_cont),
+ SMKG203_cont - SMKG01C_cont,
+ NA),
+
+ # Smoking duration for former smokers
+ smoking_duration = ifelse(!is.na(SMKG207_cont) & !is.na(SMK_09A_cont),
+ SMK_09A_cont - SMKG207_cont,
+ NA)
+ )
+```
+
+## Generating synthetic populations
+
+For microsimulation models, you may need to generate a synthetic population:
+
+> replace with the CISNET Smoking History Generator Model when available.
+```r
+# Generate a synthetic population
+synthetic_pop <- generate_synthetic_population(
+ n = 100000, # Population size
+ start_year = 1900, # Earliest birth year
+ end_year = 2020, # Latest birth year
+ age_distribution = "canada", # Use Canadian age distribution
+ sex_ratio = 0.49 # Proportion male
+)
+
+# Apply smoking histories to the synthetic population
+pop_with_smoking <- generate_smoking_histories(
+ population = synthetic_pop,
+ model = apc_model_results
+)
+```
+
+## Model validation and calibration
+
+### Comparing with historical data
+
+You can validate your model against historical smoking prevalence data:
+
+```r
+# Load historical smoking prevalence data
+historical_data <- read.csv("path/to/historical_prevalence.csv")
+
+# Compare model predictions with historical data
+validation_plot <- compare_with_historical(
+ model_results = apc_model_results,
+ historical_data = historical_data,
+ by = "year" # Compare by year
+)
+
+# Display validation plot
+print(validation_plot)
+```
+
+### Calibrating the model
+
+If needed, you can calibrate the model to better match external data sources:
+TBA...
+```r
+# Calibrate the model
+calibrated_model <- calibrate_model(
+ model = apc_model_results,
+ target_data = target_prevalence,
+ calibration_method = "multiplicative"
+)
+```
+
+## Working with secure environments
+
+When working in secure environments like Statistics Canada's RDCs, you may need to adapt your workflow:
+
+```r
+# Load secure environment configuration
+config <- config::get(file = "path/to/secure_config.yml", config = "secure")
+
+# Use configuration to access data
+secure_data_path <- config$data_path
+```
+
+## Performance optimization
+
+For large datasets or complex analyses, you may need to optimize performance:
+
+```r
+# Parallel processing for bootstrap samples
+library(parallel)
+library(future)
+
+# Set up parallel processing
+plan(multisession, workers = 4)
+
+# Run bootstrap in parallel
+bootstrap_results <- future_lapply(1:100, function(i) {
+ # Bootstrap sample
+ sample_idx <- sample(nrow(data), replace = TRUE)
+ bootstrap_data <- data[sample_idx, ]
+
+ # Run model on bootstrap sample
+ model <- run_apc_model(bootstrap_data)
+
+ # Return results
+ return(model$coefficients)
+})
+```
+
+## Next steps
+
+- Learn about [implementing custom models](../how-to/custom-models.qmd)
+- Explore [integration with other health models](../how-to/integration.qmd)
+- See examples of [research applications](../explanation/research-applications.qmd)
\ No newline at end of file
diff --git a/docs/tutorials/basic-usage.qmd b/docs/tutorials/basic-usage.qmd
new file mode 100644
index 0000000..8ea5564
--- /dev/null
+++ b/docs/tutorials/basic-usage.qmd
@@ -0,0 +1,72 @@
+---
+title: "Basic usage of CSHM"
+---
+
+This tutorial provides basic usage examples for the Canadian Smoking Histories Model (CSHM).
+
+## Prerequisites
+
+Before you begin, make sure you have:
+
+- Completed the [Getting Started](getting-started.qmd) guide
+- Prepared your CCHS data as described in [Processing CCHS Data](../how-to/processing-cchs-data.qmd)
+
+## Basic workflow
+
+The typical CSHM workflow consists of four main steps:
+
+1. **Data preparation**: Load and harmonize CCHS data
+2. **Smoking behavior analysis**: Extract smoking initiation and cessation data
+3. **APC modeling**: Apply the Age-Period-Cohort model
+4. **Generating smoking histories**: Create complete smoking histories
+
+## Example: Analyzing smoking initiation
+
+```r
+# Load the required packages
+library(cshm)
+library(dplyr)
+library(ggplot2)
+
+# Load preprocessed CCHS data (see "Processing CCHS Data" guide)
+data <- readRDS("path/to/harmonized_cchs_data.rds")
+
+# Process smoking initiation data by sex
+init_male <- process_smoking_initiation(data, sex = "M")
+init_female <- process_smoking_initiation(data, sex = "F")
+
+# Examine the results
+summary(init_male)
+summary(init_female)
+
+# Visualize initiation ages by birth cohort
+ggplot(init_male, aes(x = cohort, y = age)) +
+ geom_point(alpha = 0.3) +
+ geom_smooth(method = "loess") +
+ labs(
+ title = "Age of Smoking Initiation by Birth Cohort (Males)",
+ x = "Birth Year",
+ y = "Age of Initiation"
+ ) +
+ theme_minimal()
+```
+
+## Example: Running the full model
+
+```r
+# Full model example (to be implemented)
+# model_results <- run_apc_model(data)
+#
+# # Generate smoking histories for a simulated population
+# simulated_histories <- generate_smoking_histories(
+# n = 10000, # Number of individuals to simulate
+# birth_years = 1920:2000, # Birth years to include
+# model = model_results # APC model results
+# )
+```
+
+## Next steps
+
+- Learn how to interpret the results in [Understanding CSHM Output](../how-to/interpreting-results.qmd)
+- Explore [advanced usage options](advanced-usage.qmd) for customizing the model
+- See how to [validate your results](../how-to/validating-results.qmd) against reference data
\ No newline at end of file
diff --git a/docs/tutorials/getting-started.qmd b/docs/tutorials/getting-started.qmd
new file mode 100644
index 0000000..3d2f8d0
--- /dev/null
+++ b/docs/tutorials/getting-started.qmd
@@ -0,0 +1,132 @@
+---
+title: "Getting started with CSHM"
+subtitle: "A step-by-step guide to the Canadian Smoking Histories Model"
+---
+
+## Introduction
+
+This tutorial will help you get started with the Canadian Smoking Histories Model (CSHM). The CSHM is an R implementation of an Age-Period-Cohort (APC) model for analyzing smoking behavior patterns across different generations and time periods.
+
+## What is CSHM?
+
+The Canadian Smoking Histories Model:
+
+1. Analyzes smoking initiation and cessation patterns from survey data
+2. Incorporates age, period, and cohort effects on smoking behavior
+3. Generates synthetic smoking histories for population health modeling
+4. Harmonizes data across multiple cycles of the Canadian Community Health Survey
+
+## Prerequisites
+
+Before you begin, make sure you have:
+
+- R (version 4.1.0 or higher)
+- RStudio (recommended)
+- Basic knowledge of R programming
+- Access to CCHS data (or the sample datasets provided)
+
+## Installation
+
+### Development version
+
+To install the development version from GitHub:
+
+```r
+# Install remotes package if you don't have it
+install.packages("remotes")
+
+# Install CSHM from GitHub
+remotes::install_github("cshgm/cshgm")
+```
+
+### Working with the source code
+
+If you want to work with the source code directly:
+
+```r
+# Clone the GitHub repository
+git clone https://github.com/Big-Life-Lab/cshm-dev.git
+cd cshgm
+
+# Install renv package if you don't have it
+install.packages("renv")
+
+# Set up the project environment with renv
+renv::restore()
+```
+
+## CSHM workflow
+
+The CSHM follows a sequential workflow:
+
+1. **Accessing and loading CCHS data**: Obtaining CCHS data from appropriate sources and loading it into R
+2. **Creating a working file**: Selecting required variables and preparing data for analysis
+3. **Harmonizing across CCHS cycles**: Standardizing variables and values across different survey cycles
+4. **Cleaning and deriving variables**: Processing smoking-related variables for consistency
+5. **Data validation and consistency checks**: Creating descriptive tables and verification of data quality
+6. **APC modeling**: Applying the Age-Period-Cohort model to estimate smoking behaviors
+7. **Generating smoking histories**: Creating synthetic smoking histories based on the model
+8. **Model validation**: Comparing model results with historical data
+
+
+
+## Configuration
+
+CSHM uses configuration files to manage paths and parameters:
+
+```r
+# Load the default configuration
+config <- config::get()
+
+# Access configuration parameters
+data_path <- config$data_path
+```
+
+You can create custom configurations for different environments by modifying the `config.yml` file.
+
+## Quick start example
+
+Here's a simple example to get you started:
+
+```r
+# Load required packages
+library(cshm)
+library(dplyr)
+library(ggplot2)
+
+# Load sample data included with the package
+data("sample_cchs_data")
+
+# Process smoking initiation data
+init_data <- process_smoking_initiation(sample_cchs_data, sex = "M")
+
+# View the first few rows
+head(init_data)
+
+# Plot age of initiation by birth cohort
+ggplot(init_data, aes(x = cohort, y = age)) +
+ geom_point(alpha = 0.3) +
+ geom_smooth() +
+ labs(title = "Age of Smoking Initiation by Birth Cohort",
+ x = "Birth Year",
+ y = "Age of Initiation") +
+ theme_minimal()
+```
+
+## Step-by-step guides
+
+The CSHM documentation provides detailed guides for each step of the workflow:
+
+1. [Data loading and harmonizing](../how-to/data-loading-and-harmonizing.qmd) - Prepare CCHS data for analysis
+2. [Processing CCHS data](../how-to/processing-cchs-data.qmd) - Convert raw survey data into format needed for modeling
+3. [Running the APC model](../how-to/reproducing-manuel-study.qmd) - Apply the Age-Period-Cohort model
+4. [Basic usage examples](basic-usage.qmd) - Common analysis tasks
+5. [Advanced usage](advanced-usage.qmd) - Customization and extensions
+
+## Getting help
+
+If you encounter issues or have questions:
+
+- Check the [FAQ](../reference/faq.qmd)
+- Submit an issue on [GitHub](https://github.com/Big-Life-Lab/cshm-dev/issues)
+- Contact the project maintainers
\ No newline at end of file
diff --git a/docs/workflow/.gitignore b/docs/workflow/.gitignore
new file mode 100644
index 0000000..2ccd777
--- /dev/null
+++ b/docs/workflow/.gitignore
@@ -0,0 +1,4 @@
+/.quarto/
+**/*.quarto_ipynb
+**/*_files/
+*.html
diff --git a/docs/workflow/1-variable-setup.qmd b/docs/workflow/1-variable-setup.qmd
new file mode 100644
index 0000000..1bf9a01
--- /dev/null
+++ b/docs/workflow/1-variable-setup.qmd
@@ -0,0 +1,395 @@
+---
+title: "Stage 1 — Variable setup"
+subtitle: "Study data dictionary and variable roles"
+---
+
+## Purpose
+
+This page serves as the **study data dictionary** — a complete description of every variable used in the CSHM, generated directly from those cchsflow worksheets. If the cchsflow worksheets change, this page updates automatically on re-render.
+
+The data includes both PUMF and Master versions of variables available for all CCHS cycles. Variables in cchsflow are harmonized across cycles. There are many harmonization issues that are discussed in the cchsflow R package, as well as the protocol. Further the CCHS survey major undergoes redevelopment at different periods. Redevelopment includes redevelopment of the study frame and selection process that affects response bias.
+
+The main different between the PUMF and Main versions of the CCHS is categorizing continuous variables like age to reduce the risk of responder identification. In this study, the categorized groups like age (DHHGAGE identified as a categorical variable by the "G" before "AGE") are transformed into "pseudo-continous variables" (DHHGAGE_cont identified by "_cont") by assiging the mid-point value of the cateogy.
+
+## Inputs
+
+| File | Contents |
+|------|----------|
+| `worksheets/cshm-variables.csv` | Study variable list: name, label, type, role, section, source |
+| `worksheets/cchsflow-variable-details.csv` | In-repo snapshot of cchsflow v3 base recoding rules (cycles 2001–2023) |
+| `worksheets/cshm-variable-details.csv` | CSHM extension rows (GEOGPRV and WTS_M, 2019–20 and 2022 cycles) |
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `variables_sheet` | Data frame: one row per study variable |
+| `variable_details_sheet` | Data frame: combined cchsflow base + CSHM extension recoding rules |
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+
+# root must be set before source() calls; use absolute paths here
+repo_root <- normalizePath("../..")
+knitr::opts_knit$set(root.dir = repo_root)
+
+cfg <- config::get(file = file.path(repo_root, "config.yml"))
+source(file.path(repo_root, "R/variables-sheet-utils.R"))
+source(file.path(repo_root, "R/variable-details-sheet-utils.R"))
+
+withr::with_dir(repo_root, {
+ variables_sheet <- targets::tar_read("variables_sheet")
+ variable_details_sheet <- targets::tar_read("variable_details_sheet")
+})
+```
+
+```{r helpers, include=FALSE}
+library(dplyr)
+library(knitr)
+
+# All cycles used in this study (PUMF)
+all_pumf_cycles <- c("2001", "2003", "2005", "2007-08", "2009-10",
+ "2011-12", "2013-14", "2015-16", "2017-18", "2019-20", "2022")
+
+pumf_cycle_map <- c(
+ cchs2001_p = "2001", cchs2003_p = "2003", cchs2005_p = "2005",
+ cchs2007_2008_p = "2007-08", cchs2009_2010_p = "2009-10",
+ cchs2011_2012_p = "2011-12", cchs2013_2014_p = "2013-14",
+ cchs2015_2016_p = "2015-16", cchs2017_2018_p = "2017-18",
+ cchs2019_2020_p = "2019-20", cchs2022_p = "2022"
+)
+# Master coverage: cchsflow v3 uses _m suffix for Master/RDC files.
+# Legacy _i (ICES) and _s (share) suffixes were removed in v3.
+master_cycle_map <- c(
+ cchs2001_m = "2001", cchs2003_m = "2003", cchs2005_m = "2005",
+ cchs2007_2008_m = "2007-08", cchs2009_2010_m = "2009-10",
+ cchs2011_2012_m = "2011-12", cchs2013_2014_m = "2013-14",
+ cchs2015_2016_m = "2015-16", cchs2017_2018_m = "2017-18",
+ cchs2019_2020_m = "2019-20", cchs2021_m = "2021",
+ cchs2022_m = "2022", cchs2023_m = "2023"
+)
+
+# --- helper: extract variable names from variableStart notation ---
+# Returns list(pumf = "VARNAME", master = "MASTERNAME or same")
+extract_var_names <- function(var_name, variable_start) {
+ if (is.na(variable_start) || variable_start == "") return(list(pumf = var_name, master = ""))
+ # fallback name in [brackets] is the canonical (master-file) name
+ fallback <- regmatches(variable_start, regexpr("\\[([A-Za-z0-9_]+)\\]", variable_start))
+ fallback <- if (length(fallback) > 0) gsub("\\[|\\]", "", fallback) else var_name
+ # if fallback differs from var_name, it's the master variable name
+ master_name <- if (fallback != var_name) fallback else ""
+ list(pumf = var_name, master = master_name)
+}
+
+# --- helper: get cycle coverage from variable_details ---
+# Returns list(pumf = "all" or "2001, 2003 ...", master = "all" or "...")
+get_cycle_coverage <- function(var_name, variable_details_sheet) {
+ rows <- variable_details_sheet[variable_details_sheet$variable == var_name, ]
+ if (nrow(rows) == 0) return(list(pumf = "—", master = "—"))
+
+ # expand databaseStart (comma-separated dataset names) across all rows
+ all_datasets <- unlist(strsplit(as.character(rows$databaseStart), ",\\s*"))
+ all_datasets <- trimws(all_datasets)
+
+ pumf_yrs <- unique(pumf_cycle_map[all_datasets[all_datasets %in% names(pumf_cycle_map)]])
+ master_yrs <- unique(master_cycle_map[all_datasets[all_datasets %in% names(master_cycle_map)]])
+ pumf_yrs <- sort(pumf_yrs[!is.na(pumf_yrs)])
+ master_yrs <- sort(master_yrs[!is.na(master_yrs)])
+
+ pumf_str <- if (length(pumf_yrs) == 0) "—"
+ else if (identical(pumf_yrs, all_pumf_cycles)) "all 11 cycles"
+ else paste(pumf_yrs, collapse = ", ")
+
+ master_str <- if (length(master_yrs) == 0) "—"
+ else paste(master_yrs, collapse = ", ")
+
+ list(pumf = pumf_str, master = master_str)
+}
+
+# --- helper: categories as HTML lines (one per line) ---
+get_categories_html <- function(var_name, variable_details_sheet) {
+ rows <- get_unique_rec_end_rows(variable_details_sheet, var_name, include_NA = FALSE)
+ if (nrow(rows) == 0) return("")
+ pairs <- ifelse(
+ is.na(rows$catLabel) | rows$catLabel == "",
+ as.character(rows$recEnd),
+ paste0("", rows$recEnd, ": ", rows$catLabel)
+ )
+ paste(pairs, collapse = "
")
+}
+
+# --- helper: continuous range ---
+get_range <- function(var_name, variable_details_sheet) {
+ rows <- variable_details_sheet[variable_details_sheet$variable == var_name, ]
+ rows <- rows[!grepl("^NA::", as.character(rows$recEnd)), ]
+ if (nrow(rows) == 0) return("")
+ ends <- suppressWarnings(as.numeric(as.character(rows$recEnd)))
+ ends <- ends[!is.na(ends)]
+ if (length(ends) == 0) return("")
+ paste0(min(ends), "–", max(ends))
+}
+
+# --- helper: missing data codes as HTML lines ---
+get_na_codes_html <- function(var_name, variable_details_sheet) {
+ rows <- variable_details_sheet[variable_details_sheet$variable == var_name, ]
+ na_rows <- rows[grepl("^NA::", as.character(rows$recEnd)), ]
+ if (nrow(na_rows) == 0) return("—")
+ codes <- paste0("", na_rows$recEnd, ": ", na_rows$catLabel)
+ paste(unique(codes), collapse = "
")
+}
+
+# --- helper: cycle coverage as named vectors for matrix table ---
+# Returns list(pumf = named logical, master = named logical) over all_cycles.
+# master_var_name: when the Master file uses a different variable name (e.g.,
+# DHH_AGE for PUMF's DHHGAGE_cont), pass it here to look up Master coverage
+# from that variable's rows in variable_details_sheet.
+all_cycles <- c("2001", "2003", "2005", "2007-08", "2009-10",
+ "2011-12", "2013-14", "2015-16", "2017-18", "2019-20", "2022", "2023")
+
+get_cycle_coverage_vec <- function(var_name, variable_details_sheet,
+ master_var_name = NULL) {
+ # PUMF coverage: always from the primary variable name
+ rows <- variable_details_sheet[variable_details_sheet$variable == var_name, ]
+ if (nrow(rows) == 0) {
+ pumf_v <- setNames(rep(FALSE, length(all_cycles)), all_cycles)
+ } else {
+ all_datasets <- trimws(unlist(strsplit(as.character(rows$databaseStart), ",\\s*")))
+ pumf_yrs <- unname(pumf_cycle_map[all_datasets[all_datasets %in% names(pumf_cycle_map)]])
+ pumf_v <- setNames(all_cycles %in% pumf_yrs, all_cycles)
+ }
+
+ # Master coverage: use master_var_name if provided, otherwise same variable
+ master_lookup <- if (!is.null(master_var_name) && master_var_name != var_name) {
+ master_var_name
+ } else {
+ var_name
+ }
+ m_rows <- variable_details_sheet[variable_details_sheet$variable == master_lookup, ]
+ if (nrow(m_rows) == 0) {
+ # Also check primary variable rows for _m datasets (variables with same name in both)
+ m_rows <- rows
+ }
+ if (nrow(m_rows) == 0) {
+ master_v <- setNames(rep(FALSE, length(all_cycles)), all_cycles)
+ } else {
+ m_datasets <- trimws(unlist(strsplit(as.character(m_rows$databaseStart), ",\\s*")))
+ master_yrs <- unname(master_cycle_map[m_datasets[m_datasets %in% names(master_cycle_map)]])
+ master_v <- setNames(all_cycles %in% master_yrs, all_cycles)
+ }
+
+ list(pumf = pumf_v, master = master_v)
+}
+
+# --- build config key → pumf/master variable mapping from cfg$survey ---
+# Scalar entries (e.g. cycle) are skipped; only pumf/master list entries are kept.
+# Each source entry is now a list with $var, $min, $max.
+`%||%` <- function(x, y) if (is.null(x)) y else x
+survey_cfg <- cfg$survey
+survey_map <- do.call(rbind, lapply(names(survey_cfg), function(key) {
+ entry <- survey_cfg[[key]]
+ if (!is.list(entry)) return(NULL) # skip scalars like cycle
+ pumf <- entry[["pumf"]]
+ master <- entry[["master"]]
+ data.frame(
+ config_key = key,
+ pumf_varname = if (is.list(pumf)) pumf[["var"]] else pumf %||% NA_character_,
+ master_varname = if (is.list(master)) master[["var"]] else master %||% NA_character_,
+ stringsAsFactors = FALSE
+ )
+}))
+```
+
+## Study data dictionary
+
+Each row is one variable. The **Config key : CCHS variable** column shows the `config.yml` key paired with the PUMF variable name; where the Master file uses a different name it is shown on a second line. Categories and ranges are from `variable_details_sheet`.
+
+^[Missing data codes apply uniformly to all variables: **NA(a)** = not applicable; **NA(b)** = don't know or refused; **NA(c)** = not asked in this cycle.]
+
+```{r data-dictionary}
+#| results: asis
+design_vars <- variables_sheet$variable[grepl("design", variables_sheet$role)]
+
+dict <- variables_sheet |>
+ filter(!variable %in% design_vars) |>
+ rowwise() |>
+ mutate(
+ config_key = {
+ hit <- survey_map$config_key[!is.na(survey_map$pumf_varname) &
+ survey_map$pumf_varname == variable]
+ if (length(hit) > 0) hit[1] else ""
+ },
+ master_name = {
+ hit <- survey_map$master_varname[!is.na(survey_map$pumf_varname) &
+ survey_map$pumf_varname == variable]
+ if (length(hit) > 0 && !is.na(hit[1]) && hit[1] != variable) hit[1] else ""
+ },
+ key_var = {
+ key <- config_key
+ pumf_part <- if (key != "") paste0(key, ": ", variable) else variable
+ if (master_name != "" && master_name != variable)
+ paste0(pumf_part, "
Master: ", master_name, "")
+ else
+ pumf_part
+ },
+ values = if (variableType == "Categorical")
+ get_categories_html(variable, variable_details_sheet)
+ else
+ get_range(variable, variable_details_sheet)
+ ) |>
+ ungroup() |>
+ select(section, key_var, label, variableType, units, values)
+
+sections <- unique(dict$section)
+sections <- sections[!is.na(sections)]
+
+for (sec in sections) {
+ cat("\n\n###", sec, "\n\n")
+ tbl <- dict |>
+ filter(section == sec) |>
+ select(-section) |>
+ rename(
+ `Config key: CCHS variable` = key_var,
+ Label = label,
+ Type = variableType,
+ Units = units,
+ `Values / range` = values
+ )
+ print(kable(tbl, format = "html", escape = FALSE,
+ table.attr = 'class="table table-sm table-striped"'))
+}
+```
+
+## Cycle coverage
+
+Each cell shows whether the variable is available in that survey cycle. **P** = PUMF, **M** = Master, **P+M** = both. Blank = not available. Gaps indicate cycles where the variable was not asked or not yet harmonized.
+
+```{r cycle-coverage}
+#| results: asis
+# design_vars already defined in data-dictionary chunk above
+cov_rows <- variables_sheet |>
+ filter(!variable %in% design_vars) |>
+ rowwise() |>
+ mutate(
+ config_key = {
+ hit <- survey_map$config_key[!is.na(survey_map$pumf_varname) &
+ survey_map$pumf_varname == variable]
+ if (length(hit) > 0) hit[1] else ""
+ },
+ master_var = {
+ hit <- survey_map$master_varname[!is.na(survey_map$pumf_varname) &
+ survey_map$pumf_varname == variable]
+ if (length(hit) > 0 && !is.na(hit[1]) && hit[1] != "") hit[1] else NA_character_
+ },
+ cov = list(get_cycle_coverage_vec(
+ variable, variable_details_sheet,
+ master_var_name = if (is.na(master_var)) NULL else master_var
+ ))
+ ) |>
+ ungroup()
+
+sections <- unique(cov_rows$section)
+sections <- sections[!is.na(sections)]
+
+for (sec in sections) {
+ cat("\n\n###", sec, "\n\n")
+ sec_rows <- cov_rows[!is.na(cov_rows$section) & cov_rows$section == sec, ]
+
+ mat <- do.call(rbind, lapply(seq_len(nrow(sec_rows)), function(i) {
+ cv <- sec_rows$cov[[i]]
+ sapply(all_cycles, function(yr) {
+ p <- cv$pumf[yr]
+ m <- cv$master[yr]
+ if (isTRUE(p) && isTRUE(m)) "P+M"
+ else if (isTRUE(p)) "P"
+ else if (isTRUE(m)) "M"
+ else ""
+ })
+ }))
+
+ tbl <- data.frame(
+ `Config key` = sec_rows$config_key,
+ Label = sec_rows$label,
+ mat,
+ check.names = FALSE,
+ stringsAsFactors = FALSE
+ )
+ colnames(tbl)[1:2] <- c("Config key", "Label")
+
+ print(kable(tbl, format = "html", escape = FALSE,
+ table.attr = 'class="table table-sm table-striped"'))
+}
+```
+
+## Variable roles
+
+Each variable carries one or more **roles** — a comma-separated label in `variables_sheet` that tells the pipeline how to use that variable. Roles allow downstream stages to select exactly the variables they need without any hard-coding.
+
+```{r role-table}
+# Role definitions
+role_defs <- data.frame(
+ Role = c("design", "intermediate", "predictor", "model-stratifier",
+ "table1", "table1-stratifier",
+ "apc-numerator", "apc-denominator",
+ "imputation-predictor", "sensitivity-analysis"),
+ Group = c("Survey design", "Harmonization", "Model", "Model",
+ "Descriptive", "Descriptive",
+ "APC data prep", "APC data prep",
+ "Imputation", "Analysis"),
+ Purpose = c(
+ "Survey infrastructure — cycle identifier, sampling weights",
+ "Raw cchsflow input used to derive a unified variable; not used directly by pipeline code",
+ "Covariate in the APC model or descriptive analysis",
+ "Stratifies APC models into separate fits (e.g., sex)",
+ "Appears as a row in Table 1 descriptive statistics",
+ "Stratifies Table 1 columns (e.g., sex columns)",
+ "Defines the event indicator for APC numerator cells (Stage 7)",
+ "Constructs the at-risk person-year denominator (Stage 7)",
+ "Included in the MICE imputation model (Stage 5)",
+ "Used in sensitivity analyses only"
+ ),
+ stringsAsFactors = FALSE
+)
+kable(role_defs, format = "html",
+ table.attr = 'class="table table-sm table-striped"')
+```
+
+```{r role-variable-crosswalk}
+#| results: asis
+# Which variables carry which roles?
+cat("\n\n### Variables by role\n\n")
+
+# Expand multi-role rows
+role_rows <- lapply(seq_len(nrow(variables_sheet)), function(i) {
+ roles <- trimws(strsplit(variables_sheet$role[i], ",")[[1]])
+ data.frame(role = roles,
+ variable = variables_sheet$variable[i],
+ label = variables_sheet$label[i],
+ source = variables_sheet$source[i],
+ stringsAsFactors = FALSE)
+})
+role_expanded <- do.call(rbind, role_rows)
+
+for (r in sort(unique(role_expanded$role))) {
+ cat("\n\n**`", r, "`**\n\n", sep = "")
+ vars <- role_expanded[role_expanded$role == r, c("variable", "label", "source")]
+ print(kable(vars, format = "html", row.names = FALSE,
+ table.attr = 'class="table table-sm"'))
+}
+```
+
+## Key decisions
+
+**Separation of definition from code.** Variable names, types, categories, and roles are all stored in plain-text CSVs. R code never contains a bare CCHS variable name — it always reads from `survey_var(cfg, "key")` (for key variables) or filters `variables_sheet` by role. This makes the pipeline adaptable to other surveys.
+
+**PUMF vs Master.** The `source` column (`pumf` / `master` / `both`) controls which variables are loaded in each computing environment. `load_study_data()` filters at load time — PUMF variables only in the open environment; Master variables at the Statistics Canada RDC.
+
+**Extension rows.** cchsflow v3 now covers age and sex (`DHH_SEX`, `DHHGAGE_cont`) for all 2001–2023 cycles, so their former extension rows have been removed. The CSHM extension rows (`worksheets/cshm-variable-details.csv`) still add recoding rules for `GEOGPRV` and `WTS_M` for the 2019–20 and 2022 cycles, which cchsflow v3 does not yet harmonize. As cchsflow extends its coverage, the remaining extension rows can be removed.
+
+**Missing data codes.** The CSHM uses `haven::tagged_na()` throughout, preserving the reason for missingness: `NA(a)` = not applicable, `NA(b)` = don't know or refused, `NA(c)` = not asked in this cycle. These are carried through all pipeline stages and used in imputation (Stage 5).
+
+## Related resources
+
+- [Stage 2 — Data loading](2-data-loading.qmd)
+- [Variable reference](../reference/variables.qmd)
+- [How to adapt variables for a different survey](../how-to/custom-models.qmd)
+- `schemas/cshm-variables.yaml` — role vocabulary (LinkML schema)
diff --git a/docs/workflow/2-data-loading.qmd b/docs/workflow/2-data-loading.qmd
new file mode 100644
index 0000000..5530c64
--- /dev/null
+++ b/docs/workflow/2-data-loading.qmd
@@ -0,0 +1,72 @@
+---
+title: "Stage 2 — Data loading"
+subtitle: "Loading and harmonizing 11 CCHS survey cycles"
+---
+
+## Purpose
+
+The Canadian Community Health Survey (CCHS) has been conducted annually or biennially since 2001. Each cycle is released as a separate file with slightly different variable names, response categories, and coding conventions. This stage loads all 11 PUMF cycles and harmonizes them into a single consistent data frame using the [`cchsflow`](https://github.com/Big-Life-Lab/cchsflow) R package.
+
+Harmonization is the most complex and fragile step in the pipeline. This QMD walks through what happens, why certain decisions were made, and what to watch for when extending the pipeline to new cycles.
+
+## Inputs
+
+| Target | Description |
+|--------|-------------|
+| `variables_sheet` | Study variable list (Stage 1) |
+| `variable_details_sheet` | Recoding rules (Stage 1) |
+
+Raw `.RData` files are read directly from `cfg$raw_data_dir` (not a pipeline target).
+
+## Code
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+cfg <- config::get(file = "config.yml")
+```
+
+```{r load-study-data}
+study_data <- targets::tar_read("study_data")
+```
+
+```{r inspect}
+# Total respondents across all cycles
+nrow(study_data)
+
+# Respondents per cycle
+table(study_data[[survey_var(cfg, "cycle")]])
+
+# Variable completeness for key smoking variables
+key_vars <- c(survey_var(cfg, "smoking_status"), survey_var(cfg, "age_first_cigarette"),
+ survey_var(cfg, "years_since_quit"))
+sapply(study_data[, key_vars], function(x) mean(!is.na(x)))
+```
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `study_data` | Combined harmonized data frame (~1.4 million rows × study variables) |
+
+## Key decisions
+
+**Variable filtering by source.** `load_study_data()` filters `variables_sheet` to rows where `source %in% c(cfg$data_source, "both")` before passing to `rec_with_table()`. This means only PUMF variables are loaded in the development environment; only Master variables at the RDC. The `survey_var(cfg, "cycle")` variable is excluded from this filter because it is derived manually (not recoded by cchsflow).
+
+**Tibble → data.frame conversion.** `cchsflow::rec_with_table()` requires a plain `data.frame`. When input is a tibble, column subsetting with logical indices and `recEnd = "copy"` crashes with "list cannot be coerced to double". The fix (`as.data.frame(raw_data)`) is applied before every `rec_with_table()` call.
+
+**Survey cycle coding.** `SurveyCycle` is added as an integer (1–11) after harmonization. The lookup table in `survey_cycle_code()` maps cchsflow dataset names (e.g., `cchs2013_2014_p`) to integer codes. Integer codes are used throughout for compactness; `cfg$cycle_labels` maps them to display strings for tables and plots.
+
+**cchsflow-data release naming.** The `draft` config profile uses the `cchsflow-data` GitHub release files, which use `CCHS_2001.RData` naming with an internal object called `table` (not `cchs2001_p`). The `raw_data_file_map` config section handles this naming difference; the pipeline supports both conventions.
+
+**Missing variables by cycle.** Many sociodemographic variables are only available in some cycles:
+- `SDCFIMM` (immigration status): 2007–2014 only
+- `EDUDR03`: not available in 2022
+- `SDC_RACEM` (ethnicity): PUMF encodes as binary white/non-white; Master has more categories
+
+These variables will be `NA` for cycles where they are absent. This is expected and handled by the imputation stage.
+
+## Related resources
+
+- [How to load and harmonize CCHS data](../how-to/data-loading-and-harmonizing.qmd)
+- cchsflow documentation:
diff --git a/docs/workflow/3-data-cleaning.qmd b/docs/workflow/3-data-cleaning.qmd
new file mode 100644
index 0000000..f8c5587
--- /dev/null
+++ b/docs/workflow/3-data-cleaning.qmd
@@ -0,0 +1,85 @@
+---
+title: "Stage 3 — Data cleaning"
+subtitle: "Age restriction, distribution checks, and outlier truncation"
+---
+
+## Purpose
+
+After harmonization, the data needs two types of cleaning before analysis:
+
+1. **Age restriction** — the APC model is calibrated to adults. Respondents aged 12–17 (the youngest CCHS age group) are excluded because their smoking behaviour is systematically different and their age is recorded only as a group midpoint in the PUMF.
+
+2. **Distribution checks and truncation** — continuous predictors with high skewness (|skewness| ≥ 1) are truncated at the 99th percentile. This prevents extreme values from exerting undue leverage on the APC model while preserving the bulk of the distribution.
+
+Tagged NAs (from `haven::tagged_na()`) are preserved throughout — they carry information about *why* a value is missing (`NA(a)` = not applicable, `NA(b)` = don't know/refused, `NA(c)` = not asked this cycle).
+
+## Inputs
+
+| Target | Description |
+|--------|-------------|
+| `study_data` | Harmonized combined data frame (Stage 2) |
+| `variables_sheet` | Variable metadata — identifies continuous predictors by `variableType == "Continuous"` |
+
+Configuration parameters used:
+
+| Parameter | Location | Value | Meaning |
+|-----------|----------|-------|---------|
+| `age_exclusion_category` | `config.yml` | `1` | Age group code for 12–17 year olds |
+| `age_grouped` | `config.yml survey:` | `survey_var(cfg, "age_grouped")` | Grouped age variable name |
+| `skewness_threshold` | `config.yml` | `1` | |skewness| threshold triggering truncation |
+| `truncate_percentile` | `config.yml` | `99` | Upper percentile cap |
+
+## Code
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+cfg <- config::get(file = "config.yml")
+```
+
+```{r load-cleaned-data}
+cleaned_data <- targets::tar_read("cleaned_data")
+study_data <- targets::tar_read("study_data")
+```
+
+```{r age-restriction}
+# Respondents excluded by age restriction
+cat("Before cleaning:", nrow(study_data), "\n")
+cat("After cleaning: ", nrow(cleaned_data), "\n")
+cat("Excluded: ", nrow(study_data) - nrow(cleaned_data), "\n")
+```
+
+```{r skewness-check}
+# The skewness summary is printed during pipeline execution
+# To reproduce:
+variables_sheet <- targets::tar_read("variables_sheet")
+source("R/data-cleaning.R")
+
+continuous_predictors <- variables_sheet$variable[
+ variables_sheet$variableType == "Continuous" &
+ variables_sheet$role %in% c("predictor", "model-stratifier")
+]
+continuous_predictors <- intersect(continuous_predictors, colnames(study_data))
+check_skewness(study_data, continuous_predictors, cfg$skewness_threshold)$summary
+```
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `cleaned_data` | Data frame with 12–17 year olds excluded and skewed variables truncated |
+
+## Key decisions
+
+**Age group 1 = 12–17.** In CCHS PUMF, the grouped age variable (`survey_var(cfg, "age_grouped")`) codes the youngest age group as `1`. The config parameter `age_exclusion_category: 1` encodes this decision explicitly so it can be changed without touching R code (e.g., if applying the model to a survey where the youngest group has a different code).
+
+**Skewness formula.** `check_skewness()` uses Pearson's moment coefficient (type 2 = unbiased, equivalent to `e1071::skewness(x, type = 2)`), which matches SAS's default. This ensures consistency when comparing results to the original SAS implementation.
+
+**99th percentile truncation, not log-transformation.** Log-transformation is common for skewed predictors but changes the interpretation of model coefficients. Truncation at the 99th percentile retains the original scale while capping extreme values that are likely measurement artifacts (e.g., implausibly high cigarettes-per-day values in the PUMF midpoint imputation).
+
+**Tagged NAs pass through.** `dplyr::if_else()` with `missing = x` ensures that `haven::tagged_na` values are not coerced to regular `NA` during truncation. This preserves missingness reason codes for the imputation stage.
+
+## Related resources
+
+- [How to process CCHS data](../how-to/processing-cchs-data.qmd)
+- `R/data-cleaning.R` — `clean_study_data()`, `check_skewness()`, `truncate_continuous()`
diff --git a/docs/workflow/4-descriptive-statistics.qmd b/docs/workflow/4-descriptive-statistics.qmd
new file mode 100644
index 0000000..9f3685c
--- /dev/null
+++ b/docs/workflow/4-descriptive-statistics.qmd
@@ -0,0 +1,72 @@
+---
+title: "Stage 4 — Descriptive statistics (pre-imputation)"
+subtitle: "Table 1a: study population characteristics before imputation"
+---
+
+## Purpose
+
+Before imputation, we describe the study population to document who is in the analysis, how complete the key variables are, and whether the sample composition is consistent across survey cycles. This produces the data underlying **Table 1a** in the manuscript.
+
+Generating descriptive statistics *before* imputation is important for transparency: it shows the actual observed data, including missingness patterns, rather than the imputed values. The post-imputation table (Stage 6) can then be compared to show what imputation changed.
+
+## Inputs
+
+| Target | Description |
+|--------|-------------|
+| `cleaned_data` | Age-restricted, truncated data frame (Stage 3) |
+| `variables_sheet` | Variable metadata — `table1` role selects rows for Table 1 |
+| `variable_details_sheet` | Recoding rules — provides value labels for categorical variables |
+
+## Code
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+cfg <- config::get(file = "config.yml")
+```
+
+```{r load-table-data}
+table_1a_data <- targets::tar_read("table_1a_data")
+variables_sheet <- targets::tar_read("variables_sheet")
+variable_details_sheet <- targets::tar_read("variable_details_sheet")
+```
+
+```{r render-table-1a}
+# Reproduce Table 1a
+source("R/variables-sheet-utils.R")
+source("R/variable-details-sheet-utils.R")
+source("R/get-descriptive-data.R")
+source("R/create-descriptive-tables.R")
+
+predictor_vars <- select_vars_by_role("predictor", variables_sheet)
+
+create_descriptive_table(
+ descriptive_data = table_1a_data,
+ variables_sheet = variables_sheet,
+ variable_details_sheet = variable_details_sheet,
+ variables = predictor_vars,
+ column_stratifier = survey_var(cfg, "sex"),
+ sections_order = c("Sociodemographics", "Health behaviour")
+)
+```
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `table_1a_data` | Named list of descriptive statistics: means, SDs, proportions, N by variable and sex |
+
+## Key decisions
+
+**Sex stratification.** Table 1 is stratified by `survey_var(cfg, "sex")`. This is consistent with the APC models, which are fit separately by sex. The config abstraction means the table code does not contain any hardcoded CCHS variable names.
+
+**Role filtering.** Only variables with `role` containing `"predictor"` appear in Table 1. Variables with `role = "design"` (e.g., survey weights, cycle) or `role = "intermediate"` (raw cchsflow inputs) are excluded. The `select_vars_by_role()` function handles comma-separated multi-role entries correctly.
+
+**Section ordering.** `sections_order` controls the vertical ordering of variable groups in the table. The `section` column in `variables_sheet` assigns each variable to a group (e.g., "Sociodemographics", "Health behaviour"). Groups not listed in `sections_order` appear after the listed ones.
+
+## Related resources
+
+- [Stage 6 — Post-imputation descriptives](6-descriptive-post-imputation.qmd)
+- `docs/results/table-1.qmd` — rendered Table 1 output (reads from pipeline cache)
+- `R/descriptive-data.R` — `get_cshm_desc_data()`
+- `R/create-descriptive-tables.R` — `create_descriptive_table()`
diff --git a/docs/workflow/5-imputation.qmd b/docs/workflow/5-imputation.qmd
new file mode 100644
index 0000000..905d91d
--- /dev/null
+++ b/docs/workflow/5-imputation.qmd
@@ -0,0 +1,77 @@
+---
+title: "Stage 5 — Multiple imputation"
+subtitle: "Handling missing data with MICE"
+---
+
+## Purpose
+
+The CCHS has substantial missingness in sociodemographic variables, primarily because some variables are only collected in certain cycles (e.g., immigration status is absent from 2019–20 and 2022) or were refused/unknown by some respondents. Complete-case analysis would discard a large fraction of respondents and introduce selection bias.
+
+Multiple imputation by chained equations (MICE) fills in missing values by iteratively fitting a model for each incomplete variable given all other variables. The result is `m = 5` complete datasets, which are later combined using Rubin's rules. This stage produces `analysis_data` — the primary input for all downstream modelling.
+
+## Inputs
+
+| Target | Description |
+|--------|-------------|
+| `cleaned_data` | Age-restricted, truncated data frame (Stage 3) |
+| `variables_sheet` | Variable metadata — `imputation-predictor` role identifies variables for the MICE model |
+
+Configuration parameters:
+
+| Parameter | Value | Meaning |
+|-----------|-------|---------|
+| `imputation_m` | 5 | Number of imputed datasets |
+| `imputation_maxit` | 5 | MICE iterations per imputation |
+
+## Code
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+cfg <- config::get(file = "config.yml")
+```
+
+```{r load-analysis-data}
+# analysis_data is a mids object (multiple imputation dataset)
+analysis_data <- targets::tar_read("analysis_data")
+```
+
+```{r inspect-imputation}
+# Summary of imputation
+print(analysis_data)
+
+# Missingness before imputation
+cleaned_data <- targets::tar_read("cleaned_data")
+variables_sheet <- targets::tar_read("variables_sheet")
+
+source("R/variables-sheet-utils.R")
+imputation_vars <- select_vars_by_role("imputation-predictor", variables_sheet)
+imputation_vars <- intersect(imputation_vars, colnames(cleaned_data))
+
+# Proportion missing per variable
+sort(sapply(cleaned_data[, imputation_vars], function(x) mean(is.na(x))), decreasing = TRUE)
+```
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `analysis_data` | `mids` object (mice): `m = 5` imputed datasets, stored as `data/analysis_data.rds` |
+
+`analysis_data.rds` is the **primary reproducibility artefact** — sharing it allows others to reproduce all downstream analyses without access to the raw CCHS files.
+
+## Key decisions
+
+**MICE over single imputation.** Single imputation (e.g., mean substitution) underestimates variability and produces overconfident standard errors. MICE preserves uncertainty about missing values by propagating it across `m` imputed datasets and combining results with Rubin's rules.
+
+**Imputation predictor selection.** Only variables with `role` containing `"imputation-predictor"` enter the MICE model. This excludes variables derived from others (e.g., `smoking_status` is derived from raw smoking items) and APC-specific construction variables, which would create circular dependencies.
+
+**`m = 1` in dev/draft profiles.** The `dev` and `draft` config profiles use `imputation_m: 1` and `imputation_maxit: 1` to speed up iteration. The production run uses `m = 5, maxit = 5` per the protocol.
+
+**Cycle-specific missingness.** Some variables (e.g., immigration status, ethnicity) are structurally absent from certain CCHS cycles. These are `NA(c)` (tagged NA: not asked this cycle) in the harmonized data. MICE treats these as missing and imputes them — this is appropriate because the variable *would* have had a value if collected. Tagged NA codes are stripped to regular `NA` before MICE is run.
+
+## Related resources
+
+- `R/imputation.R` — `impute_data()`
+- [mice R package](https://www.rdocumentation.org/packages/mice)
+- Protocol §3.3 — missing data strategy
diff --git a/docs/workflow/6-descriptive-post-imputation.qmd b/docs/workflow/6-descriptive-post-imputation.qmd
new file mode 100644
index 0000000..ed18713
--- /dev/null
+++ b/docs/workflow/6-descriptive-post-imputation.qmd
@@ -0,0 +1,67 @@
+---
+title: "Stage 6 — Descriptive statistics (post-imputation)"
+subtitle: "Table 1b: study population characteristics after imputation"
+---
+
+## Purpose
+
+After imputation, we repeat the descriptive statistics on the completed dataset. Comparing Table 1b to Table 1a (Stage 4) allows readers to see what imputation changed: proportions for variables with high missingness will shift, while variables with complete data should be unchanged.
+
+This is standard practice in studies using multiple imputation — reporting both tables provides full transparency about the role of imputation in the analysis.
+
+## Inputs
+
+| Target | Description |
+|--------|-------------|
+| `analysis_data` | `mids` object — 5 imputed datasets (Stage 5) |
+| `variables_sheet` | Variable metadata |
+| `variable_details_sheet` | Recoding rules and value labels |
+
+## Code
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+cfg <- config::get(file = "config.yml")
+```
+
+```{r load-table-data}
+table_1b_data <- targets::tar_read("table_1b_data")
+variables_sheet <- targets::tar_read("variables_sheet")
+variable_details_sheet <- targets::tar_read("variable_details_sheet")
+```
+
+```{r render-table-1b}
+source("R/variables-sheet-utils.R")
+source("R/variable-details-sheet-utils.R")
+source("R/get-descriptive-data.R")
+source("R/create-descriptive-tables.R")
+
+predictor_vars <- select_vars_by_role("predictor", variables_sheet)
+
+create_descriptive_table(
+ descriptive_data = table_1b_data,
+ variables_sheet = variables_sheet,
+ variable_details_sheet = variable_details_sheet,
+ variables = predictor_vars,
+ column_stratifier = survey_var(cfg, "sex"),
+ sections_order = c("Sociodemographics", "Health behaviour")
+)
+```
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `table_1b_data` | Named list of post-imputation descriptive statistics |
+
+## Key decisions
+
+**Pooled over imputations.** `get_cshm_desc_data()` pools descriptive statistics across the `m = 5` imputed datasets using Rubin's rules for means and proportions. Standard deviations reflect within-imputation variability only (between-imputation variability in descriptives is negligible for reporting purposes).
+
+**Same structure as Table 1a.** The same `create_descriptive_table()` call is used for both tables, ensuring the rows and columns are identical and differences are directly comparable.
+
+## Related resources
+
+- [Stage 4 — Pre-imputation descriptives](4-descriptive-statistics.qmd)
+- `docs/results/table-1.qmd` — rendered Tables 1a and 1b side by side
diff --git a/docs/workflow/7-apc-data-preparation.qmd b/docs/workflow/7-apc-data-preparation.qmd
new file mode 100644
index 0000000..bef5fdd
--- /dev/null
+++ b/docs/workflow/7-apc-data-preparation.qmd
@@ -0,0 +1,98 @@
+---
+title: "Stage 7 — APC data preparation"
+subtitle: "Building numerator and denominator cells for the Age-Period-Cohort models"
+---
+
+## Purpose
+
+The APC models are logistic regressions where each row represents a *person-year at risk*. Before fitting the models, we need to convert the cross-sectional CCHS data into this person-year format by reconstructing each respondent's smoking history up to their survey date, then computing the denominator (person-years at risk of initiating or quitting) and numerator (events: initiations and cessations).
+
+This is the most computationally expensive step. By keeping it as a separate target from Stage 8, `{targets}` can cache the APC dataset independently — changing model parameters (knots, constraints) only reruns Stage 8, not Stage 7.
+
+## Inputs
+
+| Target | Description |
+|--------|-------------|
+| `analysis_data` | Imputed dataset (Stage 5) |
+
+Configuration used:
+
+| Parameter | Location | Description |
+|-----------|----------|-------------|
+| `survey$age` | `config.yml` | Continuous age variable |
+| `survey$sex` | `config.yml` | Sex variable |
+| `survey$cycle` | `config.yml` | Survey cycle integer |
+| `survey$weight` | `config.yml` | Sampling weight |
+| `survey$smoking_status` | `config.yml` | 6-category smoking status |
+| `survey$age_first_cigarette` | `config.yml` | Age at first whole cigarette |
+| `survey$years_since_quit` | `config.yml` | Years since quitting (former daily smokers) |
+| `survey_bound(cfg, "age_first_cigarette", "min")` | `config.yml` | APC floor for initiation age (PUMF: 13, Master: 8) |
+| `survey_bound(cfg, "years_since_quit", "min")` | `config.yml` | APC floor for cessation years since quit (0) |
+| `apc$age_knots` | `config.yml` | `[10, 15, 20, 50, 60]` |
+| `apc$period_knots` | `config.yml` | `[1940, 1950, 1960, 1970, 1980]` |
+| `apc$cohort_knots` | `config.yml` | `[1930, 1940, 1945, 1950, 1955, 1960, 1965, 1970, 1975, 1980]` |
+
+## Code
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+cfg <- config::get(file = "config.yml")
+```
+
+```{r load-apc-data}
+apc_data <- targets::tar_read("apc_data")
+# apc_data is a list: $initiation_men, $initiation_women,
+# $cessation_men, $cessation_women
+names(apc_data)
+```
+
+```{r inspect-apc-data}
+# Initiation data: one row per person-year at risk of initiating
+nrow(apc_data$initiation_men)
+nrow(apc_data$initiation_women)
+
+# Cessation data: one row per person-year at risk of quitting
+nrow(apc_data$cessation_men)
+nrow(apc_data$cessation_women)
+
+# Event rates
+mean(apc_data$initiation_men$init)
+mean(apc_data$cessation_men$init)
+```
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `apc_data` | Named list with four data frames: `initiation_men`, `initiation_women`, `cessation_men`, `cessation_women` |
+
+Each data frame contains:
+
+| Column | Description |
+|--------|-------------|
+| `age` | Age at risk (years) |
+| `period` | Calendar year |
+| `cohort` | Birth year (`period - age`) |
+| `init` | Event indicator (1 = initiated/quit, 0 = at risk but no event) |
+| `weighting` | Survey weight × mortality correction |
+| Natural spline columns | Spline basis for age, period, cohort effects |
+
+## Key decisions
+
+**APC identity: `cohort = period − age`.** This fundamental identity is the basis for the APC framework. Each respondent's smoking history is reconstructed year-by-year using their reported age at initiation/cessation and the survey year. The cohort effect is not estimated independently — it is derived from the period and age effects.
+
+**Retrospective history construction.** A current smoker at age 45 in survey year 2014 is recorded as a person-year at risk of cessation at every age from their initiation age to 45. This reconstruction assumes respondents accurately recall their age of initiation and cessation (supported by CCHS validation studies).
+
+**Mortality adjustment.** Ever-smokers are less likely to survive to survey date than never-smokers, creating survival bias. The `weighting` column incorporates a mortality correction. The primary method is MPoRT (not yet implemented); during development, the Peto constant risk ratio is used as a fallback (see `cfg$apc$mortality_method`).
+
+**APC floor ages.** Initiation probability is assumed zero before `survey_bound(cfg, "age_first_cigarette", "min")` (PUMF: 13, Master: 8). Cessation probability is assumed zero before `survey_bound(cfg, "years_since_quit", "min")` (0). In PUMF data, the practical floor for observed initiation is ~13 due to midpoint imputation of grouped categories. The Master analytical floor of 8 captures genuine early initiations.
+
+**Four separate data frames, not one.** Men and women are modelled separately (consistent with Manuel et al. 2020). Initiation and cessation are separate models. Keeping four data frames lets Stage 8 fit all four models in parallel using `{targets}` branching.
+
+## Related resources
+
+- [Stage 8 — APC model fitting](8-apc-model.qmd)
+- [APC methodology explanation](../explanation/apc-method.qmd)
+- `R/apc-model.R` — `prepare_apc_data()`, `build_initiation_data()`, `build_cessation_data()`
+- Protocol §3.4 — analytical approach
diff --git a/docs/workflow/8-apc-model.qmd b/docs/workflow/8-apc-model.qmd
new file mode 100644
index 0000000..cb582eb
--- /dev/null
+++ b/docs/workflow/8-apc-model.qmd
@@ -0,0 +1,104 @@
+---
+title: "Stage 8 — APC model fitting"
+subtitle: "Fitting the Age-Period-Cohort logistic regression models"
+---
+
+## Purpose
+
+This stage fits four logistic regression models — initiation and cessation, separately for men and women — using the person-year data from Stage 7. Each model estimates how the probability of initiating or quitting smoking varies by age, calendar period, and birth cohort, after accounting for the APC identification constraint.
+
+The models use constrained natural cubic splines (Holford knot structure) with period and cohort effects held constant beyond the observed data range. This produces smooth, interpretable rate curves that can be projected forward to 2050.
+
+## Inputs
+
+| Target | Description |
+|--------|-------------|
+| `apc_data$initiation_men` | Person-year initiation data, men (Stage 7) |
+| `apc_data$initiation_women` | Person-year initiation data, women (Stage 7) |
+| `apc_data$cessation_men` | Person-year cessation data, men (Stage 7) |
+| `apc_data$cessation_women` | Person-year cessation data, women (Stage 7) |
+
+Configuration used:
+
+| Parameter | Value | Description |
+|-----------|-------|-------------|
+| `apc$age_knots` | `[10, 15, 20, 50, 60]` | Natural spline knots for age effect |
+| `apc$period_knots` | `[1940, 1950, 1960, 1970, 1980]` | Natural spline knots for period effect |
+| `apc$cohort_knots` | `[1930, ..., 1980]` | Natural spline knots for cohort effect (10 knots) |
+| `apc$period_constraints$initiation_men_from` | `1999` | Period effect constant from this year (men) |
+| `apc$period_constraints$initiation_women_from` | `2003` | Period effect constant from this year (women) |
+| `apc$period_constraints$cessation_from` | `2013` | Period effect constant from this year |
+| `apc$cohort_constraints$initiation_prior_to` | `1920` | Cohort effect constant before this year |
+| `apc$cohort_constraints$cessation_from` | `1985` | Cohort effect constant from this year |
+| `apc$spline_library` | `"splines2"` | Primary: `splines2::nsp()` |
+| `apc$spline_type` | `"nsp"` | Natural spline type |
+| `apc$mortality_method` | `"peto"` | Current: Peto (development); primary: MPoRT |
+
+## Code
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = normalizePath("../.."))
+cfg <- config::get(file = "config.yml")
+```
+
+```{r load-models}
+model_init_men <- targets::tar_read("apc_model_initiation_men")
+model_init_women <- targets::tar_read("apc_model_initiation_women")
+model_cess_men <- targets::tar_read("apc_model_cessation_men")
+model_cess_women <- targets::tar_read("apc_model_cessation_women")
+```
+
+```{r inspect-models}
+# Model summaries
+summary(model_init_men)$coefficients |> head(10)
+
+# Model fit statistics
+cat("Initiation men AIC: ", AIC(model_init_men), "\n")
+cat("Initiation women AIC: ", AIC(model_init_women), "\n")
+cat("Cessation men AIC: ", AIC(model_cess_men), "\n")
+cat("Cessation women AIC: ", AIC(model_cess_women), "\n")
+```
+
+```{r plot-age-effects, eval=FALSE}
+# Plot estimated age effects (requires rate table generation — Stage 9)
+# Placeholder: will be populated when Stage 9 is implemented
+```
+
+## Outputs
+
+| Target | Description |
+|--------|-------------|
+| `apc_model_initiation_men` | Fitted `glm` object — initiation model, men |
+| `apc_model_initiation_women` | Fitted `glm` object — initiation model, women |
+| `apc_model_cessation_men` | Fitted `glm` object — cessation model, men |
+| `apc_model_cessation_women` | Fitted `glm` object — cessation model, women |
+
+## Key decisions
+
+**Four independent targets, not one.** Each model is a separate `{targets}` target. This means changing the cessation constraints only reruns the cessation models — not the initiation models. It also allows the four models to run in parallel with `tar_make(callr_function = callr::r)`.
+
+**Natural cubic splines (`nsp`), not restricted cubic splines (`rcs`).** The primary analysis uses `splines2::nsp()` (natural splines with zero second derivatives at the boundary). Restricted cubic splines (`rms::rcs()`) are a prespecified sensitivity analysis (`cfg$apc$spline_type = "rcs"`). Both impose the same APC identification constraint but differ in boundary behaviour.
+
+**Period constraints.** The period effect is held constant beyond the most recent observed data to prevent extrapolation artifacts in projections. Men's initiation period effect is held constant from 1999; women's from 2003 (reflecting the later peak in women's smoking uptake). These constraint years are based on the original Ontario analysis (Manuel et al. 2020).
+
+**Cohort constraint.** The initiation cohort effect is held constant prior to 1920 (small sample, sparse data). The cessation cohort effect is held constant from 1985 forward (insufficient follow-up time for younger cohorts to accumulate cessation history).
+
+**Mortality method: Peto (current).** The primary analysis specifies MPoRT mortality correction. MPoRT is not yet implemented; the Peto constant risk ratio is used during development. A sensitivity analysis compares Peto and MPoRT results (`cfg$apc$mortality_method`).
+
+## Sensitivity analyses
+
+| Analysis | Parameter | Primary | Alternative |
+|----------|-----------|---------|-------------|
+| Spline type | `cfg$apc$spline_type` | `"nsp"` | `"rcs"` |
+| Mortality method | `cfg$apc$mortality_method` | `"mport"` | `"peto"` |
+| Period constraints | `cfg$apc$period_constraints` | See above | Extended to 2003/2007/2015 |
+
+## Related resources
+
+- [Stage 7 — APC data preparation](7-apc-data-preparation.qmd)
+- [APC methodology explanation](../explanation/apc-method.qmd)
+- `R/apc-model.R` — `fit_apc_model()`
+- Protocol §3.4.3 — APC model specification
+- Holford et al. (2014) — foundational APC methodology
+- Manuel et al. (2020) — Ontario adaptation
diff --git a/docs/workflow/_quarto.yml b/docs/workflow/_quarto.yml
new file mode 100644
index 0000000..fc448cd
--- /dev/null
+++ b/docs/workflow/_quarto.yml
@@ -0,0 +1,7 @@
+format:
+ html:
+ theme: cosmo
+ toc: true
+ code-copy: true
+ code-tools: true
+ highlight-style: github
diff --git a/docs/workflow/index.qmd b/docs/workflow/index.qmd
new file mode 100644
index 0000000..3a9a306
--- /dev/null
+++ b/docs/workflow/index.qmd
@@ -0,0 +1,67 @@
+---
+title: "Pipeline workflow"
+subtitle: "A step-by-step walkthrough of the CSHM analysis pipeline"
+---
+
+The CSHM uses a [`{targets}`](https://docs.ropensci.org/targets/) pipeline to manage the analysis workflow. Each step is defined as a *target* — a named R object with a recipe for computing it. Targets are only recomputed when their inputs change, making the pipeline fast and fully reproducible.
+
+## Pipeline overview
+
+```{r dag, eval=FALSE}
+# Visualise the pipeline dependency graph
+targets::tar_visnetwork()
+```
+
+## Pipeline stages
+
+| Step | Target | What it does |
+|------|--------|-------------|
+| [1. Variable setup](1-variable-setup.qmd) | `variables_sheet`, `variable_details_sheet` | Load the variable dictionary and recoding rules |
+| [2. Data loading](2-data-loading.qmd) | `study_data` | Load and harmonize all 11 CCHS cycles |
+| [3. Data cleaning](3-data-cleaning.qmd) | `cleaned_data` | Age restriction, distribution checks, truncation |
+| [4. Descriptive statistics](4-descriptive-statistics.qmd) | `table_1a_data` | Pre-imputation Table 1 statistics |
+| [5. Imputation](5-imputation.qmd) | `analysis_data` | Multiple imputation (MICE) |
+| [6. Post-imputation descriptives](6-descriptive-post-imputation.qmd) | `table_1b_data` | Post-imputation Table 1 statistics |
+| [7. APC data preparation](7-apc-data-preparation.qmd) | `apc_data` | Build numerator/denominator cells for APC models |
+| [8. APC model fitting](8-apc-model.qmd) | `apc_model_*` | Fit initiation and cessation APC models |
+
+## Running the pipeline
+
+```r
+# Full pipeline
+targets::tar_make()
+
+# Run through a specific stage
+targets::tar_make(apc_data)
+
+# Inspect a target's output
+targets::tar_read(apc_data)
+
+# See what is outdated
+targets::tar_outdated()
+```
+
+## Configuration
+
+All environment-specific settings live in `config.yml`. Set the profile before running:
+
+```r
+Sys.setenv(R_CONFIG_ACTIVE = "dev") # 10% sample, fast iteration
+Sys.setenv(R_CONFIG_ACTIVE = "prod") # Full PUMF sample
+```
+
+See `config.yml` for the full list of profiles and parameters.
+
+## Reproducibility
+
+- [`{renv}`](https://rstudio.github.io/renv/) locks all R package versions (`renv.lock`)
+- [`{targets}`](https://docs.ropensci.org/targets/) caches outputs — rerunning is safe and fast
+- `execute: freeze: auto` in `_quarto.yml` freezes rendered outputs when pipeline data is unavailable
+
+To fully reproduce the analysis from scratch:
+
+```r
+renv::restore() # Restore R packages
+targets::tar_make() # Run the pipeline
+quarto render # Render the documentation site
+```
diff --git a/index.qmd b/index.qmd
new file mode 100644
index 0000000..737942c
--- /dev/null
+++ b/index.qmd
@@ -0,0 +1,64 @@
+---
+title: "Canadian Smoking Histories Model"
+---
+
+The Canadian Smoking Histories Model (CSHM) reconstructs smoking initiation, cessation, and intensity by birth cohort, sex, and province from 1965 to 2023, with projections to 2050. It uses Age-Period-Cohort (APC) models fit to 11 cycles of the Canadian Community Health Survey (CCHS; ~1.4 million respondents) and extends the Ontario analysis of Manuel et al. (2020) to all of Canada.
+
+This repository serves three purposes:
+
+:::{.grid}
+
+:::{.g-col-4}
+### 1. Study protocol
+
+A fully prespecified protocol, written before final analysis, documenting all analytical decisions in advance.
+
+- [Full protocol](docs/protocol/full-protocol.qmd)
+- [One-page summary](docs/protocol/study-summary.qmd)
+:::
+
+:::{.g-col-4}
+### 2. Transparent workflow
+
+A step-by-step walkthrough of every pipeline stage — code, outputs, and the reasoning behind each methodological decision. Designed so an undergraduate student can follow the full analysis.
+
+- [Pipeline overview](docs/workflow/index.qmd)
+- [Stage 2: Data loading](docs/workflow/2-data-loading.qmd)
+- [Stage 7: APC data prep](docs/workflow/7-apc-data-preparation.qmd)
+- [Stage 8: Model fitting](docs/workflow/8-apc-model.qmd)
+:::
+
+:::{.g-col-4}
+### 3. Reproducible manuscript
+
+A Quarto manuscript where every reported number is inline R code reading from the pipeline — no hardcoded values. Run the pipeline, render the manuscript, submit.
+
+- [Manuscript](manuscript/manuscript.qmd) *(render locally)*
+:::
+
+:::
+
+## Quick start
+
+```r
+# Restore R environment
+renv::restore()
+
+# Run the pipeline (dev profile: 10% sample, fast iteration)
+Sys.setenv(R_CONFIG_ACTIVE = "dev")
+targets::tar_make()
+
+# Preview the documentation site
+quarto preview
+
+# Render the manuscript to Word
+quarto render manuscript/manuscript.qmd
+```
+
+## Key reference
+
+> Manuel DG, Wilton AS, Bennett C, Dass R, Laporte A, Holford TR. Smoking patterns based on birth-cohort-specific histories from 1965 to 2013, with projections to 2041. *Health Reports*. 2020;31(11):16–31. [doi:10.25318/82-003-x202001100002-eng](https://doi.org/10.25318/82-003-x202001100002-eng)
+
+## Licence
+
+Code: [MIT License](LICENSE). CCHS data: [Statistics Canada Open Licence](https://www.statcan.gc.ca/eng/reference/licence).
diff --git a/manuscript/_quarto.yml b/manuscript/_quarto.yml
new file mode 100644
index 0000000..071aa3d
--- /dev/null
+++ b/manuscript/_quarto.yml
@@ -0,0 +1,78 @@
+project:
+ type: default
+ output-dir: output
+ pre-render: ../_extensions/docstyle/generate-reference.R
+ post-render: ../_extensions/docstyle/update-field-codes.R
+
+bibliography: ../references.bib
+
+format:
+ docstyle-docx:
+ toc: false
+ number-sections: false
+ reference-doc: ../_docstyle/reference.docx
+
+docstyle:
+ css:
+ - ../styles.css
+ sidecar-dir: ../_docstyle
+ header:
+ enabled: true
+ left: "Canadian Smoking Histories Model"
+ first-page: false
+ style: header
+ footer:
+ enabled: true
+ left: "DRAFT"
+ right: "Page {page} of {pages}"
+ first-page: false
+ style: footer
+ author-plate:
+ enabled: false
+ authors:
+ - name:
+ given: "Douglas"
+ family: "Manuel"
+ email: "dmanuel@ohri.ca"
+ orcid: "0000-0003-0912-0845"
+ corresponding: true
+ affiliations:
+ - ref: ohri
+ - name:
+ given: "Rafael"
+ family: "Meza"
+ orcid: "0000-0002-1076-5037"
+ affiliations:
+ - ref: bccancer
+ - ref: ubc
+ - name:
+ given: "Rochelle E."
+ family: "Garner"
+ email: "rochelle.garner@statcan.gc.ca"
+ affiliations:
+ - ref: statscan
+ - name:
+ given: "Maikol"
+ family: "Diasparra"
+ email: "maikol.diasparra@statcan.gc.ca"
+ affiliations:
+ - ref: statscan
+ affiliations:
+ - id: ohri
+ name: "Ottawa Hospital Research Institute"
+ city: "Ottawa"
+ country: "Canada"
+ - id: bccancer
+ name: "BC Cancer Research Institute"
+ city: "Vancouver"
+ country: "Canada"
+ - id: ubc
+ name: "University of British Columbia"
+ department: "School of Population and Public Health"
+ city: "Vancouver"
+ country: "Canada"
+ - id: statscan
+ name: "Statistics Canada"
+ department: "Health Analysis Division"
+ city: "Ottawa"
+ country: "Canada"
diff --git a/manuscript/manuscript.qmd b/manuscript/manuscript.qmd
new file mode 100644
index 0000000..2503a22
--- /dev/null
+++ b/manuscript/manuscript.qmd
@@ -0,0 +1,150 @@
+---
+title: "Smoking patterns in Canada by birth cohort, 1965–2023, with projections to 2050: the Canadian Smoking Histories Model"
+status: "Draft"
+version-summary:
+ date: "2026-03-16"
+ version: "0.1.0"
+version-history:
+ - version: "0.1.0"
+ date: "2026-03-16"
+ description: "Initial manuscript stub — inline result placeholders."
+---
+
+[{{< meta version-summary.date >}}]{.date} \| Version: [{{< meta version-summary.version >}}]{.version}
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
+
+# Set working directory to repo root so tar_read() finds the pipeline store
+knitr::opts_knit$set(root.dir = normalizePath(".."))
+
+library(targets)
+library(dplyr)
+
+cfg <- config::get(file = here::here("config.yml"))
+source(here::here("R/config-utils.R"))
+
+# Helper: format large numbers with commas
+fmt_n <- function(x) format(round(x), big.mark = ",", scientific = FALSE)
+
+# Helper: format proportions as percentages
+fmt_pct <- function(x, digits = 1) paste0(round(x * 100, digits), "%")
+```
+
+```{r load-pipeline-results, include=FALSE}
+# Load cached pipeline targets — all numbers in this manuscript come from here.
+# Run targets::tar_make() first if results are not available.
+study_data <- tar_read("study_data")
+cleaned_data <- tar_read("cleaned_data")
+analysis_data <- tar_read("analysis_data")
+```
+
+```{r derived-values, include=FALSE}
+# Sample size
+n_respondents_total <- nrow(study_data)
+n_respondents_cleaned <- nrow(cleaned_data)
+n_excluded_age <- n_respondents_total - n_respondents_cleaned
+
+# Cycle counts
+n_cycles <- length(unique(study_data[[cfg$survey$cycle]]))
+```
+
+::: author-plate
+:::
+
+::: toc
+:::
+
+# Abstract
+
+**Background.** Smoking behaviour varies across birth cohorts in ways that affect projections of tobacco-attributable disease and evaluation of tobacco control policies in Canada. A pan-Canadian smoking histories model does not yet exist.
+
+**Objectives.** To develop a Canadian Smoking Histories Model (CSHM) describing smoking initiation, cessation, and intensity by birth cohort, sex, and province from 1965 to 2023, with projections to 2050.
+
+**Methods.** We used `r n_cycles` cycles of the Canadian Community Health Survey (CCHS; 2001–2022), comprising `r fmt_n(n_respondents_cleaned)` respondents after exclusions. Age-period-cohort logistic regression models with constrained natural cubic splines were fit separately for men and women to estimate annual probabilities of smoking initiation and cessation. Mortality-adjusted weights were applied to correct for survival bias. Projections were extended to 2050 under status quo conditions.
+
+**Results.** [To be completed when pipeline is fully run.]
+
+**Conclusions.** [To be completed.]
+
+::: {.section-body}
+
+# 1. Introduction
+
+Smoking remains the leading cause of preventable death in Canada, responsible for approximately 48,000 deaths annually [@HealthCanada_SmokingMortality_2024]. Smoking behaviour is not uniform across the population — it varies markedly by birth cohort, sex, province, and socioeconomic position, reflecting decades of evolving social norms, tobacco control policies, and industry marketing [@Manuel_HR_2020; @Holford_AJPM_2014].
+
+Population health models used to evaluate tobacco control policies and project disease burden (e.g., OncoSim, POHEM, SimSmoke) require current smoking histories as inputs. The most recent Canadian parameters were derived from the 1994–2004 National Population Health Survey (NPHS), now over 20 years old [@hennessy2015; @gauvreau2017; @chaiton2021]. Meanwhile, smoking patterns have continued to evolve — particularly among younger cohorts, immigrant populations, and equity-deserving groups.
+
+The CCHS, with `r fmt_n(n_respondents_total)` respondents across `r n_cycles` survey cycles (2001–2022), provides an unparalleled opportunity to address this gap. We apply the age-period-cohort (APC) framework developed by Holford et al. [-@Holford_AJPM_2014] and adapted for Ontario by Manuel et al. [-@Manuel_HR_2020] to all Canadian provinces and territories.
+
+# 2. Methods
+
+## 2.1 Data
+
+We used all available cycles of the CCHS (2001–2022 PUMF; 2001–2023 Master files), comprising `r fmt_n(n_respondents_total)` respondents. After excluding respondents aged 12–17 years (`r fmt_n(n_excluded_age)` excluded), the analytic sample comprised `r fmt_n(n_respondents_cleaned)` respondents. Variables were harmonized across cycles using the `cchsflow` R package [@cchsflow].
+
+## 2.2 Smoking status definitions
+
+| Status | Definition |
+|--------|-----------|
+| Never smoker | <100 lifetime cigarettes AND never smoked a whole cigarette |
+| Current smoker | ≥100 lifetime cigarettes AND currently smokes daily or occasionally |
+| Former smoker | ≥100 lifetime cigarettes AND not currently smoking |
+
+## 2.3 Analytical approach
+
+We fit two logistic regression models using an age-period-cohort framework:
+
+1. **Initiation model** — annual probability of transitioning from never smoker to current smoker, conditional on being a never smoker at age *a*−1. Probability assumed zero before age `r survey_bound(cfg, "age_first_cigarette", "min")`.
+
+2. **Cessation model** — conditional probability of a current smoker quitting at age *a*. Probability assumed zero before `r survey_bound(cfg, "years_since_quit", "min")` years since quitting.
+
+Both models used constrained natural cubic splines with the knot structure from Holford et al. [-@Holford_AJPM_2014]: age knots `r paste0("[", paste(cfg$apc$age_knots, collapse = ", "), "]")`, period knots `r paste0("[", paste(cfg$apc$period_knots, collapse = ", "), "]")`, and cohort knots `r paste0("[", paste(cfg$apc$cohort_knots, collapse = ", "), "]")`.
+
+Period effects were held constant beyond observed data: men's initiation from `r cfg$apc$period_constraints$initiation_men_from`, women's initiation from `r cfg$apc$period_constraints$initiation_women_from`, and cessation from `r cfg$apc$period_constraints$cessation_from`. Cohort effects were held constant prior to `r cfg$apc$cohort_constraints$initiation_prior_to` (initiation) and from `r cfg$apc$cohort_constraints$cessation_from` (cessation).
+
+## 2.4 Missing data
+
+Missing sociodemographic variables were handled using multiple imputation by chained equations (MICE; `m = `r cfg$imputation_m`, maxit = `r cfg$imputation_maxit`). Variables structurally absent from some cycles (e.g., immigration status) were imputed using available cycle data.
+
+# 3. Results
+
+[To be completed when pipeline is fully run.]
+
+## 3.1 Study population
+
+[Table 1 — characteristics of `r fmt_n(n_respondents_cleaned)` respondents]
+
+## 3.2 Smoking initiation rates by cohort
+
+[Figures and tables — to be added at Stage 9]
+
+## 3.3 Smoking cessation rates by cohort
+
+[Figures and tables — to be added at Stage 9]
+
+## 3.4 Projected smoking prevalence to 2050
+
+[Projections to `r cfg$apc$projection_max` — to be added at Stage 9]
+
+# 4. Discussion
+
+[To be completed.]
+
+# 5. Conclusions
+
+[To be completed.]
+
+# References
+
+::: bibliography
+:::
+
+:::
+
+::: {.section-body page-break="true"}
+
+::: version-history
+:::
+
+:::
diff --git a/pop-draft-manuscript.css b/pop-draft-manuscript.css
new file mode 100644
index 0000000..692bfa1
--- /dev/null
+++ b/pop-draft-manuscript.css
@@ -0,0 +1,31 @@
+/* POPCORN Draft Manuscript Layer
+ * Overrides popcorn-base.css for draft/review documents
+ *
+ * Changes from base:
+ * - 12pt body text (vs 11pt) for easier reading
+ * - 1.15 line spacing (vs single) for annotation room
+ *
+ * Usage in _quarto.yml:
+ * docstyle:
+ * css:
+ * - popcorn-base.css
+ * - pop-draft-manuscript.css
+ */
+
+/* Override body text for draft review */
+p, body {
+ font-size: 12pt;
+ line-height: 1.15;
+}
+
+/* Lists should match body text */
+ol, ul {
+ font-size: 12pt;
+ line-height: 1.15;
+}
+
+/* Tables can stay at 11pt for density, but bump line height */
+.table-formal,
+.table-grid {
+ line-height: 1.15;
+}
diff --git a/popcorn-base.css b/popcorn-base.css
new file mode 100644
index 0000000..6377409
--- /dev/null
+++ b/popcorn-base.css
@@ -0,0 +1,397 @@
+/* POPCORN Base Styles
+ * Core typography and formatting for POPCORN documents
+ *
+ * This is the base layer. Additional layers can override:
+ * - pop-draft-manuscript.css (12pt body, 1.15 line-height for review)
+ * - pop-web.css (web-specific adjustments)
+ *
+ * Typography:
+ * Headings: Libre Baskerville (14pt/13pt/11pt hierarchy)
+ * Body: Hanken Grotesk (11pt default)
+ */
+
+/* ==========================================================================
+ COLOUR PALETTE (shared with popcorn.css)
+ ========================================================================== */
+
+:root {
+ /* Valhalla - RGB: 38, 44, 58 */
+ --popcorn-valhalla: #262c3a;
+
+ /* Chardonnay - RGB: 253, 200, 130 */
+ --popcorn-chardonnay: #fdc882;
+
+ /* Teal - links */
+ --popcorn-teal: #2ea6cc;
+
+ --popcorn-white: #ffffff;
+}
+
+/* ==========================================================================
+ TYPOGRAPHY - Print/Word optimised
+ ========================================================================== */
+
+/* Headings - Libre Baskerville for formal documents */
+h1 {
+ font-family: "Libre Baskerville", "Georgia", serif;
+ font-size: 14pt;
+ font-weight: bold;
+ color: #262c3a;
+}
+
+h2 {
+ font-family: "Libre Baskerville", "Georgia", serif;
+ font-size: 13pt;
+ font-weight: bold;
+ color: #262c3a;
+}
+
+h3 {
+ font-family: "Libre Baskerville", "Georgia", serif;
+ font-size: 11pt;
+ font-weight: bold;
+ color: #262c3a;
+}
+
+h4 {
+ font-family: "Libre Baskerville", "Georgia", serif;
+ font-size: 11pt;
+ font-weight: bold;
+ font-style: italic;
+ color: #262c3a;
+}
+
+h5 {
+ font-family: "Libre Baskerville", "Georgia", serif;
+ font-size: 11pt;
+ font-weight: normal;
+ color: #262c3a;
+}
+
+/* Body text - Hanken Grotesk (consistent across platforms) */
+p, body {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ font-weight: normal;
+ color: #000000;
+}
+
+/* Links */
+a {
+ color: #2ea6cc;
+}
+
+/* ==========================================================================
+ DOCUMENT STRUCTURE (headers, footers, title page metadata)
+ ========================================================================== */
+
+.footer {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 10pt;
+ color: #000000;
+}
+
+.header {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 10pt;
+ color: #262c3a;
+}
+
+/* Author block styles - for title page metadata */
+.author {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 12pt;
+ text-align: center;
+}
+
+.affiliation {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 10pt;
+ font-style: italic;
+ text-align: center;
+ color: #666666;
+}
+
+/* Version block styles - for title page metadata */
+.date {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ text-align: center;
+}
+
+.version {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ text-align: center;
+ font-style: italic;
+}
+
+/* ==========================================================================
+ SEMANTIC STYLES
+ ========================================================================== */
+
+/* Title style - for document title on cover/first page */
+.title {
+ font-family: "Libre Baskerville", "Georgia", serif;
+ font-size: 18pt;
+ font-weight: bold;
+ color: #262c3a;
+}
+
+/* Subtitle */
+.subtitle {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 14pt;
+ font-weight: normal;
+ color: #262c3a;
+}
+
+/* Emphasis/callout text */
+.callout {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 10pt;
+ font-style: italic;
+ color: #666666;
+}
+
+/* ==========================================================================
+ UTILITY CLASSES (for manual styling in Quarto divs)
+ ========================================================================== */
+
+.libre-baskerville {
+ font-family: "Libre Baskerville", "Georgia", serif;
+}
+
+.hanken-grotesk {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+}
+
+.valhalla {
+ color: #262c3a;
+}
+
+.chardonnay-bg {
+ background-color: #fdc882;
+}
+
+.center {
+ text-align: center;
+}
+
+/* ==========================================================================
+ TABLE OF CONTENTS STYLES
+ Harvested from POPCORN_StrategicAdvisoryCommitteeTOR_V1.0.0.docx
+ - TOC entries use Hanken Grotesk (body font), not headings font
+ - TOC 1: bold, no indent
+ - TOC 2: Hanken Grotesk Light, indented
+ ========================================================================== */
+
+/* TOC Heading - Libre Baskerville italic (matches document heading style) */
+.toc-heading {
+ font-family: "Libre Baskerville", "Georgia", serif;
+ font-size: 14pt;
+ font-weight: bold;
+ font-style: italic;
+ color: #252c56;
+}
+
+/* TOC Level 1 - Hanken Grotesk bold, no indent, single line spacing */
+.toc-1 {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ font-weight: bold;
+ color: #000000;
+ line-height: 1;
+ margin-bottom: 0;
+}
+
+/* TOC Level 2 - Hanken Grotesk Light, indented 12pt (~238 twips) */
+.toc-2 {
+ font-family: "Hanken Grotesk Light", "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ font-weight: normal;
+ color: #000000;
+ margin-left: 12pt;
+ line-height: 1;
+ margin-bottom: 0;
+}
+
+/* TOC Level 3 - Hanken Grotesk Light, indented 24pt */
+.toc-3 {
+ font-family: "Hanken Grotesk Light", "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ font-weight: normal;
+ color: #000000;
+ margin-left: 24pt;
+ line-height: 1;
+ margin-bottom: 0;
+}
+
+/* ==========================================================================
+ FOOTNOTE STYLES
+ Harvested from POPCORN_StrategicAdvisoryCommitteeTOR_V1.0.0.docx
+ - FootnoteText: 10pt Hanken Grotesk, single line spacing, no space after
+ ========================================================================== */
+
+/* Footnote Text - paragraph style for footnote body */
+.footnote-text {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 10pt;
+ font-weight: normal;
+ color: #000000;
+ line-height: 1;
+ margin-bottom: 0;
+}
+
+/* ==========================================================================
+ LIST STYLES - Architecture
+
+ Standard markdown syntax maps to default list types:
+ - Bullet lists: `-` syntax in QMD
+ - Numbered lists: `1.` syntax in QMD
+
+ Pandoc handles rendering. For DOCX, post-processing can modify
+ numbering.xml to achieve custom formats (e.g., 1./a./i. hierarchy).
+
+ Custom list classes (e.g., `.my-custom-list`) can be applied via
+ fenced divs in QMD:
+
+ ::: {.my-custom-list}
+ 1. First item
+ 2. Second item
+ :::
+
+ For HTML: CSS handles custom classes natively.
+ For DOCX: Post-processing required (Pandoc doesn't pass div classes
+ to Word list numbering definitions).
+ ========================================================================== */
+
+/* Default numbered list styling
+ Applied to standard `1.` markdown lists
+ Renders as: 1. 2. 3. (Pandoc default)
+*/
+ol {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+}
+
+/* Default bullet list styling
+ Applied to standard `-` markdown lists
+ Renders as: • ◦ ▪ hierarchy (Pandoc default)
+*/
+ul {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+}
+
+/* ==========================================================================
+ TABLE STYLES
+ Harvested from POPCORN_StrategicAdvisoryCommitteeTOR_V1.0.0.docx
+
+ Usage in QMD:
+ ::: {.table-formal}
+ | Column 1 | Column 2 |
+ |----------|----------|
+ | Data | Data |
+ :::
+
+ The Lua filter (table-style.lua) reads these CSS properties and converts
+ them to OpenXML table formatting during Pandoc rendering.
+ ========================================================================== */
+
+/* Formal table style - top/bottom borders, shaded header row
+ Used for structured data tables like Domain/Responsibility mapping
+*/
+.table-formal {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ border-top: 1pt solid #7F7F7F;
+ border-bottom: 1pt solid #7F7F7F;
+ border-left: none;
+ border-right: none;
+}
+
+.table-formal th {
+ background-color: #D9D9D9;
+ font-weight: normal;
+ padding: 6pt;
+}
+
+.table-formal td {
+ padding: 6pt;
+}
+
+/* Grid table style - full borders on all cells
+ Used for version history and similar tabular data
+*/
+.table-grid {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 11pt;
+ border: 1pt solid #000000;
+ border-collapse: collapse;
+}
+
+.table-grid th,
+.table-grid td {
+ border: 1pt solid #000000;
+ padding: 6pt;
+}
+
+.table-grid th {
+ font-weight: bold;
+}
+
+/* ==========================================================================
+ REVISION STYLES (Track Changes Preview)
+ Used for visualising Word track changes in HTML preview.
+ The Lua filter (revisions-inject.lua) converts these to OpenXML for Word.
+ ========================================================================== */
+
+/* Deletions - strikethrough with red background
+ QMD syntax: [~~deleted text~~]{.del id="x"}
+*/
+.del {
+ background-color: #ffebe9; /* Light red background */
+ color: #6a737d; /* Muted text */
+ text-decoration: line-through;
+}
+
+/* Insertions - underlined with green background
+ QMD syntax: [inserted text]{.ins id="y"}
+*/
+.ins {
+ background-color: #e6ffec; /* Light green background */
+ text-decoration: underline;
+}
+
+/* ==========================================================================
+ COMMENT STYLES (Preview)
+ Used for visualising Word comments in HTML preview.
+ The Lua filter (comment-inject.lua) converts these to OpenXML for Word.
+ ========================================================================== */
+
+/* Comment spans - highlighted text indicating commented content
+ QMD syntax: [commented text]{.comment id="1"}
+*/
+.comment {
+ background-color: #fff3cd; /* Light yellow/amber background */
+ border-bottom: 2px solid #ffc107; /* Amber underline */
+}
+
+/* ==========================================================================
+ BLOCKQUOTE STYLE
+ Left bar (Valhalla) + grey background — for reporting guideline
+ placeholders and callout notes
+ ========================================================================== */
+
+blockquote {
+ font-family: "Hanken Grotesk", "Arial", sans-serif;
+ font-size: 10pt;
+ font-style: italic;
+ color: #444444;
+ background-color: #F2F2F2;
+ border-left: 3pt solid #262c3a;
+ margin-left: 0;
+ margin-right: 0;
+ padding: 6pt 12pt;
+}
diff --git a/references.bib b/references.bib
new file mode 100644
index 0000000..df8c9e3
--- /dev/null
+++ b/references.bib
@@ -0,0 +1,199 @@
+@article{Manuel_HR_2020,
+ author = {Manuel, Douglas G. and Wilton, Andrew S. and Bennett, Carol and Dass, Rohit and Laporte, Audrey and Holford, Theodore R.},
+ title = {{Smoking patterns based on birth-cohort-specific histories from 1965 to 2013, with projections to 2041}},
+ journal = {Health Reports},
+ year = {2020},
+ volume = {31},
+ number = {11},
+ pages = {16--31},
+ doi = {10.25318/82-003-x202001100002-eng},
+ url = {https://www150.statcan.gc.ca/n1/pub/82-003-x/2020011/article/00002-eng.htm}
+}
+
+@article{Tam_AJPM_2023,
+ author = {Tam, Jamie and Jaffri, Mohammed A. and Mok, Yoonseo and Jeon, Jihyoun and Szklo, André S. and Souza, Mirian C. and Holford, Theodore R. and Levy, David T. and Cao, Pianpian and Sánchez-Romero, Luz M. and Meza, Rafael},
+ title = {{Patterns of birth cohort‒specific smoking histories in Brazil}},
+ journal = {American Journal of Preventive Medicine},
+ year = {2023},
+ volume = {64},
+ number = {4},
+ pages = {S63--S71},
+ doi = {10.1016/j.amepre.2022.12.002},
+ url = {https://linkinghub.elsevier.com/retrieve/pii/S0749379722005530}
+}
+
+@article{Meza_J_2021,
+ author = {Meza, Rafael and Jeon, Jihyoun and Toumazis, Iakovos and Ten Haaf, Kevin and Cao, Pianpian and Bastani, Mehrad and Han, Summer S. and Blom, Erik F. and Jonas, Daniel E. and Feuer, Eric J. and Plevritis, Sylvia K. and De Koning, Harry J. and Kong, Chung Yin},
+ title = {{Evaluation of the benefits and harms of lung cancer screening with low-dose computed tomography: modeling study for the US preventive services task force}},
+ journal = {JAMA},
+ year = {2021},
+ volume = {325},
+ number = {10},
+ pages = {988},
+ doi = {10.1001/jama.2021.1077},
+ url = {https://jamanetwork.com/journals/jama/fullarticle/2777243}
+}
+
+@article{Holford_AJPM_2014,
+ author = {Holford, T. R. and Levy, D. T. and McKay, L. A. and Clarke, L. and Racine, B. and Meza, R. and Land, S. and Jeon, J. and Feuer, E. J.},
+ title = {{Patterns of birth cohort-specific smoking histories, 1965-2009}},
+ journal = {Am J Prev Med},
+ year = {2014},
+ volume = {46},
+ number = {2},
+ pages = {e31--7},
+ doi = {10.1016/j.amepre.2013.10.022}
+}
+
+@misc{HealthCanada_SmokingMortality_2024,
+ author = {{Health Canada}},
+ title = {{Smoking and mortality}},
+ year = {2024},
+ url = {https://www.canada.ca/en/health-canada/services/health-concerns/tobacco/legislation/tobacco-product-labelling/smoking-mortality.html}
+}
+
+@report{CSUCH_2023,
+ author = {{Canadian Substance Use Costs and Harms Scientific Working Group}},
+ title = {{Canadian substance use costs and harms 2007--2020}},
+ year = {2023},
+ institution = {Canadian Institute for Substance Use Research and Canadian Centre on Substance Use and Addiction},
+ address = {Victoria, BC},
+ url = {https://csuch.ca/publications/csuch-report/}
+}
+
+@article{mitra2015,
+ author = {Mitra, Dipjyoti and Shaw, Amanda and Tjepkema, Michael and Peters, Paul},
+ title = {{Social determinants of lung cancer incidence in Canada: A 13-year prospective study}},
+ journal = {Health Reports},
+ year = {2015},
+ volume = {26},
+ number = {6},
+ pages = {12--20},
+ doi = {10.25318/82-003-x201500614195-eng}
+}
+
+@article{hennessy2015,
+ author = {Hennessy, Deirdre A. and Flanagan, William M. and Tanuseputro, Peter and Bennett, Carol and Tuna, Meltem and Kopec, Jacek and Wolfson, Michael C. and Manuel, Douglas G.},
+ title = {{The Population Health Model (POHEM): an overview of rationale, methods and applications}},
+ journal = {Population Health Metrics},
+ year = {2015},
+ volume = {13},
+ number = {1},
+ pages = {24},
+ doi = {10.1186/s12963-015-0057-x}
+}
+
+@article{gauvreau2017,
+ author = {Gauvreau, C. L. and Fitzgerald, N. R. and Memon, S. and Flanagan, W. M. and Nadeau, C. and Asakawa, K. and Garner, R. and Miller, A. B. and Evans, W. K. and Popadiuk, C. M.},
+ title = {{The OncoSim model: development and use for better decision-making in Canadian cancer control}},
+ journal = {Current Oncology},
+ year = {2017},
+ volume = {24},
+ number = {6},
+ pages = {401--406},
+ doi = {10.3747/co.24.3609}
+}
+
+@article{chaiton2021,
+ author = {Chaiton, Michael and Dubray, Jolene and Guindon, G. Emmanuel and Schwartz, Robert},
+ title = {{Tobacco endgame simulation modelling: assessing the impact of policy changes on smoking prevalence in 2035}},
+ journal = {Forecasting},
+ year = {2021},
+ volume = {3},
+ number = {2},
+ pages = {267--275},
+ doi = {10.3390/forecast3020017}
+}
+
+@article{beland2002,
+ author = {Beland, Y.},
+ title = {{Canadian Community Health Survey -- Methodological overview}},
+ journal = {Health Reports},
+ year = {2002},
+ volume = {13},
+ number = {2},
+ pages = {9--14}
+}
+
+@article{kopasker2023,
+ author = {Kopasker, Daniel and Katikireddi, Srinivasa Vittal and Santos, João Vasco and Richiardi, Matteo and Bronka, Patryk and Rostila, Mikael and Cecchini, Michele and Ali, Shehzad and Emmert-Fees, Karl and Bambra, Clare and Hoven, Hanno and Backhaus, Insa and Balaj, Mirza and Eikemo, Terje Andreas},
+ title = {{Microsimulation as a flexible tool to evaluate policies and their impact on socioeconomic inequalities in health}},
+ journal = {The Lancet Regional Health -- Europe},
+ year = {2023},
+ volume = {34},
+ pages = {100758},
+ doi = {10.1016/j.lanepe.2023.100758}
+}
+
+@article{vasquezlavin2022,
+ author = {Vasquez-Lavin, Felipe and Bratti, Luna and Orrego, Sergio and Barrientos, Manuel},
+ title = {{Assessing the use of pseudo-panels to estimate the value of statistical life}},
+ journal = {Applied Economics},
+ year = {2022},
+ volume = {54},
+ number = {34},
+ pages = {3972--3988},
+ doi = {10.1080/00036846.2021.2019186}
+}
+
+@article{backinger2008,
+ author = {Backinger, Cathy L. and Lawrence, Deirdre and Swan, Judith and Winn, Deborah M. and Breen, Nancy and Hartman, Anne and Grana, Rachel and Tran, David and Farrell, Samantha},
+ title = {{Using the National Health Interview Survey to understand and address the impact of tobacco in the United States: past perspectives and future considerations}},
+ journal = {Epidemiologic Perspectives \& Innovations},
+ year = {2008},
+ volume = {5},
+ pages = {8},
+ doi = {10.1186/1742-5573-5-8},
+ issn = {1742-5573},
+ pmid = {19055824},
+ pmcid = {PMC2627846}
+}
+
+@article{gagne2017,
+ author = {Gagné, Tara},
+ title = {{Estimation of smoking prevalence in Canada: Implications of survey characteristics in the CCHS and CTUMS/CTADS}},
+ journal = {Canadian Journal of Public Health},
+ year = {2017},
+ volume = {108},
+ number = {3},
+ pages = {e331--e334},
+ doi = {10.17269/CJPH.108.5895},
+ issn = {0008-4263},
+ pmid = {28910259},
+ pmcid = {PMC6972049}
+}
+
+@article{chen2020joinpoint,
+ author = {Chen, Huann-Sheng and Zeichner, Samantha and Anderson, Robert N. and Espey, Donald K. and Kim, Hyune-Ju and Feuer, Eric J.},
+ title = {{The Joinpoint-Jump and Joinpoint-Comparability Ratio Model for Trend Analysis with Applications to Coding Changes in Health Statistics}},
+ journal = {Journal of Official Statistics},
+ year = {2020},
+ volume = {36},
+ number = {1},
+ pages = {49--62},
+ doi = {10.2478/jos-2020-0003},
+ pmcid = {PMC7380682}
+}
+
+@article{opazobretton2022,
+ author = {Opazo Breton, Magdalena and Gillespie, Duncan and Pryce, Robert and Bogdanovica, Ilze and Angus, Colin and Brennan, Alan and Britton, John},
+ title = {{Understanding long-term trends in smoking in England, 1972--2019: an age-period-cohort approach}},
+ journal = {Addiction},
+ year = {2022},
+ volume = {117},
+ number = {5},
+ pages = {1392--1403},
+ doi = {10.1111/add.15696},
+ issn = {0965-2140},
+ pmid = {34590368}
+}
+
+@article{wade2025,
+ author = {Wade, Stephanie and Sarich, Patricia and Vaneckova, Petra},
+ title = {{Using Bayesian evidence synthesis to quantify uncertainty in population trends in smoking behaviour}},
+ journal = {Statistical Methods in Medical Research},
+ year = {2025},
+ doi = {10.1177/09622802241310326},
+ issn = {0962-2802},
+ pmcid = {PMC11951451}
+}
diff --git a/renv.lock b/renv.lock
new file mode 100644
index 0000000..8eda30d
--- /dev/null
+++ b/renv.lock
@@ -0,0 +1,7455 @@
+{
+ "R": {
+ "Version": "4.4.2",
+ "Repositories": [
+ {
+ "Name": "CRAN",
+ "URL": "http://cran.r-project.org"
+ }
+ ]
+ },
+ "Packages": {
+ "Formula": {
+ "Package": "Formula",
+ "Version": "1.2-5",
+ "Source": "Repository",
+ "Date": "2023-02-23",
+ "Title": "Extended Model Formulas",
+ "Description": "Infrastructure for extended formulas with multiple parts on the right-hand side and/or multiple responses on the left-hand side (see ).",
+ "Authors@R": "c(person(given = \"Achim\", family = \"Zeileis\", role = c(\"aut\", \"cre\"), email = \"Achim.Zeileis@R-project.org\", comment = c(ORCID = \"0000-0003-0918-3766\")), person(given = \"Yves\", family = \"Croissant\", role = \"aut\", email = \"Yves.Croissant@univ-reunion.fr\"))",
+ "Depends": [
+ "R (>= 2.0.0)",
+ "stats"
+ ],
+ "License": "GPL-2 | GPL-3",
+ "NeedsCompilation": "no",
+ "Author": "Achim Zeileis [aut, cre] (), Yves Croissant [aut]",
+ "Maintainer": "Achim Zeileis ",
+ "Repository": "RSPM",
+ "Encoding": "UTF-8"
+ },
+ "Hmisc": {
+ "Package": "Hmisc",
+ "Version": "5.2-5",
+ "Source": "Repository",
+ "Date": "2026-01-08",
+ "Title": "Harrell Miscellaneous",
+ "Authors@R": "c(person(given = \"Frank E\", family = \"Harrell Jr\", role = c(\"aut\", \"cre\"), email = \"fh@fharrell.com\", comment = c(ORCID = \"0000-0002-8271-5493\")), person(given = \"Cole\", family = \"Beck\", role = c(\"ctb\"), email = \"cole.beck@vumc.org\" ), person(given = \"Charles\", family = \"Dupont\", role = \"ctb\") )",
+ "Depends": [
+ "R (>= 4.2.0)"
+ ],
+ "Imports": [
+ "methods",
+ "ggplot2",
+ "cluster",
+ "rpart",
+ "nnet",
+ "foreign",
+ "gtable",
+ "grid",
+ "gridExtra",
+ "data.table",
+ "htmlTable (>= 1.11.0)",
+ "viridisLite",
+ "htmltools",
+ "base64enc",
+ "colorspace",
+ "rmarkdown",
+ "knitr",
+ "Formula"
+ ],
+ "Suggests": [
+ "survival",
+ "qreport",
+ "acepack",
+ "chron",
+ "rms",
+ "mice",
+ "rstudioapi",
+ "tables",
+ "plotly (>= 4.5.6)",
+ "rlang",
+ "VGAM",
+ "leaps",
+ "pcaPP",
+ "digest",
+ "parallel",
+ "polspline",
+ "abind",
+ "kableExtra",
+ "rio",
+ "lattice",
+ "latticeExtra",
+ "gt",
+ "sparkline",
+ "jsonlite",
+ "htmlwidgets",
+ "qs",
+ "getPass",
+ "keyring",
+ "safer",
+ "htm2txt",
+ "boot"
+ ],
+ "Description": "Contains many functions useful for data analysis, high-level graphics, utility operations, functions for computing sample size and power, simulation, importing and annotating datasets, imputing missing values, advanced table making, variable clustering, character string manipulation, conversion of R objects to LaTeX and html code, recoding variables, caching, simplified parallel computing, encrypting and decrypting data using a safe workflow, general moving window statistical estimation, and assistance in interpreting principal component analysis.",
+ "License": "GPL (>= 2)",
+ "LazyLoad": "Yes",
+ "URL": "https://hbiostat.org/R/Hmisc/",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.3",
+ "NeedsCompilation": "yes",
+ "Author": "Frank E Harrell Jr [aut, cre] (ORCID: ), Cole Beck [ctb], Charles Dupont [ctb]",
+ "Maintainer": "Frank E Harrell Jr ",
+ "Repository": "CRAN"
+ },
+ "MASS": {
+ "Package": "MASS",
+ "Version": "7.3-64",
+ "Source": "Repository",
+ "Priority": "recommended",
+ "Date": "2025-01-06",
+ "Revision": "$Rev: 3680 $",
+ "Depends": [
+ "R (>= 4.4.0)",
+ "grDevices",
+ "graphics",
+ "stats",
+ "utils"
+ ],
+ "Imports": [
+ "methods"
+ ],
+ "Suggests": [
+ "lattice",
+ "nlme",
+ "nnet",
+ "survival"
+ ],
+ "Authors@R": "c(person(\"Brian\", \"Ripley\", role = c(\"aut\", \"cre\", \"cph\"), email = \"Brian.Ripley@R-project.org\"), person(\"Bill\", \"Venables\", role = c(\"aut\", \"cph\")), person(c(\"Douglas\", \"M.\"), \"Bates\", role = \"ctb\"), person(\"Kurt\", \"Hornik\", role = \"trl\", comment = \"partial port ca 1998\"), person(\"Albrecht\", \"Gebhardt\", role = \"trl\", comment = \"partial port ca 1998\"), person(\"David\", \"Firth\", role = \"ctb\", comment = \"support functions for polr\"))",
+ "Description": "Functions and datasets to support Venables and Ripley, \"Modern Applied Statistics with S\" (4th edition, 2002).",
+ "Title": "Support Functions and Datasets for Venables and Ripley's MASS",
+ "LazyData": "yes",
+ "ByteCompile": "yes",
+ "License": "GPL-2 | GPL-3",
+ "URL": "http://www.stats.ox.ac.uk/pub/MASS4/",
+ "Contact": "",
+ "NeedsCompilation": "yes",
+ "Author": "Brian Ripley [aut, cre, cph], Bill Venables [aut, cph], Douglas M. Bates [ctb], Kurt Hornik [trl] (partial port ca 1998), Albrecht Gebhardt [trl] (partial port ca 1998), David Firth [ctb] (support functions for polr)",
+ "Maintainer": "Brian Ripley ",
+ "Repository": "RSPM",
+ "Encoding": "UTF-8"
+ },
+ "Matrix": {
+ "Package": "Matrix",
+ "Version": "1.7-2",
+ "Source": "Repository",
+ "VersionNote": "do also bump src/version.h, inst/include/Matrix/version.h",
+ "Date": "2025-01-20",
+ "Priority": "recommended",
+ "Title": "Sparse and Dense Matrix Classes and Methods",
+ "Description": "A rich hierarchy of sparse and dense matrix classes, including general, symmetric, triangular, and diagonal matrices with numeric, logical, or pattern entries. Efficient methods for operating on such matrices, often wrapping the 'BLAS', 'LAPACK', and 'SuiteSparse' libraries.",
+ "License": "GPL (>= 2) | file LICENCE",
+ "URL": "https://Matrix.R-forge.R-project.org",
+ "BugReports": "https://R-forge.R-project.org/tracker/?atid=294&group_id=61",
+ "Contact": "Matrix-authors@R-project.org",
+ "Authors@R": "c(person(\"Douglas\", \"Bates\", role = \"aut\", comment = c(ORCID = \"0000-0001-8316-9503\")), person(\"Martin\", \"Maechler\", role = c(\"aut\", \"cre\"), email = \"mmaechler+Matrix@gmail.com\", comment = c(ORCID = \"0000-0002-8685-9910\")), person(\"Mikael\", \"Jagan\", role = \"aut\", comment = c(ORCID = \"0000-0002-3542-2938\")), person(\"Timothy A.\", \"Davis\", role = \"ctb\", comment = c(ORCID = \"0000-0001-7614-6899\", \"SuiteSparse libraries\", \"collaborators listed in dir(system.file(\\\"doc\\\", \\\"SuiteSparse\\\", package=\\\"Matrix\\\"), pattern=\\\"License\\\", full.names=TRUE, recursive=TRUE)\")), person(\"George\", \"Karypis\", role = \"ctb\", comment = c(ORCID = \"0000-0003-2753-1437\", \"METIS library\", \"Copyright: Regents of the University of Minnesota\")), person(\"Jason\", \"Riedy\", role = \"ctb\", comment = c(ORCID = \"0000-0002-4345-4200\", \"GNU Octave's condest() and onenormest()\", \"Copyright: Regents of the University of California\")), person(\"Jens\", \"Oehlschlägel\", role = \"ctb\", comment = \"initial nearPD()\"), person(\"R Core Team\", role = \"ctb\", comment = c(ROR = \"02zz1nj61\", \"base R's matrix implementation\")))",
+ "Depends": [
+ "R (>= 4.4)",
+ "methods"
+ ],
+ "Imports": [
+ "grDevices",
+ "graphics",
+ "grid",
+ "lattice",
+ "stats",
+ "utils"
+ ],
+ "Suggests": [
+ "MASS",
+ "datasets",
+ "sfsmisc",
+ "tools"
+ ],
+ "Enhances": [
+ "SparseM",
+ "graph"
+ ],
+ "LazyData": "no",
+ "LazyDataNote": "not possible, since we use data/*.R and our S4 classes",
+ "BuildResaveData": "no",
+ "Encoding": "UTF-8",
+ "NeedsCompilation": "yes",
+ "Author": "Douglas Bates [aut] (), Martin Maechler [aut, cre] (), Mikael Jagan [aut] (), Timothy A. Davis [ctb] (, SuiteSparse libraries, collaborators listed in dir(system.file(\"doc\", \"SuiteSparse\", package=\"Matrix\"), pattern=\"License\", full.names=TRUE, recursive=TRUE)), George Karypis [ctb] (, METIS library, Copyright: Regents of the University of Minnesota), Jason Riedy [ctb] (, GNU Octave's condest() and onenormest(), Copyright: Regents of the University of California), Jens Oehlschlägel [ctb] (initial nearPD()), R Core Team [ctb] (02zz1nj61, base R's matrix implementation)",
+ "Maintainer": "Martin Maechler ",
+ "Repository": "CRAN"
+ },
+ "MatrixModels": {
+ "Package": "MatrixModels",
+ "Version": "0.5-4",
+ "Source": "Repository",
+ "VersionNote": "Released 0.5-3 on 2023-11-06",
+ "Date": "2025-03-25",
+ "Title": "Modelling with Sparse and Dense Matrices",
+ "Contact": "Matrix-authors@R-project.org",
+ "Authors@R": "c( person(\"Douglas\", \"Bates\", role = \"aut\", email = \"bates@stat.wisc.edu\", comment = c(ORCID = \"0000-0001-8316-9503\")), person(\"Martin\", \"Maechler\", role = c(\"aut\", \"cre\"), email = \"mmaechler+Matrix@gmail.com\", comment = c(ORCID = \"0000-0002-8685-9910\")))",
+ "Description": "Generalized Linear Modelling with sparse and dense 'Matrix' matrices, using modular prediction and response module classes.",
+ "Depends": [
+ "R (>= 3.6.0)"
+ ],
+ "Imports": [
+ "stats",
+ "methods",
+ "Matrix (>= 1.6-0)",
+ "Matrix(< 1.8-0)"
+ ],
+ "ImportsNote": "_not_yet_stats4",
+ "Encoding": "UTF-8",
+ "LazyLoad": "yes",
+ "License": "GPL (>= 2)",
+ "URL": "https://Matrix.R-forge.R-project.org/, https://r-forge.r-project.org/R/?group_id=61",
+ "BugReports": "https://R-forge.R-project.org/tracker/?func=add&atid=294&group_id=61",
+ "NeedsCompilation": "no",
+ "Author": "Douglas Bates [aut] (), Martin Maechler [aut, cre] ()",
+ "Maintainer": "Martin Maechler ",
+ "Repository": "RSPM"
+ },
+ "R6": {
+ "Package": "R6",
+ "Version": "2.6.1",
+ "Source": "Repository",
+ "Title": "Encapsulated Classes with Reference Semantics",
+ "Authors@R": "c( person(\"Winston\", \"Chang\", , \"winston@posit.co\", role = c(\"aut\", \"cre\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Creates classes with reference semantics, similar to R's built-in reference classes. Compared to reference classes, R6 classes are simpler and lighter-weight, and they are not built on S4 classes so they do not require the methods package. These classes allow public and private members, and they support inheritance, even when the classes are defined in different packages.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://r6.r-lib.org, https://github.com/r-lib/R6",
+ "BugReports": "https://github.com/r-lib/R6/issues",
+ "Depends": [
+ "R (>= 3.6)"
+ ],
+ "Suggests": [
+ "lobstr",
+ "testthat (>= 3.0.0)"
+ ],
+ "Config/Needs/website": "tidyverse/tidytemplate, ggplot2, microbenchmark, scales",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.2",
+ "NeedsCompilation": "no",
+ "Author": "Winston Chang [aut, cre], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Winston Chang ",
+ "Repository": "CRAN"
+ },
+ "RColorBrewer": {
+ "Package": "RColorBrewer",
+ "Version": "1.1-3",
+ "Source": "Repository",
+ "Date": "2022-04-03",
+ "Title": "ColorBrewer Palettes",
+ "Authors@R": "c(person(given = \"Erich\", family = \"Neuwirth\", role = c(\"aut\", \"cre\"), email = \"erich.neuwirth@univie.ac.at\"))",
+ "Author": "Erich Neuwirth [aut, cre]",
+ "Maintainer": "Erich Neuwirth ",
+ "Depends": [
+ "R (>= 2.0.0)"
+ ],
+ "Description": "Provides color schemes for maps (and other graphics) designed by Cynthia Brewer as described at http://colorbrewer2.org.",
+ "License": "Apache License 2.0",
+ "NeedsCompilation": "no",
+ "Repository": "CRAN"
+ },
+ "Rcpp": {
+ "Package": "Rcpp",
+ "Version": "1.1.1",
+ "Source": "Repository",
+ "Title": "Seamless R and C++ Integration",
+ "Date": "2026-01-07",
+ "Authors@R": "c(person(\"Dirk\", \"Eddelbuettel\", role = c(\"aut\", \"cre\"), email = \"edd@debian.org\", comment = c(ORCID = \"0000-0001-6419-907X\")), person(\"Romain\", \"Francois\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"JJ\", \"Allaire\", role = \"aut\", comment = c(ORCID = \"0000-0003-0174-9868\")), person(\"Kevin\", \"Ushey\", role = \"aut\", comment = c(ORCID = \"0000-0003-2880-7407\")), person(\"Qiang\", \"Kou\", role = \"aut\", comment = c(ORCID = \"0000-0001-6786-5453\")), person(\"Nathan\", \"Russell\", role = \"aut\"), person(\"Iñaki\", \"Ucar\", role = \"aut\", comment = c(ORCID = \"0000-0001-6403-5550\")), person(\"Doug\", \"Bates\", role = \"aut\", comment = c(ORCID = \"0000-0001-8316-9503\")), person(\"John\", \"Chambers\", role = \"aut\"))",
+ "Description": "The 'Rcpp' package provides R functions as well as C++ classes which offer a seamless integration of R and C++. Many R data types and objects can be mapped back and forth to C++ equivalents which facilitates both writing of new code as well as easier integration of third-party libraries. Documentation about 'Rcpp' is provided by several vignettes included in this package, via the 'Rcpp Gallery' site at , the paper by Eddelbuettel and Francois (2011, ), the book by Eddelbuettel (2013, ) and the paper by Eddelbuettel and Balamuta (2018, ); see 'citation(\"Rcpp\")' for details.",
+ "Depends": [
+ "R (>= 3.5.0)"
+ ],
+ "Imports": [
+ "methods",
+ "utils"
+ ],
+ "Suggests": [
+ "tinytest",
+ "inline",
+ "rbenchmark",
+ "pkgKitten (>= 0.1.2)"
+ ],
+ "URL": "https://www.rcpp.org, https://dirk.eddelbuettel.com/code/rcpp.html, https://github.com/RcppCore/Rcpp",
+ "License": "GPL (>= 2)",
+ "BugReports": "https://github.com/RcppCore/Rcpp/issues",
+ "MailingList": "rcpp-devel@lists.r-forge.r-project.org",
+ "RoxygenNote": "6.1.1",
+ "Encoding": "UTF-8",
+ "VignetteBuilder": "Rcpp",
+ "NeedsCompilation": "yes",
+ "Author": "Dirk Eddelbuettel [aut, cre] (ORCID: ), Romain Francois [aut] (ORCID: ), JJ Allaire [aut] (ORCID: ), Kevin Ushey [aut] (ORCID: ), Qiang Kou [aut] (ORCID: ), Nathan Russell [aut], Iñaki Ucar [aut] (ORCID: ), Doug Bates [aut] (ORCID: ), John Chambers [aut]",
+ "Maintainer": "Dirk Eddelbuettel ",
+ "Repository": "CRAN"
+ },
+ "RcppArmadillo": {
+ "Package": "RcppArmadillo",
+ "Version": "15.2.3-1",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "'Rcpp' Integration for the 'Armadillo' Templated Linear Algebra Library",
+ "Date": "2025-12-16",
+ "Authors@R": "c(person(\"Dirk\", \"Eddelbuettel\", role = c(\"aut\", \"cre\"), email = \"edd@debian.org\", comment = c(ORCID = \"0000-0001-6419-907X\")), person(\"Romain\", \"Francois\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"Doug\", \"Bates\", role = \"aut\", comment = c(ORCID = \"0000-0001-8316-9503\")), person(\"Binxiang\", \"Ni\", role = \"aut\"), person(\"Conrad\", \"Sanderson\", role = \"aut\", comment = c(ORCID = \"0000-0002-0049-4501\")))",
+ "Description": "'Armadillo' is a templated C++ linear algebra library aiming towards a good balance between speed and ease of use. It provides high-level syntax and functionality deliberately similar to Matlab. It is useful for algorithm development directly in C++, or quick conversion of research code into production environments. It provides efficient classes for vectors, matrices and cubes where dense and sparse matrices are supported. Integer, floating point and complex numbers are supported. A sophisticated expression evaluator (based on template meta-programming) automatically combines several operations to increase speed and efficiency. Dynamic evaluation automatically chooses optimal code paths based on detected matrix structures. Matrix decompositions are provided through integration with LAPACK, or one of its high performance drop-in replacements (such as 'MKL' or 'OpenBLAS'). It can automatically use 'OpenMP' multi-threading (parallelisation) to speed up computationally expensive operations. . The 'RcppArmadillo' package includes the header files from the 'Armadillo' library; users do not need to install 'Armadillo' itself in order to use 'RcppArmadillo'. Starting from release 15.0.0, the minimum compilation standard is C++14 so 'Armadillo' version 14.6.3 is included as a fallback when an R package forces the C++11 standard. Package authors should set a '#define' to select the 'current' version, or select the 'legacy' version (also chosen as default) if they must. See 'GitHub issue #475' for details. . Since release 7.800.0, 'Armadillo' is licensed under Apache License 2; previous releases were under licensed as MPL 2.0 from version 3.800.0 onwards and LGPL-3 prior to that; 'RcppArmadillo' (the 'Rcpp' bindings/bridge to Armadillo) is licensed under the GNU GPL version 2 or later, as is the rest of 'Rcpp'.",
+ "License": "GPL (>= 2)",
+ "LazyLoad": "yes",
+ "Depends": [
+ "R (>= 3.3.0)"
+ ],
+ "LinkingTo": [
+ "Rcpp"
+ ],
+ "Imports": [
+ "Rcpp (>= 1.0.12)",
+ "stats",
+ "utils",
+ "methods"
+ ],
+ "Suggests": [
+ "tinytest",
+ "Matrix (>= 1.3.0)",
+ "pkgKitten",
+ "reticulate",
+ "slam"
+ ],
+ "URL": "https://github.com/RcppCore/RcppArmadillo, https://dirk.eddelbuettel.com/code/rcpp.armadillo.html",
+ "BugReports": "https://github.com/RcppCore/RcppArmadillo/issues",
+ "RoxygenNote": "6.0.1",
+ "NeedsCompilation": "yes",
+ "Author": "Dirk Eddelbuettel [aut, cre] (ORCID: ), Romain Francois [aut] (ORCID: ), Doug Bates [aut] (ORCID: ), Binxiang Ni [aut], Conrad Sanderson [aut] (ORCID: )",
+ "Maintainer": "Dirk Eddelbuettel ",
+ "Repository": "CRAN"
+ },
+ "RcppEigen": {
+ "Package": "RcppEigen",
+ "Version": "0.3.4.0.2",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "'Rcpp' Integration for the 'Eigen' Templated Linear Algebra Library",
+ "Date": "2024-08-23",
+ "Authors@R": "c(person(\"Doug\", \"Bates\", role = \"aut\", comment = c(ORCID = \"0000-0001-8316-9503\")), person(\"Dirk\", \"Eddelbuettel\", role = c(\"aut\", \"cre\"), email = \"edd@debian.org\", comment = c(ORCID = \"0000-0001-6419-907X\")), person(\"Romain\", \"Francois\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"Yixuan\", \"Qiu\", role = \"aut\", comment = c(ORCID = \"0000-0003-0109-6692\")), person(\"Authors of\", \"Eigen\", role = \"cph\", comment = \"Authorship and copyright in included Eigen library as detailed in inst/COPYRIGHTS\"))",
+ "Copyright": "See the file COPYRIGHTS for various Eigen copyright details",
+ "Description": "R and 'Eigen' integration using 'Rcpp'. 'Eigen' is a C++ template library for linear algebra: matrices, vectors, numerical solvers and related algorithms. It supports dense and sparse matrices on integer, floating point and complex numbers, decompositions of such matrices, and solutions of linear systems. Its performance on many algorithms is comparable with some of the best implementations based on 'Lapack' and level-3 'BLAS'. The 'RcppEigen' package includes the header files from the 'Eigen' C++ template library. Thus users do not need to install 'Eigen' itself in order to use 'RcppEigen'. Since version 3.1.1, 'Eigen' is licensed under the Mozilla Public License (version 2); earlier version were licensed under the GNU LGPL version 3 or later. 'RcppEigen' (the 'Rcpp' bindings/bridge to 'Eigen') is licensed under the GNU GPL version 2 or later, as is the rest of 'Rcpp'.",
+ "License": "GPL (>= 2) | file LICENSE",
+ "LazyLoad": "yes",
+ "Depends": [
+ "R (>= 3.6.0)"
+ ],
+ "LinkingTo": [
+ "Rcpp"
+ ],
+ "Imports": [
+ "Rcpp (>= 0.11.0)",
+ "stats",
+ "utils"
+ ],
+ "Suggests": [
+ "Matrix",
+ "inline",
+ "tinytest",
+ "pkgKitten",
+ "microbenchmark"
+ ],
+ "URL": "https://github.com/RcppCore/RcppEigen, https://dirk.eddelbuettel.com/code/rcpp.eigen.html",
+ "BugReports": "https://github.com/RcppCore/RcppEigen/issues",
+ "NeedsCompilation": "yes",
+ "Author": "Doug Bates [aut] (), Dirk Eddelbuettel [aut, cre] (), Romain Francois [aut] (), Yixuan Qiu [aut] (), Authors of Eigen [cph] (Authorship and copyright in included Eigen library as detailed in inst/COPYRIGHTS)",
+ "Maintainer": "Dirk Eddelbuettel ",
+ "Repository": "CRAN"
+ },
+ "Rdpack": {
+ "Package": "Rdpack",
+ "Version": "2.6.6",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Update and Manipulate Rd Documentation Objects",
+ "Authors@R": "c( person(given = c(\"Georgi\", \"N.\"), family = \"Boshnakov\", role = c(\"aut\", \"cre\"), email = \"georgi.boshnakov@manchester.ac.uk\", comment = c(ORCID = \"0000-0003-2839-346X\")), person(given = \"Duncan\", family = \"Murdoch\", role = \"ctb\", email = \"murdoch.duncan@gmail.com\") )",
+ "Description": "Functions for manipulation of R documentation objects, including functions reprompt() and ereprompt() for updating 'Rd' documentation for functions, methods and classes; 'Rd' macros for citations and import of references from 'bibtex' files for use in 'Rd' files and 'roxygen2' comments; 'Rd' macros for evaluating and inserting snippets of 'R' code and the results of its evaluation or creating graphics on the fly; and many functions for manipulation of references and Rd files.",
+ "URL": "https://geobosh.github.io/Rdpack/ (doc), https://CRAN.R-project.org/package=Rdpack",
+ "BugReports": "https://github.com/GeoBosh/Rdpack/issues",
+ "Depends": [
+ "R (>= 2.15.0)",
+ "methods"
+ ],
+ "Imports": [
+ "tools",
+ "utils",
+ "rbibutils (> 2.4)"
+ ],
+ "Suggests": [
+ "grDevices",
+ "testthat",
+ "rstudioapi",
+ "rprojroot",
+ "gbRd"
+ ],
+ "License": "GPL (>= 2)",
+ "LazyLoad": "yes",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.1.1",
+ "NeedsCompilation": "no",
+ "Author": "Georgi N. Boshnakov [aut, cre] (ORCID: ), Duncan Murdoch [ctb]",
+ "Maintainer": "Georgi N. Boshnakov ",
+ "Repository": "CRAN"
+ },
+ "SparseM": {
+ "Package": "SparseM",
+ "Version": "1.84-2",
+ "Source": "Repository",
+ "Authors@R": "c( person(\"Roger\", \"Koenker\", role = c(\"cre\",\"aut\"), email = \"rkoenker@uiuc.edu\"), person(c(\"Pin\", \"Tian\"), \"Ng\", role = c(\"ctb\"), comment = \"Contributions to Sparse QR code\", email = \"pin.ng@nau.edu\") , person(\"Yousef\", \"Saad\", role = c(\"ctb\"), comment = \"author of sparskit2\") , person(\"Ben\", \"Shaby\", role = c(\"ctb\"), comment = \"author of chol2csr\") , person(\"Martin\", \"Maechler\", role = \"ctb\", comment = c(\"chol() tweaks; S4\", ORCID = \"0000-0002-8685-9910\")) )",
+ "Maintainer": "Roger Koenker ",
+ "Depends": [
+ "R (>= 2.15)",
+ "methods"
+ ],
+ "Imports": [
+ "graphics",
+ "stats",
+ "utils"
+ ],
+ "VignetteBuilder": "knitr",
+ "Suggests": [
+ "knitr"
+ ],
+ "Description": "Some basic linear algebra functionality for sparse matrices is provided: including Cholesky decomposition and backsolving as well as standard R subsetting and Kronecker products.",
+ "License": "GPL (>= 2)",
+ "Title": "Sparse Linear Algebra",
+ "URL": "http://www.econ.uiuc.edu/~roger/research/sparse/sparse.html",
+ "NeedsCompilation": "yes",
+ "Author": "Roger Koenker [cre, aut], Pin Tian Ng [ctb] (Contributions to Sparse QR code), Yousef Saad [ctb] (author of sparskit2), Ben Shaby [ctb] (author of chol2csr), Martin Maechler [ctb] (chol() tweaks; S4, )",
+ "Repository": "RSPM",
+ "Encoding": "UTF-8"
+ },
+ "TH.data": {
+ "Package": "TH.data",
+ "Version": "1.1-5",
+ "Source": "Repository",
+ "Title": "TH's Data Archive",
+ "Date": "2025-11-17",
+ "Authors@R": "c(person(\"Torsten\", \"Hothorn\", role = c(\"aut\", \"cre\"), email = \"Torsten.Hothorn@R-project.org\"))",
+ "Description": "Contains data sets used in other packages Torsten Hothorn maintains.",
+ "Depends": [
+ "R (>= 3.5.0)",
+ "survival",
+ "MASS"
+ ],
+ "Suggests": [
+ "trtf",
+ "tram",
+ "rms",
+ "coin",
+ "ATR",
+ "multcomp",
+ "gridExtra",
+ "vcd",
+ "colorspace",
+ "lattice",
+ "knitr",
+ "dplyr",
+ "openxlsx",
+ "plyr"
+ ],
+ "LazyData": "yes",
+ "VignetteBuilder": "knitr",
+ "License": "GPL-3",
+ "NeedsCompilation": "no",
+ "Author": "Torsten Hothorn [aut, cre]",
+ "Maintainer": "Torsten Hothorn ",
+ "Repository": "CRAN"
+ },
+ "V8": {
+ "Package": "V8",
+ "Version": "8.0.1",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Embedded JavaScript and WebAssembly Engine for R",
+ "Authors@R": "c( person(\"Jeroen\", \"Ooms\", role = c(\"aut\", \"cre\"), email = \"jeroenooms@gmail.com\", comment = c(ORCID = \"0000-0002-4035-0289\")), person(\"George\", \"Stagg\", role = \"ctb\", comment = c(ORCID = \"0009-0006-3173-9846\")), person(\"Jan Marvin\", \"Garbuszus\", role = \"ctb\"))",
+ "Description": "An R interface to V8 : Google's open source JavaScript and WebAssembly engine. This package can be compiled either with V8 or NodeJS when built as a shared library.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://jeroen.r-universe.dev/V8",
+ "BugReports": "https://github.com/jeroen/v8/issues",
+ "SystemRequirements": "On Linux you can build against libv8-dev (Debian) or v8-devel (Fedora). We also provide static libv8 binaries for most platforms, see the README for details.",
+ "NeedsCompilation": "yes",
+ "VignetteBuilder": "knitr",
+ "Imports": [
+ "Rcpp (>= 0.12.12)",
+ "jsonlite (>= 1.0)",
+ "curl (>= 1.0)",
+ "utils"
+ ],
+ "LinkingTo": [
+ "Rcpp"
+ ],
+ "Suggests": [
+ "testthat",
+ "knitr",
+ "rmarkdown"
+ ],
+ "RoxygenNote": "7.3.1",
+ "Language": "en-US",
+ "Encoding": "UTF-8",
+ "Biarch": "true",
+ "Author": "Jeroen Ooms [aut, cre] (ORCID: ), George Stagg [ctb] (ORCID: ), Jan Marvin Garbuszus [ctb]",
+ "Maintainer": "Jeroen Ooms ",
+ "Repository": "CRAN"
+ },
+ "askpass": {
+ "Package": "askpass",
+ "Version": "1.2.1",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Password Entry Utilities for R, Git, and SSH",
+ "Authors@R": "person(\"Jeroen\", \"Ooms\", role = c(\"aut\", \"cre\"), email = \"jeroenooms@gmail.com\", comment = c(ORCID = \"0000-0002-4035-0289\"))",
+ "Description": "Cross-platform utilities for prompting the user for credentials or a passphrase, for example to authenticate with a server or read a protected key. Includes native programs for MacOS and Windows, hence no 'tcltk' is required. Password entry can be invoked in two different ways: directly from R via the askpass() function, or indirectly as password-entry back-end for 'ssh-agent' or 'git-credential' via the SSH_ASKPASS and GIT_ASKPASS environment variables. Thereby the user can be prompted for credentials or a passphrase if needed when R calls out to git or ssh.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://r-lib.r-universe.dev/askpass",
+ "BugReports": "https://github.com/r-lib/askpass/issues",
+ "Encoding": "UTF-8",
+ "Imports": [
+ "sys (>= 2.1)"
+ ],
+ "RoxygenNote": "7.2.3",
+ "Suggests": [
+ "testthat"
+ ],
+ "Language": "en-US",
+ "NeedsCompilation": "yes",
+ "Author": "Jeroen Ooms [aut, cre] ()",
+ "Maintainer": "Jeroen Ooms ",
+ "Repository": "CRAN"
+ },
+ "backports": {
+ "Package": "backports",
+ "Version": "1.5.0",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Reimplementations of Functions Introduced Since R-3.0.0",
+ "Authors@R": "c( person(\"Michel\", \"Lang\", NULL, \"michellang@gmail.com\", role = c(\"cre\", \"aut\"), comment = c(ORCID = \"0000-0001-9754-0393\")), person(\"Duncan\", \"Murdoch\", NULL, \"murdoch.duncan@gmail.com\", role = c(\"aut\")), person(\"R Core Team\", role = \"aut\"))",
+ "Maintainer": "Michel Lang ",
+ "Description": "Functions introduced or changed since R v3.0.0 are re-implemented in this package. The backports are conditionally exported in order to let R resolve the function name to either the implemented backport, or the respective base version, if available. Package developers can make use of new functions or arguments by selectively importing specific backports to support older installations.",
+ "URL": "https://github.com/r-lib/backports",
+ "BugReports": "https://github.com/r-lib/backports/issues",
+ "License": "GPL-2 | GPL-3",
+ "NeedsCompilation": "yes",
+ "ByteCompile": "yes",
+ "Depends": [
+ "R (>= 3.0.0)"
+ ],
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.1",
+ "Author": "Michel Lang [cre, aut] (), Duncan Murdoch [aut], R Core Team [aut]",
+ "Repository": "CRAN"
+ },
+ "base64enc": {
+ "Package": "base64enc",
+ "Version": "0.1-3",
+ "Source": "Repository",
+ "Title": "Tools for base64 encoding",
+ "Author": "Simon Urbanek ",
+ "Maintainer": "Simon Urbanek ",
+ "Depends": [
+ "R (>= 2.9.0)"
+ ],
+ "Enhances": [
+ "png"
+ ],
+ "Description": "This package provides tools for handling base64 encoding. It is more flexible than the orphaned base64 package.",
+ "License": "GPL-2 | GPL-3",
+ "URL": "http://www.rforge.net/base64enc",
+ "NeedsCompilation": "yes",
+ "Repository": "CRAN"
+ },
+ "base64url": {
+ "Package": "base64url",
+ "Version": "1.4",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Fast and URL-Safe Base64 Encoder and Decoder",
+ "Authors@R": "c( person(\"Michel\", \"Lang\", NULL, \"michellang@gmail.com\", role = c(\"cre\", \"aut\"), comment = c(ORCID = \"0000-0001-9754-0393\")), person(NULL, \"Apache Foundation\", NULL, NULL, role = c(\"ctb\", \"cph\")), person(NULL, \"Free Software Foundation\", NULL, NULL, role = c(\"ctb\", \"cph\")) )",
+ "Description": "In contrast to RFC3548, the 62nd character (\"+\") is replaced with \"-\", the 63rd character (\"/\") is replaced with \"_\". Furthermore, the encoder does not fill the string with trailing \"=\". The resulting encoded strings comply to the regular expression pattern \"[A-Za-z0-9_-]\" and thus are safe to use in URLs or for file names. The package also comes with a simple base32 encoder/decoder suited for case insensitive file systems.",
+ "URL": "https://github.com/mllg/base64url",
+ "BugReports": "https://github.com/mllg/base64url/issues",
+ "NeedsCompilation": "yes",
+ "License": "GPL-3",
+ "Encoding": "UTF-8",
+ "Imports": [
+ "backports (>= 1.1.0)"
+ ],
+ "Suggests": [
+ "base64enc",
+ "checkmate",
+ "knitr",
+ "microbenchmark",
+ "openssl",
+ "rmarkdown",
+ "testthat"
+ ],
+ "RoxygenNote": "6.0.1",
+ "VignetteBuilder": "knitr",
+ "Author": "Michel Lang [cre, aut] (), Apache Foundation [ctb, cph], Free Software Foundation [ctb, cph]",
+ "Maintainer": "Michel Lang ",
+ "Repository": "CRAN"
+ },
+ "bigD": {
+ "Package": "bigD",
+ "Version": "0.3.1",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Flexibly Format Dates and Times to a Given Locale",
+ "Description": "Format dates and times flexibly and to whichever locales make sense. Parses dates, times, and date-times in various formats (including string-based ISO 8601 constructions). The formatting syntax gives the user many options for formatting the date and time output in a precise manner. Time zones in the input can be expressed in multiple ways and there are many options for formatting time zones in the output as well. Several of the provided helper functions allow for automatic generation of locale-aware formatting patterns based on date/time skeleton formats and standardized date/time formats with varying specificity.",
+ "Authors@R": "c( person(\"Richard\", \"Iannone\", , \"rich@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-3925-190X\")), person(\"Olivier\", \"Roy\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "License": "MIT + file LICENSE",
+ "URL": "https://rstudio.github.io/bigD/, https://github.com/rstudio/bigD",
+ "BugReports": "https://github.com/rstudio/bigD/issues",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.2",
+ "Depends": [
+ "R (>= 3.6.0)"
+ ],
+ "Suggests": [
+ "testthat (>= 3.0.0)",
+ "vctrs (>= 0.5.0)"
+ ],
+ "Config/testthat/edition": "3",
+ "Config/testthat/parallel": "true",
+ "NeedsCompilation": "no",
+ "Author": "Richard Iannone [aut, cre] (), Olivier Roy [ctb], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Richard Iannone ",
+ "Repository": "CRAN"
+ },
+ "bit": {
+ "Package": "bit",
+ "Version": "4.6.0",
+ "Source": "Repository",
+ "Title": "Classes and Methods for Fast Memory-Efficient Boolean Selections",
+ "Authors@R": "c( person(\"Michael\", \"Chirico\", email = \"MichaelChirico4@gmail.com\", role = c(\"aut\", \"cre\")), person(\"Jens\", \"Oehlschlägel\", role = \"aut\"), person(\"Brian\", \"Ripley\", role = \"ctb\") )",
+ "Depends": [
+ "R (>= 3.4.0)"
+ ],
+ "Suggests": [
+ "testthat (>= 3.0.0)",
+ "roxygen2",
+ "knitr",
+ "markdown",
+ "rmarkdown",
+ "microbenchmark",
+ "bit64 (>= 4.0.0)",
+ "ff (>= 4.0.0)"
+ ],
+ "Description": "Provided are classes for boolean and skewed boolean vectors, fast boolean methods, fast unique and non-unique integer sorting, fast set operations on sorted and unsorted sets of integers, and foundations for ff (range index, compression, chunked processing).",
+ "License": "GPL-2 | GPL-3",
+ "LazyLoad": "yes",
+ "ByteCompile": "yes",
+ "Encoding": "UTF-8",
+ "URL": "https://github.com/r-lib/bit",
+ "VignetteBuilder": "knitr, rmarkdown",
+ "RoxygenNote": "7.3.2",
+ "Config/testthat/edition": "3",
+ "NeedsCompilation": "yes",
+ "Author": "Michael Chirico [aut, cre], Jens Oehlschlägel [aut], Brian Ripley [ctb]",
+ "Maintainer": "Michael Chirico ",
+ "Repository": "CRAN"
+ },
+ "bit64": {
+ "Package": "bit64",
+ "Version": "4.6.0-1",
+ "Source": "Repository",
+ "Title": "A S3 Class for Vectors of 64bit Integers",
+ "Authors@R": "c( person(\"Michael\", \"Chirico\", email = \"michaelchirico4@gmail.com\", role = c(\"aut\", \"cre\")), person(\"Jens\", \"Oehlschlägel\", role = \"aut\"), person(\"Leonardo\", \"Silvestri\", role = \"ctb\"), person(\"Ofek\", \"Shilon\", role = \"ctb\") )",
+ "Depends": [
+ "R (>= 3.4.0)",
+ "bit (>= 4.0.0)"
+ ],
+ "Description": "Package 'bit64' provides serializable S3 atomic 64bit (signed) integers. These are useful for handling database keys and exact counting in +-2^63. WARNING: do not use them as replacement for 32bit integers, integer64 are not supported for subscripting by R-core and they have different semantics when combined with double, e.g. integer64 + double => integer64. Class integer64 can be used in vectors, matrices, arrays and data.frames. Methods are available for coercion from and to logicals, integers, doubles, characters and factors as well as many elementwise and summary functions. Many fast algorithmic operations such as 'match' and 'order' support inter- active data exploration and manipulation and optionally leverage caching.",
+ "License": "GPL-2 | GPL-3",
+ "LazyLoad": "yes",
+ "ByteCompile": "yes",
+ "URL": "https://github.com/r-lib/bit64",
+ "Encoding": "UTF-8",
+ "Imports": [
+ "graphics",
+ "methods",
+ "stats",
+ "utils"
+ ],
+ "Suggests": [
+ "testthat (>= 3.0.3)",
+ "withr"
+ ],
+ "Config/testthat/edition": "3",
+ "Config/needs/development": "testthat",
+ "RoxygenNote": "7.3.2",
+ "NeedsCompilation": "yes",
+ "Author": "Michael Chirico [aut, cre], Jens Oehlschlägel [aut], Leonardo Silvestri [ctb], Ofek Shilon [ctb]",
+ "Maintainer": "Michael Chirico ",
+ "Repository": "CRAN"
+ },
+ "bitops": {
+ "Package": "bitops",
+ "Version": "1.0-9",
+ "Source": "Repository",
+ "Date": "2024-10-03",
+ "Authors@R": "c( person(\"Steve\", \"Dutky\", role = \"aut\", email = \"sdutky@terpalum.umd.edu\", comment = \"S original; then (after MM's port) revised and modified\"), person(\"Martin\", \"Maechler\", role = c(\"cre\", \"aut\"), email = \"maechler@stat.math.ethz.ch\", comment = c(\"Initial R port; tweaks\", ORCID = \"0000-0002-8685-9910\")))",
+ "Title": "Bitwise Operations",
+ "Description": "Functions for bitwise operations on integer vectors.",
+ "License": "GPL (>= 2)",
+ "URL": "https://github.com/mmaechler/R-bitops",
+ "BugReports": "https://github.com/mmaechler/R-bitops/issues",
+ "NeedsCompilation": "yes",
+ "Author": "Steve Dutky [aut] (S original; then (after MM's port) revised and modified), Martin Maechler [cre, aut] (Initial R port; tweaks, )",
+ "Maintainer": "Martin Maechler ",
+ "Repository": "CRAN"
+ },
+ "boot": {
+ "Package": "boot",
+ "Version": "1.3-31",
+ "Source": "Repository",
+ "Priority": "recommended",
+ "Date": "2024-08-28",
+ "Authors@R": "c(person(\"Angelo\", \"Canty\", role = \"aut\", email = \"cantya@mcmaster.ca\", comment = \"author of original code for S\"), person(\"Brian\", \"Ripley\", role = c(\"aut\", \"trl\"), email = \"ripley@stats.ox.ac.uk\", comment = \"conversion to R, maintainer 1999--2022, author of parallel support\"), person(\"Alessandra R.\", \"Brazzale\", role = c(\"ctb\", \"cre\"), email = \"brazzale@stat.unipd.it\", comment = \"minor bug fixes\"))",
+ "Maintainer": "Alessandra R. Brazzale ",
+ "Note": "Maintainers are not available to give advice on using a package they did not author.",
+ "Description": "Functions and datasets for bootstrapping from the book \"Bootstrap Methods and Their Application\" by A. C. Davison and D. V. Hinkley (1997, CUP), originally written by Angelo Canty for S.",
+ "Title": "Bootstrap Functions (Originally by Angelo Canty for S)",
+ "Depends": [
+ "R (>= 3.0.0)",
+ "graphics",
+ "stats"
+ ],
+ "Suggests": [
+ "MASS",
+ "survival"
+ ],
+ "LazyData": "yes",
+ "ByteCompile": "yes",
+ "License": "Unlimited",
+ "NeedsCompilation": "no",
+ "Author": "Angelo Canty [aut] (author of original code for S), Brian Ripley [aut, trl] (conversion to R, maintainer 1999--2022, author of parallel support), Alessandra R. Brazzale [ctb, cre] (minor bug fixes)",
+ "Repository": "CRAN"
+ },
+ "brew": {
+ "Package": "brew",
+ "Version": "1.0-10",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Templating Framework for Report Generation",
+ "Authors@R": "c( person(\"Jeffrey\", \"Horner\", role = c(\"aut\", \"cph\")), person(\"Greg\", \"Hunt\", , \"greg@firmansyah.com\", role = c(\"aut\", \"cre\", \"cph\")) )",
+ "Description": "Implements a templating framework for mixing text and R code for report generation. brew template syntax is similar to PHP, Ruby's erb module, Java Server Pages, and Python's psp module.",
+ "License": "GPL (>= 2)",
+ "URL": "https://github.com/gregfrog/brew",
+ "BugReports": "https://github.com/gregfrog/brew/issues",
+ "Suggests": [
+ "testthat (>= 3.0.0)"
+ ],
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "Repository": "CRAN",
+ "NeedsCompilation": "no",
+ "Author": "Jeffrey Horner [aut, cph], Greg Hunt [aut, cre, cph]",
+ "Maintainer": "Greg Hunt "
+ },
+ "brio": {
+ "Package": "brio",
+ "Version": "1.1.5",
+ "Source": "Repository",
+ "Title": "Basic R Input Output",
+ "Authors@R": "c( person(\"Jim\", \"Hester\", role = \"aut\", comment = c(ORCID = \"0000-0002-2739-7082\")), person(\"Gábor\", \"Csárdi\", , \"csardi.gabor@gmail.com\", role = c(\"aut\", \"cre\")), person(given = \"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Functions to handle basic input output, these functions always read and write UTF-8 (8-bit Unicode Transformation Format) files and provide more explicit control over line endings.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://brio.r-lib.org, https://github.com/r-lib/brio",
+ "BugReports": "https://github.com/r-lib/brio/issues",
+ "Depends": [
+ "R (>= 3.6)"
+ ],
+ "Suggests": [
+ "covr",
+ "testthat (>= 3.0.0)"
+ ],
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.2.3",
+ "NeedsCompilation": "yes",
+ "Author": "Jim Hester [aut] (), Gábor Csárdi [aut, cre], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Gábor Csárdi ",
+ "Repository": "CRAN"
+ },
+ "broom": {
+ "Package": "broom",
+ "Version": "1.0.12",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Convert Statistical Objects into Tidy Tibbles",
+ "Authors@R": "c( person(\"David\", \"Robinson\", , \"admiral.david@gmail.com\", role = \"aut\"), person(\"Alex\", \"Hayes\", , \"alexpghayes@gmail.com\", role = \"aut\", comment = c(ORCID = \"0000-0002-4985-5160\")), person(\"Simon\", \"Couch\", , \"simon.couch@posit.co\", role = c(\"aut\"), comment = c(ORCID = \"0000-0001-5676-5107\")), person(\"Emil\", \"Hvitfeldt\", , \"emil.hvitfeldt@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-0679-1945\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")), person(\"Indrajeet\", \"Patil\", , \"patilindrajeet.science@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0003-1995-6531\")), person(\"Derek\", \"Chiu\", , \"dchiu@bccrc.ca\", role = \"ctb\"), person(\"Matthieu\", \"Gomez\", , \"mattg@princeton.edu\", role = \"ctb\"), person(\"Boris\", \"Demeshev\", , \"boris.demeshev@gmail.com\", role = \"ctb\"), person(\"Dieter\", \"Menne\", , \"dieter.menne@menne-biomed.de\", role = \"ctb\"), person(\"Benjamin\", \"Nutter\", , \"nutter@battelle.org\", role = \"ctb\"), person(\"Luke\", \"Johnston\", , \"luke.johnston@mail.utoronto.ca\", role = \"ctb\"), person(\"Ben\", \"Bolker\", , \"bolker@mcmaster.ca\", role = \"ctb\"), person(\"Francois\", \"Briatte\", , \"f.briatte@gmail.com\", role = \"ctb\"), person(\"Jeffrey\", \"Arnold\", , \"jeffrey.arnold@gmail.com\", role = \"ctb\"), person(\"Jonah\", \"Gabry\", , \"jsg2201@columbia.edu\", role = \"ctb\"), person(\"Luciano\", \"Selzer\", , \"luciano.selzer@gmail.com\", role = \"ctb\"), person(\"Gavin\", \"Simpson\", , \"ucfagls@gmail.com\", role = \"ctb\"), person(\"Jens\", \"Preussner\", , \"jens.preussner@mpi-bn.mpg.de\", role = \"ctb\"), person(\"Jay\", \"Hesselberth\", , \"jay.hesselberth@gmail.com\", role = \"ctb\"), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"ctb\"), person(\"Matthew\", \"Lincoln\", , \"matthew.d.lincoln@gmail.com\", role = \"ctb\"), person(\"Alessandro\", \"Gasparini\", , \"ag475@leicester.ac.uk\", role = \"ctb\"), person(\"Lukasz\", \"Komsta\", , \"lukasz.komsta@umlub.pl\", role = \"ctb\"), person(\"Frederick\", \"Novometsky\", role = \"ctb\"), person(\"Wilson\", \"Freitas\", role = \"ctb\"), person(\"Michelle\", \"Evans\", role = \"ctb\"), person(\"Jason Cory\", \"Brunson\", , \"cornelioid@gmail.com\", role = \"ctb\"), person(\"Simon\", \"Jackson\", , \"drsimonjackson@gmail.com\", role = \"ctb\"), person(\"Ben\", \"Whalley\", , \"ben.whalley@plymouth.ac.uk\", role = \"ctb\"), person(\"Karissa\", \"Whiting\", , \"karissa.whiting@gmail.com\", role = \"ctb\"), person(\"Yves\", \"Rosseel\", , \"yrosseel@gmail.com\", role = \"ctb\"), person(\"Michael\", \"Kuehn\", , \"mkuehn10@gmail.com\", role = \"ctb\"), person(\"Jorge\", \"Cimentada\", , \"cimentadaj@gmail.com\", role = \"ctb\"), person(\"Erle\", \"Holgersen\", , \"erle.holgersen@gmail.com\", role = \"ctb\"), person(\"Karl\", \"Dunkle Werner\", role = \"ctb\", comment = c(ORCID = \"0000-0003-0523-7309\")), person(\"Ethan\", \"Christensen\", , \"christensen.ej@gmail.com\", role = \"ctb\"), person(\"Steven\", \"Pav\", , \"shabbychef@gmail.com\", role = \"ctb\"), person(\"Paul\", \"PJ\", , \"pjpaul.stephens@gmail.com\", role = \"ctb\"), person(\"Ben\", \"Schneider\", , \"benjamin.julius.schneider@gmail.com\", role = \"ctb\"), person(\"Patrick\", \"Kennedy\", , \"pkqstr@protonmail.com\", role = \"ctb\"), person(\"Lily\", \"Medina\", , \"lilymiru@gmail.com\", role = \"ctb\"), person(\"Brian\", \"Fannin\", , \"captain@pirategrunt.com\", role = \"ctb\"), person(\"Jason\", \"Muhlenkamp\", , \"jason.muhlenkamp@gmail.com\", role = \"ctb\"), person(\"Matt\", \"Lehman\", role = \"ctb\"), person(\"Bill\", \"Denney\", , \"wdenney@humanpredictions.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-5759-428X\")), person(\"Nic\", \"Crane\", role = \"ctb\"), person(\"Andrew\", \"Bates\", role = \"ctb\"), person(\"Vincent\", \"Arel-Bundock\", , \"vincent.arel-bundock@umontreal.ca\", role = \"ctb\", comment = c(ORCID = \"0000-0003-2042-7063\")), person(\"Hideaki\", \"Hayashi\", role = \"ctb\"), person(\"Luis\", \"Tobalina\", role = \"ctb\"), person(\"Annie\", \"Wang\", , \"anniewang.uc@gmail.com\", role = \"ctb\"), person(\"Wei Yang\", \"Tham\", , \"weiyang.tham@gmail.com\", role = \"ctb\"), person(\"Clara\", \"Wang\", , \"clara.wang.94@gmail.com\", role = \"ctb\"), person(\"Abby\", \"Smith\", , \"als1@u.northwestern.edu\", role = \"ctb\", comment = c(ORCID = \"0000-0002-3207-0375\")), person(\"Jasper\", \"Cooper\", , \"jaspercooper@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-8639-3188\")), person(\"E Auden\", \"Krauska\", , \"krauskae@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-1466-5850\")), person(\"Alex\", \"Wang\", , \"x249wang@uwaterloo.ca\", role = \"ctb\"), person(\"Malcolm\", \"Barrett\", , \"malcolmbarrett@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0003-0299-5825\")), person(\"Charles\", \"Gray\", , \"charlestigray@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-9978-011X\")), person(\"Jared\", \"Wilber\", role = \"ctb\"), person(\"Vilmantas\", \"Gegzna\", , \"GegznaV@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-9500-5167\")), person(\"Eduard\", \"Szoecs\", , \"eduardszoecs@gmail.com\", role = \"ctb\"), person(\"Frederik\", \"Aust\", , \"frederik.aust@uni-koeln.de\", role = \"ctb\", comment = c(ORCID = \"0000-0003-4900-788X\")), person(\"Angus\", \"Moore\", , \"angusmoore9@gmail.com\", role = \"ctb\"), person(\"Nick\", \"Williams\", , \"ntwilliams.personal@gmail.com\", role = \"ctb\"), person(\"Marius\", \"Barth\", , \"marius.barth.uni.koeln@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-3421-6665\")), person(\"Bruna\", \"Wundervald\", , \"brunadaviesw@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0001-8163-220X\")), person(\"Joyce\", \"Cahoon\", , \"joyceyu48@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0001-7217-4702\")), person(\"Grant\", \"McDermott\", , \"grantmcd@uoregon.edu\", role = \"ctb\", comment = c(ORCID = \"0000-0001-7883-8573\")), person(\"Kevin\", \"Zarca\", , \"kevin.zarca@gmail.com\", role = \"ctb\"), person(\"Shiro\", \"Kuriwaki\", , \"shirokuriwaki@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-5687-2647\")), person(\"Lukas\", \"Wallrich\", , \"lukas.wallrich@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0003-2121-5177\")), person(\"James\", \"Martherus\", , \"james@martherus.com\", role = \"ctb\", comment = c(ORCID = \"0000-0002-8285-3300\")), person(\"Chuliang\", \"Xiao\", , \"cxiao@umich.edu\", role = \"ctb\", comment = c(ORCID = \"0000-0002-8466-9398\")), person(\"Joseph\", \"Larmarange\", , \"joseph@larmarange.net\", role = \"ctb\"), person(\"Max\", \"Kuhn\", , \"max@posit.co\", role = \"ctb\"), person(\"Michal\", \"Bojanowski\", , \"michal2992@gmail.com\", role = \"ctb\"), person(\"Hakon\", \"Malmedal\", , \"hmalmedal@gmail.com\", role = \"ctb\"), person(\"Clara\", \"Wang\", role = \"ctb\"), person(\"Sergio\", \"Oller\", , \"sergioller@gmail.com\", role = \"ctb\"), person(\"Luke\", \"Sonnet\", , \"luke.sonnet@gmail.com\", role = \"ctb\"), person(\"Jim\", \"Hester\", , \"jim.hester@posit.co\", role = \"ctb\"), person(\"Ben\", \"Schneider\", , \"benjamin.julius.schneider@gmail.com\", role = \"ctb\"), person(\"Bernie\", \"Gray\", , \"bfgray3@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0001-9190-6032\")), person(\"Mara\", \"Averick\", , \"mara@posit.co\", role = \"ctb\"), person(\"Aaron\", \"Jacobs\", , \"atheriel@gmail.com\", role = \"ctb\"), person(\"Andreas\", \"Bender\", , \"bender.at.R@gmail.com\", role = \"ctb\"), person(\"Sven\", \"Templer\", , \"sven.templer@gmail.com\", role = \"ctb\"), person(\"Paul-Christian\", \"Buerkner\", , \"paul.buerkner@gmail.com\", role = \"ctb\"), person(\"Matthew\", \"Kay\", , \"mjskay@umich.edu\", role = \"ctb\"), person(\"Erwan\", \"Le Pennec\", , \"lepennec@gmail.com\", role = \"ctb\"), person(\"Johan\", \"Junkka\", , \"johan.junkka@umu.se\", role = \"ctb\"), person(\"Hao\", \"Zhu\", , \"haozhu233@gmail.com\", role = \"ctb\"), person(\"Benjamin\", \"Soltoff\", , \"soltoffbc@uchicago.edu\", role = \"ctb\"), person(\"Zoe\", \"Wilkinson Saldana\", , \"zoewsaldana@gmail.com\", role = \"ctb\"), person(\"Tyler\", \"Littlefield\", , \"tylurp1@gmail.com\", role = \"ctb\"), person(\"Charles T.\", \"Gray\", , \"charlestigray@gmail.com\", role = \"ctb\"), person(\"Shabbh E.\", \"Banks\", role = \"ctb\"), person(\"Serina\", \"Robinson\", , \"robi0916@umn.edu\", role = \"ctb\"), person(\"Roger\", \"Bivand\", , \"Roger.Bivand@nhh.no\", role = \"ctb\"), person(\"Riinu\", \"Ots\", , \"riinuots@gmail.com\", role = \"ctb\"), person(\"Nicholas\", \"Williams\", , \"ntwilliams.personal@gmail.com\", role = \"ctb\"), person(\"Nina\", \"Jakobsen\", role = \"ctb\"), person(\"Michael\", \"Weylandt\", , \"michael.weylandt@gmail.com\", role = \"ctb\"), person(\"Lisa\", \"Lendway\", , \"llendway@macalester.edu\", role = \"ctb\"), person(\"Karl\", \"Hailperin\", , \"khailper@gmail.com\", role = \"ctb\"), person(\"Josue\", \"Rodriguez\", , \"jerrodriguez@ucdavis.edu\", role = \"ctb\"), person(\"Jenny\", \"Bryan\", , \"jenny@posit.co\", role = \"ctb\"), person(\"Chris\", \"Jarvis\", , \"Christopher1.jarvis@gmail.com\", role = \"ctb\"), person(\"Greg\", \"Macfarlane\", , \"gregmacfarlane@gmail.com\", role = \"ctb\"), person(\"Brian\", \"Mannakee\", , \"bmannakee@gmail.com\", role = \"ctb\"), person(\"Drew\", \"Tyre\", , \"atyre2@unl.edu\", role = \"ctb\"), person(\"Shreyas\", \"Singh\", , \"shreyas.singh.298@gmail.com\", role = \"ctb\"), person(\"Laurens\", \"Geffert\", , \"laurensgeffert@gmail.com\", role = \"ctb\"), person(\"Hong\", \"Ooi\", , \"hongooi@microsoft.com\", role = \"ctb\"), person(\"Henrik\", \"Bengtsson\", , \"henrikb@braju.com\", role = \"ctb\"), person(\"Eduard\", \"Szocs\", , \"eduardszoecs@gmail.com\", role = \"ctb\"), person(\"David\", \"Hugh-Jones\", , \"davidhughjones@gmail.com\", role = \"ctb\"), person(\"Matthieu\", \"Stigler\", , \"Matthieu.Stigler@gmail.com\", role = \"ctb\"), person(\"Hugo\", \"Tavares\", , \"hm533@cam.ac.uk\", role = \"ctb\", comment = c(ORCID = \"0000-0001-9373-2726\")), person(\"R. Willem\", \"Vervoort\", , \"Willemvervoort@gmail.com\", role = \"ctb\"), person(\"Brenton M.\", \"Wiernik\", , \"brenton@wiernik.org\", role = \"ctb\"), person(\"Josh\", \"Yamamoto\", , \"joshuayamamoto5@gmail.com\", role = \"ctb\"), person(\"Jasme\", \"Lee\", role = \"ctb\"), person(\"Taren\", \"Sanders\", , \"taren.sanders@acu.edu.au\", role = \"ctb\", comment = c(ORCID = \"0000-0002-4504-6008\")), person(\"Ilaria\", \"Prosdocimi\", , \"prosdocimi.ilaria@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0001-8565-094X\")), person(\"Daniel D.\", \"Sjoberg\", , \"danield.sjoberg@gmail.com\", role = \"ctb\", comment = c(ORCID = \"0000-0003-0862-2018\")), person(\"Alex\", \"Reinhart\", , \"areinhar@stat.cmu.edu\", role = \"ctb\", comment = c(ORCID = \"0000-0002-6658-514X\")) )",
+ "Description": "Summarizes key information about statistical objects in tidy tibbles. This makes it easy to report results, create plots and consistently work with large numbers of models at once. Broom provides three verbs that each provide different types of information about a model. tidy() summarizes information about model components such as coefficients of a regression. glance() reports information about an entire model, such as goodness of fit measures like AIC and BIC. augment() adds information about individual observations to a dataset, such as fitted values or influence measures.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://broom.tidymodels.org/, https://github.com/tidymodels/broom",
+ "BugReports": "https://github.com/tidymodels/broom/issues",
+ "Depends": [
+ "R (>= 4.1)"
+ ],
+ "Imports": [
+ "backports",
+ "cli",
+ "dplyr (>= 1.0.0)",
+ "generics (>= 0.0.2)",
+ "glue",
+ "lifecycle",
+ "purrr",
+ "rlang (>= 1.1.0)",
+ "stringr",
+ "tibble (>= 3.0.0)",
+ "tidyr (>= 1.0.0)"
+ ],
+ "Suggests": [
+ "AER",
+ "AUC",
+ "bbmle",
+ "betareg (>= 3.2-1)",
+ "biglm",
+ "binGroup",
+ "boot",
+ "btergm (>= 1.10.6)",
+ "car (>= 3.1-2)",
+ "carData",
+ "caret",
+ "cluster",
+ "cmprsk",
+ "coda",
+ "covr",
+ "drc",
+ "e1071",
+ "emmeans",
+ "epiR (>= 2.0.85)",
+ "ergm (>= 3.10.4)",
+ "fixest (>= 0.9.0)",
+ "gam (>= 1.15)",
+ "gee",
+ "geepack",
+ "ggplot2",
+ "glmnet",
+ "glmnetUtils",
+ "gmm",
+ "Hmisc",
+ "interp",
+ "irlba",
+ "joineRML",
+ "Kendall",
+ "knitr",
+ "ks",
+ "Lahman",
+ "lavaan (>= 0.6.18)",
+ "leaps",
+ "lfe",
+ "lm.beta",
+ "lme4",
+ "lmodel2",
+ "lmtest (>= 0.9.38)",
+ "lsmeans",
+ "maps",
+ "margins",
+ "MASS",
+ "mclust",
+ "mediation",
+ "metafor",
+ "mfx",
+ "mgcv",
+ "mlogit",
+ "modeldata",
+ "modeltests (>= 0.1.6)",
+ "muhaz",
+ "multcomp",
+ "network",
+ "nnet",
+ "ordinal",
+ "plm",
+ "poLCA",
+ "psych",
+ "quantreg",
+ "rmarkdown",
+ "robust",
+ "robustbase",
+ "rsample",
+ "sandwich",
+ "spatialreg",
+ "spdep (>= 1.1)",
+ "speedglm",
+ "spelling",
+ "stats4",
+ "survey",
+ "survival (>= 3.6-4)",
+ "systemfit",
+ "testthat (>= 3.0.0)",
+ "tseries",
+ "vars",
+ "zoo"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Config/usethis/last-upkeep": "2025-04-25",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.3.3",
+ "Collate": "'aaa-documentation-helper.R' 'null-and-default.R' 'aer.R' 'auc.R' 'base.R' 'bbmle.R' 'betareg.R' 'biglm.R' 'bingroup.R' 'boot.R' 'broom-package.R' 'broom.R' 'btergm.R' 'car.R' 'caret.R' 'cluster.R' 'cmprsk.R' 'data-frame.R' 'deprecated-0-7-0.R' 'drc.R' 'emmeans.R' 'epiR.R' 'ergm.R' 'fixest.R' 'gam.R' 'geepack.R' 'glmnet-cv-glmnet.R' 'glmnet-glmnet.R' 'gmm.R' 'hmisc.R' 'import-standalone-obj-type.R' 'import-standalone-types-check.R' 'joinerml.R' 'kendall.R' 'ks.R' 'lavaan.R' 'leaps.R' 'lfe.R' 'list-irlba.R' 'list-optim.R' 'list-svd.R' 'list-xyz.R' 'list.R' 'lm-beta.R' 'lmodel2.R' 'lmtest.R' 'maps.R' 'margins.R' 'mass-fitdistr.R' 'mass-negbin.R' 'mass-polr.R' 'mass-ridgelm.R' 'stats-lm.R' 'mass-rlm.R' 'mclust.R' 'mediation.R' 'metafor.R' 'mfx.R' 'mgcv.R' 'mlogit.R' 'muhaz.R' 'multcomp.R' 'nnet.R' 'nobs.R' 'ordinal-clm.R' 'ordinal-clmm.R' 'plm.R' 'polca.R' 'psych.R' 'stats-nls.R' 'quantreg-nlrq.R' 'quantreg-rq.R' 'quantreg-rqs.R' 'robust-glmrob.R' 'robust-lmrob.R' 'robustbase-glmrob.R' 'robustbase-lmrob.R' 'sp.R' 'spdep.R' 'speedglm-speedglm.R' 'speedglm-speedlm.R' 'stats-anova.R' 'stats-arima.R' 'stats-decompose.R' 'stats-factanal.R' 'stats-glm.R' 'stats-htest.R' 'stats-kmeans.R' 'stats-loess.R' 'stats-mlm.R' 'stats-prcomp.R' 'stats-smooth.spline.R' 'stats-summary-lm.R' 'stats-time-series.R' 'survey.R' 'survival-aareg.R' 'survival-cch.R' 'survival-coxph.R' 'survival-pyears.R' 'survival-survdiff.R' 'survival-survexp.R' 'survival-survfit.R' 'survival-survreg.R' 'systemfit.R' 'tseries.R' 'utilities.R' 'vars.R' 'zoo.R' 'zzz.R'",
+ "NeedsCompilation": "no",
+ "Author": "David Robinson [aut], Alex Hayes [aut] (ORCID: ), Simon Couch [aut] (ORCID: ), Emil Hvitfeldt [aut, cre] (ORCID: ), Posit Software, PBC [cph, fnd] (ROR: ), Indrajeet Patil [ctb] (ORCID: ), Derek Chiu [ctb], Matthieu Gomez [ctb], Boris Demeshev [ctb], Dieter Menne [ctb], Benjamin Nutter [ctb], Luke Johnston [ctb], Ben Bolker [ctb], Francois Briatte [ctb], Jeffrey Arnold [ctb], Jonah Gabry [ctb], Luciano Selzer [ctb], Gavin Simpson [ctb], Jens Preussner [ctb], Jay Hesselberth [ctb], Hadley Wickham [ctb], Matthew Lincoln [ctb], Alessandro Gasparini [ctb], Lukasz Komsta [ctb], Frederick Novometsky [ctb], Wilson Freitas [ctb], Michelle Evans [ctb], Jason Cory Brunson [ctb], Simon Jackson [ctb], Ben Whalley [ctb], Karissa Whiting [ctb], Yves Rosseel [ctb], Michael Kuehn [ctb], Jorge Cimentada [ctb], Erle Holgersen [ctb], Karl Dunkle Werner [ctb] (ORCID: ), Ethan Christensen [ctb], Steven Pav [ctb], Paul PJ [ctb], Ben Schneider [ctb], Patrick Kennedy [ctb], Lily Medina [ctb], Brian Fannin [ctb], Jason Muhlenkamp [ctb], Matt Lehman [ctb], Bill Denney [ctb] (ORCID: ), Nic Crane [ctb], Andrew Bates [ctb], Vincent Arel-Bundock [ctb] (ORCID: ), Hideaki Hayashi [ctb], Luis Tobalina [ctb], Annie Wang [ctb], Wei Yang Tham [ctb], Clara Wang [ctb], Abby Smith [ctb] (ORCID: ), Jasper Cooper [ctb] (ORCID: ), E Auden Krauska [ctb] (ORCID: ), Alex Wang [ctb], Malcolm Barrett [ctb] (ORCID: ), Charles Gray [ctb] (ORCID: ), Jared Wilber [ctb], Vilmantas Gegzna [ctb] (ORCID: ), Eduard Szoecs [ctb], Frederik Aust [ctb] (ORCID: ), Angus Moore [ctb], Nick Williams [ctb], Marius Barth [ctb] (ORCID: ), Bruna Wundervald [ctb] (ORCID: ), Joyce Cahoon [ctb] (ORCID: ), Grant McDermott [ctb] (ORCID: ), Kevin Zarca [ctb], Shiro Kuriwaki [ctb] (ORCID: ), Lukas Wallrich [ctb] (ORCID: ), James Martherus [ctb] (ORCID: ), Chuliang Xiao [ctb] (ORCID: ), Joseph Larmarange [ctb], Max Kuhn [ctb], Michal Bojanowski [ctb], Hakon Malmedal [ctb], Clara Wang [ctb], Sergio Oller [ctb], Luke Sonnet [ctb], Jim Hester [ctb], Ben Schneider [ctb], Bernie Gray [ctb] (ORCID: ), Mara Averick [ctb], Aaron Jacobs [ctb], Andreas Bender [ctb], Sven Templer [ctb], Paul-Christian Buerkner [ctb], Matthew Kay [ctb], Erwan Le Pennec [ctb], Johan Junkka [ctb], Hao Zhu [ctb], Benjamin Soltoff [ctb], Zoe Wilkinson Saldana [ctb], Tyler Littlefield [ctb], Charles T. Gray [ctb], Shabbh E. Banks [ctb], Serina Robinson [ctb], Roger Bivand [ctb], Riinu Ots [ctb], Nicholas Williams [ctb], Nina Jakobsen [ctb], Michael Weylandt [ctb], Lisa Lendway [ctb], Karl Hailperin [ctb], Josue Rodriguez [ctb], Jenny Bryan [ctb], Chris Jarvis [ctb], Greg Macfarlane [ctb], Brian Mannakee [ctb], Drew Tyre [ctb], Shreyas Singh [ctb], Laurens Geffert [ctb], Hong Ooi [ctb], Henrik Bengtsson [ctb], Eduard Szocs [ctb], David Hugh-Jones [ctb], Matthieu Stigler [ctb], Hugo Tavares [ctb] (ORCID: ), R. Willem Vervoort [ctb], Brenton M. Wiernik [ctb], Josh Yamamoto [ctb], Jasme Lee [ctb], Taren Sanders [ctb] (ORCID: ), Ilaria Prosdocimi [ctb] (ORCID: ), Daniel D. Sjoberg [ctb] (ORCID: ), Alex Reinhart [ctb] (ORCID: )",
+ "Maintainer": "Emil Hvitfeldt ",
+ "Repository": "CRAN"
+ },
+ "bslib": {
+ "Package": "bslib",
+ "Version": "0.9.0",
+ "Source": "Repository",
+ "Title": "Custom 'Bootstrap' 'Sass' Themes for 'shiny' and 'rmarkdown'",
+ "Authors@R": "c( person(\"Carson\", \"Sievert\", , \"carson@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-4958-2844\")), person(\"Joe\", \"Cheng\", , \"joe@posit.co\", role = \"aut\"), person(\"Garrick\", \"Aden-Buie\", , \"garrick@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0002-7111-0077\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")), person(, \"Bootstrap contributors\", role = \"ctb\", comment = \"Bootstrap library\"), person(, \"Twitter, Inc\", role = \"cph\", comment = \"Bootstrap library\"), person(\"Javi\", \"Aguilar\", role = c(\"ctb\", \"cph\"), comment = \"Bootstrap colorpicker library\"), person(\"Thomas\", \"Park\", role = c(\"ctb\", \"cph\"), comment = \"Bootswatch library\"), person(, \"PayPal\", role = c(\"ctb\", \"cph\"), comment = \"Bootstrap accessibility plugin\") )",
+ "Description": "Simplifies custom 'CSS' styling of both 'shiny' and 'rmarkdown' via 'Bootstrap' 'Sass'. Supports 'Bootstrap' 3, 4 and 5 as well as their various 'Bootswatch' themes. An interactive widget is also provided for previewing themes in real time.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://rstudio.github.io/bslib/, https://github.com/rstudio/bslib",
+ "BugReports": "https://github.com/rstudio/bslib/issues",
+ "Depends": [
+ "R (>= 2.10)"
+ ],
+ "Imports": [
+ "base64enc",
+ "cachem",
+ "fastmap (>= 1.1.1)",
+ "grDevices",
+ "htmltools (>= 0.5.8)",
+ "jquerylib (>= 0.1.3)",
+ "jsonlite",
+ "lifecycle",
+ "memoise (>= 2.0.1)",
+ "mime",
+ "rlang",
+ "sass (>= 0.4.9)"
+ ],
+ "Suggests": [
+ "bsicons",
+ "curl",
+ "fontawesome",
+ "future",
+ "ggplot2",
+ "knitr",
+ "magrittr",
+ "rappdirs",
+ "rmarkdown (>= 2.7)",
+ "shiny (> 1.8.1)",
+ "testthat",
+ "thematic",
+ "tools",
+ "utils",
+ "withr",
+ "yaml"
+ ],
+ "Config/Needs/deploy": "BH, chiflights22, colourpicker, commonmark, cpp11, cpsievert/chiflights22, cpsievert/histoslider, dplyr, DT, ggplot2, ggridges, gt, hexbin, histoslider, htmlwidgets, lattice, leaflet, lubridate, markdown, modelr, plotly, reactable, reshape2, rprojroot, rsconnect, rstudio/shiny, scales, styler, tibble",
+ "Config/Needs/routine": "chromote, desc, renv",
+ "Config/Needs/website": "brio, crosstalk, dplyr, DT, ggplot2, glue, htmlwidgets, leaflet, lorem, palmerpenguins, plotly, purrr, rprojroot, rstudio/htmltools, scales, stringr, tidyr, webshot2",
+ "Config/testthat/edition": "3",
+ "Config/testthat/parallel": "true",
+ "Config/testthat/start-first": "zzzz-bs-sass, fonts, zzz-precompile, theme-*, rmd-*",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.2",
+ "Collate": "'accordion.R' 'breakpoints.R' 'bs-current-theme.R' 'bs-dependencies.R' 'bs-global.R' 'bs-remove.R' 'bs-theme-layers.R' 'bs-theme-preset-bootswatch.R' 'bs-theme-preset-brand.R' 'bs-theme-preset-builtin.R' 'bs-theme-preset.R' 'utils.R' 'bs-theme-preview.R' 'bs-theme-update.R' 'bs-theme.R' 'bslib-package.R' 'buttons.R' 'card.R' 'deprecated.R' 'files.R' 'fill.R' 'imports.R' 'input-dark-mode.R' 'input-switch.R' 'layout.R' 'nav-items.R' 'nav-update.R' 'navbar_options.R' 'navs-legacy.R' 'navs.R' 'onLoad.R' 'page.R' 'popover.R' 'precompiled.R' 'print.R' 'shiny-devmode.R' 'sidebar.R' 'staticimports.R' 'tooltip.R' 'utils-deps.R' 'utils-shiny.R' 'utils-tags.R' 'value-box.R' 'version-default.R' 'versions.R'",
+ "NeedsCompilation": "no",
+ "Author": "Carson Sievert [aut, cre] (), Joe Cheng [aut], Garrick Aden-Buie [aut] (), Posit Software, PBC [cph, fnd], Bootstrap contributors [ctb] (Bootstrap library), Twitter, Inc [cph] (Bootstrap library), Javi Aguilar [ctb, cph] (Bootstrap colorpicker library), Thomas Park [ctb, cph] (Bootswatch library), PayPal [ctb, cph] (Bootstrap accessibility plugin)",
+ "Maintainer": "Carson Sievert ",
+ "Repository": "CRAN"
+ },
+ "cachem": {
+ "Package": "cachem",
+ "Version": "1.1.0",
+ "Source": "Repository",
+ "Title": "Cache R Objects with Automatic Pruning",
+ "Description": "Key-value stores with automatic pruning. Caches can limit either their total size or the age of the oldest object (or both), automatically pruning objects to maintain the constraints.",
+ "Authors@R": "c( person(\"Winston\", \"Chang\", , \"winston@posit.co\", c(\"aut\", \"cre\")), person(family = \"Posit Software, PBC\", role = c(\"cph\", \"fnd\")))",
+ "License": "MIT + file LICENSE",
+ "Encoding": "UTF-8",
+ "ByteCompile": "true",
+ "URL": "https://cachem.r-lib.org/, https://github.com/r-lib/cachem",
+ "Imports": [
+ "rlang",
+ "fastmap (>= 1.2.0)"
+ ],
+ "Suggests": [
+ "testthat"
+ ],
+ "RoxygenNote": "7.2.3",
+ "Config/Needs/routine": "lobstr",
+ "Config/Needs/website": "pkgdown",
+ "NeedsCompilation": "yes",
+ "Author": "Winston Chang [aut, cre], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Winston Chang ",
+ "Repository": "CRAN"
+ },
+ "callr": {
+ "Package": "callr",
+ "Version": "3.7.6",
+ "Source": "Repository",
+ "Title": "Call R from R",
+ "Authors@R": "c( person(\"Gábor\", \"Csárdi\", , \"csardi.gabor@gmail.com\", role = c(\"aut\", \"cre\", \"cph\"), comment = c(ORCID = \"0000-0001-7098-9676\")), person(\"Winston\", \"Chang\", role = \"aut\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")), person(\"Ascent Digital Services\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "It is sometimes useful to perform a computation in a separate R process, without affecting the current R process at all. This packages does exactly that.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://callr.r-lib.org, https://github.com/r-lib/callr",
+ "BugReports": "https://github.com/r-lib/callr/issues",
+ "Depends": [
+ "R (>= 3.4)"
+ ],
+ "Imports": [
+ "processx (>= 3.6.1)",
+ "R6",
+ "utils"
+ ],
+ "Suggests": [
+ "asciicast (>= 2.3.1)",
+ "cli (>= 1.1.0)",
+ "mockery",
+ "ps",
+ "rprojroot",
+ "spelling",
+ "testthat (>= 3.2.0)",
+ "withr (>= 2.3.0)"
+ ],
+ "Config/Needs/website": "r-lib/asciicast, glue, htmlwidgets, igraph, tibble, tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.3.1.9000",
+ "NeedsCompilation": "no",
+ "Author": "Gábor Csárdi [aut, cre, cph] (), Winston Chang [aut], Posit Software, PBC [cph, fnd], Ascent Digital Services [cph, fnd]",
+ "Maintainer": "Gábor Csárdi ",
+ "Repository": "CRAN"
+ },
+ "cchsflow": {
+ "Package": "cchsflow",
+ "Version": "2.1.0",
+ "Source": "Local",
+ "Type": "Package",
+ "Title": "Transforming and Harmonizing CCHS Variables",
+ "Date": "2022-05-05",
+ "Author": "See Authors@R",
+ "Maintainer": "Kitty Chen ",
+ "Authors@R": "c( person(given = \"Doug\", family = \"Manuel\", role = c(\"aut\", \"cph\"), email = \"dmanuel@ohri.ca\", comment = c(ORCID = \"0000-0003-0912-0845\")), person(given = \"Warsame\", family = \"Yusuf\", role = c(\"aut\"), email = \"waryusuf@ohri.ca\"), person(given = \"Rostyslav\", family = \"Vyuha\", role = c(\"aut\"), email = \"rostikvyuha@gmail.com\"), person(given = \"Kitty\", family = \"Chen\", role = c(\"aut\",\"cre\"), email = \"kitchen@ohri.ca\"), person(given = \"Carol\", family = \"Bennett\", role = c(\"aut\"), email = \"cbennett@ohri.ca\"), person(given = \"Yulric\", family = \"Sequeira\", role = c(\"ctb\"), email = \"ysequeira@ohri.ca\"), person(given = \"The Ottawa Hospital\", role = c(\"cph\"), email = \"dmanuel@ohri.ca\"))",
+ "Depends": [
+ "R (>= 3.5.0)",
+ "haven (>= 1.1.2)",
+ "dplyr (>= 0.8.2)",
+ "sjlabelled (>= 1.0.17)",
+ "stringr (>= 1.2.0)",
+ "magrittr",
+ "yaml",
+ "readr",
+ "purrr",
+ "cli"
+ ],
+ "Description": "Supporting the use of the Canadian Community Health Survey (CCHS) by transforming variables from each cycle into harmonized, consistent versions that span survey cycles (currently, 2001 to 2018). CCHS data used in this library is accessed and adapted in accordance to the Statistics Canada Open Licence Agreement. This package uses rec_with_table(), which was developed from 'sjmisc' rec(). Lüdecke D (2018). \"sjmisc: Data and Variable Transformation Functions\". Journal of Open Source Software, 3(26), 754. .",
+ "License": "MIT + file LICENSE",
+ "Encoding": "UTF-8",
+ "LazyData": "true",
+ "URL": "https://big-life-lab.github.io/cchsflow/, https://github.com/Big-Life-Lab/cchsflow",
+ "BugReports": "https://github.com/Big-Life-Lab/cchsflow/issues",
+ "RoxygenNote": "7.3.3",
+ "Suggests": [
+ "testthat (>= 3.0.0)",
+ "kableExtra",
+ "DT",
+ "rmarkdown",
+ "knitr",
+ "pkgdown"
+ ],
+ "Config/testthat/edition": "3",
+ "RemoteType": "local",
+ "RemoteUrl": "~/github/cchsflow"
+ },
+ "checkmate": {
+ "Package": "checkmate",
+ "Version": "2.3.4",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Fast and Versatile Argument Checks",
+ "Description": "Tests and assertions to perform frequent argument checks. A substantial part of the package was written in C to minimize any worries about execution time overhead.",
+ "Authors@R": "c( person(\"Michel\", \"Lang\", NULL, \"michellang@gmail.com\", role = c(\"cre\", \"aut\"), comment = c(ORCID = \"0000-0001-9754-0393\")), person(\"Bernd\", \"Bischl\", NULL, \"bernd_bischl@gmx.net\", role = \"ctb\"), person(\"Dénes\", \"Tóth\", NULL, \"toth.denes@kogentum.hu\", role = \"ctb\", comment = c(ORCID = \"0000-0003-4262-3217\")) )",
+ "URL": "https://mllg.github.io/checkmate/, https://github.com/mllg/checkmate",
+ "URLNote": "https://github.com/mllg/checkmate",
+ "BugReports": "https://github.com/mllg/checkmate/issues",
+ "NeedsCompilation": "yes",
+ "ByteCompile": "yes",
+ "Encoding": "UTF-8",
+ "Depends": [
+ "R (>= 3.0.0)"
+ ],
+ "Imports": [
+ "backports (>= 1.1.0)",
+ "utils"
+ ],
+ "Suggests": [
+ "R6",
+ "fastmatch",
+ "data.table (>= 1.9.8)",
+ "devtools",
+ "ggplot2",
+ "knitr",
+ "magrittr",
+ "microbenchmark",
+ "rmarkdown",
+ "testthat (>= 3.0.4)",
+ "tinytest (>= 1.1.0)",
+ "tibble"
+ ],
+ "License": "BSD_3_clause + file LICENSE",
+ "VignetteBuilder": "knitr",
+ "RoxygenNote": "7.3.3",
+ "Collate": "'AssertCollection.R' 'allMissing.R' 'anyInfinite.R' 'anyMissing.R' 'anyNaN.R' 'asInteger.R' 'assert.R' 'helper.R' 'makeExpectation.R' 'makeTest.R' 'makeAssertion.R' 'checkAccess.R' 'checkArray.R' 'checkAtomic.R' 'checkAtomicVector.R' 'checkCharacter.R' 'checkChoice.R' 'checkClass.R' 'checkComplex.R' 'checkCount.R' 'checkDataFrame.R' 'checkDataTable.R' 'checkDate.R' 'checkDirectoryExists.R' 'checkDisjunct.R' 'checkDouble.R' 'checkEnvironment.R' 'checkFALSE.R' 'checkFactor.R' 'checkFileExists.R' 'checkFlag.R' 'checkFormula.R' 'checkFunction.R' 'checkInt.R' 'checkInteger.R' 'checkIntegerish.R' 'checkList.R' 'checkLogical.R' 'checkMatrix.R' 'checkMultiClass.R' 'checkNamed.R' 'checkNames.R' 'checkNull.R' 'checkNumber.R' 'checkNumeric.R' 'checkOS.R' 'checkPOSIXct.R' 'checkPathForOutput.R' 'checkPermutation.R' 'checkR6.R' 'checkRaw.R' 'checkScalar.R' 'checkScalarNA.R' 'checkSetEqual.R' 'checkString.R' 'checkSubset.R' 'checkTRUE.R' 'checkTibble.R' 'checkVector.R' 'coalesce.R' 'isIntegerish.R' 'matchArg.R' 'qassert.R' 'qassertr.R' 'vname.R' 'wfwl.R' 'zzz.R'",
+ "Author": "Michel Lang [cre, aut] (ORCID: ), Bernd Bischl [ctb], Dénes Tóth [ctb] (ORCID: )",
+ "Maintainer": "Michel Lang ",
+ "Repository": "CRAN"
+ },
+ "cli": {
+ "Package": "cli",
+ "Version": "3.6.5",
+ "Source": "Repository",
+ "Title": "Helpers for Developing Command Line Interfaces",
+ "Authors@R": "c( person(\"Gábor\", \"Csárdi\", , \"gabor@posit.co\", role = c(\"aut\", \"cre\")), person(\"Hadley\", \"Wickham\", role = \"ctb\"), person(\"Kirill\", \"Müller\", role = \"ctb\"), person(\"Salim\", \"Brüggemann\", , \"salim-b@pm.me\", role = \"ctb\", comment = c(ORCID = \"0000-0002-5329-5987\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "A suite of tools to build attractive command line interfaces ('CLIs'), from semantic elements: headings, lists, alerts, paragraphs, etc. Supports custom themes via a 'CSS'-like language. It also contains a number of lower level 'CLI' elements: rules, boxes, trees, and 'Unicode' symbols with 'ASCII' alternatives. It support ANSI colors and text styles as well.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://cli.r-lib.org, https://github.com/r-lib/cli",
+ "BugReports": "https://github.com/r-lib/cli/issues",
+ "Depends": [
+ "R (>= 3.4)"
+ ],
+ "Imports": [
+ "utils"
+ ],
+ "Suggests": [
+ "callr",
+ "covr",
+ "crayon",
+ "digest",
+ "glue (>= 1.6.0)",
+ "grDevices",
+ "htmltools",
+ "htmlwidgets",
+ "knitr",
+ "methods",
+ "processx",
+ "ps (>= 1.3.4.9000)",
+ "rlang (>= 1.0.2.9003)",
+ "rmarkdown",
+ "rprojroot",
+ "rstudioapi",
+ "testthat (>= 3.2.0)",
+ "tibble",
+ "whoami",
+ "withr"
+ ],
+ "Config/Needs/website": "r-lib/asciicast, bench, brio, cpp11, decor, desc, fansi, prettyunits, sessioninfo, tidyverse/tidytemplate, usethis, vctrs",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.2",
+ "NeedsCompilation": "yes",
+ "Author": "Gábor Csárdi [aut, cre], Hadley Wickham [ctb], Kirill Müller [ctb], Salim Brüggemann [ctb] (), Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Gábor Csárdi ",
+ "Repository": "CRAN"
+ },
+ "clipr": {
+ "Package": "clipr",
+ "Version": "0.8.0",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Read and Write from the System Clipboard",
+ "Authors@R": "c( person(\"Matthew\", \"Lincoln\", , \"matthew.d.lincoln@gmail.com\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-4387-3384\")), person(\"Louis\", \"Maddox\", role = \"ctb\"), person(\"Steve\", \"Simpson\", role = \"ctb\"), person(\"Jennifer\", \"Bryan\", role = \"ctb\") )",
+ "Description": "Simple utility functions to read from and write to the Windows, OS X, and X11 clipboards.",
+ "License": "GPL-3",
+ "URL": "https://github.com/mdlincoln/clipr, http://matthewlincoln.net/clipr/",
+ "BugReports": "https://github.com/mdlincoln/clipr/issues",
+ "Imports": [
+ "utils"
+ ],
+ "Suggests": [
+ "covr",
+ "knitr",
+ "rmarkdown",
+ "rstudioapi (>= 0.5)",
+ "testthat (>= 2.0.0)"
+ ],
+ "VignetteBuilder": "knitr",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.1.2",
+ "SystemRequirements": "xclip (https://github.com/astrand/xclip) or xsel (http://www.vergenet.net/~conrad/software/xsel/) for accessing the X11 clipboard, or wl-clipboard (https://github.com/bugaevc/wl-clipboard) for systems using Wayland.",
+ "NeedsCompilation": "no",
+ "Author": "Matthew Lincoln [aut, cre] (), Louis Maddox [ctb], Steve Simpson [ctb], Jennifer Bryan [ctb]",
+ "Maintainer": "Matthew Lincoln ",
+ "Repository": "CRAN"
+ },
+ "cluster": {
+ "Package": "cluster",
+ "Version": "2.1.6",
+ "Source": "Repository",
+ "Date": "2023-11-30",
+ "Priority": "recommended",
+ "Title": "\"Finding Groups in Data\": Cluster Analysis Extended Rousseeuw et al.",
+ "Description": "Methods for Cluster analysis. Much extended the original from Peter Rousseeuw, Anja Struyf and Mia Hubert, based on Kaufman and Rousseeuw (1990) \"Finding Groups in Data\".",
+ "Maintainer": "Martin Maechler ",
+ "Authors@R": "c(person(\"Martin\",\"Maechler\", role = c(\"aut\",\"cre\"), email=\"maechler@stat.math.ethz.ch\", comment = c(ORCID = \"0000-0002-8685-9910\")) ,person(\"Peter\", \"Rousseeuw\", role=\"aut\", email=\"peter.rousseeuw@kuleuven.be\", comment = c(\"Fortran original\", ORCID = \"0000-0002-3807-5353\")) ,person(\"Anja\", \"Struyf\", role=\"aut\", comment= \"S original\") ,person(\"Mia\", \"Hubert\", role=\"aut\", email= \"Mia.Hubert@uia.ua.ac.be\", comment = c(\"S original\", ORCID = \"0000-0001-6398-4850\")) ,person(\"Kurt\", \"Hornik\", role=c(\"trl\", \"ctb\"), email=\"Kurt.Hornik@R-project.org\", comment=c(\"port to R; maintenance(1999-2000)\", ORCID=\"0000-0003-4198-9911\")) ,person(\"Matthias\", \"Studer\", role=\"ctb\") ,person(\"Pierre\", \"Roudier\", role=\"ctb\") ,person(\"Juan\", \"Gonzalez\", role=\"ctb\") ,person(\"Kamil\", \"Kozlowski\", role=\"ctb\") ,person(\"Erich\", \"Schubert\", role=\"ctb\", comment = c(\"fastpam options for pam()\", ORCID = \"0000-0001-9143-4880\")) ,person(\"Keefe\", \"Murphy\", role=\"ctb\", comment = \"volume.ellipsoid({d >= 3})\") #not yet ,person(\"Fischer-Rasmussen\", \"Kasper\", role = \"ctb\", comment = \"Gower distance for CLARA\") )",
+ "Depends": [
+ "R (>= 3.5.0)"
+ ],
+ "Imports": [
+ "graphics",
+ "grDevices",
+ "stats",
+ "utils"
+ ],
+ "Suggests": [
+ "MASS",
+ "Matrix"
+ ],
+ "SuggestsNote": "MASS: two examples using cov.rob() and mvrnorm(); Matrix tools for testing",
+ "Enhances": [
+ "mvoutlier",
+ "fpc",
+ "ellipse",
+ "sfsmisc"
+ ],
+ "EnhancesNote": "xref-ed in man/*.Rd",
+ "LazyLoad": "yes",
+ "LazyData": "yes",
+ "ByteCompile": "yes",
+ "BuildResaveData": "no",
+ "License": "GPL (>= 2)",
+ "URL": "https://svn.r-project.org/R-packages/trunk/cluster/",
+ "NeedsCompilation": "yes",
+ "Author": "Martin Maechler [aut, cre] (), Peter Rousseeuw [aut] (Fortran original, ), Anja Struyf [aut] (S original), Mia Hubert [aut] (S original, ), Kurt Hornik [trl, ctb] (port to R; maintenance(1999-2000), ), Matthias Studer [ctb], Pierre Roudier [ctb], Juan Gonzalez [ctb], Kamil Kozlowski [ctb], Erich Schubert [ctb] (fastpam options for pam(), ), Keefe Murphy [ctb] (volume.ellipsoid({d >= 3}))",
+ "Repository": "CRAN"
+ },
+ "codetools": {
+ "Package": "codetools",
+ "Version": "0.2-20",
+ "Source": "Repository",
+ "Priority": "recommended",
+ "Author": "Luke Tierney ",
+ "Description": "Code analysis tools for R.",
+ "Title": "Code Analysis Tools for R",
+ "Depends": [
+ "R (>= 2.1)"
+ ],
+ "Maintainer": "Luke Tierney ",
+ "URL": "https://gitlab.com/luke-tierney/codetools",
+ "License": "GPL",
+ "NeedsCompilation": "no",
+ "Repository": "CRAN"
+ },
+ "colorspace": {
+ "Package": "colorspace",
+ "Version": "2.1-1",
+ "Source": "Repository",
+ "Date": "2024-07-26",
+ "Title": "A Toolbox for Manipulating and Assessing Colors and Palettes",
+ "Authors@R": "c(person(given = \"Ross\", family = \"Ihaka\", role = \"aut\", email = \"ihaka@stat.auckland.ac.nz\"), person(given = \"Paul\", family = \"Murrell\", role = \"aut\", email = \"paul@stat.auckland.ac.nz\", comment = c(ORCID = \"0000-0002-3224-8858\")), person(given = \"Kurt\", family = \"Hornik\", role = \"aut\", email = \"Kurt.Hornik@R-project.org\", comment = c(ORCID = \"0000-0003-4198-9911\")), person(given = c(\"Jason\", \"C.\"), family = \"Fisher\", role = \"aut\", email = \"jfisher@usgs.gov\", comment = c(ORCID = \"0000-0001-9032-8912\")), person(given = \"Reto\", family = \"Stauffer\", role = \"aut\", email = \"Reto.Stauffer@uibk.ac.at\", comment = c(ORCID = \"0000-0002-3798-5507\")), person(given = c(\"Claus\", \"O.\"), family = \"Wilke\", role = \"aut\", email = \"wilke@austin.utexas.edu\", comment = c(ORCID = \"0000-0002-7470-9261\")), person(given = c(\"Claire\", \"D.\"), family = \"McWhite\", role = \"aut\", email = \"claire.mcwhite@utmail.utexas.edu\", comment = c(ORCID = \"0000-0001-7346-3047\")), person(given = \"Achim\", family = \"Zeileis\", role = c(\"aut\", \"cre\"), email = \"Achim.Zeileis@R-project.org\", comment = c(ORCID = \"0000-0003-0918-3766\")))",
+ "Description": "Carries out mapping between assorted color spaces including RGB, HSV, HLS, CIEXYZ, CIELUV, HCL (polar CIELUV), CIELAB, and polar CIELAB. Qualitative, sequential, and diverging color palettes based on HCL colors are provided along with corresponding ggplot2 color scales. Color palette choice is aided by an interactive app (with either a Tcl/Tk or a shiny graphical user interface) and shiny apps with an HCL color picker and a color vision deficiency emulator. Plotting functions for displaying and assessing palettes include color swatches, visualizations of the HCL space, and trajectories in HCL and/or RGB spectrum. Color manipulation functions include: desaturation, lightening/darkening, mixing, and simulation of color vision deficiencies (deutanomaly, protanomaly, tritanomaly). Details can be found on the project web page at and in the accompanying scientific paper: Zeileis et al. (2020, Journal of Statistical Software, ).",
+ "Depends": [
+ "R (>= 3.0.0)",
+ "methods"
+ ],
+ "Imports": [
+ "graphics",
+ "grDevices",
+ "stats"
+ ],
+ "Suggests": [
+ "datasets",
+ "utils",
+ "KernSmooth",
+ "MASS",
+ "kernlab",
+ "mvtnorm",
+ "vcd",
+ "tcltk",
+ "shiny",
+ "shinyjs",
+ "ggplot2",
+ "dplyr",
+ "scales",
+ "grid",
+ "png",
+ "jpeg",
+ "knitr",
+ "rmarkdown",
+ "RColorBrewer",
+ "rcartocolor",
+ "scico",
+ "viridis",
+ "wesanderson"
+ ],
+ "VignetteBuilder": "knitr",
+ "License": "BSD_3_clause + file LICENSE",
+ "URL": "https://colorspace.R-Forge.R-project.org/, https://hclwizard.org/",
+ "BugReports": "https://colorspace.R-Forge.R-project.org/contact.html",
+ "LazyData": "yes",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.1",
+ "NeedsCompilation": "yes",
+ "Author": "Ross Ihaka [aut], Paul Murrell [aut] (), Kurt Hornik [aut] (), Jason C. Fisher [aut] (), Reto Stauffer [aut] (), Claus O. Wilke [aut] (), Claire D. McWhite [aut] (), Achim Zeileis [aut, cre] ()",
+ "Maintainer": "Achim Zeileis ",
+ "Repository": "CRAN"
+ },
+ "commonmark": {
+ "Package": "commonmark",
+ "Version": "2.0.0",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "High Performance CommonMark and Github Markdown Rendering in R",
+ "Authors@R": "c( person(\"Jeroen\", \"Ooms\", ,\"jeroenooms@gmail.com\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-4035-0289\")), person(\"John MacFarlane\", role = \"cph\", comment = \"Author of cmark\"))",
+ "Description": "The CommonMark specification defines a rationalized version of markdown syntax. This package uses the 'cmark' reference implementation for converting markdown text into various formats including html, latex and groff man. In addition it exposes the markdown parse tree in xml format. Also includes opt-in support for GFM extensions including tables, autolinks, and strikethrough text.",
+ "License": "BSD_2_clause + file LICENSE",
+ "URL": "https://docs.ropensci.org/commonmark/ https://ropensci.r-universe.dev/commonmark",
+ "BugReports": "https://github.com/r-lib/commonmark/issues",
+ "Suggests": [
+ "curl",
+ "testthat",
+ "xml2"
+ ],
+ "RoxygenNote": "7.3.2",
+ "Language": "en-US",
+ "Encoding": "UTF-8",
+ "NeedsCompilation": "yes",
+ "Author": "Jeroen Ooms [aut, cre] (ORCID: ), John MacFarlane [cph] (Author of cmark)",
+ "Maintainer": "Jeroen Ooms ",
+ "Repository": "CRAN"
+ },
+ "config": {
+ "Package": "config",
+ "Version": "0.3.2",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Manage Environment Specific Configuration Values",
+ "Authors@R": "c( person(\"JJ\", \"Allaire\", role = c(\"aut\"), email = \"jj@rstudio.com\"), person(\"Andrie\", \"de Vries\", role = \"cre\", email = \"apdevries@gmail.com\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Imports": [
+ "yaml (>= 2.1.19)"
+ ],
+ "Suggests": [
+ "testthat",
+ "knitr",
+ "rmarkdown",
+ "covr",
+ "spelling",
+ "withr"
+ ],
+ "Description": "Manage configuration values across multiple environments (e.g. development, test, production). Read values using a function that determines the current environment and returns the appropriate value.",
+ "License": "GPL-3",
+ "URL": "https://rstudio.github.io/config/, https://github.com/rstudio/config",
+ "BugReports": "https://github.com/rstudio/config/issues",
+ "RoxygenNote": "7.2.3",
+ "VignetteBuilder": "knitr",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "Config/testthat/edition": "3",
+ "NeedsCompilation": "no",
+ "Author": "JJ Allaire [aut], Andrie de Vries [cre], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Andrie de Vries ",
+ "Repository": "CRAN"
+ },
+ "cpp11": {
+ "Package": "cpp11",
+ "Version": "0.5.2",
+ "Source": "Repository",
+ "Title": "A C++11 Interface for R's C Interface",
+ "Authors@R": "c( person(\"Davis\", \"Vaughan\", email = \"davis@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-4777-038X\")), person(\"Jim\",\"Hester\", role = \"aut\", comment = c(ORCID = \"0000-0002-2739-7082\")), person(\"Romain\", \"François\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"Benjamin\", \"Kietzman\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Provides a header only, C++11 interface to R's C interface. Compared to other approaches 'cpp11' strives to be safe against long jumps from the C API as well as C++ exceptions, conform to normal R function semantics and supports interaction with 'ALTREP' vectors.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://cpp11.r-lib.org, https://github.com/r-lib/cpp11",
+ "BugReports": "https://github.com/r-lib/cpp11/issues",
+ "Depends": [
+ "R (>= 4.0.0)"
+ ],
+ "Suggests": [
+ "bench",
+ "brio",
+ "callr",
+ "cli",
+ "covr",
+ "decor",
+ "desc",
+ "ggplot2",
+ "glue",
+ "knitr",
+ "lobstr",
+ "mockery",
+ "progress",
+ "rmarkdown",
+ "scales",
+ "Rcpp",
+ "testthat (>= 3.2.0)",
+ "tibble",
+ "utils",
+ "vctrs",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Config/Needs/cpp11/cpp_register": "brio, cli, decor, desc, glue, tibble, vctrs",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.2",
+ "NeedsCompilation": "no",
+ "Author": "Davis Vaughan [aut, cre] (), Jim Hester [aut] (), Romain François [aut] (), Benjamin Kietzman [ctb], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Davis Vaughan ",
+ "Repository": "CRAN"
+ },
+ "crayon": {
+ "Package": "crayon",
+ "Version": "1.5.3",
+ "Source": "Repository",
+ "Title": "Colored Terminal Output",
+ "Authors@R": "c( person(\"Gábor\", \"Csárdi\", , \"csardi.gabor@gmail.com\", role = c(\"aut\", \"cre\")), person(\"Brodie\", \"Gaslam\", , \"brodie.gaslam@yahoo.com\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "The crayon package is now superseded. Please use the 'cli' package for new projects. Colored terminal output on terminals that support 'ANSI' color and highlight codes. It also works in 'Emacs' 'ESS'. 'ANSI' color support is automatically detected. Colors and highlighting can be combined and nested. New styles can also be created easily. This package was inspired by the 'chalk' 'JavaScript' project.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://r-lib.github.io/crayon/, https://github.com/r-lib/crayon",
+ "BugReports": "https://github.com/r-lib/crayon/issues",
+ "Imports": [
+ "grDevices",
+ "methods",
+ "utils"
+ ],
+ "Suggests": [
+ "mockery",
+ "rstudioapi",
+ "testthat",
+ "withr"
+ ],
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.1",
+ "Collate": "'aaa-rstudio-detect.R' 'aaaa-rematch2.R' 'aab-num-ansi-colors.R' 'aac-num-ansi-colors.R' 'ansi-256.R' 'ansi-palette.R' 'combine.R' 'string.R' 'utils.R' 'crayon-package.R' 'disposable.R' 'enc-utils.R' 'has_ansi.R' 'has_color.R' 'link.R' 'styles.R' 'machinery.R' 'parts.R' 'print.R' 'style-var.R' 'show.R' 'string_operations.R'",
+ "NeedsCompilation": "no",
+ "Author": "Gábor Csárdi [aut, cre], Brodie Gaslam [ctb], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Gábor Csárdi ",
+ "Repository": "CRAN"
+ },
+ "credentials": {
+ "Package": "credentials",
+ "Version": "2.0.3",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Tools for Managing SSH and Git Credentials",
+ "Authors@R": "person(\"Jeroen\", \"Ooms\", role = c(\"aut\", \"cre\"), email = \"jeroenooms@gmail.com\", comment = c(ORCID = \"0000-0002-4035-0289\"))",
+ "Description": "Setup and retrieve HTTPS and SSH credentials for use with 'git' and other services. For HTTPS remotes the package interfaces the 'git-credential' utility which 'git' uses to store HTTP usernames and passwords. For SSH remotes we provide convenient functions to find or generate appropriate SSH keys. The package both helps the user to setup a local git installation, and also provides a back-end for git/ssh client libraries to authenticate with existing user credentials.",
+ "License": "MIT + file LICENSE",
+ "SystemRequirements": "git (optional)",
+ "Encoding": "UTF-8",
+ "Imports": [
+ "openssl (>= 1.3)",
+ "sys (>= 2.1)",
+ "curl",
+ "jsonlite",
+ "askpass"
+ ],
+ "Suggests": [
+ "testthat",
+ "knitr",
+ "rmarkdown"
+ ],
+ "RoxygenNote": "7.2.1",
+ "VignetteBuilder": "knitr",
+ "Language": "en-US",
+ "URL": "https://docs.ropensci.org/credentials/ https://r-lib.r-universe.dev/credentials",
+ "BugReports": "https://github.com/r-lib/credentials/issues",
+ "NeedsCompilation": "no",
+ "Author": "Jeroen Ooms [aut, cre] (ORCID: )",
+ "Maintainer": "Jeroen Ooms ",
+ "Repository": "CRAN"
+ },
+ "curl": {
+ "Package": "curl",
+ "Version": "7.0.0",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "A Modern and Flexible Web Client for R",
+ "Authors@R": "c( person(\"Jeroen\", \"Ooms\", role = c(\"aut\", \"cre\"), email = \"jeroenooms@gmail.com\", comment = c(ORCID = \"0000-0002-4035-0289\")), person(\"Hadley\", \"Wickham\", role = \"ctb\"), person(\"Posit Software, PBC\", role = \"cph\"))",
+ "Description": "Bindings to 'libcurl' for performing fully configurable HTTP/FTP requests where responses can be processed in memory, on disk, or streaming via the callback or connection interfaces. Some knowledge of 'libcurl' is recommended; for a more-user-friendly web client see the 'httr2' package which builds on this package with http specific tools and logic.",
+ "License": "MIT + file LICENSE",
+ "SystemRequirements": "libcurl (>= 7.73): libcurl-devel (rpm) or libcurl4-openssl-dev (deb)",
+ "URL": "https://jeroen.r-universe.dev/curl",
+ "BugReports": "https://github.com/jeroen/curl/issues",
+ "Suggests": [
+ "spelling",
+ "testthat (>= 1.0.0)",
+ "knitr",
+ "jsonlite",
+ "later",
+ "rmarkdown",
+ "httpuv (>= 1.4.4)",
+ "webutils"
+ ],
+ "VignetteBuilder": "knitr",
+ "Depends": [
+ "R (>= 3.0.0)"
+ ],
+ "RoxygenNote": "7.3.2",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "NeedsCompilation": "yes",
+ "Author": "Jeroen Ooms [aut, cre] (ORCID: ), Hadley Wickham [ctb], Posit Software, PBC [cph]",
+ "Maintainer": "Jeroen Ooms ",
+ "Repository": "CRAN"
+ },
+ "data.table": {
+ "Package": "data.table",
+ "Version": "1.18.2.1",
+ "Source": "Repository",
+ "Title": "Extension of `data.frame`",
+ "Depends": [
+ "R (>= 3.4.0)"
+ ],
+ "Imports": [
+ "methods"
+ ],
+ "Suggests": [
+ "bit64 (>= 4.0.0)",
+ "bit (>= 4.0.4)",
+ "R.utils (>= 2.13.0)",
+ "xts",
+ "zoo (>= 1.8-1)",
+ "yaml",
+ "knitr",
+ "markdown"
+ ],
+ "Description": "Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development.",
+ "License": "MPL-2.0 | file LICENSE",
+ "URL": "https://r-datatable.com, https://Rdatatable.gitlab.io/data.table, https://github.com/Rdatatable/data.table",
+ "BugReports": "https://github.com/Rdatatable/data.table/issues",
+ "VignetteBuilder": "knitr",
+ "Encoding": "UTF-8",
+ "ByteCompile": "TRUE",
+ "Authors@R": "c( person(\"Tyson\",\"Barrett\", role=c(\"aut\",\"cre\"), email=\"t.barrett88@gmail.com\", comment = c(ORCID=\"0000-0002-2137-1391\")), person(\"Matt\",\"Dowle\", role=\"aut\", email=\"mattjdowle@gmail.com\"), person(\"Arun\",\"Srinivasan\", role=\"aut\", email=\"asrini@pm.me\"), person(\"Jan\",\"Gorecki\", role=\"aut\", email=\"j.gorecki@wit.edu.pl\"), person(\"Michael\",\"Chirico\", role=\"aut\", email=\"michaelchirico4@gmail.com\", comment = c(ORCID=\"0000-0003-0787-087X\")), person(\"Toby\",\"Hocking\", role=\"aut\", email=\"toby.hocking@r-project.org\", comment = c(ORCID=\"0000-0002-3146-0865\")), person(\"Benjamin\",\"Schwendinger\",role=\"aut\", comment = c(ORCID=\"0000-0003-3315-8114\")), person(\"Ivan\", \"Krylov\", role=\"aut\", email=\"ikrylov@disroot.org\", comment = c(ORCID=\"0000-0002-0172-3812\")), person(\"Pasha\",\"Stetsenko\", role=\"ctb\"), person(\"Tom\",\"Short\", role=\"ctb\"), person(\"Steve\",\"Lianoglou\", role=\"ctb\"), person(\"Eduard\",\"Antonyan\", role=\"ctb\"), person(\"Markus\",\"Bonsch\", role=\"ctb\"), person(\"Hugh\",\"Parsonage\", role=\"ctb\"), person(\"Scott\",\"Ritchie\", role=\"ctb\"), person(\"Kun\",\"Ren\", role=\"ctb\"), person(\"Xianying\",\"Tan\", role=\"ctb\"), person(\"Rick\",\"Saporta\", role=\"ctb\"), person(\"Otto\",\"Seiskari\", role=\"ctb\"), person(\"Xianghui\",\"Dong\", role=\"ctb\"), person(\"Michel\",\"Lang\", role=\"ctb\"), person(\"Watal\",\"Iwasaki\", role=\"ctb\"), person(\"Seth\",\"Wenchel\", role=\"ctb\"), person(\"Karl\",\"Broman\", role=\"ctb\"), person(\"Tobias\",\"Schmidt\", role=\"ctb\"), person(\"David\",\"Arenburg\", role=\"ctb\"), person(\"Ethan\",\"Smith\", role=\"ctb\"), person(\"Francois\",\"Cocquemas\", role=\"ctb\"), person(\"Matthieu\",\"Gomez\", role=\"ctb\"), person(\"Philippe\",\"Chataignon\", role=\"ctb\"), person(\"Nello\",\"Blaser\", role=\"ctb\"), person(\"Dmitry\",\"Selivanov\", role=\"ctb\"), person(\"Andrey\",\"Riabushenko\", role=\"ctb\"), person(\"Cheng\",\"Lee\", role=\"ctb\"), person(\"Declan\",\"Groves\", role=\"ctb\"), person(\"Daniel\",\"Possenriede\", role=\"ctb\"), person(\"Felipe\",\"Parages\", role=\"ctb\"), person(\"Denes\",\"Toth\", role=\"ctb\"), person(\"Mus\",\"Yaramaz-David\", role=\"ctb\"), person(\"Ayappan\",\"Perumal\", role=\"ctb\"), person(\"James\",\"Sams\", role=\"ctb\"), person(\"Martin\",\"Morgan\", role=\"ctb\"), person(\"Michael\",\"Quinn\", role=\"ctb\"), person(given=\"@javrucebo\", role=\"ctb\", comment=\"GitHub user\"), person(\"Marc\",\"Halperin\", role=\"ctb\"), person(\"Roy\",\"Storey\", role=\"ctb\"), person(\"Manish\",\"Saraswat\", role=\"ctb\"), person(\"Morgan\",\"Jacob\", role=\"ctb\"), person(\"Michael\",\"Schubmehl\", role=\"ctb\"), person(\"Davis\",\"Vaughan\", role=\"ctb\"), person(\"Leonardo\",\"Silvestri\", role=\"ctb\"), person(\"Jim\",\"Hester\", role=\"ctb\"), person(\"Anthony\",\"Damico\", role=\"ctb\"), person(\"Sebastian\",\"Freundt\", role=\"ctb\"), person(\"David\",\"Simons\", role=\"ctb\"), person(\"Elliott\",\"Sales de Andrade\", role=\"ctb\"), person(\"Cole\",\"Miller\", role=\"ctb\"), person(\"Jens Peder\",\"Meldgaard\", role=\"ctb\"), person(\"Vaclav\",\"Tlapak\", role=\"ctb\"), person(\"Kevin\",\"Ushey\", role=\"ctb\"), person(\"Dirk\",\"Eddelbuettel\", role=\"ctb\"), person(\"Tony\",\"Fischetti\", role=\"ctb\"), person(\"Ofek\",\"Shilon\", role=\"ctb\"), person(\"Vadim\",\"Khotilovich\", role=\"ctb\"), person(\"Hadley\",\"Wickham\", role=\"ctb\"), person(\"Bennet\",\"Becker\", role=\"ctb\"), person(\"Kyle\",\"Haynes\", role=\"ctb\"), person(\"Boniface Christian\",\"Kamgang\", role=\"ctb\"), person(\"Olivier\",\"Delmarcell\", role=\"ctb\"), person(\"Josh\",\"O'Brien\", role=\"ctb\"), person(\"Dereck\",\"de Mezquita\", role=\"ctb\"), person(\"Michael\",\"Czekanski\", role=\"ctb\"), person(\"Dmitry\", \"Shemetov\", role=\"ctb\"), person(\"Nitish\", \"Jha\", role=\"ctb\"), person(\"Joshua\", \"Wu\", role=\"ctb\"), person(\"Iago\", \"Giné-Vázquez\", role=\"ctb\"), person(\"Anirban\", \"Chetia\", role=\"ctb\"), person(\"Doris\", \"Amoakohene\", role=\"ctb\"), person(\"Angel\", \"Feliz\", role=\"ctb\"), person(\"Michael\",\"Young\", role=\"ctb\"), person(\"Mark\", \"Seeto\", role=\"ctb\"), person(\"Philippe\", \"Grosjean\", role=\"ctb\"), person(\"Vincent\", \"Runge\", role=\"ctb\"), person(\"Christian\", \"Wia\", role=\"ctb\"), person(\"Elise\", \"Maigné\", role=\"ctb\"), person(\"Vincent\", \"Rocher\", role=\"ctb\"), person(\"Vijay\", \"Lulla\", role=\"ctb\"), person(\"Aljaž\", \"Sluga\", role=\"ctb\"), person(\"Bill\", \"Evans\", role=\"ctb\"), person(\"Reino\", \"Bruner\", role=\"ctb\"), person(given=\"@badasahog\", role=\"ctb\", comment=\"GitHub user\"), person(\"Vinit\", \"Thakur\", role=\"ctb\"), person(\"Mukul\", \"Kumar\", role=\"ctb\"), person(\"Ildikó\", \"Czeller\", role=\"ctb\"), person(\"Manmita\", \"Das\", role=\"ctb\") )",
+ "NeedsCompilation": "yes",
+ "Author": "Tyson Barrett [aut, cre] (ORCID: ), Matt Dowle [aut], Arun Srinivasan [aut], Jan Gorecki [aut], Michael Chirico [aut] (ORCID: ), Toby Hocking [aut] (ORCID: ), Benjamin Schwendinger [aut] (ORCID: ), Ivan Krylov [aut] (ORCID: ), Pasha Stetsenko [ctb], Tom Short [ctb], Steve Lianoglou [ctb], Eduard Antonyan [ctb], Markus Bonsch [ctb], Hugh Parsonage [ctb], Scott Ritchie [ctb], Kun Ren [ctb], Xianying Tan [ctb], Rick Saporta [ctb], Otto Seiskari [ctb], Xianghui Dong [ctb], Michel Lang [ctb], Watal Iwasaki [ctb], Seth Wenchel [ctb], Karl Broman [ctb], Tobias Schmidt [ctb], David Arenburg [ctb], Ethan Smith [ctb], Francois Cocquemas [ctb], Matthieu Gomez [ctb], Philippe Chataignon [ctb], Nello Blaser [ctb], Dmitry Selivanov [ctb], Andrey Riabushenko [ctb], Cheng Lee [ctb], Declan Groves [ctb], Daniel Possenriede [ctb], Felipe Parages [ctb], Denes Toth [ctb], Mus Yaramaz-David [ctb], Ayappan Perumal [ctb], James Sams [ctb], Martin Morgan [ctb], Michael Quinn [ctb], @javrucebo [ctb] (GitHub user), Marc Halperin [ctb], Roy Storey [ctb], Manish Saraswat [ctb], Morgan Jacob [ctb], Michael Schubmehl [ctb], Davis Vaughan [ctb], Leonardo Silvestri [ctb], Jim Hester [ctb], Anthony Damico [ctb], Sebastian Freundt [ctb], David Simons [ctb], Elliott Sales de Andrade [ctb], Cole Miller [ctb], Jens Peder Meldgaard [ctb], Vaclav Tlapak [ctb], Kevin Ushey [ctb], Dirk Eddelbuettel [ctb], Tony Fischetti [ctb], Ofek Shilon [ctb], Vadim Khotilovich [ctb], Hadley Wickham [ctb], Bennet Becker [ctb], Kyle Haynes [ctb], Boniface Christian Kamgang [ctb], Olivier Delmarcell [ctb], Josh O'Brien [ctb], Dereck de Mezquita [ctb], Michael Czekanski [ctb], Dmitry Shemetov [ctb], Nitish Jha [ctb], Joshua Wu [ctb], Iago Giné-Vázquez [ctb], Anirban Chetia [ctb], Doris Amoakohene [ctb], Angel Feliz [ctb], Michael Young [ctb], Mark Seeto [ctb], Philippe Grosjean [ctb], Vincent Runge [ctb], Christian Wia [ctb], Elise Maigné [ctb], Vincent Rocher [ctb], Vijay Lulla [ctb], Aljaž Sluga [ctb], Bill Evans [ctb], Reino Bruner [ctb], @badasahog [ctb] (GitHub user), Vinit Thakur [ctb], Mukul Kumar [ctb], Ildikó Czeller [ctb], Manmita Das [ctb]",
+ "Maintainer": "Tyson Barrett ",
+ "Repository": "CRAN"
+ },
+ "datawizard": {
+ "Package": "datawizard",
+ "Version": "1.3.0",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Easy Data Wrangling and Statistical Transformations",
+ "Authors@R": "c( person(\"Indrajeet\", \"Patil\", , \"patilindrajeet.science@gmail.com\", role = \"aut\", comment = c(ORCID = \"0000-0003-1995-6531\")), person(\"Etienne\", \"Bacher\", , \"etienne.bacher@protonmail.com\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-9271-5075\")), person(\"Dominique\", \"Makowski\", , \"dom.makowski@gmail.com\", role = \"aut\", comment = c(ORCID = \"0000-0001-5375-9967\")), person(\"Daniel\", \"Lüdecke\", , \"d.luedecke@uke.de\", role = \"aut\", comment = c(ORCID = \"0000-0002-8895-3206\")), person(\"Mattan S.\", \"Ben-Shachar\", , \"matanshm@post.bgu.ac.il\", role = \"aut\", comment = c(ORCID = \"0000-0002-4287-4801\")), person(\"Brenton M.\", \"Wiernik\", , \"brenton@wiernik.org\", role = \"aut\", comment = c(ORCID = \"0000-0001-9560-6336\")), person(\"Rémi\", \"Thériault\", , \"remi.theriault@mail.mcgill.ca\", role = \"ctb\", comment = c(ORCID = \"0000-0003-4315-6788\")), person(\"Thomas J.\", \"Faulkenberry\", , \"faulkenberry@tarleton.edu\", role = \"rev\"), person(\"Robert\", \"Garrett\", , \"rcg4@illinois.edu\", role = \"rev\") )",
+ "Maintainer": "Etienne Bacher ",
+ "Description": "A lightweight package to assist in key steps involved in any data analysis workflow: (1) wrangling the raw data to get it in the needed form, (2) applying preprocessing steps and statistical transformations, and (3) compute statistical summaries of data properties and distributions. It is also the data wrangling backend for packages in 'easystats' ecosystem. References: Patil et al. (2022) .",
+ "License": "MIT + file LICENSE",
+ "URL": "https://easystats.github.io/datawizard/",
+ "BugReports": "https://github.com/easystats/datawizard/issues",
+ "Depends": [
+ "R (>= 4.0)"
+ ],
+ "Imports": [
+ "insight (>= 1.4.2)",
+ "stats",
+ "utils"
+ ],
+ "Suggests": [
+ "bayestestR",
+ "boot",
+ "BH",
+ "brms",
+ "curl",
+ "data.table",
+ "dplyr (>= 1.1)",
+ "effectsize",
+ "emmeans",
+ "gamm4",
+ "ggplot2 (>= 3.5.0)",
+ "gt",
+ "haven",
+ "httr",
+ "knitr",
+ "lme4",
+ "mediation",
+ "modelbased",
+ "nanoparquet",
+ "parameters (>= 0.21.7)",
+ "performance (>= 0.14.0)",
+ "poorman (>= 0.2.7)",
+ "psych",
+ "readxl",
+ "readr",
+ "rio",
+ "rmarkdown",
+ "rstanarm",
+ "see",
+ "testthat (>= 3.2.1)",
+ "tibble",
+ "tidyr",
+ "tinytable (>= 0.13.0)",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.3.3",
+ "Config/testthat/edition": "3",
+ "Config/testthat/parallel": "true",
+ "Config/Needs/website": "easystats/easystatstemplate",
+ "NeedsCompilation": "no",
+ "Author": "Indrajeet Patil [aut] (ORCID: ), Etienne Bacher [aut, cre] (ORCID: ), Dominique Makowski [aut] (ORCID: ), Daniel Lüdecke [aut] (ORCID: ), Mattan S. Ben-Shachar [aut] (ORCID: ), Brenton M. Wiernik [aut] (ORCID: ), Rémi Thériault [ctb] (ORCID: ), Thomas J. Faulkenberry [rev], Robert Garrett [rev]",
+ "Repository": "RSPM"
+ },
+ "desc": {
+ "Package": "desc",
+ "Version": "1.4.3",
+ "Source": "Repository",
+ "Title": "Manipulate DESCRIPTION Files",
+ "Authors@R": "c( person(\"Gábor\", \"Csárdi\", , \"csardi.gabor@gmail.com\", role = c(\"aut\", \"cre\")), person(\"Kirill\", \"Müller\", role = \"aut\"), person(\"Jim\", \"Hester\", , \"james.f.hester@gmail.com\", role = \"aut\"), person(\"Maëlle\", \"Salmon\", role = \"ctb\", comment = c(ORCID = \"0000-0002-2815-0399\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Maintainer": "Gábor Csárdi ",
+ "Description": "Tools to read, write, create, and manipulate DESCRIPTION files. It is intended for packages that create or manipulate other packages.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://desc.r-lib.org/, https://github.com/r-lib/desc",
+ "BugReports": "https://github.com/r-lib/desc/issues",
+ "Depends": [
+ "R (>= 3.4)"
+ ],
+ "Imports": [
+ "cli",
+ "R6",
+ "utils"
+ ],
+ "Suggests": [
+ "callr",
+ "covr",
+ "gh",
+ "spelling",
+ "testthat",
+ "whoami",
+ "withr"
+ ],
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.2.3",
+ "Collate": "'assertions.R' 'authors-at-r.R' 'built.R' 'classes.R' 'collate.R' 'constants.R' 'deps.R' 'desc-package.R' 'description.R' 'encoding.R' 'find-package-root.R' 'latex.R' 'non-oo-api.R' 'package-archives.R' 'read.R' 'remotes.R' 'str.R' 'syntax_checks.R' 'urls.R' 'utils.R' 'validate.R' 'version.R'",
+ "NeedsCompilation": "no",
+ "Author": "Gábor Csárdi [aut, cre], Kirill Müller [aut], Jim Hester [aut], Maëlle Salmon [ctb] (), Posit Software, PBC [cph, fnd]",
+ "Repository": "CRAN"
+ },
+ "devtools": {
+ "Package": "devtools",
+ "Version": "2.4.6",
+ "Source": "Repository",
+ "Title": "Tools to Make Developing R Packages Easier",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", role = \"aut\"), person(\"Jim\", \"Hester\", role = \"aut\"), person(\"Winston\", \"Chang\", role = \"aut\"), person(\"Jennifer\", \"Bryan\", , \"jenny@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-6983-2759\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )",
+ "Description": "Collection of package development tools.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://devtools.r-lib.org/, https://github.com/r-lib/devtools",
+ "BugReports": "https://github.com/r-lib/devtools/issues",
+ "Depends": [
+ "R (>= 4.1)",
+ "usethis (>= 3.2.1)"
+ ],
+ "Imports": [
+ "cli (>= 3.6.5)",
+ "desc (>= 1.4.3)",
+ "ellipsis (>= 0.3.2)",
+ "fs (>= 1.6.6)",
+ "lifecycle (>= 1.0.4)",
+ "memoise (>= 2.0.1)",
+ "miniUI (>= 0.1.2)",
+ "pkgbuild (>= 1.4.8)",
+ "pkgdown (>= 2.1.3)",
+ "pkgload (>= 1.4.1)",
+ "profvis (>= 0.4.0)",
+ "rcmdcheck (>= 1.4.0)",
+ "remotes (>= 2.5.0)",
+ "rlang (>= 1.1.6)",
+ "roxygen2 (>= 7.3.3)",
+ "rversions (>= 2.1.2)",
+ "sessioninfo (>= 1.2.3)",
+ "stats",
+ "testthat (>= 3.2.3)",
+ "tools",
+ "urlchecker (>= 1.0.1)",
+ "utils",
+ "withr (>= 3.0.2)"
+ ],
+ "Suggests": [
+ "BiocManager (>= 1.30.18)",
+ "callr (>= 3.7.1)",
+ "covr (>= 3.5.1)",
+ "curl (>= 4.3.2)",
+ "digest (>= 0.6.29)",
+ "DT (>= 0.23)",
+ "foghorn (>= 1.4.2)",
+ "gh (>= 1.3.0)",
+ "gmailr (>= 1.0.1)",
+ "httr (>= 1.4.3)",
+ "knitr (>= 1.39)",
+ "lintr (>= 3.0.0)",
+ "MASS",
+ "mockery (>= 0.4.3)",
+ "pingr (>= 2.0.1)",
+ "rhub (>= 1.1.1)",
+ "rmarkdown (>= 2.14)",
+ "rstudioapi (>= 0.13)",
+ "spelling (>= 2.2)"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.3.3",
+ "NeedsCompilation": "no",
+ "Author": "Hadley Wickham [aut], Jim Hester [aut], Winston Chang [aut], Jennifer Bryan [aut, cre] (ORCID: ), Posit Software, PBC [cph, fnd] (ROR: )",
+ "Maintainer": "Jennifer Bryan ",
+ "Repository": "CRAN"
+ },
+ "diffobj": {
+ "Package": "diffobj",
+ "Version": "0.3.5",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Diffs for R Objects",
+ "Description": "Generate a colorized diff of two R objects for an intuitive visualization of their differences.",
+ "Authors@R": "c( person( \"Brodie\", \"Gaslam\", email=\"brodie.gaslam@yahoo.com\", role=c(\"aut\", \"cre\")), person( \"Michael B.\", \"Allen\", email=\"ioplex@gmail.com\", role=c(\"ctb\", \"cph\"), comment=\"Original C implementation of Myers Diff Algorithm\"))",
+ "Depends": [
+ "R (>= 3.1.0)"
+ ],
+ "License": "GPL-2 | GPL-3",
+ "URL": "https://github.com/brodieG/diffobj",
+ "BugReports": "https://github.com/brodieG/diffobj/issues",
+ "RoxygenNote": "7.1.1",
+ "VignetteBuilder": "knitr",
+ "Encoding": "UTF-8",
+ "Suggests": [
+ "knitr",
+ "rmarkdown"
+ ],
+ "Collate": "'capt.R' 'options.R' 'pager.R' 'check.R' 'finalizer.R' 'misc.R' 'html.R' 'styles.R' 's4.R' 'core.R' 'diff.R' 'get.R' 'guides.R' 'hunks.R' 'layout.R' 'myerssimple.R' 'rdiff.R' 'rds.R' 'set.R' 'subset.R' 'summmary.R' 'system.R' 'text.R' 'tochar.R' 'trim.R' 'word.R'",
+ "Imports": [
+ "crayon (>= 1.3.2)",
+ "tools",
+ "methods",
+ "utils",
+ "stats"
+ ],
+ "NeedsCompilation": "yes",
+ "Author": "Brodie Gaslam [aut, cre], Michael B. Allen [ctb, cph] (Original C implementation of Myers Diff Algorithm)",
+ "Maintainer": "Brodie Gaslam ",
+ "Repository": "CRAN"
+ },
+ "digest": {
+ "Package": "digest",
+ "Version": "0.6.37",
+ "Source": "Repository",
+ "Authors@R": "c(person(\"Dirk\", \"Eddelbuettel\", role = c(\"aut\", \"cre\"), email = \"edd@debian.org\", comment = c(ORCID = \"0000-0001-6419-907X\")), person(\"Antoine\", \"Lucas\", role=\"ctb\"), person(\"Jarek\", \"Tuszynski\", role=\"ctb\"), person(\"Henrik\", \"Bengtsson\", role=\"ctb\", comment = c(ORCID = \"0000-0002-7579-5165\")), person(\"Simon\", \"Urbanek\", role=\"ctb\", comment = c(ORCID = \"0000-0003-2297-1732\")), person(\"Mario\", \"Frasca\", role=\"ctb\"), person(\"Bryan\", \"Lewis\", role=\"ctb\"), person(\"Murray\", \"Stokely\", role=\"ctb\"), person(\"Hannes\", \"Muehleisen\", role=\"ctb\"), person(\"Duncan\", \"Murdoch\", role=\"ctb\"), person(\"Jim\", \"Hester\", role=\"ctb\"), person(\"Wush\", \"Wu\", role=\"ctb\", comment = c(ORCID = \"0000-0001-5180-0567\")), person(\"Qiang\", \"Kou\", role=\"ctb\", comment = c(ORCID = \"0000-0001-6786-5453\")), person(\"Thierry\", \"Onkelinx\", role=\"ctb\", comment = c(ORCID = \"0000-0001-8804-4216\")), person(\"Michel\", \"Lang\", role=\"ctb\", comment = c(ORCID = \"0000-0001-9754-0393\")), person(\"Viliam\", \"Simko\", role=\"ctb\"), person(\"Kurt\", \"Hornik\", role=\"ctb\", comment = c(ORCID = \"0000-0003-4198-9911\")), person(\"Radford\", \"Neal\", role=\"ctb\", comment = c(ORCID = \"0000-0002-2473-3407\")), person(\"Kendon\", \"Bell\", role=\"ctb\", comment = c(ORCID = \"0000-0002-9093-8312\")), person(\"Matthew\", \"de Queljoe\", role=\"ctb\"), person(\"Dmitry\", \"Selivanov\", role=\"ctb\"), person(\"Ion\", \"Suruceanu\", role=\"ctb\"), person(\"Bill\", \"Denney\", role=\"ctb\"), person(\"Dirk\", \"Schumacher\", role=\"ctb\"), person(\"András\", \"Svraka\", role=\"ctb\"), person(\"Sergey\", \"Fedorov\", role=\"ctb\"), person(\"Will\", \"Landau\", role=\"ctb\", comment = c(ORCID = \"0000-0003-1878-3253\")), person(\"Floris\", \"Vanderhaeghe\", role=\"ctb\", comment = c(ORCID = \"0000-0002-6378-6229\")), person(\"Kevin\", \"Tappe\", role=\"ctb\"), person(\"Harris\", \"McGehee\", role=\"ctb\"), person(\"Tim\", \"Mastny\", role=\"ctb\"), person(\"Aaron\", \"Peikert\", role=\"ctb\", comment = c(ORCID = \"0000-0001-7813-818X\")), person(\"Mark\", \"van der Loo\", role=\"ctb\", comment = c(ORCID = \"0000-0002-9807-4686\")), person(\"Chris\", \"Muir\", role=\"ctb\", comment = c(ORCID = \"0000-0003-2555-3878\")), person(\"Moritz\", \"Beller\", role=\"ctb\", comment = c(ORCID = \"0000-0003-4852-0526\")), person(\"Sebastian\", \"Campbell\", role=\"ctb\"), person(\"Winston\", \"Chang\", role=\"ctb\", comment = c(ORCID = \"0000-0002-1576-2126\")), person(\"Dean\", \"Attali\", role=\"ctb\", comment = c(ORCID = \"0000-0002-5645-3493\")), person(\"Michael\", \"Chirico\", role=\"ctb\", comment = c(ORCID = \"0000-0003-0787-087X\")), person(\"Kevin\", \"Ushey\", role=\"ctb\"))",
+ "Date": "2024-08-19",
+ "Title": "Create Compact Hash Digests of R Objects",
+ "Description": "Implementation of a function 'digest()' for the creation of hash digests of arbitrary R objects (using the 'md5', 'sha-1', 'sha-256', 'crc32', 'xxhash', 'murmurhash', 'spookyhash', 'blake3', 'crc32c', 'xxh3_64', and 'xxh3_128' algorithms) permitting easy comparison of R language objects, as well as functions such as'hmac()' to create hash-based message authentication code. Please note that this package is not meant to be deployed for cryptographic purposes for which more comprehensive (and widely tested) libraries such as 'OpenSSL' should be used.",
+ "URL": "https://github.com/eddelbuettel/digest, https://dirk.eddelbuettel.com/code/digest.html",
+ "BugReports": "https://github.com/eddelbuettel/digest/issues",
+ "Depends": [
+ "R (>= 3.3.0)"
+ ],
+ "Imports": [
+ "utils"
+ ],
+ "License": "GPL (>= 2)",
+ "Suggests": [
+ "tinytest",
+ "simplermarkdown"
+ ],
+ "VignetteBuilder": "simplermarkdown",
+ "Encoding": "UTF-8",
+ "NeedsCompilation": "yes",
+ "Author": "Dirk Eddelbuettel [aut, cre] (), Antoine Lucas [ctb], Jarek Tuszynski [ctb], Henrik Bengtsson [ctb] (), Simon Urbanek [ctb] (), Mario Frasca [ctb], Bryan Lewis [ctb], Murray Stokely [ctb], Hannes Muehleisen [ctb], Duncan Murdoch [ctb], Jim Hester [ctb], Wush Wu [ctb] (), Qiang Kou [ctb] (), Thierry Onkelinx [ctb] (), Michel Lang [ctb] (), Viliam Simko [ctb], Kurt Hornik [ctb] (), Radford Neal [ctb] (), Kendon Bell [ctb] (), Matthew de Queljoe [ctb], Dmitry Selivanov [ctb], Ion Suruceanu [ctb], Bill Denney [ctb], Dirk Schumacher [ctb], András Svraka [ctb], Sergey Fedorov [ctb], Will Landau [ctb] (), Floris Vanderhaeghe [ctb] (), Kevin Tappe [ctb], Harris McGehee [ctb], Tim Mastny [ctb], Aaron Peikert [ctb] (), Mark van der Loo [ctb] (), Chris Muir [ctb] (), Moritz Beller [ctb] (), Sebastian Campbell [ctb], Winston Chang [ctb] (), Dean Attali [ctb] (), Michael Chirico [ctb] (), Kevin Ushey [ctb]",
+ "Maintainer": "Dirk Eddelbuettel ",
+ "Repository": "CRAN"
+ },
+ "docstyle": {
+ "Package": "docstyle",
+ "Version": "0.9.0",
+ "Source": "Local",
+ "Title": "Style-as-Code for Scientific Publishing",
+ "Authors@R": "person(\"Douglas\", \"Manuel\", , \"dmanuel@ohri.ca\", role = c(\"aut\", \"cre\"))",
+ "Description": "Generate Word, PDF, HTML, and PowerPoint assets from a single CSS + YAML configuration. Provides round-trip collaboration support for scientific publishing workflows, allowing reviewer edits from Word to be imported back into Quarto/Markdown source files.",
+ "License": "MIT + file LICENSE",
+ "Encoding": "UTF-8",
+ "Roxygen": "list(markdown = TRUE)",
+ "RoxygenNote": "7.3.3",
+ "Imports": [
+ "digest",
+ "httr",
+ "jsonlite",
+ "officer",
+ "rstudioapi",
+ "xml2",
+ "yaml"
+ ],
+ "Suggests": [
+ "devtools",
+ "knitr",
+ "quarto",
+ "rmarkdown",
+ "testthat (>= 3.0.0)"
+ ],
+ "Config/testthat/edition": "3",
+ "VignetteBuilder": "quarto",
+ "URL": "https://github.com/dmanuel/docstyle",
+ "BugReports": "https://github.com/dmanuel/docstyle/issues",
+ "Author": "Douglas Manuel [aut, cre]",
+ "Maintainer": "Douglas Manuel ",
+ "RemoteType": "local",
+ "RemoteUrl": "~/github/docstyle"
+ },
+ "downlit": {
+ "Package": "downlit",
+ "Version": "0.4.5",
+ "Source": "Repository",
+ "Title": "Syntax Highlighting and Automatic Linking",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Syntax highlighting of R code, specifically designed for the needs of 'RMarkdown' packages like 'pkgdown', 'hugodown', and 'bookdown'. It includes linking of function calls to their documentation on the web, and automatic translation of ANSI escapes in output to the equivalent HTML.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://downlit.r-lib.org/, https://github.com/r-lib/downlit",
+ "BugReports": "https://github.com/r-lib/downlit/issues",
+ "Depends": [
+ "R (>= 4.0.0)"
+ ],
+ "Imports": [
+ "brio",
+ "desc",
+ "digest",
+ "evaluate",
+ "fansi",
+ "memoise",
+ "rlang",
+ "vctrs",
+ "withr",
+ "yaml"
+ ],
+ "Suggests": [
+ "covr",
+ "htmltools",
+ "jsonlite",
+ "MASS",
+ "MassSpecWavelet",
+ "pkgload",
+ "rmarkdown",
+ "testthat (>= 3.0.0)",
+ "xml2"
+ ],
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.3",
+ "NeedsCompilation": "no",
+ "Author": "Hadley Wickham [aut, cre], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Hadley Wickham ",
+ "Repository": "CRAN"
+ },
+ "dplyr": {
+ "Package": "dplyr",
+ "Version": "1.1.4",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "A Grammar of Data Manipulation",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Romain\", \"François\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"Lionel\", \"Henry\", role = \"aut\"), person(\"Kirill\", \"Müller\", role = \"aut\", comment = c(ORCID = \"0000-0002-1416-3412\")), person(\"Davis\", \"Vaughan\", , \"davis@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0003-4777-038X\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "A fast, consistent tool for working with data frame like objects, both in memory and out of memory.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://dplyr.tidyverse.org, https://github.com/tidyverse/dplyr",
+ "BugReports": "https://github.com/tidyverse/dplyr/issues",
+ "Depends": [
+ "R (>= 3.5.0)"
+ ],
+ "Imports": [
+ "cli (>= 3.4.0)",
+ "generics",
+ "glue (>= 1.3.2)",
+ "lifecycle (>= 1.0.3)",
+ "magrittr (>= 1.5)",
+ "methods",
+ "pillar (>= 1.9.0)",
+ "R6",
+ "rlang (>= 1.1.0)",
+ "tibble (>= 3.2.0)",
+ "tidyselect (>= 1.2.0)",
+ "utils",
+ "vctrs (>= 0.6.4)"
+ ],
+ "Suggests": [
+ "bench",
+ "broom",
+ "callr",
+ "covr",
+ "DBI",
+ "dbplyr (>= 2.2.1)",
+ "ggplot2",
+ "knitr",
+ "Lahman",
+ "lobstr",
+ "microbenchmark",
+ "nycflights13",
+ "purrr",
+ "rmarkdown",
+ "RMySQL",
+ "RPostgreSQL",
+ "RSQLite",
+ "stringi (>= 1.7.6)",
+ "testthat (>= 3.1.5)",
+ "tidyr (>= 1.3.0)",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "tidyverse, shiny, pkgdown, tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "LazyData": "true",
+ "RoxygenNote": "7.2.3",
+ "NeedsCompilation": "yes",
+ "Author": "Hadley Wickham [aut, cre] (), Romain François [aut] (), Lionel Henry [aut], Kirill Müller [aut] (), Davis Vaughan [aut] (), Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Hadley Wickham ",
+ "Repository": "CRAN"
+ },
+ "ellipsis": {
+ "Package": "ellipsis",
+ "Version": "0.3.2",
+ "Source": "Repository",
+ "Title": "Tools for Working with ...",
+ "Description": "The ellipsis is a powerful tool for extending functions. Unfortunately this power comes at a cost: misspelled arguments will be silently ignored. The ellipsis package provides a collection of functions to catch problems and alert the user.",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@rstudio.com\", role = c(\"aut\", \"cre\")), person(\"RStudio\", role = \"cph\") )",
+ "License": "MIT + file LICENSE",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.1.1",
+ "URL": "https://ellipsis.r-lib.org, https://github.com/r-lib/ellipsis",
+ "BugReports": "https://github.com/r-lib/ellipsis/issues",
+ "Depends": [
+ "R (>= 3.2)"
+ ],
+ "Imports": [
+ "rlang (>= 0.3.0)"
+ ],
+ "Suggests": [
+ "covr",
+ "testthat"
+ ],
+ "NeedsCompilation": "yes",
+ "Author": "Hadley Wickham [aut, cre], RStudio [cph]",
+ "Maintainer": "Hadley Wickham ",
+ "Repository": "CRAN"
+ },
+ "evaluate": {
+ "Package": "evaluate",
+ "Version": "1.0.3",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Parsing and Evaluation Tools that Provide More Details than the Default",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\")), person(\"Yihui\", \"Xie\", role = \"aut\", comment = c(ORCID = \"0000-0003-0645-5666\")), person(\"Michael\", \"Lawrence\", role = \"ctb\"), person(\"Thomas\", \"Kluyver\", role = \"ctb\"), person(\"Jeroen\", \"Ooms\", role = \"ctb\"), person(\"Barret\", \"Schloerke\", role = \"ctb\"), person(\"Adam\", \"Ryczkowski\", role = \"ctb\"), person(\"Hiroaki\", \"Yutani\", role = \"ctb\"), person(\"Michel\", \"Lang\", role = \"ctb\"), person(\"Karolis\", \"Koncevičius\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Parsing and evaluation tools that make it easy to recreate the command line behaviour of R.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://evaluate.r-lib.org/, https://github.com/r-lib/evaluate",
+ "BugReports": "https://github.com/r-lib/evaluate/issues",
+ "Depends": [
+ "R (>= 3.6.0)"
+ ],
+ "Suggests": [
+ "callr",
+ "covr",
+ "ggplot2 (>= 3.3.6)",
+ "lattice",
+ "methods",
+ "pkgload",
+ "rlang",
+ "knitr",
+ "testthat (>= 3.0.0)",
+ "withr"
+ ],
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.2",
+ "NeedsCompilation": "no",
+ "Author": "Hadley Wickham [aut, cre], Yihui Xie [aut] (), Michael Lawrence [ctb], Thomas Kluyver [ctb], Jeroen Ooms [ctb], Barret Schloerke [ctb], Adam Ryczkowski [ctb], Hiroaki Yutani [ctb], Michel Lang [ctb], Karolis Koncevičius [ctb], Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Hadley Wickham ",
+ "Repository": "CRAN"
+ },
+ "fansi": {
+ "Package": "fansi",
+ "Version": "1.0.6",
+ "Source": "Repository",
+ "Title": "ANSI Control Sequence Aware String Functions",
+ "Description": "Counterparts to R string manipulation functions that account for the effects of ANSI text formatting control sequences.",
+ "Authors@R": "c( person(\"Brodie\", \"Gaslam\", email=\"brodie.gaslam@yahoo.com\", role=c(\"aut\", \"cre\")), person(\"Elliott\", \"Sales De Andrade\", role=\"ctb\"), person(family=\"R Core Team\", email=\"R-core@r-project.org\", role=\"cph\", comment=\"UTF8 byte length calcs from src/util.c\" ))",
+ "Depends": [
+ "R (>= 3.1.0)"
+ ],
+ "License": "GPL-2 | GPL-3",
+ "URL": "https://github.com/brodieG/fansi",
+ "BugReports": "https://github.com/brodieG/fansi/issues",
+ "VignetteBuilder": "knitr",
+ "Suggests": [
+ "unitizer",
+ "knitr",
+ "rmarkdown"
+ ],
+ "Imports": [
+ "grDevices",
+ "utils"
+ ],
+ "RoxygenNote": "7.2.3",
+ "Encoding": "UTF-8",
+ "Collate": "'constants.R' 'fansi-package.R' 'internal.R' 'load.R' 'misc.R' 'nchar.R' 'strwrap.R' 'strtrim.R' 'strsplit.R' 'substr2.R' 'trimws.R' 'tohtml.R' 'unhandled.R' 'normalize.R' 'sgr.R'",
+ "NeedsCompilation": "yes",
+ "Author": "Brodie Gaslam [aut, cre], Elliott Sales De Andrade [ctb], R Core Team [cph] (UTF8 byte length calcs from src/util.c)",
+ "Maintainer": "Brodie Gaslam ",
+ "Repository": "CRAN"
+ },
+ "farver": {
+ "Package": "farver",
+ "Version": "2.1.2",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "High Performance Colour Space Manipulation",
+ "Authors@R": "c( person(\"Thomas Lin\", \"Pedersen\", , \"thomas.pedersen@posit.co\", role = c(\"cre\", \"aut\"), comment = c(ORCID = \"0000-0002-5147-4711\")), person(\"Berendea\", \"Nicolae\", role = \"aut\", comment = \"Author of the ColorSpace C++ library\"), person(\"Romain\", \"François\", , \"romain@purrple.cat\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"Posit, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "The encoding of colour can be handled in many different ways, using different colour spaces. As different colour spaces have different uses, efficient conversion between these representations are important. The 'farver' package provides a set of functions that gives access to very fast colour space conversion and comparisons implemented in C++, and offers speed improvements over the 'convertColor' function in the 'grDevices' package.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://farver.data-imaginist.com, https://github.com/thomasp85/farver",
+ "BugReports": "https://github.com/thomasp85/farver/issues",
+ "Suggests": [
+ "covr",
+ "testthat (>= 3.0.0)"
+ ],
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.1",
+ "NeedsCompilation": "yes",
+ "Author": "Thomas Lin Pedersen [cre, aut] (), Berendea Nicolae [aut] (Author of the ColorSpace C++ library), Romain François [aut] (), Posit, PBC [cph, fnd]",
+ "Maintainer": "Thomas Lin Pedersen ",
+ "Repository": "CRAN"
+ },
+ "fastmap": {
+ "Package": "fastmap",
+ "Version": "1.2.0",
+ "Source": "Repository",
+ "Title": "Fast Data Structures",
+ "Authors@R": "c( person(\"Winston\", \"Chang\", email = \"winston@posit.co\", role = c(\"aut\", \"cre\")), person(given = \"Posit Software, PBC\", role = c(\"cph\", \"fnd\")), person(given = \"Tessil\", role = \"cph\", comment = \"hopscotch_map library\") )",
+ "Description": "Fast implementation of data structures, including a key-value store, stack, and queue. Environments are commonly used as key-value stores in R, but every time a new key is used, it is added to R's global symbol table, causing a small amount of memory leakage. This can be problematic in cases where many different keys are used. Fastmap avoids this memory leak issue by implementing the map using data structures in C++.",
+ "License": "MIT + file LICENSE",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.2.3",
+ "Suggests": [
+ "testthat (>= 2.1.1)"
+ ],
+ "URL": "https://r-lib.github.io/fastmap/, https://github.com/r-lib/fastmap",
+ "BugReports": "https://github.com/r-lib/fastmap/issues",
+ "NeedsCompilation": "yes",
+ "Author": "Winston Chang [aut, cre], Posit Software, PBC [cph, fnd], Tessil [cph] (hopscotch_map library)",
+ "Maintainer": "Winston Chang ",
+ "Repository": "CRAN"
+ },
+ "fontawesome": {
+ "Package": "fontawesome",
+ "Version": "0.5.3",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Easily Work with 'Font Awesome' Icons",
+ "Description": "Easily and flexibly insert 'Font Awesome' icons into 'R Markdown' documents and 'Shiny' apps. These icons can be inserted into HTML content through inline 'SVG' tags or 'i' tags. There is also a utility function for exporting 'Font Awesome' icons as 'PNG' images for those situations where raster graphics are needed.",
+ "Authors@R": "c( person(\"Richard\", \"Iannone\", , \"rich@posit.co\", c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-3925-190X\")), person(\"Christophe\", \"Dervieux\", , \"cderv@posit.co\", role = \"ctb\", comment = c(ORCID = \"0000-0003-4474-2498\")), person(\"Winston\", \"Chang\", , \"winston@posit.co\", role = \"ctb\"), person(\"Dave\", \"Gandy\", role = c(\"ctb\", \"cph\"), comment = \"Font-Awesome font\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "License": "MIT + file LICENSE",
+ "URL": "https://github.com/rstudio/fontawesome, https://rstudio.github.io/fontawesome/",
+ "BugReports": "https://github.com/rstudio/fontawesome/issues",
+ "Encoding": "UTF-8",
+ "ByteCompile": "true",
+ "RoxygenNote": "7.3.2",
+ "Depends": [
+ "R (>= 3.3.0)"
+ ],
+ "Imports": [
+ "rlang (>= 1.0.6)",
+ "htmltools (>= 0.5.1.1)"
+ ],
+ "Suggests": [
+ "covr",
+ "dplyr (>= 1.0.8)",
+ "gt (>= 0.9.0)",
+ "knitr (>= 1.31)",
+ "testthat (>= 3.0.0)",
+ "rsvg"
+ ],
+ "Config/testthat/edition": "3",
+ "NeedsCompilation": "no",
+ "Author": "Richard Iannone [aut, cre] (), Christophe Dervieux [ctb] (), Winston Chang [ctb], Dave Gandy [ctb, cph] (Font-Awesome font), Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Richard Iannone ",
+ "Repository": "CRAN"
+ },
+ "forcats": {
+ "Package": "forcats",
+ "Version": "1.0.0",
+ "Source": "Repository",
+ "Title": "Tools for Working with Categorical Variables (Factors)",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@rstudio.com\", role = c(\"aut\", \"cre\")), person(\"RStudio\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Helpers for reordering factor levels (including moving specified levels to front, ordering by first appearance, reversing, and randomly shuffling), and tools for modifying factor levels (including collapsing rare levels into other, 'anonymising', and manually 'recoding').",
+ "License": "MIT + file LICENSE",
+ "URL": "https://forcats.tidyverse.org/, https://github.com/tidyverse/forcats",
+ "BugReports": "https://github.com/tidyverse/forcats/issues",
+ "Depends": [
+ "R (>= 3.4)"
+ ],
+ "Imports": [
+ "cli (>= 3.4.0)",
+ "glue",
+ "lifecycle",
+ "magrittr",
+ "rlang (>= 1.0.0)",
+ "tibble"
+ ],
+ "Suggests": [
+ "covr",
+ "dplyr",
+ "ggplot2",
+ "knitr",
+ "readr",
+ "rmarkdown",
+ "testthat (>= 3.0.0)",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "LazyData": "true",
+ "RoxygenNote": "7.2.3",
+ "NeedsCompilation": "no",
+ "Author": "Hadley Wickham [aut, cre], RStudio [cph, fnd]",
+ "Maintainer": "Hadley Wickham ",
+ "Repository": "CRAN"
+ },
+ "foreach": {
+ "Package": "foreach",
+ "Version": "1.5.2",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Provides Foreach Looping Construct",
+ "Authors@R": "c(person(\"Folashade\", \"Daniel\", role=\"cre\", email=\"fdaniel@microsoft.com\"), person(\"Hong\", \"Ooi\", role=\"ctb\"), person(\"Rich\", \"Calaway\", role=\"ctb\"), person(\"Microsoft\", role=c(\"aut\", \"cph\")), person(\"Steve\", \"Weston\", role=\"aut\"))",
+ "Description": "Support for the foreach looping construct. Foreach is an idiom that allows for iterating over elements in a collection, without the use of an explicit loop counter. This package in particular is intended to be used for its return value, rather than for its side effects. In that sense, it is similar to the standard lapply function, but doesn't require the evaluation of a function. Using foreach without side effects also facilitates executing the loop in parallel.",
+ "License": "Apache License (== 2.0)",
+ "URL": "https://github.com/RevolutionAnalytics/foreach",
+ "BugReports": "https://github.com/RevolutionAnalytics/foreach/issues",
+ "Depends": [
+ "R (>= 2.5.0)"
+ ],
+ "Imports": [
+ "codetools",
+ "utils",
+ "iterators"
+ ],
+ "Suggests": [
+ "randomForest",
+ "doMC",
+ "doParallel",
+ "testthat",
+ "knitr",
+ "rmarkdown"
+ ],
+ "VignetteBuilder": "knitr",
+ "RoxygenNote": "7.1.1",
+ "Collate": "'callCombine.R' 'foreach.R' 'do.R' 'foreach-ext.R' 'foreach-pkg.R' 'getDoPar.R' 'getDoSeq.R' 'getsyms.R' 'iter.R' 'nextElem.R' 'onLoad.R' 'setDoPar.R' 'setDoSeq.R' 'times.R' 'utils.R'",
+ "NeedsCompilation": "no",
+ "Author": "Folashade Daniel [cre], Hong Ooi [ctb], Rich Calaway [ctb], Microsoft [aut, cph], Steve Weston [aut]",
+ "Maintainer": "Folashade Daniel ",
+ "Repository": "RSPM",
+ "Encoding": "UTF-8"
+ },
+ "foreign": {
+ "Package": "foreign",
+ "Version": "0.8-87",
+ "Source": "Repository",
+ "Priority": "recommended",
+ "Date": "2024-06-25",
+ "Title": "Read Data Stored by 'Minitab', 'S', 'SAS', 'SPSS', 'Stata', 'Systat', 'Weka', 'dBase', ...",
+ "Depends": [
+ "R (>= 4.0.0)"
+ ],
+ "Imports": [
+ "methods",
+ "utils",
+ "stats"
+ ],
+ "Authors@R": "c( person(\"R Core Team\", email = \"R-core@R-project.org\", role = c(\"aut\", \"cph\", \"cre\")), person(\"Roger\", \"Bivand\", role = c(\"ctb\", \"cph\")), person(c(\"Vincent\", \"J.\"), \"Carey\", role = c(\"ctb\", \"cph\")), person(\"Saikat\", \"DebRoy\", role = c(\"ctb\", \"cph\")), person(\"Stephen\", \"Eglen\", role = c(\"ctb\", \"cph\")), person(\"Rajarshi\", \"Guha\", role = c(\"ctb\", \"cph\")), person(\"Swetlana\", \"Herbrandt\", role = \"ctb\"), person(\"Nicholas\", \"Lewin-Koh\", role = c(\"ctb\", \"cph\")), person(\"Mark\", \"Myatt\", role = c(\"ctb\", \"cph\")), person(\"Michael\", \"Nelson\", role = \"ctb\"), person(\"Ben\", \"Pfaff\", role = \"ctb\"), person(\"Brian\", \"Quistorff\", role = \"ctb\"), person(\"Frank\", \"Warmerdam\", role = c(\"ctb\", \"cph\")), person(\"Stephen\", \"Weigand\", role = c(\"ctb\", \"cph\")), person(\"Free Software Foundation, Inc.\", role = \"cph\"))",
+ "Contact": "see 'MailingList'",
+ "Copyright": "see file COPYRIGHTS",
+ "Description": "Reading and writing data stored by some versions of 'Epi Info', 'Minitab', 'S', 'SAS', 'SPSS', 'Stata', 'Systat', 'Weka', and for reading and writing some 'dBase' files.",
+ "ByteCompile": "yes",
+ "Biarch": "yes",
+ "License": "GPL (>= 2)",
+ "BugReports": "https://bugs.r-project.org",
+ "MailingList": "R-help@r-project.org",
+ "URL": "https://svn.r-project.org/R-packages/trunk/foreign/",
+ "NeedsCompilation": "yes",
+ "Author": "R Core Team [aut, cph, cre], Roger Bivand [ctb, cph], Vincent J. Carey [ctb, cph], Saikat DebRoy [ctb, cph], Stephen Eglen [ctb, cph], Rajarshi Guha [ctb, cph], Swetlana Herbrandt [ctb], Nicholas Lewin-Koh [ctb, cph], Mark Myatt [ctb, cph], Michael Nelson [ctb], Ben Pfaff [ctb], Brian Quistorff [ctb], Frank Warmerdam [ctb, cph], Stephen Weigand [ctb, cph], Free Software Foundation, Inc. [cph]",
+ "Maintainer": "R Core Team ",
+ "Repository": "CRAN"
+ },
+ "fs": {
+ "Package": "fs",
+ "Version": "1.6.7",
+ "Source": "Repository",
+ "Title": "Cross-Platform File System Operations Based on 'libuv'",
+ "Authors@R": "c( person(\"Jim\", \"Hester\", role = \"aut\"), person(\"Hadley\", \"Wickham\", role = \"aut\"), person(\"Gábor\", \"Csárdi\", role = \"aut\"), person(\"Jeroen\", \"Ooms\", , \"jeroenooms@gmail.com\", role = \"cre\"), person(\"libuv project contributors\", role = \"cph\", comment = \"libuv library\"), person(\"Joyent, Inc. and other Node contributors\", role = \"cph\", comment = \"libuv library\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )",
+ "Description": "A cross-platform interface to file system operations, built on top of the 'libuv' C library.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://fs.r-lib.org, https://github.com/r-lib/fs",
+ "BugReports": "https://github.com/r-lib/fs/issues",
+ "Depends": [
+ "R (>= 4.1)"
+ ],
+ "Imports": [
+ "methods"
+ ],
+ "Suggests": [
+ "covr",
+ "crayon",
+ "knitr",
+ "pillar (>= 1.0.0)",
+ "rmarkdown",
+ "spelling",
+ "testthat (>= 3.0.0)",
+ "tibble (>= 1.1.0)",
+ "vctrs (>= 0.3.0)",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "ByteCompile": "true",
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Config/usethis/last-upkeep": "2025-04-23",
+ "Copyright": "file COPYRIGHTS",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.3.3",
+ "SystemRequirements": "GNU make",
+ "NeedsCompilation": "yes",
+ "Author": "Jim Hester [aut], Hadley Wickham [aut], Gábor Csárdi [aut], Jeroen Ooms [cre], libuv project contributors [cph] (libuv library), Joyent, Inc. and other Node contributors [cph] (libuv library), Posit Software, PBC [cph, fnd] (ROR: )",
+ "Maintainer": "Jeroen Ooms ",
+ "Repository": "CRAN"
+ },
+ "generics": {
+ "Package": "generics",
+ "Version": "0.1.3",
+ "Source": "Repository",
+ "Title": "Common S3 Generics not Provided by Base R Methods Related to Model Fitting",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@rstudio.com\", role = c(\"aut\", \"cre\")), person(\"Max\", \"Kuhn\", , \"max@rstudio.com\", role = \"aut\"), person(\"Davis\", \"Vaughan\", , \"davis@rstudio.com\", role = \"aut\"), person(\"RStudio\", role = \"cph\") )",
+ "Description": "In order to reduce potential package dependencies and conflicts, generics provides a number of commonly used S3 generics.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://generics.r-lib.org, https://github.com/r-lib/generics",
+ "BugReports": "https://github.com/r-lib/generics/issues",
+ "Depends": [
+ "R (>= 3.2)"
+ ],
+ "Imports": [
+ "methods"
+ ],
+ "Suggests": [
+ "covr",
+ "pkgload",
+ "testthat (>= 3.0.0)",
+ "tibble",
+ "withr"
+ ],
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.2.0",
+ "NeedsCompilation": "no",
+ "Author": "Hadley Wickham [aut, cre], Max Kuhn [aut], Davis Vaughan [aut], RStudio [cph]",
+ "Maintainer": "Hadley Wickham ",
+ "Repository": "CRAN"
+ },
+ "gert": {
+ "Package": "gert",
+ "Version": "2.3.1",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Simple Git Client for R",
+ "Authors@R": "c( person(\"Jeroen\", \"Ooms\", role = c(\"aut\", \"cre\"), email = \"jeroenooms@gmail.com\", comment = c(ORCID = \"0000-0002-4035-0289\")), person(\"Jennifer\", \"Bryan\", role = \"ctb\", email = \"jenny@posit.co\", comment = c(ORCID = \"0000-0002-6983-2759\")))",
+ "Description": "Simple git client for R based on 'libgit2' with support for SSH and HTTPS remotes. All functions in 'gert' use basic R data types (such as vectors and data-frames) for their arguments and return values. User credentials are shared with command line 'git' through the git-credential store and ssh keys stored on disk or ssh-agent.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://docs.ropensci.org/gert/, https://ropensci.r-universe.dev/gert",
+ "BugReports": "https://github.com/r-lib/gert/issues",
+ "Imports": [
+ "askpass",
+ "credentials (>= 1.2.1)",
+ "openssl (>= 2.0.3)",
+ "rstudioapi (>= 0.11)",
+ "sys",
+ "zip (>= 2.1.0)"
+ ],
+ "Suggests": [
+ "spelling",
+ "knitr",
+ "rmarkdown",
+ "testthat"
+ ],
+ "VignetteBuilder": "knitr",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.3",
+ "SystemRequirements": "libgit2 (>= 1.0): libgit2-devel (rpm) or libgit2-dev (deb)",
+ "Language": "en-US",
+ "NeedsCompilation": "yes",
+ "Author": "Jeroen Ooms [aut, cre] (ORCID: ), Jennifer Bryan [ctb] (ORCID: )",
+ "Maintainer": "Jeroen Ooms ",
+ "Repository": "CRAN"
+ },
+ "ggplot2": {
+ "Package": "ggplot2",
+ "Version": "3.5.1",
+ "Source": "Repository",
+ "Title": "Create Elegant Data Visualisations Using the Grammar of Graphics",
+ "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Winston\", \"Chang\", role = \"aut\", comment = c(ORCID = \"0000-0002-1576-2126\")), person(\"Lionel\", \"Henry\", role = \"aut\"), person(\"Thomas Lin\", \"Pedersen\", , \"thomas.pedersen@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-5147-4711\")), person(\"Kohske\", \"Takahashi\", role = \"aut\"), person(\"Claus\", \"Wilke\", role = \"aut\", comment = c(ORCID = \"0000-0002-7470-9261\")), person(\"Kara\", \"Woo\", role = \"aut\", comment = c(ORCID = \"0000-0002-5125-4188\")), person(\"Hiroaki\", \"Yutani\", role = \"aut\", comment = c(ORCID = \"0000-0002-3385-7233\")), person(\"Dewey\", \"Dunnington\", role = \"aut\", comment = c(ORCID = \"0000-0002-9415-4582\")), person(\"Teun\", \"van den Brand\", role = \"aut\", comment = c(ORCID = \"0000-0002-9335-7468\")), person(\"Posit, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "A system for 'declaratively' creating graphics, based on \"The Grammar of Graphics\". You provide the data, tell 'ggplot2' how to map variables to aesthetics, what graphical primitives to use, and it takes care of the details.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://ggplot2.tidyverse.org, https://github.com/tidyverse/ggplot2",
+ "BugReports": "https://github.com/tidyverse/ggplot2/issues",
+ "Depends": [
+ "R (>= 3.5)"
+ ],
+ "Imports": [
+ "cli",
+ "glue",
+ "grDevices",
+ "grid",
+ "gtable (>= 0.1.1)",
+ "isoband",
+ "lifecycle (> 1.0.1)",
+ "MASS",
+ "mgcv",
+ "rlang (>= 1.1.0)",
+ "scales (>= 1.3.0)",
+ "stats",
+ "tibble",
+ "vctrs (>= 0.6.0)",
+ "withr (>= 2.5.0)"
+ ],
+ "Suggests": [
+ "covr",
+ "dplyr",
+ "ggplot2movies",
+ "hexbin",
+ "Hmisc",
+ "knitr",
+ "mapproj",
+ "maps",
+ "multcomp",
+ "munsell",
+ "nlme",
+ "profvis",
+ "quantreg",
+ "ragg (>= 1.2.6)",
+ "RColorBrewer",
+ "rmarkdown",
+ "rpart",
+ "sf (>= 0.7-3)",
+ "svglite (>= 2.1.2)",
+ "testthat (>= 3.1.2)",
+ "vdiffr (>= 1.0.6)",
+ "xml2"
+ ],
+ "Enhances": [
+ "sp"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "ggtext, tidyr, forcats, tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "LazyData": "true",
+ "RoxygenNote": "7.3.1",
+ "Collate": "'ggproto.R' 'ggplot-global.R' 'aaa-.R' 'aes-colour-fill-alpha.R' 'aes-evaluation.R' 'aes-group-order.R' 'aes-linetype-size-shape.R' 'aes-position.R' 'compat-plyr.R' 'utilities.R' 'aes.R' 'utilities-checks.R' 'legend-draw.R' 'geom-.R' 'annotation-custom.R' 'annotation-logticks.R' 'geom-polygon.R' 'geom-map.R' 'annotation-map.R' 'geom-raster.R' 'annotation-raster.R' 'annotation.R' 'autolayer.R' 'autoplot.R' 'axis-secondary.R' 'backports.R' 'bench.R' 'bin.R' 'coord-.R' 'coord-cartesian-.R' 'coord-fixed.R' 'coord-flip.R' 'coord-map.R' 'coord-munch.R' 'coord-polar.R' 'coord-quickmap.R' 'coord-radial.R' 'coord-sf.R' 'coord-transform.R' 'data.R' 'docs_layer.R' 'facet-.R' 'facet-grid-.R' 'facet-null.R' 'facet-wrap.R' 'fortify-lm.R' 'fortify-map.R' 'fortify-multcomp.R' 'fortify-spatial.R' 'fortify.R' 'stat-.R' 'geom-abline.R' 'geom-rect.R' 'geom-bar.R' 'geom-bin2d.R' 'geom-blank.R' 'geom-boxplot.R' 'geom-col.R' 'geom-path.R' 'geom-contour.R' 'geom-count.R' 'geom-crossbar.R' 'geom-segment.R' 'geom-curve.R' 'geom-defaults.R' 'geom-ribbon.R' 'geom-density.R' 'geom-density2d.R' 'geom-dotplot.R' 'geom-errorbar.R' 'geom-errorbarh.R' 'geom-freqpoly.R' 'geom-function.R' 'geom-hex.R' 'geom-histogram.R' 'geom-hline.R' 'geom-jitter.R' 'geom-label.R' 'geom-linerange.R' 'geom-point.R' 'geom-pointrange.R' 'geom-quantile.R' 'geom-rug.R' 'geom-sf.R' 'geom-smooth.R' 'geom-spoke.R' 'geom-text.R' 'geom-tile.R' 'geom-violin.R' 'geom-vline.R' 'ggplot2-package.R' 'grob-absolute.R' 'grob-dotstack.R' 'grob-null.R' 'grouping.R' 'theme-elements.R' 'guide-.R' 'guide-axis.R' 'guide-axis-logticks.R' 'guide-axis-stack.R' 'guide-axis-theta.R' 'guide-legend.R' 'guide-bins.R' 'guide-colorbar.R' 'guide-colorsteps.R' 'guide-custom.R' 'layer.R' 'guide-none.R' 'guide-old.R' 'guides-.R' 'guides-grid.R' 'hexbin.R' 'import-standalone-obj-type.R' 'import-standalone-types-check.R' 'labeller.R' 'labels.R' 'layer-sf.R' 'layout.R' 'limits.R' 'margins.R' 'performance.R' 'plot-build.R' 'plot-construction.R' 'plot-last.R' 'plot.R' 'position-.R' 'position-collide.R' 'position-dodge.R' 'position-dodge2.R' 'position-identity.R' 'position-jitter.R' 'position-jitterdodge.R' 'position-nudge.R' 'position-stack.R' 'quick-plot.R' 'reshape-add-margins.R' 'save.R' 'scale-.R' 'scale-alpha.R' 'scale-binned.R' 'scale-brewer.R' 'scale-colour.R' 'scale-continuous.R' 'scale-date.R' 'scale-discrete-.R' 'scale-expansion.R' 'scale-gradient.R' 'scale-grey.R' 'scale-hue.R' 'scale-identity.R' 'scale-linetype.R' 'scale-linewidth.R' 'scale-manual.R' 'scale-shape.R' 'scale-size.R' 'scale-steps.R' 'scale-type.R' 'scale-view.R' 'scale-viridis.R' 'scales-.R' 'stat-align.R' 'stat-bin.R' 'stat-bin2d.R' 'stat-bindot.R' 'stat-binhex.R' 'stat-boxplot.R' 'stat-contour.R' 'stat-count.R' 'stat-density-2d.R' 'stat-density.R' 'stat-ecdf.R' 'stat-ellipse.R' 'stat-function.R' 'stat-identity.R' 'stat-qq-line.R' 'stat-qq.R' 'stat-quantilemethods.R' 'stat-sf-coordinates.R' 'stat-sf.R' 'stat-smooth-methods.R' 'stat-smooth.R' 'stat-sum.R' 'stat-summary-2d.R' 'stat-summary-bin.R' 'stat-summary-hex.R' 'stat-summary.R' 'stat-unique.R' 'stat-ydensity.R' 'summarise-plot.R' 'summary.R' 'theme.R' 'theme-defaults.R' 'theme-current.R' 'utilities-break.R' 'utilities-grid.R' 'utilities-help.R' 'utilities-matrix.R' 'utilities-patterns.R' 'utilities-resolution.R' 'utilities-tidy-eval.R' 'zxx.R' 'zzz.R'",
+ "NeedsCompilation": "no",
+ "Author": "Hadley Wickham [aut] (), Winston Chang [aut] (), Lionel Henry [aut], Thomas Lin Pedersen [aut, cre] (), Kohske Takahashi [aut], Claus Wilke [aut] (), Kara Woo [aut] (), Hiroaki Yutani [aut] (), Dewey Dunnington [aut] (), Teun van den Brand [aut] (), Posit, PBC [cph, fnd]",
+ "Maintainer": "Thomas Lin Pedersen ",
+ "Repository": "CRAN"
+ },
+ "gh": {
+ "Package": "gh",
+ "Version": "1.5.0",
+ "Source": "Repository",
+ "Title": "'GitHub' 'API'",
+ "Authors@R": "c( person(\"Gábor\", \"Csárdi\", , \"csardi.gabor@gmail.com\", role = c(\"cre\", \"ctb\")), person(\"Jennifer\", \"Bryan\", role = \"aut\"), person(\"Hadley\", \"Wickham\", role = \"aut\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )",
+ "Description": "Minimal client to access the 'GitHub' 'API'.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://gh.r-lib.org/, https://github.com/r-lib/gh#readme",
+ "BugReports": "https://github.com/r-lib/gh/issues",
+ "Depends": [
+ "R (>= 4.1)"
+ ],
+ "Imports": [
+ "cli (>= 3.0.1)",
+ "gitcreds",
+ "glue",
+ "httr2 (>= 1.0.6)",
+ "ini",
+ "jsonlite",
+ "lifecycle",
+ "rlang (>= 1.0.0)"
+ ],
+ "Suggests": [
+ "connectcreds",
+ "covr",
+ "knitr",
+ "rmarkdown",
+ "rprojroot",
+ "spelling",
+ "testthat (>= 3.0.0)",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Config/usethis/last-upkeep": "2025-04-29",
+ "Encoding": "UTF-8",
+ "Language": "en-US",
+ "RoxygenNote": "7.3.2.9000",
+ "NeedsCompilation": "no",
+ "Author": "Gábor Csárdi [cre, ctb], Jennifer Bryan [aut], Hadley Wickham [aut], Posit Software, PBC [cph, fnd] (ROR: )",
+ "Maintainer": "Gábor Csárdi ",
+ "Repository": "CRAN"
+ },
+ "gitcreds": {
+ "Package": "gitcreds",
+ "Version": "0.1.2",
+ "Source": "Repository",
+ "Title": "Query 'git' Credentials from 'R'",
+ "Authors@R": "c( person(\"Gábor\", \"Csárdi\", , \"csardi.gabor@gmail.com\", role = c(\"aut\", \"cre\")), person(\"RStudio\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Query, set, delete credentials from the 'git' credential store. Manage 'GitHub' tokens and other 'git' credentials. This package is to be used by other packages that need to authenticate to 'GitHub' and/or other 'git' repositories.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://gitcreds.r-lib.org/, https://github.com/r-lib/gitcreds",
+ "BugReports": "https://github.com/r-lib/gitcreds/issues",
+ "Depends": [
+ "R (>= 3.4)"
+ ],
+ "Suggests": [
+ "codetools",
+ "covr",
+ "knitr",
+ "mockery",
+ "oskeyring",
+ "rmarkdown",
+ "testthat (>= 3.0.0)",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "Config/Needs/website": "tidyverse/tidytemplate",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.2.1.9000",
+ "SystemRequirements": "git",
+ "Config/testthat/edition": "3",
+ "NeedsCompilation": "no",
+ "Author": "Gábor Csárdi [aut, cre], RStudio [cph, fnd]",
+ "Maintainer": "Gábor Csárdi ",
+ "Repository": "CRAN"
+ },
+ "glmnet": {
+ "Package": "glmnet",
+ "Version": "4.1-10",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Lasso and Elastic-Net Regularized Generalized Linear Models",
+ "Date": "2025-07-15",
+ "Authors@R": "c(person(\"Jerome\", \"Friedman\", role=c(\"aut\")), person(\"Trevor\", \"Hastie\", role=c(\"aut\", \"cre\"), email = \"hastie@stanford.edu\"), person(\"Rob\", \"Tibshirani\", role=c(\"aut\")), person(\"Balasubramanian\", \"Narasimhan\", role=c(\"aut\")), person(\"Kenneth\",\"Tay\",role=c(\"aut\")), person(\"Noah\", \"Simon\", role=c(\"aut\")), person(\"Junyang\", \"Qian\", role=c(\"ctb\")), person(\"James\", \"Yang\", role=c(\"aut\")))",
+ "Depends": [
+ "R (>= 3.6.0)",
+ "Matrix (>= 1.0-6)"
+ ],
+ "Imports": [
+ "methods",
+ "utils",
+ "foreach",
+ "shape",
+ "survival",
+ "Rcpp"
+ ],
+ "Suggests": [
+ "knitr",
+ "lars",
+ "testthat",
+ "xfun",
+ "rmarkdown"
+ ],
+ "SystemRequirements": "C++17",
+ "Description": "Extremely efficient procedures for fitting the entire lasso or elastic-net regularization path for linear regression, logistic and multinomial regression models, Poisson regression, Cox model, multiple-response Gaussian, and the grouped multinomial regression; see and . There are two new and important additions. The family argument can be a GLM family object, which opens the door to any programmed family (). This comes with a modest computational cost, so when the built-in families suffice, they should be used instead. The other novelty is the relax option, which refits each of the active sets in the path unpenalized. The algorithm uses cyclical coordinate descent in a path-wise fashion, as described in the papers cited.",
+ "License": "GPL-2",
+ "VignetteBuilder": "knitr",
+ "Encoding": "UTF-8",
+ "URL": "https://glmnet.stanford.edu",
+ "RoxygenNote": "7.3.2",
+ "LinkingTo": [
+ "RcppEigen",
+ "Rcpp"
+ ],
+ "NeedsCompilation": "yes",
+ "Author": "Jerome Friedman [aut], Trevor Hastie [aut, cre], Rob Tibshirani [aut], Balasubramanian Narasimhan [aut], Kenneth Tay [aut], Noah Simon [aut], Junyang Qian [ctb], James Yang [aut]",
+ "Maintainer": "Trevor Hastie ",
+ "Repository": "CRAN"
+ },
+ "glue": {
+ "Package": "glue",
+ "Version": "1.8.0",
+ "Source": "Repository",
+ "Title": "Interpreted String Literals",
+ "Authors@R": "c( person(\"Jim\", \"Hester\", role = \"aut\", comment = c(ORCID = \"0000-0002-2739-7082\")), person(\"Jennifer\", \"Bryan\", , \"jenny@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-6983-2759\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "An implementation of interpreted string literals, inspired by Python's Literal String Interpolation and Docstrings and Julia's Triple-Quoted String Literals .",
+ "License": "MIT + file LICENSE",
+ "URL": "https://glue.tidyverse.org/, https://github.com/tidyverse/glue",
+ "BugReports": "https://github.com/tidyverse/glue/issues",
+ "Depends": [
+ "R (>= 3.6)"
+ ],
+ "Imports": [
+ "methods"
+ ],
+ "Suggests": [
+ "crayon",
+ "DBI (>= 1.2.0)",
+ "dplyr",
+ "knitr",
+ "magrittr",
+ "rlang",
+ "rmarkdown",
+ "RSQLite",
+ "testthat (>= 3.2.0)",
+ "vctrs (>= 0.3.0)",
+ "waldo (>= 0.5.3)",
+ "withr"
+ ],
+ "VignetteBuilder": "knitr",
+ "ByteCompile": "true",
+ "Config/Needs/website": "bench, forcats, ggbeeswarm, ggplot2, R.utils, rprintf, tidyr, tidyverse/tidytemplate",
+ "Config/testthat/edition": "3",
+ "Encoding": "UTF-8",
+ "RoxygenNote": "7.3.2",
+ "NeedsCompilation": "yes",
+ "Author": "Jim Hester [aut] (), Jennifer Bryan [aut, cre] (), Posit Software, PBC [cph, fnd]",
+ "Maintainer": "Jennifer Bryan ",
+ "Repository": "CRAN"
+ },
+ "gridExtra": {
+ "Package": "gridExtra",
+ "Version": "2.3",
+ "Source": "Repository",
+ "Authors@R": "c(person(\"Baptiste\", \"Auguie\", email = \"baptiste.auguie@gmail.com\", role = c(\"aut\", \"cre\")), person(\"Anton\", \"Antonov\", email = \"tonytonov@gmail.com\", role = c(\"ctb\")))",
+ "License": "GPL (>= 2)",
+ "Title": "Miscellaneous Functions for \"Grid\" Graphics",
+ "Type": "Package",
+ "Description": "Provides a number of user-level functions to work with \"grid\" graphics, notably to arrange multiple grid-based plots on a page, and draw tables.",
+ "VignetteBuilder": "knitr",
+ "Imports": [
+ "gtable",
+ "grid",
+ "grDevices",
+ "graphics",
+ "utils"
+ ],
+ "Suggests": [
+ "ggplot2",
+ "egg",
+ "lattice",
+ "knitr",
+ "testthat"
+ ],
+ "RoxygenNote": "6.0.1",
+ "NeedsCompilation": "no",
+ "Author": "Baptiste Auguie [aut, cre], Anton Antonov [ctb]",
+ "Maintainer": "Baptiste Auguie ",
+ "Repository": "RSPM",
+ "Encoding": "UTF-8"
+ },
+ "gt": {
+ "Package": "gt",
+ "Version": "1.3.0",
+ "Source": "Repository",
+ "Type": "Package",
+ "Title": "Easily Create Presentation-Ready Display Tables",
+ "Authors@R": "c( person(\"Richard\", \"Iannone\", , \"rich@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-3925-190X\")), person(\"Joe\", \"Cheng\", , \"joe@posit.co\", role = \"aut\"), person(\"Barret\", \"Schloerke\", , \"barret@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0001-9986-114X\")), person(\"Shannon\", \"Haughton\", , \"shannon.l.haughton@gsk.com\", role = \"aut\"), person(\"Ellis\", \"Hughes\", , \"ellis.h.hughes@gsk.com\", role = \"aut\", comment = c(ORCID = \"0000-0003-0637-4436\")), person(\"Alexandra\", \"Lauer\", , \"alexandralauer1@gmail.com\", role = \"aut\", comment = c(ORCID = \"0000-0002-4191-6301\")), person(\"Romain\", \"François\", , \"romain@tada.science\", role = \"aut\"), person(\"JooYoung\", \"Seo\", , \"jseo1005@illinois.edu\", role = \"aut\", comment = c(ORCID = \"0000-0002-4064-6012\")), person(\"Ken\", \"Brevoort\", , \"ken@brevoort.com\", role = \"aut\", comment = c(ORCID = \"0000-0002-4001-8358\")), person(\"Olivier\", \"Roy\", role = \"aut\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+ "Description": "Build display tables from tabular data with an easy-to-use set of functions. With its progressive approach, we can construct display tables with a cohesive set of table parts. Table values can be formatted using any of the included formatting functions. Footnotes and cell styles can be precisely added through a location targeting system. The way in which 'gt' handles things for you means that you don't often have to worry about the fine details.",
+ "License": "MIT + file LICENSE",
+ "URL": "https://gt.rstudio.com, https://github.com/rstudio/gt",
+ "BugReports": "https://github.com/rstudio/gt/issues",
+ "Depends": [
+ "R (>= 4.1.0)"
+ ],
+ "Imports": [
+ "base64enc (>= 0.1-3)",
+ "bigD (>= 0.2)",
+ "bitops (>= 1.0-7)",
+ "cli (>= 3.6.3)",
+ "commonmark (>= 1.9.1)",
+ "dplyr (>= 1.1.4)",
+ "fs (>= 1.6.4)",
+ "glue (>= 1.8.0)",
+ "htmltools (>= 0.5.8.1)",
+ "htmlwidgets (>= 1.6.4)",
+ "juicyjuice (>= 0.1.0)",
+ "magrittr (>= 2.0.3)",
+ "markdown (>= 1.13)",
+ "reactable (>= 0.4.4)",
+ "rlang (>= 1.1.4)",
+ "sass (>= 0.4.9)",
+ "scales (>= 1.3.0)",
+ "tidyselect (>= 1.2.1)",
+ "vctrs",
+ "xml2 (>= 1.3.6)"
+ ],
+ "Suggests": [
+ "bit64",
+ "farver",
+ "fontawesome (>= 0.5.2)",
+ "ggplot2",
+ "grid",
+ "gtable (>= 0.3.6)",
+ "katex (>= 1.4.1)",
+ "knitr",
+ "lubridate",
+ "magick",
+ "paletteer",
+ "RColorBrewer",
+ "rmarkdown (>= 2.20)",
+ "rsvg",
+ "rvest",
+ "shiny (>= 1.9.1)",
+ "testthat (>= 3.1.9)",
+ "tidyr (>= 1.0.0)",
+ "webshot2 (>= 0.1.0)",
+ "withr"
+ ],
+ "Config/Needs/coverage": "officer",
+ "Config/Needs/website": "quarto",
+ "ByteCompile": "true",
+ "Config/testthat/edition": "3",
+ "Config/testthat/parallel": "true",
+ "Encoding": "UTF-8",
+ "LazyData": "true",
+ "RoxygenNote": "7.3.3",
+ "NeedsCompilation": "no",
+ "Author": "Richard Iannone [aut, cre] (ORCID: ), Joe Cheng [aut], Barret Schloerke [aut] (ORCID: ), Shannon Haughton [aut], Ellis Hughes [aut] (ORCID: ), Alexandra Lauer [aut] (ORCID: ), Romain François [aut], JooYoung Seo [aut] (ORCID: