diff --git a/.gitignore b/.gitignore index 54f27cb..6284008 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ docs inst/doc /doc/ /Meta/ +.vscode/launch.json +..Rcheck/00check.log diff --git a/DESCRIPTION b/DESCRIPTION index 5391d2e..7f78e32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: lighthouse.codebook Title: Summarize Datasets for Lighthouse Institute Projects -Version: 0.3.2 +Version: 0.4.0 Authors@R: c( person("Casey", "Sarapas", email = "ccsarapas@chestnut.org", @@ -14,11 +14,10 @@ Depends: Imports: cli, data.table, - dplyr, + dplyr (>= 1.2.0), haven, labelled, lighthouse, - moments, openxlsx2, purrr, rlang, @@ -35,7 +34,9 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.3 URL: https://github.com/ccsarapas/lighthouse.codebook, https://ccsarapas.github.io/lighthouse.codebook/ BugReports: https://github.com/ccsarapas/lighthouse.codebook/issues -Suggests: +Suggests: knitr, - rmarkdown + rmarkdown, + testthat (>= 3.0.0) VignetteBuilder: knitr + diff --git a/NAMESPACE b/NAMESPACE index af707b1..3ae72bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,8 +13,17 @@ export(cb_summarize_categorical) export(cb_summarize_numeric) export(cb_summarize_text) export(cb_write) +export(kurtosis) +export(max_if_any) +export(min_if_any) +export(se_mean) +export(skew) +export(spread) importFrom(lighthouse,"%<-%") importFrom(lighthouse,glue_chr) +importFrom(lighthouse,max_if_any) +importFrom(lighthouse,min_if_any) +importFrom(lighthouse,se_mean) importFrom(lighthouse,untidyselect) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) diff --git a/NEWS.md b/NEWS.md index e5a1183..f078d5b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,27 @@ +# lighthouse.codebook 0.4.0 + +## Added + +* Summary statistics returned by `cb_summarize_numeric()` can now be specified using the new `stats` argument. Summary statistics included on the numeric summary tab of workbooks written by `cb_write()` can likewise be specified using the `stats_numeric` argument. + +* A handful of stats helpers for use in the new `stats` / `stats_numeric` arguments, including `skew()`, `kurtosis()`, `spread()`, and re-exports from the lighthouse package `min_if_any()`, `max_if_any()`, and `se_mean()`. + +## Fixed + +* User missing values defined in SPSS datasets or `"haven_labelled"` vectors are now consistently recognized (fixes #32). + +* Specfying numeric grouping columns no longer throws an error (fixes #31). + +* `cb_create()` no longer throws an error when `.val_labels = NULL` (fixes #34). + +* `cb_summarize_categorical()` no longer throws an error when a variable contains no value labels. + +## Internal + +* Added a test suite. + +* Dropped dependency on moments package. + # lighthouse.codebook 0.3.2 ## Fixed diff --git a/R/cb_create.r b/R/cb_create.r index 7e0fdab..13316d4 100644 --- a/R/cb_create.r +++ b/R/cb_create.r @@ -172,6 +172,7 @@ cb_create <- function(data, incompatible = .options$user_missing_incompatible ) |> cb_add_lookups(sep1 = .val_labs_sep1, sep2 = .val_labs_sep2) |> + cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |> cb_label_data(conflict = .options$user_missing_conflict) |> cb_zap_data() |> cb_add_dims() |> @@ -378,7 +379,10 @@ cb_user_missings <- function(cb, user_missing, match_type = TRUE, incompatible = c("ignore", "warn", "error")) { - if (is.null(user_missing)) return(set_attrs(cb, user_missing = list())) + if (is.null(user_missing)) { + attr(cb, "user_missing") <- attr(cb, "user_missing") %||% list() + return(cb) + } user_missing <- check_user_missing_arg(user_missing) for (um in user_missing) { cb <- cb_user_missings_across( @@ -410,8 +414,12 @@ lookups_from_string <- function(cb, data, sep1, sep2) { labs <- x[, 2] setNames(vals, labs) } + # if (!("values" %in% names(cb))) { + # return(setNames(character(), character())) + # } + # early return if values col doesn't exist, length 0, or all NA + if (!("values" %in% names(cb)) || !sum(!is.na(cb$values))) return(list()) val_labels <- na.omit(setNames(cb$values, cb$name)) - if (!length(val_labels)) return(val_labels) if (is.null(sep1) || is.null(sep2)) { cli::cli_abort( "{.arg sep1} and {.arg sep2} must be specified if value labels are provided." @@ -493,12 +501,36 @@ reconcile_missing_labels <- function(val_labs, list(val_labs = val_labs, missings = missings) } +cb_reconcile_missing_labels <- function(cb, + conflict = c("val_label", "missing_label")) { + conflict <- match.arg(conflict) + vals_by_label <- attr(cb, "vals_by_label") + user_missing <- attr(cb, "user_missing") + factors <- attr(cb, "factors") %||% character() + vars <- setdiff(intersect(names(vals_by_label), names(user_missing)), factors) + if (!length(vars)) return(cb) + + for (nm in vars) { + if (is.null(vals_by_label[[nm]]) || is.null(user_missing[[nm]])) next + vals <- reconcile_missing_labels( + val_labs = sort(vals_by_label[[nm]]), + missings = sort(user_missing[[nm]]), + conflict = conflict + ) + user_missing[[nm]] <- sort(vals$missings) + vals_by_label[[nm]] <- vals$val_labs[ + order(vals$val_labs %in% vals$missings, vals$val_labs) + ] + } + set_attrs(cb, vals_by_label = vals_by_label, user_missing = user_missing) +} + cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) { data <- attr(cb, "data") vals_by_label <- attr(cb, "vals_by_label") factors <- attr(cb, "factors") user_missing <- attr(cb, "user_missing") - label_vars <- unique(c(names(vals_by_label), names(user_missing))) + label_vars <- union(names(vals_by_label), names(user_missing)) for (nm in label_vars) { missings <- sort(user_missing[[nm]]) if (nm %in% factors) { @@ -506,15 +538,7 @@ cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) { } else { val_labs <- sort(vals_by_label[[nm]]) if (!is.null(val_labs) && !is.null(missings)) { - vals <- reconcile_missing_labels( - val_labs = val_labs, - missings = missings, - conflict = conflict - ) - missings <- sort(vals$missings) - val_labs <- vals$val_labs[ - order(vals$val_labs %in% vals$missings, vals$val_labs) - ] + val_labs <- val_labs[order(val_labs %in% missings, val_labs)] } data[[nm]] <- haven::labelled_spss( data[[nm]], labels = val_labs, na_values = missings @@ -581,12 +605,23 @@ cb_add_val_labels_col <- function(cb, user_missing_col = c("if_any", "yes", "no" } else { missings <- NULL } - val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors")) - dplyr::mutate(cb, values = val_labs, user_missings = missings) + include_values <- "values" %in% names(cb) || + any(vapply(val_labs, \(x) !is.null(x) && length(x) > 0, logical(1))) + if (include_values) { + val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors")) + cb <- dplyr::mutate(cb, values = val_labs) + } + if (user_missing_col) { + cb <- dplyr::mutate(cb, user_missings = missings) + } + cb } cb_split_labels_col <- function(cb, split_var_labels = NULL) { if (is.null(split_var_labels)) return(cb) + if (!("label" %in% names(cb))) { + cli::cli_abort("{.arg .split_var_labels} requires a {.code label} column.") + } if (rlang::is_call(split_var_labels) && rlang::call_name(split_var_labels) == "list") { split_var_labels <- rlang::call_args(split_var_labels) } else { diff --git a/R/cb_create_redcap.r b/R/cb_create_redcap.r index ccb8c58..ad25e69 100644 --- a/R/cb_create_redcap.r +++ b/R/cb_create_redcap.r @@ -115,6 +115,7 @@ cb_create_redcap <- function(data, cb <- cb_propagate_user_missing_checkboxes_rc(cb) } cb |> + cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |> cb_label_data(conflict = .options$user_missing_conflict) |> cb_zap_data() |> cb_add_dims() |> diff --git a/R/cb_create_spss.r b/R/cb_create_spss.r index 3fc7789..9823e40 100644 --- a/R/cb_create_spss.r +++ b/R/cb_create_spss.r @@ -96,7 +96,7 @@ cb_user_missings_from_spss <- function(cb) { user_missings[[nm]] <- c(user_missings[[nm]], miss_vals) } } - + if (length(user_missings)) attr(cb, "user_missing") <- user_missings cb } @@ -106,13 +106,14 @@ cb_update_labels_spss <- function(cb, user_missing_conflict = c("val_label", "missing_label"), user_missing_incompatible = c("ignore", "warn", "error")) { data <- attr(cb, "data") - cb <- cb |> - cb_user_missings_from_spss() |> - cb_user_missings( - user_missing = user_missing, - incompatible = user_missing_incompatible - ) |> - cb_add_lookups() + cb <- cb |> + cb_user_missings_from_spss() |> + cb_user_missings( + user_missing = user_missing, + incompatible = user_missing_incompatible + ) |> + cb_add_lookups() |> + cb_reconcile_missing_labels(conflict = user_missing_conflict) if (is.null(user_missing)) { cb |> set_attrs(data_labelled = data) } else { diff --git a/R/cb_summarize.r b/R/cb_summarize.r index d2c4650..a477eaf 100644 --- a/R/cb_summarize.r +++ b/R/cb_summarize.r @@ -1,12 +1,14 @@ #' Summarize numeric variables from a codebook object -#' +#' #' `cb_summarize_numeric()` generates a summary table for all numeric variables -#' from a codebook object, optionally by group. Future releases will include options -#' to specify the summary statistics used. Currently, summary statistics are valid -#' n and %; mean and SD; median, MAD, min, max, and range; skewness, and kurtosis. +#' from a codebook object, optionally by group. #' #' @param cb An object of class `"li_codebook"` as produced by [`cb_create()`] or #' a variant. +#' @param stats A named list of summary functions to include. The defaults include +#' mean and standard deviation (SD); median and median absolute deviation (MAD); +#' minimum, maximum, and range; and adjusted skewness and kurtosis. See details +#' and examples. #' @param group_by <[`tidy-select`][dplyr_tidy_select]> Column or columns to group #' by. #' @param warn_if_none Should a warning be issued if there are no numeric variables @@ -21,25 +23,101 @@ #' a non-missing label stem. #' - `label`: variable label #' - `valid_n`, `valid_pct`: number and proportion of non-missing values -#' - summary statistic columns: by default, these include `mean` and standard -#' deviation (`SD`); `median`, median absolute deviation (`MAD`), `min`, `max`, -#' and `range`; skewness (`skew`), and kurtosis (`kurt`). -#' +#' - Summary statistic columns as specified in `stats` +#' +#' @details +#' The `stats` argument controls which summary statistics will be computed. It takes +#' a named list of functions, where the names will be used as column names. +#' +#' `cb_summarize_numeric()` will set `na.rm` to `TRUE` for any function that takes +#' a `na.rm` argument. +#' +#' You can include anonymous functions. If wrapping a function that takes a `na.rm` +#' argument, it is recommended you explicitly set `na.rm` to `TRUE`. (e.g., to include +#' the 25th quantile, use `q25 = \(x) quantile(x, 0.25, na.rm = TRUE)`. +#' +# #' Names will be formatted when written to an Excel codebook: +# #' - "_" will be replaced with " " +# #' - "pct" will be replaced with "%" +# #' - names will generally be changed to Title Case, except that "n" will not be +# #' capitalized, and any words already containing capital letters will be left as +# #' is. +# #' +#' @examples +#' cb_storms <- dplyr::storms |> +#' dplyr::mutate(year = factor(year)) |> +#' dplyr::filter(status %in% c("tropical storm", "hurricane")) |> +#' cb_create() +#' +#' # ungrouped summary with default stats +#' cb_summarize_numeric(cb_storms) +#' +#' # with subset of default stats +#' cb_summarize_numeric( +#' cb_storms, +#' stats = list(mean = mean, SD = sd) +#' ) +#' +#' # grouped summary +#' cb_summarize_numeric( +#' cb_storms, +#' stats = list(mean = mean, SD = sd), +#' group_by = status +#' ) +#' +#' # with custom stats +#' cb_summarize_numeric( +#' cb_storms, +#' stats = list( +#' median = median, +#' q25 = \(x) quantile(x, 0.25, na.rm = TRUE), +#' q75 = \(x) quantile(x, 0.75, na.rm = TRUE), +#' IQR = IQR +#' ) +#' ) +#' #' @export -cb_summarize_numeric <- function(cb, group_by = NULL, warn_if_none = TRUE) { +cb_summarize_numeric <- function(cb, + group_by = NULL, + stats = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), + warn_if_none = TRUE) { check_codebook(cb) group_by <- cb_untidyselect(cb, {{ group_by }}) cb_summarize_numeric_impl( - cb = cb, group_by = group_by, warn_if_none = warn_if_none + cb = cb, group_by = group_by, stats = stats, warn_if_none = warn_if_none ) } cb_summarize_numeric_impl <- function(cb, group_by = NULL, + stats = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), warn_if_none = FALSE, group_rows = NULL) { data <- attr(cb, "data_zapped")[cb$name] - nms_num <- names(data)[vapply(data, is.numeric, logical(1))] + nms_num <- setdiff( + names(data)[vapply(data, is.numeric, logical(1))], + group_by + ) id_cols <- intersect(c("name", "label_stem", "label"), names(cb)) out <- cb |> dplyr::filter(name %in% nms_num) |> @@ -47,9 +125,15 @@ cb_summarize_numeric_impl <- function(cb, if (!nrow(out)) { if (warn_if_none) { - cli::cli_warn(c( - "!" = "No numeric variables in codebook; returning `NULL`." - )) + if (is.null(group_by)) { + cli::cli_warn(c( + "!" = "No numeric variables in codebook; returning `NULL`." + )) + } else { + cli::cli_warn(c( + "i" = "No numeric variables in codebook after grouping; returning `NULL`." + )) + } } return(NULL) } @@ -60,17 +144,19 @@ cb_summarize_numeric_impl <- function(cb, id_cols <- setdiff(id_cols, "label_stem") } - res <- lighthouse::summary_table( - data, - valid_n = lighthouse::n_valid, valid_pct = lighthouse::pct_valid, - mean, SD = sd, - median, MAD = mad, min = lighthouse::min_if_any, max = lighthouse::max_if_any, - range = spread_if_any, - skew = moments::skewness, kurt = moments::kurtosis, + args <- c( + list( + .data = data, na.rm = TRUE, - .vars = all_of(nms_num), - .rows_group_by = all_of(group_by) - ) |> + .vars = rlang::expr(all_of(nms_num)), + .rows_group_by = rlang::expr(all_of(group_by)), + valid_n = lighthouse::n_valid, + valid_pct = lighthouse::pct_valid + ), + stats + ) + + res <- do.call(lighthouse::summary_table, args) |> dplyr::mutate(dplyr::across( all_of(group_by), \(x) fct_replace_na(factor(x), "(Missing)") diff --git a/R/cb_write.r b/R/cb_write.r index 962b964..992d536 100644 --- a/R/cb_write.r +++ b/R/cb_write.r @@ -27,6 +27,10 @@ #' @param group_rows_numeric,group_rows_categorical <[`tidy-select`][dplyr_tidy_select]> #' Column or columns to group by in rows on grouped numeric or categorical summary #' tab. +#' @param stats_numeric A named list of summary functions to include on the numeric +#' summary tab. Defaults include mean and standard deviation (SD); median and +#' median absolute deviation (MAD); minimum, maximum, and range; and adjusted +#' skewness and kurtosis. See `?cb_summarize_numeric` for details and examples. #' @param detail_missing Include detailed missing value information on ungrouped #' categorical and text summary tabs? (Detailed missing information for grouped #' summary tabs is not currently supported.) @@ -62,6 +66,17 @@ cb_write <- function(cb, group_rows = NULL, group_rows_numeric = group_rows, group_rows_categorical = group_rows, + stats_numeric = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), detail_missing = c("if_any_user_missing", "yes", "no"), n_text_vals = 5, incl_date = TRUE, @@ -93,6 +108,7 @@ cb_write <- function(cb, group_rows = group_rows, group_rows_numeric = group_rows_numeric, group_rows_categorical = group_rows_categorical, + stats_numeric = stats_numeric, detail_missing = detail_missing, n_text_vals = n_text_vals, incl_date = incl_date, @@ -109,6 +125,17 @@ cb_write_impl <- function(cb, group_rows = NULL, group_rows_numeric = group_rows, group_rows_categorical = group_rows, + stats_numeric = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), detail_missing = c("if_any_user_missing", "yes", "no"), n_text_vals = 5, incl_date = TRUE, @@ -119,7 +146,7 @@ cb_write_impl <- function(cb, detail_missing == "if_any_user_missing" && length(attr(cb, "user_missing")) ) summaries <- list( - num = cb_summarize_numeric_impl(cb), + num = cb_summarize_numeric_impl(cb, stats = stats_numeric), cat = cb_summarize_categorical_impl(cb, detail_missing = detail_missing), txt = cb_summarize_text_impl( cb, @@ -131,7 +158,8 @@ cb_write_impl <- function(cb, summaries$num_grp <- cb_summarize_numeric_impl( cb, group_by = group_by, - group_rows = group_rows_numeric + group_rows = group_rows_numeric, + stats = stats_numeric ) summaries$cat_grp <- cb_summarize_categorical_impl( cb, @@ -377,12 +405,12 @@ var_name_hyperlinks <- function(params) { hl <- hl_rows$overview |> dplyr::mutate( overview_nm = params$overview$sheet_name, - sheet = dplyr::case_match( + sheet = dplyr::recode_values( name, hl_rows$num$name %||% NA ~ "num", hl_rows$cat$name %||% NA ~ "cat", hl_rows$txt$name %||% NA ~ "txt", - .default = NA + default = NA ), ) |> dplyr::mutate( diff --git a/R/stats.r b/R/stats.r new file mode 100644 index 0000000..f8efb1d --- /dev/null +++ b/R/stats.r @@ -0,0 +1,91 @@ +#' Statistics for numeric summaries +#' +#' @description +#' Functions for computing summary statistics for use in `cb_summarize_numeric()`. +#' - `skew()` and `kurtosis()` return adjusted skewness and kurtosis. (Unadjusted +#' estimates can be obtained by setting `adjusted = FALSE`.) +#' - `spread()` returns the difference between a vector's minimum and maximum values. +#' - `min_if_any()` and `max_if_any()` return minima and maxima with alternate behavior +#' if all values are missing. (Re-exported from the lighthouse package. See `lighthouse::min_if_any` +#' for more details.) +#' - `se_mean()` returns the standard error of the mean. (Re-exported from the lighthouse +#' package. See `lighthouse::se_mean` for more details.) +#' +#' @param x A numeric vector. +#' +#' @param ... A numeric vector or vectors. +#' +#' @param na.rm Should missing values be removed? (Note that `cb_summarize_numeric()` +#' automatically sets na.rm to `TRUE` for all functions). +#' +#' @param adjusted If `TRUE`, returns adjusted skewness (_G_1) or kurtosis +#' (_G_2) by applying a small-sample correction. If `FALSE`, returns +#' the unadjusted skewness (_g_1) or kurtosis (_g_2). (Note +#' that `TRUE` corresponds to behavior of software such as SPSS, SAS, and Excel.) +#' +#' @param excess If `TRUE`, returns excess kurtosis by (total kurtosis - 3). +#' +#' @name stats +#' +#' @rdname stats +#' @export +skew <- function(x, adjusted = TRUE, na.rm = FALSE) { + if (na.rm) x <- x[!is.na(x)] + n <- length(x) + + if (n < 3) return(NA_real_) + + m <- mean(x) + m2 <- mean((x - m)^2) + m3 <- mean((x - m)^3) + + if (m2 == 0) return(0) + + g1 <- m3 / (m2^(3/2)) + + if (!adjusted) return(g1) + + sqrt(n * (n - 1)) / (n - 2) * g1 +} +#' +#' @rdname stats +#' @export +kurtosis <- function(x, adjusted = TRUE, excess = TRUE, na.rm = FALSE) { + if (na.rm) x <- x[!is.na(x)] + n <- length(x) + if (n < 4) return(NA_real_) + + m <- mean(x) + m2 <- mean((x - m)^2) + m4 <- mean((x - m)^4) + + if (m2 == 0) return(if (excess) 0 else 3) + + g2 <- m4 / (m2^2) + g2_excess <- g2 - 3 + + if (!adjusted) return(if (excess) g2_excess else g2) + + G2_excess <- ((n - 1) / ((n - 2) * (n - 3))) * ((n + 1) * g2_excess + 6) + + if (excess) G2_excess else G2_excess + 3 +} +#' +#' @rdname stats +#' @export +spread <- function(x, na.rm = FALSE) { + max_if_any(x, na.rm = na.rm) - min_if_any(x, na.rm = na.rm) +} +#' +#' @importFrom lighthouse min_if_any max_if_any se_mean +#' @rdname stats +#' @export +lighthouse::min_if_any +#' +#' @rdname stats +#' @export +lighthouse::max_if_any +#' +#' @rdname stats +#' @export +lighthouse::se_mean diff --git a/R/utils.r b/R/utils.r index 6f49877..59d481a 100644 --- a/R/utils.r +++ b/R/utils.r @@ -76,10 +76,11 @@ try_sort_numeric <- function(x, sort(x, decreasing = decreasing, ...) } } -try_sort_numeric(letters) - coercible <- lighthouse::is_coercible_numeric(letters, na = "TRUE") -class_collapse <- function(x, sep = ", ") stringr::str_c(class(x), collapse = sep) +class_collapse <- function(x, sep = ", ") { + stringr::str_c(class(x), collapse = sep) +} + strip_html <- function(x) { stopifnot(is.character(x)) has_tags <- grepl("<[A-Za-z!/]", x) @@ -172,7 +173,6 @@ cb_match_type <- function(nm, as_named <- function(x, class) setNames(as(x, class), names(x)) - has_val_labels <- function(x) !is.null(labelled::val_labels(x)) to_labelled_chr <- function(x, @@ -185,9 +185,6 @@ to_labelled_chr <- function(x, ) } -spread_if_any <- function(..., na.rm = TRUE) { - lighthouse::max_if_any(..., na.rm = na.rm) - lighthouse::min_if_any(..., na.rm = na.rm) -} #' @export nan_to_na.default <- function(x) dplyr::if_else(is.nan(x), NA, x) diff --git a/README.Rmd b/README.Rmd index a60cfe5..be922f8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -27,7 +27,8 @@ customized in a number of ways, including options for grouped summaries. You can install lighthouse.codebook by running: ```r -# install.packages("remotes") +## git2r needed only if Git isn't installed on your system +# install.packages(c("git2r", "remotes")) remotes::install_github("ccsarapas/lighthouse.codebook") ``` @@ -73,9 +74,9 @@ dat_rc$data |> There are many options for controlling how data is interpreted, summarized, and presented. See the [introduction to lighthouse.codebook](https://ccsarapas.github.io/lighthouse.codebook/articles/lighthouse-codebook.html) -for some of the most useful options, including grouped data summaries and specifying -user missing codes. Further options are detailed in the help pages for `cb_create()` -and `cb_write()`. +for some of the most useful options, including grouped data summaries, summary statistics +for numeric variables, and specifying user missing codes. Further options are detailed +in the help pages for `cb_create()` and `cb_write()`.