diff --git a/.gitignore b/.gitignore
index 54f27cb..6284008 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@ docs
inst/doc
/doc/
/Meta/
+.vscode/launch.json
+..Rcheck/00check.log
diff --git a/DESCRIPTION b/DESCRIPTION
index 5391d2e..7f78e32 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: lighthouse.codebook
Title: Summarize Datasets for Lighthouse Institute Projects
-Version: 0.3.2
+Version: 0.4.0
Authors@R: c(
person("Casey", "Sarapas",
email = "ccsarapas@chestnut.org",
@@ -14,11 +14,10 @@ Depends:
Imports:
cli,
data.table,
- dplyr,
+ dplyr (>= 1.2.0),
haven,
labelled,
lighthouse,
- moments,
openxlsx2,
purrr,
rlang,
@@ -35,7 +34,9 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3
URL: https://github.com/ccsarapas/lighthouse.codebook, https://ccsarapas.github.io/lighthouse.codebook/
BugReports: https://github.com/ccsarapas/lighthouse.codebook/issues
-Suggests:
+Suggests:
knitr,
- rmarkdown
+ rmarkdown,
+ testthat (>= 3.0.0)
VignetteBuilder: knitr
+
diff --git a/NAMESPACE b/NAMESPACE
index af707b1..3ae72bf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -13,8 +13,17 @@ export(cb_summarize_categorical)
export(cb_summarize_numeric)
export(cb_summarize_text)
export(cb_write)
+export(kurtosis)
+export(max_if_any)
+export(min_if_any)
+export(se_mean)
+export(skew)
+export(spread)
importFrom(lighthouse,"%<-%")
importFrom(lighthouse,glue_chr)
+importFrom(lighthouse,max_if_any)
+importFrom(lighthouse,min_if_any)
+importFrom(lighthouse,se_mean)
importFrom(lighthouse,untidyselect)
importFrom(tidyselect,all_of)
importFrom(tidyselect,any_of)
diff --git a/NEWS.md b/NEWS.md
index e5a1183..f078d5b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,27 @@
+# lighthouse.codebook 0.4.0
+
+## Added
+
+* Summary statistics returned by `cb_summarize_numeric()` can now be specified using the new `stats` argument. Summary statistics included on the numeric summary tab of workbooks written by `cb_write()` can likewise be specified using the `stats_numeric` argument.
+
+* A handful of stats helpers for use in the new `stats` / `stats_numeric` arguments, including `skew()`, `kurtosis()`, `spread()`, and re-exports from the lighthouse package `min_if_any()`, `max_if_any()`, and `se_mean()`.
+
+## Fixed
+
+* User missing values defined in SPSS datasets or `"haven_labelled"` vectors are now consistently recognized (fixes #32).
+
+* Specfying numeric grouping columns no longer throws an error (fixes #31).
+
+* `cb_create()` no longer throws an error when `.val_labels = NULL` (fixes #34).
+
+* `cb_summarize_categorical()` no longer throws an error when a variable contains no value labels.
+
+## Internal
+
+* Added a test suite.
+
+* Dropped dependency on moments package.
+
# lighthouse.codebook 0.3.2
## Fixed
diff --git a/R/cb_create.r b/R/cb_create.r
index 7e0fdab..13316d4 100644
--- a/R/cb_create.r
+++ b/R/cb_create.r
@@ -172,6 +172,7 @@ cb_create <- function(data,
incompatible = .options$user_missing_incompatible
) |>
cb_add_lookups(sep1 = .val_labs_sep1, sep2 = .val_labs_sep2) |>
+ cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |>
cb_label_data(conflict = .options$user_missing_conflict) |>
cb_zap_data() |>
cb_add_dims() |>
@@ -378,7 +379,10 @@ cb_user_missings <- function(cb,
user_missing,
match_type = TRUE,
incompatible = c("ignore", "warn", "error")) {
- if (is.null(user_missing)) return(set_attrs(cb, user_missing = list()))
+ if (is.null(user_missing)) {
+ attr(cb, "user_missing") <- attr(cb, "user_missing") %||% list()
+ return(cb)
+ }
user_missing <- check_user_missing_arg(user_missing)
for (um in user_missing) {
cb <- cb_user_missings_across(
@@ -410,8 +414,12 @@ lookups_from_string <- function(cb, data, sep1, sep2) {
labs <- x[, 2]
setNames(vals, labs)
}
+ # if (!("values" %in% names(cb))) {
+ # return(setNames(character(), character()))
+ # }
+ # early return if values col doesn't exist, length 0, or all NA
+ if (!("values" %in% names(cb)) || !sum(!is.na(cb$values))) return(list())
val_labels <- na.omit(setNames(cb$values, cb$name))
- if (!length(val_labels)) return(val_labels)
if (is.null(sep1) || is.null(sep2)) {
cli::cli_abort(
"{.arg sep1} and {.arg sep2} must be specified if value labels are provided."
@@ -493,12 +501,36 @@ reconcile_missing_labels <- function(val_labs,
list(val_labs = val_labs, missings = missings)
}
+cb_reconcile_missing_labels <- function(cb,
+ conflict = c("val_label", "missing_label")) {
+ conflict <- match.arg(conflict)
+ vals_by_label <- attr(cb, "vals_by_label")
+ user_missing <- attr(cb, "user_missing")
+ factors <- attr(cb, "factors") %||% character()
+ vars <- setdiff(intersect(names(vals_by_label), names(user_missing)), factors)
+ if (!length(vars)) return(cb)
+
+ for (nm in vars) {
+ if (is.null(vals_by_label[[nm]]) || is.null(user_missing[[nm]])) next
+ vals <- reconcile_missing_labels(
+ val_labs = sort(vals_by_label[[nm]]),
+ missings = sort(user_missing[[nm]]),
+ conflict = conflict
+ )
+ user_missing[[nm]] <- sort(vals$missings)
+ vals_by_label[[nm]] <- vals$val_labs[
+ order(vals$val_labs %in% vals$missings, vals$val_labs)
+ ]
+ }
+ set_attrs(cb, vals_by_label = vals_by_label, user_missing = user_missing)
+}
+
cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) {
data <- attr(cb, "data")
vals_by_label <- attr(cb, "vals_by_label")
factors <- attr(cb, "factors")
user_missing <- attr(cb, "user_missing")
- label_vars <- unique(c(names(vals_by_label), names(user_missing)))
+ label_vars <- union(names(vals_by_label), names(user_missing))
for (nm in label_vars) {
missings <- sort(user_missing[[nm]])
if (nm %in% factors) {
@@ -506,15 +538,7 @@ cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) {
} else {
val_labs <- sort(vals_by_label[[nm]])
if (!is.null(val_labs) && !is.null(missings)) {
- vals <- reconcile_missing_labels(
- val_labs = val_labs,
- missings = missings,
- conflict = conflict
- )
- missings <- sort(vals$missings)
- val_labs <- vals$val_labs[
- order(vals$val_labs %in% vals$missings, vals$val_labs)
- ]
+ val_labs <- val_labs[order(val_labs %in% missings, val_labs)]
}
data[[nm]] <- haven::labelled_spss(
data[[nm]], labels = val_labs, na_values = missings
@@ -581,12 +605,23 @@ cb_add_val_labels_col <- function(cb, user_missing_col = c("if_any", "yes", "no"
} else {
missings <- NULL
}
- val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors"))
- dplyr::mutate(cb, values = val_labs, user_missings = missings)
+ include_values <- "values" %in% names(cb) ||
+ any(vapply(val_labs, \(x) !is.null(x) && length(x) > 0, logical(1)))
+ if (include_values) {
+ val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors"))
+ cb <- dplyr::mutate(cb, values = val_labs)
+ }
+ if (user_missing_col) {
+ cb <- dplyr::mutate(cb, user_missings = missings)
+ }
+ cb
}
cb_split_labels_col <- function(cb, split_var_labels = NULL) {
if (is.null(split_var_labels)) return(cb)
+ if (!("label" %in% names(cb))) {
+ cli::cli_abort("{.arg .split_var_labels} requires a {.code label} column.")
+ }
if (rlang::is_call(split_var_labels) && rlang::call_name(split_var_labels) == "list") {
split_var_labels <- rlang::call_args(split_var_labels)
} else {
diff --git a/R/cb_create_redcap.r b/R/cb_create_redcap.r
index ccb8c58..ad25e69 100644
--- a/R/cb_create_redcap.r
+++ b/R/cb_create_redcap.r
@@ -115,6 +115,7 @@ cb_create_redcap <- function(data,
cb <- cb_propagate_user_missing_checkboxes_rc(cb)
}
cb |>
+ cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |>
cb_label_data(conflict = .options$user_missing_conflict) |>
cb_zap_data() |>
cb_add_dims() |>
diff --git a/R/cb_create_spss.r b/R/cb_create_spss.r
index 3fc7789..9823e40 100644
--- a/R/cb_create_spss.r
+++ b/R/cb_create_spss.r
@@ -96,7 +96,7 @@ cb_user_missings_from_spss <- function(cb) {
user_missings[[nm]] <- c(user_missings[[nm]], miss_vals)
}
}
-
+
if (length(user_missings)) attr(cb, "user_missing") <- user_missings
cb
}
@@ -106,13 +106,14 @@ cb_update_labels_spss <- function(cb,
user_missing_conflict = c("val_label", "missing_label"),
user_missing_incompatible = c("ignore", "warn", "error")) {
data <- attr(cb, "data")
- cb <- cb |>
- cb_user_missings_from_spss() |>
- cb_user_missings(
- user_missing = user_missing,
- incompatible = user_missing_incompatible
- ) |>
- cb_add_lookups()
+ cb <- cb |>
+ cb_user_missings_from_spss() |>
+ cb_user_missings(
+ user_missing = user_missing,
+ incompatible = user_missing_incompatible
+ ) |>
+ cb_add_lookups() |>
+ cb_reconcile_missing_labels(conflict = user_missing_conflict)
if (is.null(user_missing)) {
cb |> set_attrs(data_labelled = data)
} else {
diff --git a/R/cb_summarize.r b/R/cb_summarize.r
index d2c4650..a477eaf 100644
--- a/R/cb_summarize.r
+++ b/R/cb_summarize.r
@@ -1,12 +1,14 @@
#' Summarize numeric variables from a codebook object
-#'
+#'
#' `cb_summarize_numeric()` generates a summary table for all numeric variables
-#' from a codebook object, optionally by group. Future releases will include options
-#' to specify the summary statistics used. Currently, summary statistics are valid
-#' n and %; mean and SD; median, MAD, min, max, and range; skewness, and kurtosis.
+#' from a codebook object, optionally by group.
#'
#' @param cb An object of class `"li_codebook"` as produced by [`cb_create()`] or
#' a variant.
+#' @param stats A named list of summary functions to include. The defaults include
+#' mean and standard deviation (SD); median and median absolute deviation (MAD);
+#' minimum, maximum, and range; and adjusted skewness and kurtosis. See details
+#' and examples.
#' @param group_by <[`tidy-select`][dplyr_tidy_select]> Column or columns to group
#' by.
#' @param warn_if_none Should a warning be issued if there are no numeric variables
@@ -21,25 +23,101 @@
#' a non-missing label stem.
#' - `label`: variable label
#' - `valid_n`, `valid_pct`: number and proportion of non-missing values
-#' - summary statistic columns: by default, these include `mean` and standard
-#' deviation (`SD`); `median`, median absolute deviation (`MAD`), `min`, `max`,
-#' and `range`; skewness (`skew`), and kurtosis (`kurt`).
-#'
+#' - Summary statistic columns as specified in `stats`
+#'
+#' @details
+#' The `stats` argument controls which summary statistics will be computed. It takes
+#' a named list of functions, where the names will be used as column names.
+#'
+#' `cb_summarize_numeric()` will set `na.rm` to `TRUE` for any function that takes
+#' a `na.rm` argument.
+#'
+#' You can include anonymous functions. If wrapping a function that takes a `na.rm`
+#' argument, it is recommended you explicitly set `na.rm` to `TRUE`. (e.g., to include
+#' the 25th quantile, use `q25 = \(x) quantile(x, 0.25, na.rm = TRUE)`.
+#'
+# #' Names will be formatted when written to an Excel codebook:
+# #' - "_" will be replaced with " "
+# #' - "pct" will be replaced with "%"
+# #' - names will generally be changed to Title Case, except that "n" will not be
+# #' capitalized, and any words already containing capital letters will be left as
+# #' is.
+# #'
+#' @examples
+#' cb_storms <- dplyr::storms |>
+#' dplyr::mutate(year = factor(year)) |>
+#' dplyr::filter(status %in% c("tropical storm", "hurricane")) |>
+#' cb_create()
+#'
+#' # ungrouped summary with default stats
+#' cb_summarize_numeric(cb_storms)
+#'
+#' # with subset of default stats
+#' cb_summarize_numeric(
+#' cb_storms,
+#' stats = list(mean = mean, SD = sd)
+#' )
+#'
+#' # grouped summary
+#' cb_summarize_numeric(
+#' cb_storms,
+#' stats = list(mean = mean, SD = sd),
+#' group_by = status
+#' )
+#'
+#' # with custom stats
+#' cb_summarize_numeric(
+#' cb_storms,
+#' stats = list(
+#' median = median,
+#' q25 = \(x) quantile(x, 0.25, na.rm = TRUE),
+#' q75 = \(x) quantile(x, 0.75, na.rm = TRUE),
+#' IQR = IQR
+#' )
+#' )
+#'
#' @export
-cb_summarize_numeric <- function(cb, group_by = NULL, warn_if_none = TRUE) {
+cb_summarize_numeric <- function(cb,
+ group_by = NULL,
+ stats = list(
+ mean = mean,
+ SD = sd,
+ median = median,
+ MAD = mad,
+ min = min_if_any,
+ max = max_if_any,
+ range = spread,
+ skew = skew,
+ kurt = kurtosis
+ ),
+ warn_if_none = TRUE) {
check_codebook(cb)
group_by <- cb_untidyselect(cb, {{ group_by }})
cb_summarize_numeric_impl(
- cb = cb, group_by = group_by, warn_if_none = warn_if_none
+ cb = cb, group_by = group_by, stats = stats, warn_if_none = warn_if_none
)
}
cb_summarize_numeric_impl <- function(cb,
group_by = NULL,
+ stats = list(
+ mean = mean,
+ SD = sd,
+ median = median,
+ MAD = mad,
+ min = min_if_any,
+ max = max_if_any,
+ range = spread,
+ skew = skew,
+ kurt = kurtosis
+ ),
warn_if_none = FALSE,
group_rows = NULL) {
data <- attr(cb, "data_zapped")[cb$name]
- nms_num <- names(data)[vapply(data, is.numeric, logical(1))]
+ nms_num <- setdiff(
+ names(data)[vapply(data, is.numeric, logical(1))],
+ group_by
+ )
id_cols <- intersect(c("name", "label_stem", "label"), names(cb))
out <- cb |>
dplyr::filter(name %in% nms_num) |>
@@ -47,9 +125,15 @@ cb_summarize_numeric_impl <- function(cb,
if (!nrow(out)) {
if (warn_if_none) {
- cli::cli_warn(c(
- "!" = "No numeric variables in codebook; returning `NULL`."
- ))
+ if (is.null(group_by)) {
+ cli::cli_warn(c(
+ "!" = "No numeric variables in codebook; returning `NULL`."
+ ))
+ } else {
+ cli::cli_warn(c(
+ "i" = "No numeric variables in codebook after grouping; returning `NULL`."
+ ))
+ }
}
return(NULL)
}
@@ -60,17 +144,19 @@ cb_summarize_numeric_impl <- function(cb,
id_cols <- setdiff(id_cols, "label_stem")
}
- res <- lighthouse::summary_table(
- data,
- valid_n = lighthouse::n_valid, valid_pct = lighthouse::pct_valid,
- mean, SD = sd,
- median, MAD = mad, min = lighthouse::min_if_any, max = lighthouse::max_if_any,
- range = spread_if_any,
- skew = moments::skewness, kurt = moments::kurtosis,
+ args <- c(
+ list(
+ .data = data,
na.rm = TRUE,
- .vars = all_of(nms_num),
- .rows_group_by = all_of(group_by)
- ) |>
+ .vars = rlang::expr(all_of(nms_num)),
+ .rows_group_by = rlang::expr(all_of(group_by)),
+ valid_n = lighthouse::n_valid,
+ valid_pct = lighthouse::pct_valid
+ ),
+ stats
+ )
+
+ res <- do.call(lighthouse::summary_table, args) |>
dplyr::mutate(dplyr::across(
all_of(group_by),
\(x) fct_replace_na(factor(x), "(Missing)")
diff --git a/R/cb_write.r b/R/cb_write.r
index 962b964..992d536 100644
--- a/R/cb_write.r
+++ b/R/cb_write.r
@@ -27,6 +27,10 @@
#' @param group_rows_numeric,group_rows_categorical <[`tidy-select`][dplyr_tidy_select]>
#' Column or columns to group by in rows on grouped numeric or categorical summary
#' tab.
+#' @param stats_numeric A named list of summary functions to include on the numeric
+#' summary tab. Defaults include mean and standard deviation (SD); median and
+#' median absolute deviation (MAD); minimum, maximum, and range; and adjusted
+#' skewness and kurtosis. See `?cb_summarize_numeric` for details and examples.
#' @param detail_missing Include detailed missing value information on ungrouped
#' categorical and text summary tabs? (Detailed missing information for grouped
#' summary tabs is not currently supported.)
@@ -62,6 +66,17 @@ cb_write <- function(cb,
group_rows = NULL,
group_rows_numeric = group_rows,
group_rows_categorical = group_rows,
+ stats_numeric = list(
+ mean = mean,
+ SD = sd,
+ median = median,
+ MAD = mad,
+ min = min_if_any,
+ max = max_if_any,
+ range = spread,
+ skew = skew,
+ kurt = kurtosis
+ ),
detail_missing = c("if_any_user_missing", "yes", "no"),
n_text_vals = 5,
incl_date = TRUE,
@@ -93,6 +108,7 @@ cb_write <- function(cb,
group_rows = group_rows,
group_rows_numeric = group_rows_numeric,
group_rows_categorical = group_rows_categorical,
+ stats_numeric = stats_numeric,
detail_missing = detail_missing,
n_text_vals = n_text_vals,
incl_date = incl_date,
@@ -109,6 +125,17 @@ cb_write_impl <- function(cb,
group_rows = NULL,
group_rows_numeric = group_rows,
group_rows_categorical = group_rows,
+ stats_numeric = list(
+ mean = mean,
+ SD = sd,
+ median = median,
+ MAD = mad,
+ min = min_if_any,
+ max = max_if_any,
+ range = spread,
+ skew = skew,
+ kurt = kurtosis
+ ),
detail_missing = c("if_any_user_missing", "yes", "no"),
n_text_vals = 5,
incl_date = TRUE,
@@ -119,7 +146,7 @@ cb_write_impl <- function(cb,
detail_missing == "if_any_user_missing" && length(attr(cb, "user_missing"))
)
summaries <- list(
- num = cb_summarize_numeric_impl(cb),
+ num = cb_summarize_numeric_impl(cb, stats = stats_numeric),
cat = cb_summarize_categorical_impl(cb, detail_missing = detail_missing),
txt = cb_summarize_text_impl(
cb,
@@ -131,7 +158,8 @@ cb_write_impl <- function(cb,
summaries$num_grp <- cb_summarize_numeric_impl(
cb,
group_by = group_by,
- group_rows = group_rows_numeric
+ group_rows = group_rows_numeric,
+ stats = stats_numeric
)
summaries$cat_grp <- cb_summarize_categorical_impl(
cb,
@@ -377,12 +405,12 @@ var_name_hyperlinks <- function(params) {
hl <- hl_rows$overview |>
dplyr::mutate(
overview_nm = params$overview$sheet_name,
- sheet = dplyr::case_match(
+ sheet = dplyr::recode_values(
name,
hl_rows$num$name %||% NA ~ "num",
hl_rows$cat$name %||% NA ~ "cat",
hl_rows$txt$name %||% NA ~ "txt",
- .default = NA
+ default = NA
),
) |>
dplyr::mutate(
diff --git a/R/stats.r b/R/stats.r
new file mode 100644
index 0000000..f8efb1d
--- /dev/null
+++ b/R/stats.r
@@ -0,0 +1,91 @@
+#' Statistics for numeric summaries
+#'
+#' @description
+#' Functions for computing summary statistics for use in `cb_summarize_numeric()`.
+#' - `skew()` and `kurtosis()` return adjusted skewness and kurtosis. (Unadjusted
+#' estimates can be obtained by setting `adjusted = FALSE`.)
+#' - `spread()` returns the difference between a vector's minimum and maximum values.
+#' - `min_if_any()` and `max_if_any()` return minima and maxima with alternate behavior
+#' if all values are missing. (Re-exported from the lighthouse package. See `lighthouse::min_if_any`
+#' for more details.)
+#' - `se_mean()` returns the standard error of the mean. (Re-exported from the lighthouse
+#' package. See `lighthouse::se_mean` for more details.)
+#'
+#' @param x A numeric vector.
+#'
+#' @param ... A numeric vector or vectors.
+#'
+#' @param na.rm Should missing values be removed? (Note that `cb_summarize_numeric()`
+#' automatically sets na.rm to `TRUE` for all functions).
+#'
+#' @param adjusted If `TRUE`, returns adjusted skewness (_G_1) or kurtosis
+#' (_G_2) by applying a small-sample correction. If `FALSE`, returns
+#' the unadjusted skewness (_g_1) or kurtosis (_g_2). (Note
+#' that `TRUE` corresponds to behavior of software such as SPSS, SAS, and Excel.)
+#'
+#' @param excess If `TRUE`, returns excess kurtosis by (total kurtosis - 3).
+#'
+#' @name stats
+#'
+#' @rdname stats
+#' @export
+skew <- function(x, adjusted = TRUE, na.rm = FALSE) {
+ if (na.rm) x <- x[!is.na(x)]
+ n <- length(x)
+
+ if (n < 3) return(NA_real_)
+
+ m <- mean(x)
+ m2 <- mean((x - m)^2)
+ m3 <- mean((x - m)^3)
+
+ if (m2 == 0) return(0)
+
+ g1 <- m3 / (m2^(3/2))
+
+ if (!adjusted) return(g1)
+
+ sqrt(n * (n - 1)) / (n - 2) * g1
+}
+#'
+#' @rdname stats
+#' @export
+kurtosis <- function(x, adjusted = TRUE, excess = TRUE, na.rm = FALSE) {
+ if (na.rm) x <- x[!is.na(x)]
+ n <- length(x)
+ if (n < 4) return(NA_real_)
+
+ m <- mean(x)
+ m2 <- mean((x - m)^2)
+ m4 <- mean((x - m)^4)
+
+ if (m2 == 0) return(if (excess) 0 else 3)
+
+ g2 <- m4 / (m2^2)
+ g2_excess <- g2 - 3
+
+ if (!adjusted) return(if (excess) g2_excess else g2)
+
+ G2_excess <- ((n - 1) / ((n - 2) * (n - 3))) * ((n + 1) * g2_excess + 6)
+
+ if (excess) G2_excess else G2_excess + 3
+}
+#'
+#' @rdname stats
+#' @export
+spread <- function(x, na.rm = FALSE) {
+ max_if_any(x, na.rm = na.rm) - min_if_any(x, na.rm = na.rm)
+}
+#'
+#' @importFrom lighthouse min_if_any max_if_any se_mean
+#' @rdname stats
+#' @export
+lighthouse::min_if_any
+#'
+#' @rdname stats
+#' @export
+lighthouse::max_if_any
+#'
+#' @rdname stats
+#' @export
+lighthouse::se_mean
diff --git a/R/utils.r b/R/utils.r
index 6f49877..59d481a 100644
--- a/R/utils.r
+++ b/R/utils.r
@@ -76,10 +76,11 @@ try_sort_numeric <- function(x,
sort(x, decreasing = decreasing, ...)
}
}
-try_sort_numeric(letters)
- coercible <- lighthouse::is_coercible_numeric(letters, na = "TRUE")
-class_collapse <- function(x, sep = ", ") stringr::str_c(class(x), collapse = sep)
+class_collapse <- function(x, sep = ", ") {
+ stringr::str_c(class(x), collapse = sep)
+}
+
strip_html <- function(x) {
stopifnot(is.character(x))
has_tags <- grepl("<[A-Za-z!/]", x)
@@ -172,7 +173,6 @@ cb_match_type <- function(nm,
as_named <- function(x, class) setNames(as(x, class), names(x))
-
has_val_labels <- function(x) !is.null(labelled::val_labels(x))
to_labelled_chr <- function(x,
@@ -185,9 +185,6 @@ to_labelled_chr <- function(x,
)
}
-spread_if_any <- function(..., na.rm = TRUE) {
- lighthouse::max_if_any(..., na.rm = na.rm) - lighthouse::min_if_any(..., na.rm = na.rm)
-}
#' @export
nan_to_na.default <- function(x) dplyr::if_else(is.nan(x), NA, x)
diff --git a/README.Rmd b/README.Rmd
index a60cfe5..be922f8 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -27,7 +27,8 @@ customized in a number of ways, including options for grouped summaries.
You can install lighthouse.codebook by running:
```r
-# install.packages("remotes")
+## git2r needed only if Git isn't installed on your system
+# install.packages(c("git2r", "remotes"))
remotes::install_github("ccsarapas/lighthouse.codebook")
```
@@ -73,9 +74,9 @@ dat_rc$data |>
There are many options for controlling how data is interpreted, summarized, and presented.
See the [introduction to lighthouse.codebook](https://ccsarapas.github.io/lighthouse.codebook/articles/lighthouse-codebook.html)
-for some of the most useful options, including grouped data summaries and specifying
-user missing codes. Further options are detailed in the help pages for `cb_create()`
-and `cb_write()`.
+for some of the most useful options, including grouped data summaries, summary statistics
+for numeric variables, and specifying user missing codes. Further options are detailed
+in the help pages for `cb_create()` and `cb_write()`.