From 28e0b1d7b83d8a8d56c477503eb2404f4cf76c32 Mon Sep 17 00:00:00 2001 From: ccsarapas Date: Thu, 19 Feb 2026 10:33:53 -0600 Subject: [PATCH 01/14] add tests --- .gitignore | 1 + DESCRIPTION | 6 +- tests/testthat.R | 4 ++ tests/testthat/helper.R | 65 +++++++++++++++++++++ tests/testthat/test-cb_create.R | 93 +++++++++++++++++++++++++++++++ tests/testthat/test-cb_get_data.R | 46 +++++++++++++++ tests/testthat/test-cb_write.R | 55 ++++++++++++++++++ tests/testthat/test-redcap.R | 27 +++++++++ tests/testthat/test-spss.R | 28 ++++++++++ tests/testthat/test-summarize.R | 62 +++++++++++++++++++++ tests/testthat/test-validation.R | 66 ++++++++++++++++++++++ 11 files changed, 451 insertions(+), 2 deletions(-) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/helper.R create mode 100644 tests/testthat/test-cb_create.R create mode 100644 tests/testthat/test-cb_get_data.R create mode 100644 tests/testthat/test-cb_write.R create mode 100644 tests/testthat/test-redcap.R create mode 100644 tests/testthat/test-spss.R create mode 100644 tests/testthat/test-summarize.R create mode 100644 tests/testthat/test-validation.R diff --git a/.gitignore b/.gitignore index 54f27cb..efec64a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ docs inst/doc /doc/ /Meta/ +.vscode/launch.json diff --git a/DESCRIPTION b/DESCRIPTION index 5391d2e..6152f8e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,9 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.3 URL: https://github.com/ccsarapas/lighthouse.codebook, https://ccsarapas.github.io/lighthouse.codebook/ BugReports: https://github.com/ccsarapas/lighthouse.codebook/issues -Suggests: +Suggests: knitr, - rmarkdown + rmarkdown, + testthat (>= 3.0.0) VignetteBuilder: knitr + diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..1c249f4 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(lighthouse.codebook) + +?test_check("lighthouse.codebook") diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R new file mode 100644 index 0000000..b5042e6 --- /dev/null +++ b/tests/testthat/helper.R @@ -0,0 +1,65 @@ +fixture_core <- function() { + data <- tibble::tibble( + id = 1:6, + num_score = c(10, 12, 99, NA, 15, 99), + cat_code = c(1, 2, 1, 2, NA, 2), + mh_red = c(1, 0, 1, 0, 1, 0), + mh_blue = c(0, 1, 0, 1, 0, 1), + txt_note = c("alpha", "beta", "alpha", NA, "gamma", "alpha"), + event_date = as.Date(c("2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05", "2024-01-06")) + ) + + metadata <- tibble::tibble( + name = names(data), + label = c( + "Record ID", + "Numeric score", + "Binary category", + "Mood: Red", + "Mood: Blue", + "Free-text note", + "Event date" + ), + val_labels = c( + NA, + NA, + "1 = Yes; 2 = No", + "0 = No; 1 = Yes", + "0 = No; 1 = Yes", + NA, + NA + ) + ) + + list(data = data, metadata = metadata) +} + +fixture_redcap <- function() { + data <- tibble::tibble( + q1___0 = c(1, 0, 0), + q1___1 = c(0, 1, 0), + q1___9 = c(0, 0, 1) + ) + + metadata <- tibble::tibble( + field_name = "q1", + field_label = "Select all that apply", + select_choices_or_calculations = "0, Option A | 1, Option B | 9, Not asked", + field_type = "checkbox", + form_name = "form_a", + text_validation_type_or_show_slider_number = NA_character_ + ) + + list(data = data, metadata = metadata) +} + +fixture_spss <- function() { + x <- haven::labelled_spss( + c(1, 2, 9, 1), + labels = c(Yes = 1, No = 2, Refused = 9), + na_values = 9, + label = "SPSS demo var" + ) + + tibble::tibble(spss_var = x) +} diff --git a/tests/testthat/test-cb_create.R b/tests/testthat/test-cb_create.R new file mode 100644 index 0000000..e991fc5 --- /dev/null +++ b/tests/testthat/test-cb_create.R @@ -0,0 +1,93 @@ +test_that("cb_create returns li_codebook and preserves variable order", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + expect_s3_class(cb, "li_codebook") + expect_identical(cb$name, names(fx$data)) + expect_true(all(c("name", "type", "label", "values", "missing") %in% names(cb))) +}) + +test_that("cb_create parses metadata value labels and errors without separators", { + fx <- fixture_core() + + expect_error( + lighthouse.codebook::cb_create(data = fx$data, metadata = fx$metadata), + "sep1" + ) + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + cat_vals <- cb$values[cb$name == "cat_code"] + expect_match(cat_vals, "\\[1\\].*Yes") + expect_match(cat_vals, "\\[2\\].*No") +}) + +test_that("cb_create handles user missing incompatibility according to options", { + fx <- fixture_core() + + opts_warn <- lighthouse.codebook::cb_create_options(user_missing_incompatible = "warn") + expect_warning( + lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = event_date ~ as.Date("2024-01-01"), + .options = opts_warn + ), + "not compatible" + ) + + opts_error <- lighthouse.codebook::cb_create_options(user_missing_incompatible = "error") + expect_error( + lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = event_date ~ as.Date("2024-01-01"), + .options = opts_error + ), + "not compatible" + ) +}) + +test_that("cb_create split_var_labels creates label_stem and rejects overlaps", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .split_var_labels = tidyselect::starts_with("mh_") + ) + + expect_true("label_stem" %in% names(cb)) + mh_idx <- cb$name %in% c("mh_red", "mh_blue") + expect_equal(length(unique(stats::na.omit(cb$label_stem[mh_idx]))), 1) + expect_setequal(cb$label[mh_idx], c("Red", "Blue")) + + expect_error( + lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .split_var_labels = list(tidyselect::starts_with("mh_"), mh_red) + ), + "captured by more than one expression" + ) +}) + diff --git a/tests/testthat/test-cb_get_data.R b/tests/testthat/test-cb_get_data.R new file mode 100644 index 0000000..f6452ad --- /dev/null +++ b/tests/testthat/test-cb_get_data.R @@ -0,0 +1,46 @@ +test_that("cb_get_data factors format converts labelled vars and zaps user missings", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = num_score ~ 99 + ) + + out <- lighthouse.codebook::cb_get_data(cb, format = "factors") + + expect_s3_class(out, "data.frame") + expect_true(is.factor(out$cat_code)) + expect_true(any(is.na(out$num_score))) + expect_false(any(out$num_score == 99, na.rm = TRUE)) +}) + +test_that("cb_get_data haven format returns labelled vectors with missing metadata", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = num_score ~ 99 + ) + + out <- lighthouse.codebook::cb_get_data(cb, format = "haven") + + expect_true("haven_labelled_spss" %in% class(out$cat_code)) + expect_true("haven_labelled_spss" %in% class(out$num_score)) + expect_true(99 %in% labelled::na_values(out$num_score)) +}) + +test_that("cb_get_data rejects deprecated format values", { + fx <- fixture_core() + cb <- lighthouse.codebook::cb_create(data = fx$data) + + expect_error( + lighthouse.codebook::cb_get_data(cb, format = "values"), + "no longer supported" + ) +}) diff --git a/tests/testthat/test-cb_write.R b/tests/testthat/test-cb_write.R new file mode 100644 index 0000000..e17a3e7 --- /dev/null +++ b/tests/testthat/test-cb_write.R @@ -0,0 +1,55 @@ +test_that("cb_write writes workbook and includes core summary sheets", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + out_file <- tempfile(fileext = ".xlsx") + on.exit(unlink(out_file), add = TRUE) + out <- lighthouse.codebook::cb_write(cb, file = out_file, overwrite = TRUE) + + expect_identical(out, out_file) + expect_true(file.exists(out_file)) + + wb <- openxlsx2::wb_load(out_file) + sheets <- openxlsx2::wb_get_sheet_names(wb) + + expect_true(all(c( + "Overview", + "Summary - Numeric", + "Summary - Categorical", + "Summary - Text" + ) %in% sheets)) +}) + +test_that("cb_write grouped mode adds grouped summary sheets", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + out_file <- tempfile(fileext = ".xlsx") + on.exit(unlink(out_file), add = TRUE) + lighthouse.codebook::cb_write( + cb, + file = out_file, + group_by = mh_red, + overwrite = TRUE + ) + + wb <- openxlsx2::wb_load(out_file) + sheets <- openxlsx2::wb_get_sheet_names(wb) + + expect_true(all(c( + "Grouped Summary - Numeric", + "Grouped Summary - Categorical" + ) %in% sheets)) +}) diff --git a/tests/testthat/test-redcap.R b/tests/testthat/test-redcap.R new file mode 100644 index 0000000..121c2a4 --- /dev/null +++ b/tests/testthat/test-redcap.R @@ -0,0 +1,27 @@ +test_that("cb_create_redcap smoke: checkbox relabeling and missing propagation", { + fx <- fixture_redcap() + + cb <- lighthouse.codebook::cb_create_redcap( + data = fx$data, + metadata = fx$metadata, + .user_missing = ~ 9, + .options = lighthouse.codebook::cb_create_redcap_options( + checkbox_resp_values = TRUE, + propagate_checkbox_missings = TRUE + ) + ) + + expect_s3_class(cb, "li_codebook") + expect_true(all(c("q1___0", "q1___1", "q1___9") %in% cb$name)) + + vals_0 <- cb$values[cb$name == "q1___0"] + vals_9 <- cb$values[cb$name == "q1___9"] + expect_match(vals_0, "Option A") + expect_match(vals_9, "Not asked") + + dat_haven <- lighthouse.codebook::cb_get_data(cb, format = "haven") + expect_equal(as.numeric(dat_haven$q1___0[3]), 9) + expect_equal(as.numeric(dat_haven$q1___1[3]), 9) + expect_true(9 %in% labelled::na_values(dat_haven$q1___0)) + expect_true(9 %in% labelled::na_values(dat_haven$q1___1)) +}) diff --git a/tests/testthat/test-spss.R b/tests/testthat/test-spss.R new file mode 100644 index 0000000..30d1865 --- /dev/null +++ b/tests/testthat/test-spss.R @@ -0,0 +1,28 @@ +test_that("cb_create_spss smoke: imports labels and SPSS user missing metadata", { + dat <- fixture_spss() + + cb <- lighthouse.codebook::cb_create_spss(dat) + + expect_s3_class(cb, "li_codebook") + expect_true("spss_var" %in% cb$name) + + vals <- cb$values[cb$name == "spss_var"] + expect_match(vals, "\\[1\\].*Yes") + expect_match(vals, "\\[2\\].*No") + + expect_true(9 %in% unname(attr(cb, "user_missing")$spss_var)) + + dat_haven <- lighthouse.codebook::cb_get_data(cb, format = "haven") + expect_true("Refused" %in% names(labelled::val_labels(dat_haven$spss_var))) + expect_true(9 %in% labelled::na_values(dat_haven$spss_var)) +}) + +test_that("cb_create_spss + cb_get_data factors zaps user missing values", { + dat <- fixture_spss() + cb <- lighthouse.codebook::cb_create_spss(dat) + + out <- lighthouse.codebook::cb_get_data(cb, format = "factors") + + expect_true(is.factor(out$spss_var)) + expect_true(any(is.na(out$spss_var))) +}) diff --git a/tests/testthat/test-summarize.R b/tests/testthat/test-summarize.R new file mode 100644 index 0000000..7b16858 --- /dev/null +++ b/tests/testthat/test-summarize.R @@ -0,0 +1,62 @@ +test_that("cb_summarize_numeric returns expected columns and NULL when no numeric vars", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + num <- lighthouse.codebook::cb_summarize_numeric(cb) + expect_true(all(c("name", "valid_n", "valid_pct", "mean", "SD") %in% names(num))) + + dat_chr <- tibble::tibble(a = c("x", "y", NA), b = c("m", "m", "n")) + cb_chr <- lighthouse.codebook::cb_create(dat_chr) + expect_warning( + out <- lighthouse.codebook::cb_summarize_numeric(cb_chr, warn_if_none = TRUE), + "No numeric variables" + ) + expect_null(out) +}) + +test_that("cb_summarize_categorical toggles detailed missing columns and supports grouping", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = cat_code ~ c(Skipped = 2) + ) + + cat_detail <- lighthouse.codebook::cb_summarize_categorical(cb, detail_missing = TRUE) + expect_true("is_missing" %in% names(cat_detail)) + expect_true("pct_of_missing" %in% names(cat_detail)) + + cat_simple <- lighthouse.codebook::cb_summarize_categorical(cb, detail_missing = FALSE) + expect_false("is_missing" %in% names(cat_simple)) + expect_false("pct_of_missing" %in% names(cat_simple)) + + cat_group <- lighthouse.codebook::cb_summarize_categorical(cb, group_by = mh_red) + expect_true("mh_red" %in% names(cat_group)) + expect_gt(nrow(cat_group), 0) +}) + +test_that("cb_summarize_text truncates displayed values with n_text_vals", { + fx <- fixture_core() + + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + txt <- lighthouse.codebook::cb_summarize_text(cb, n_text_vals = 1, detail_missing = FALSE) + + txt_note <- txt[txt$name == "txt_note", ] + expect_true(any(grepl("other values", txt_note$value, fixed = TRUE))) +}) + diff --git a/tests/testthat/test-validation.R b/tests/testthat/test-validation.R new file mode 100644 index 0000000..8028d54 --- /dev/null +++ b/tests/testthat/test-validation.R @@ -0,0 +1,66 @@ +test_that("cb_create and cb_create_redcap enforce options class", { + fx <- fixture_core() + + expect_error( + lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .options = lighthouse.codebook::cb_create_redcap_options() + ), + "Did you mean to call" + ) + + rc <- fixture_redcap() + expect_error( + lighthouse.codebook::cb_create_redcap( + data = rc$data, + metadata = rc$metadata, + .options = lighthouse.codebook::cb_create_options() + ), + "must be created from" + ) +}) + +test_that("cb_create validates .user_missing argument type", { + fx <- fixture_core() + + expect_error( + lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = c(98, 99) + ), + "must be a formula or list of formulas" + ) +}) + +test_that("cb_write validates group_rows arguments", { + fx <- fixture_core() + cb <- lighthouse.codebook::cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + expect_error( + lighthouse.codebook::cb_write( + cb, + file = tempfile(fileext = ".xlsx"), + group_rows = mh_red + ), + "group_by" + ) + + expect_error( + lighthouse.codebook::cb_write( + cb, + file = tempfile(fileext = ".xlsx"), + group_by = mh_red, + group_rows = cat_code + ), + "must also be included in" + ) +}) From 299a78b04bdab4d08f21726e6415281fe1ea3150 Mon Sep 17 00:00:00 2001 From: ccsarapas Date: Mon, 23 Feb 2026 16:21:26 -0600 Subject: [PATCH 02/14] tweak tests and utils --- DESCRIPTION | 2 +- NEWS.md | 6 +++ R/utils.r | 8 ++-- tests/testthat/test-cb_create.R | 26 +++++----- tests/testthat/test-cb_get_data.R | 18 +++---- tests/testthat/test-cb_write.R | 12 ++--- tests/testthat/test-redcap.R | 8 ++-- tests/testthat/test-spss.R | 12 ++--- tests/testthat/test-summarize.R | 79 ++++++++++++++++++++++++------- tests/testthat/test-validation.R | 22 ++++----- 10 files changed, 123 insertions(+), 70 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6152f8e..c8259e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: lighthouse.codebook Title: Summarize Datasets for Lighthouse Institute Projects -Version: 0.3.2 +Version: 0.3.9000 Authors@R: c( person("Casey", "Sarapas", email = "ccsarapas@chestnut.org", diff --git a/NEWS.md b/NEWS.md index e5a1183..6d564bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# lighthouse.codebook 0.4.0 + +## Internal + +* Added a test suite. + # lighthouse.codebook 0.3.2 ## Fixed diff --git a/R/utils.r b/R/utils.r index 6f49877..136057a 100644 --- a/R/utils.r +++ b/R/utils.r @@ -76,10 +76,11 @@ try_sort_numeric <- function(x, sort(x, decreasing = decreasing, ...) } } -try_sort_numeric(letters) - coercible <- lighthouse::is_coercible_numeric(letters, na = "TRUE") -class_collapse <- function(x, sep = ", ") stringr::str_c(class(x), collapse = sep) +class_collapse <- function(x, sep = ", ") { + stringr::str_c(class(x), collapse = sep) +} + strip_html <- function(x) { stopifnot(is.character(x)) has_tags <- grepl("<[A-Za-z!/]", x) @@ -172,7 +173,6 @@ cb_match_type <- function(nm, as_named <- function(x, class) setNames(as(x, class), names(x)) - has_val_labels <- function(x) !is.null(labelled::val_labels(x)) to_labelled_chr <- function(x, diff --git a/tests/testthat/test-cb_create.R b/tests/testthat/test-cb_create.R index e991fc5..904a473 100644 --- a/tests/testthat/test-cb_create.R +++ b/tests/testthat/test-cb_create.R @@ -1,7 +1,7 @@ -test_that("cb_create returns li_codebook and preserves variable order", { +test_that('`cb_create()` returns "li_codebook" and preserves variable order', { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -13,15 +13,15 @@ test_that("cb_create returns li_codebook and preserves variable order", { expect_true(all(c("name", "type", "label", "values", "missing") %in% names(cb))) }) -test_that("cb_create parses metadata value labels and errors without separators", { +test_that("`cb_create()` parses metadata value labels and errors without separators", { fx <- fixture_core() expect_error( - lighthouse.codebook::cb_create(data = fx$data, metadata = fx$metadata), + cb_create(data = fx$data, metadata = fx$metadata), "sep1" ) - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -33,12 +33,12 @@ test_that("cb_create parses metadata value labels and errors without separators" expect_match(cat_vals, "\\[2\\].*No") }) -test_that("cb_create handles user missing incompatibility according to options", { +test_that("`cb_create()` handles user missing incompatibility according to options", { fx <- fixture_core() - opts_warn <- lighthouse.codebook::cb_create_options(user_missing_incompatible = "warn") + opts_warn <- cb_create_options(user_missing_incompatible = "warn") expect_warning( - lighthouse.codebook::cb_create( + cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -49,9 +49,9 @@ test_that("cb_create handles user missing incompatibility according to options", "not compatible" ) - opts_error <- lighthouse.codebook::cb_create_options(user_missing_incompatible = "error") + opts_error <- cb_create_options(user_missing_incompatible = "error") expect_error( - lighthouse.codebook::cb_create( + cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -63,10 +63,10 @@ test_that("cb_create handles user missing incompatibility according to options", ) }) -test_that("cb_create split_var_labels creates label_stem and rejects overlaps", { +test_that("`cb_create()` `split_var_labels` creates `label_stem` and rejects overlaps", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -80,7 +80,7 @@ test_that("cb_create split_var_labels creates label_stem and rejects overlaps", expect_setequal(cb$label[mh_idx], c("Red", "Blue")) expect_error( - lighthouse.codebook::cb_create( + cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", diff --git a/tests/testthat/test-cb_get_data.R b/tests/testthat/test-cb_get_data.R index f6452ad..557ceb8 100644 --- a/tests/testthat/test-cb_get_data.R +++ b/tests/testthat/test-cb_get_data.R @@ -1,7 +1,7 @@ -test_that("cb_get_data factors format converts labelled vars and zaps user missings", { +test_that("`cb_get_data()` factors format converts labelled vars and zaps user missings", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -9,7 +9,7 @@ test_that("cb_get_data factors format converts labelled vars and zaps user missi .user_missing = num_score ~ 99 ) - out <- lighthouse.codebook::cb_get_data(cb, format = "factors") + out <- cb_get_data(cb, format = "factors") expect_s3_class(out, "data.frame") expect_true(is.factor(out$cat_code)) @@ -17,10 +17,10 @@ test_that("cb_get_data factors format converts labelled vars and zaps user missi expect_false(any(out$num_score == 99, na.rm = TRUE)) }) -test_that("cb_get_data haven format returns labelled vectors with missing metadata", { +test_that("`cb_get_data()` haven format returns labelled vectors with missing metadata", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -28,19 +28,19 @@ test_that("cb_get_data haven format returns labelled vectors with missing metada .user_missing = num_score ~ 99 ) - out <- lighthouse.codebook::cb_get_data(cb, format = "haven") + out <- cb_get_data(cb, format = "haven") expect_true("haven_labelled_spss" %in% class(out$cat_code)) expect_true("haven_labelled_spss" %in% class(out$num_score)) expect_true(99 %in% labelled::na_values(out$num_score)) }) -test_that("cb_get_data rejects deprecated format values", { +test_that("`cb_get_data()` rejects deprecated format values", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create(data = fx$data) + cb <- cb_create(data = fx$data) expect_error( - lighthouse.codebook::cb_get_data(cb, format = "values"), + cb_get_data(cb, format = "values"), "no longer supported" ) }) diff --git a/tests/testthat/test-cb_write.R b/tests/testthat/test-cb_write.R index e17a3e7..21e9ffe 100644 --- a/tests/testthat/test-cb_write.R +++ b/tests/testthat/test-cb_write.R @@ -1,7 +1,7 @@ -test_that("cb_write writes workbook and includes core summary sheets", { +test_that("`cb_write()` writes workbook and includes core summary sheets", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -10,7 +10,7 @@ test_that("cb_write writes workbook and includes core summary sheets", { out_file <- tempfile(fileext = ".xlsx") on.exit(unlink(out_file), add = TRUE) - out <- lighthouse.codebook::cb_write(cb, file = out_file, overwrite = TRUE) + out <- cb_write(cb, file = out_file, overwrite = TRUE) expect_identical(out, out_file) expect_true(file.exists(out_file)) @@ -26,10 +26,10 @@ test_that("cb_write writes workbook and includes core summary sheets", { ) %in% sheets)) }) -test_that("cb_write grouped mode adds grouped summary sheets", { +test_that("`cb_write()` `group_by` adds grouped summary sheets", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -38,7 +38,7 @@ test_that("cb_write grouped mode adds grouped summary sheets", { out_file <- tempfile(fileext = ".xlsx") on.exit(unlink(out_file), add = TRUE) - lighthouse.codebook::cb_write( + cb_write( cb, file = out_file, group_by = mh_red, diff --git a/tests/testthat/test-redcap.R b/tests/testthat/test-redcap.R index 121c2a4..ab08bf9 100644 --- a/tests/testthat/test-redcap.R +++ b/tests/testthat/test-redcap.R @@ -1,11 +1,11 @@ -test_that("cb_create_redcap smoke: checkbox relabeling and missing propagation", { +test_that("`cb_create_redcap()` checkbox relabeling and missing propagation", { fx <- fixture_redcap() - cb <- lighthouse.codebook::cb_create_redcap( + cb <- cb_create_redcap( data = fx$data, metadata = fx$metadata, .user_missing = ~ 9, - .options = lighthouse.codebook::cb_create_redcap_options( + .options = cb_create_redcap_options( checkbox_resp_values = TRUE, propagate_checkbox_missings = TRUE ) @@ -19,7 +19,7 @@ test_that("cb_create_redcap smoke: checkbox relabeling and missing propagation", expect_match(vals_0, "Option A") expect_match(vals_9, "Not asked") - dat_haven <- lighthouse.codebook::cb_get_data(cb, format = "haven") + dat_haven <- cb_get_data(cb, format = "haven") expect_equal(as.numeric(dat_haven$q1___0[3]), 9) expect_equal(as.numeric(dat_haven$q1___1[3]), 9) expect_true(9 %in% labelled::na_values(dat_haven$q1___0)) diff --git a/tests/testthat/test-spss.R b/tests/testthat/test-spss.R index 30d1865..566f95c 100644 --- a/tests/testthat/test-spss.R +++ b/tests/testthat/test-spss.R @@ -1,7 +1,7 @@ -test_that("cb_create_spss smoke: imports labels and SPSS user missing metadata", { +test_that("`cb_create_spss()` imports labels and SPSS user missing metadata", { dat <- fixture_spss() - cb <- lighthouse.codebook::cb_create_spss(dat) + cb <- cb_create_spss(dat) expect_s3_class(cb, "li_codebook") expect_true("spss_var" %in% cb$name) @@ -12,16 +12,16 @@ test_that("cb_create_spss smoke: imports labels and SPSS user missing metadata", expect_true(9 %in% unname(attr(cb, "user_missing")$spss_var)) - dat_haven <- lighthouse.codebook::cb_get_data(cb, format = "haven") + dat_haven <- cb_get_data(cb, format = "haven") expect_true("Refused" %in% names(labelled::val_labels(dat_haven$spss_var))) expect_true(9 %in% labelled::na_values(dat_haven$spss_var)) }) -test_that("cb_create_spss + cb_get_data factors zaps user missing values", { +test_that("`cb_create_spss()` + `cb_get_data()` factors zaps user missing values", { dat <- fixture_spss() - cb <- lighthouse.codebook::cb_create_spss(dat) + cb <- cb_create_spss(dat) - out <- lighthouse.codebook::cb_get_data(cb, format = "factors") + out <- cb_get_data(cb, format = "factors") expect_true(is.factor(out$spss_var)) expect_true(any(is.na(out$spss_var))) diff --git a/tests/testthat/test-summarize.R b/tests/testthat/test-summarize.R index 7b16858..2a6ea0f 100644 --- a/tests/testthat/test-summarize.R +++ b/tests/testthat/test-summarize.R @@ -1,29 +1,44 @@ -test_that("cb_summarize_numeric returns expected columns and NULL when no numeric vars", { +test_that("`cb_summarize_numeric()` returns expected columns", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", .val_labs_sep2 = "; " ) - num <- lighthouse.codebook::cb_summarize_numeric(cb) + num <- cb_summarize_numeric(cb) expect_true(all(c("name", "valid_n", "valid_pct", "mean", "SD") %in% names(num))) +}) - dat_chr <- tibble::tibble(a = c("x", "y", NA), b = c("m", "m", "n")) - cb_chr <- lighthouse.codebook::cb_create(dat_chr) - expect_warning( - out <- lighthouse.codebook::cb_summarize_numeric(cb_chr, warn_if_none = TRUE), - "No numeric variables" - ) +test_that("`cb_summarize_numeric()` returns `NULL` when no numeric vars", { + dat_chr <- data.frame(a = c("x", "y", NA), b = c("m", "m", "n")) + cb_chr <- cb_create(dat_chr) + expect_warning(out <- cb_summarize_numeric(cb_chr), "No numeric variables") expect_null(out) }) +test_that("`cb_summarize_numeric()` supports grouping", { + fx <- fixture_core() + + cb <- cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; " + ) + + num_group <- cb_summarize_numeric(cb, group_by = mh_red) + expect_true("mh_red" %in% names(num_group)) + expect_gt(nrow(num_group), 0) +}) test_that("cb_summarize_categorical toggles detailed missing columns and supports grouping", { +test_that( + "`cb_summarize_categorical()` `detail_missing` toggles detailed missing columns", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -31,30 +46,62 @@ test_that("cb_summarize_categorical toggles detailed missing columns and support .user_missing = cat_code ~ c(Skipped = 2) ) - cat_detail <- lighthouse.codebook::cb_summarize_categorical(cb, detail_missing = TRUE) + cat_detail <- cb_summarize_categorical(cb, detail_missing = TRUE) expect_true("is_missing" %in% names(cat_detail)) expect_true("pct_of_missing" %in% names(cat_detail)) - cat_simple <- lighthouse.codebook::cb_summarize_categorical(cb, detail_missing = FALSE) + cat_simple <- cb_summarize_categorical(cb, detail_missing = FALSE) expect_false("is_missing" %in% names(cat_simple)) expect_false("pct_of_missing" %in% names(cat_simple)) +}) + +test_that("`cb_summarize_categorical()` supports grouping", { + fx <- fixture_core() + + cb <- cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = cat_code ~ c(Skipped = 2) + ) - cat_group <- lighthouse.codebook::cb_summarize_categorical(cb, group_by = mh_red) + cat_group <- cb_summarize_categorical(cb, group_by = mh_red) expect_true("mh_red" %in% names(cat_group)) expect_gt(nrow(cat_group), 0) }) -test_that("cb_summarize_text truncates displayed values with n_text_vals", { +test_that("`cb_summarize_text()` `detail_missing` toggles detailed missing columns", { + fx <- fixture_core() + + cb <- cb_create( + data = fx$data, + metadata = fx$metadata, + .val_labs_sep1 = " = ", + .val_labs_sep2 = "; ", + .user_missing = txt_note ~ c(Skipped = "SKIP") + ) + + txt_detail <- cb_summarize_text(cb, detail_missing = TRUE) + expect_true("is_missing" %in% names(txt_detail)) + expect_true("pct_of_missing" %in% names(txt_detail)) + + txt_simple <- cb_summarize_text(cb, detail_missing = FALSE) + expect_false("is_missing" %in% names(txt_simple)) + expect_false("pct_of_missing" %in% names(txt_simple)) +}) + +test_that("`cb_summarize_text()` truncates displayed values with `n_text_vals`", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", .val_labs_sep2 = "; " ) - txt <- lighthouse.codebook::cb_summarize_text(cb, n_text_vals = 1, detail_missing = FALSE) + txt <- cb_summarize_text(cb, n_text_vals = 1, detail_missing = FALSE) txt_note <- txt[txt$name == "txt_note", ] expect_true(any(grepl("other values", txt_note$value, fixed = TRUE))) diff --git a/tests/testthat/test-validation.R b/tests/testthat/test-validation.R index 8028d54..5b4a630 100644 --- a/tests/testthat/test-validation.R +++ b/tests/testthat/test-validation.R @@ -1,31 +1,31 @@ -test_that("cb_create and cb_create_redcap enforce options class", { +test_that("`cb_create()` and `cb_create_redcap()` enforce options class", { fx <- fixture_core() expect_error( - lighthouse.codebook::cb_create( + cb_create( data = fx$data, metadata = fx$metadata, - .options = lighthouse.codebook::cb_create_redcap_options() + .options = cb_create_redcap_options() ), "Did you mean to call" ) rc <- fixture_redcap() expect_error( - lighthouse.codebook::cb_create_redcap( + cb_create_redcap( data = rc$data, metadata = rc$metadata, - .options = lighthouse.codebook::cb_create_options() + .options = cb_create_options() ), "must be created from" ) }) -test_that("cb_create validates .user_missing argument type", { +test_that("`cb_create()` validates `.user_missing` argument type", { fx <- fixture_core() expect_error( - lighthouse.codebook::cb_create( + cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -36,9 +36,9 @@ test_that("cb_create validates .user_missing argument type", { ) }) -test_that("cb_write validates group_rows arguments", { +test_that("`cb_write()` validates `group_rows` arguments", { fx <- fixture_core() - cb <- lighthouse.codebook::cb_create( + cb <- cb_create( data = fx$data, metadata = fx$metadata, .val_labs_sep1 = " = ", @@ -46,7 +46,7 @@ test_that("cb_write validates group_rows arguments", { ) expect_error( - lighthouse.codebook::cb_write( + cb_write( cb, file = tempfile(fileext = ".xlsx"), group_rows = mh_red @@ -55,7 +55,7 @@ test_that("cb_write validates group_rows arguments", { ) expect_error( - lighthouse.codebook::cb_write( + cb_write( cb, file = tempfile(fileext = ".xlsx"), group_by = mh_red, From 5362cf903421818f876e20b9eaae8801b3405a0b Mon Sep 17 00:00:00 2001 From: ccsarapas Date: Mon, 23 Feb 2026 16:23:32 -0600 Subject: [PATCH 03/14] add stats helpers --- DESCRIPTION | 1 - NAMESPACE | 9 +++++ NEWS.md | 6 +++ R/stats.r | 91 ++++++++++++++++++++++++++++++++++++++++++++ R/utils.r | 3 -- man/stats.Rd | 52 +++++++++++++++++++++++++ pkgdown/_pkgdown.yml | 12 +++++- 7 files changed, 168 insertions(+), 6 deletions(-) create mode 100644 R/stats.r create mode 100644 man/stats.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c8259e6..d973b98 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,6 @@ Imports: haven, labelled, lighthouse, - moments, openxlsx2, purrr, rlang, diff --git a/NAMESPACE b/NAMESPACE index af707b1..3ae72bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,8 +13,17 @@ export(cb_summarize_categorical) export(cb_summarize_numeric) export(cb_summarize_text) export(cb_write) +export(kurtosis) +export(max_if_any) +export(min_if_any) +export(se_mean) +export(skew) +export(spread) importFrom(lighthouse,"%<-%") importFrom(lighthouse,glue_chr) +importFrom(lighthouse,max_if_any) +importFrom(lighthouse,min_if_any) +importFrom(lighthouse,se_mean) importFrom(lighthouse,untidyselect) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) diff --git a/NEWS.md b/NEWS.md index 6d564bb..1effc81 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,15 @@ # lighthouse.codebook 0.4.0 +## Added + +* A handful of stats helpers for use in the new `stats` / `stats_numeric` arguments, including `skew()`, `kurtosis()`, `spread()`, and re-exports from the lighthouse package `min_if_any()`, `max_if_any()`, and `se_mean()`. + ## Internal * Added a test suite. +* Dropped dependency on moments package. + # lighthouse.codebook 0.3.2 ## Fixed diff --git a/R/stats.r b/R/stats.r new file mode 100644 index 0000000..f8efb1d --- /dev/null +++ b/R/stats.r @@ -0,0 +1,91 @@ +#' Statistics for numeric summaries +#' +#' @description +#' Functions for computing summary statistics for use in `cb_summarize_numeric()`. +#' - `skew()` and `kurtosis()` return adjusted skewness and kurtosis. (Unadjusted +#' estimates can be obtained by setting `adjusted = FALSE`.) +#' - `spread()` returns the difference between a vector's minimum and maximum values. +#' - `min_if_any()` and `max_if_any()` return minima and maxima with alternate behavior +#' if all values are missing. (Re-exported from the lighthouse package. See `lighthouse::min_if_any` +#' for more details.) +#' - `se_mean()` returns the standard error of the mean. (Re-exported from the lighthouse +#' package. See `lighthouse::se_mean` for more details.) +#' +#' @param x A numeric vector. +#' +#' @param ... A numeric vector or vectors. +#' +#' @param na.rm Should missing values be removed? (Note that `cb_summarize_numeric()` +#' automatically sets na.rm to `TRUE` for all functions). +#' +#' @param adjusted If `TRUE`, returns adjusted skewness (_G_1) or kurtosis +#' (_G_2) by applying a small-sample correction. If `FALSE`, returns +#' the unadjusted skewness (_g_1) or kurtosis (_g_2). (Note +#' that `TRUE` corresponds to behavior of software such as SPSS, SAS, and Excel.) +#' +#' @param excess If `TRUE`, returns excess kurtosis by (total kurtosis - 3). +#' +#' @name stats +#' +#' @rdname stats +#' @export +skew <- function(x, adjusted = TRUE, na.rm = FALSE) { + if (na.rm) x <- x[!is.na(x)] + n <- length(x) + + if (n < 3) return(NA_real_) + + m <- mean(x) + m2 <- mean((x - m)^2) + m3 <- mean((x - m)^3) + + if (m2 == 0) return(0) + + g1 <- m3 / (m2^(3/2)) + + if (!adjusted) return(g1) + + sqrt(n * (n - 1)) / (n - 2) * g1 +} +#' +#' @rdname stats +#' @export +kurtosis <- function(x, adjusted = TRUE, excess = TRUE, na.rm = FALSE) { + if (na.rm) x <- x[!is.na(x)] + n <- length(x) + if (n < 4) return(NA_real_) + + m <- mean(x) + m2 <- mean((x - m)^2) + m4 <- mean((x - m)^4) + + if (m2 == 0) return(if (excess) 0 else 3) + + g2 <- m4 / (m2^2) + g2_excess <- g2 - 3 + + if (!adjusted) return(if (excess) g2_excess else g2) + + G2_excess <- ((n - 1) / ((n - 2) * (n - 3))) * ((n + 1) * g2_excess + 6) + + if (excess) G2_excess else G2_excess + 3 +} +#' +#' @rdname stats +#' @export +spread <- function(x, na.rm = FALSE) { + max_if_any(x, na.rm = na.rm) - min_if_any(x, na.rm = na.rm) +} +#' +#' @importFrom lighthouse min_if_any max_if_any se_mean +#' @rdname stats +#' @export +lighthouse::min_if_any +#' +#' @rdname stats +#' @export +lighthouse::max_if_any +#' +#' @rdname stats +#' @export +lighthouse::se_mean diff --git a/R/utils.r b/R/utils.r index 136057a..59d481a 100644 --- a/R/utils.r +++ b/R/utils.r @@ -185,9 +185,6 @@ to_labelled_chr <- function(x, ) } -spread_if_any <- function(..., na.rm = TRUE) { - lighthouse::max_if_any(..., na.rm = na.rm) - lighthouse::min_if_any(..., na.rm = na.rm) -} #' @export nan_to_na.default <- function(x) dplyr::if_else(is.nan(x), NA, x) diff --git a/man/stats.Rd b/man/stats.Rd new file mode 100644 index 0000000..e4c4d1a --- /dev/null +++ b/man/stats.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stats.r +\name{stats} +\alias{stats} +\alias{skew} +\alias{kurtosis} +\alias{spread} +\alias{min_if_any} +\alias{max_if_any} +\alias{se_mean} +\title{Statistics for numeric summaries} +\usage{ +skew(x, adjusted = TRUE, na.rm = FALSE) + +kurtosis(x, adjusted = TRUE, excess = TRUE, na.rm = FALSE) + +spread(x, na.rm = FALSE) + +min_if_any(..., na.rm = TRUE) + +max_if_any(..., na.rm = TRUE) + +se_mean(x, na.rm = FALSE) +} +\arguments{ +\item{x}{A numeric vector.} + +\item{adjusted}{If \code{TRUE}, returns adjusted skewness (\emph{G}\if{html}{\out{}}1\if{html}{\out{}}) or kurtosis +(\emph{G}\if{html}{\out{}}2\if{html}{\out{}}) by applying a small-sample correction. If \code{FALSE}, returns +the unadjusted skewness (\emph{g}\if{html}{\out{}}1\if{html}{\out{}}) or kurtosis (\emph{g}\if{html}{\out{}}2\if{html}{\out{}}). (Note +that \code{TRUE} corresponds to behavior of software such as SPSS, SAS, and Excel.)} + +\item{na.rm}{Should missing values be removed? (Note that \code{cb_summarize_numeric()} +automatically sets na.rm to \code{TRUE} for all functions).} + +\item{excess}{If \code{TRUE}, returns excess kurtosis by (total kurtosis - 3).} + +\item{...}{A numeric vector or vectors.} +} +\description{ +Functions for computing summary statistics for use in \code{cb_summarize_numeric()}. +\itemize{ +\item \code{skew()} and \code{kurtosis()} return adjusted skewness and kurtosis. (Unadjusted +estimates can be obtained by setting \code{adjusted = FALSE}.) +\item \code{spread()} returns the difference between a vector's minimum and maximum values. +\item \code{min_if_any()} and \code{max_if_any()} return minima and maxima with alternate behavior +if all values are missing. (Re-exported from the lighthouse package. See \code{lighthouse::min_if_any} +for more details.) +\item \code{se_mean()} returns the standard error of the mean. (Re-exported from the lighthouse +package. See \code{lighthouse::se_mean} for more details.) +} +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index a77cf55..2f83ada 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -22,12 +22,20 @@ reference: - cb_create_spss - cb_create_redcap - cb_write -- title: Other functions - desc: Functions to set options or get information from codebook objects +- title: Other codebook functions + desc: Extract data or generate summaries from codebook objects contents: - cb_get_data - cb_summarize_numeric - cb_summarize_categorical - cb_summarize_text +- title: Helpers + contents: - cb_create_options - cb_create_redcap_options + - skew + - kurtosis + - spread + - min_if_any + - max_if_any + - se_mean From b5f33bf2b5f6a2ae533a164e8ed9bc42bd76a686 Mon Sep 17 00:00:00 2001 From: ccsarapas Date: Mon, 23 Feb 2026 16:51:44 -0600 Subject: [PATCH 04/14] implement `stats` and `stats_numeric` args --- NEWS.md | 2 + R/cb_summarize.r | 111 +++++++++++++++++++++++++----- R/cb_write.r | 32 ++++++++- README.Rmd | 6 +- README.md | 7 +- man/cb_summarize_numeric.Rd | 62 +++++++++++++++-- man/cb_write.Rd | 7 ++ tests/testthat/test-cb_write.R | 26 +++++++ tests/testthat/test-summarize.R | 42 ++++++++++- vignettes/lighthouse-codebook.Rmd | 41 +++++++++-- 10 files changed, 300 insertions(+), 36 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1effc81..5b211bd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ ## Added +* Summary statistics returned by `cb_summarize_numeric()` can now be specified using the new `stats` argument. Summary statistics included on the numeric summary tab of workbooks written by `cb_write()` can likewise be specified using the `stats_numeric` argument. + * A handful of stats helpers for use in the new `stats` / `stats_numeric` arguments, including `skew()`, `kurtosis()`, `spread()`, and re-exports from the lighthouse package `min_if_any()`, `max_if_any()`, and `se_mean()`. ## Internal diff --git a/R/cb_summarize.r b/R/cb_summarize.r index d2c4650..e2d8405 100644 --- a/R/cb_summarize.r +++ b/R/cb_summarize.r @@ -1,5 +1,5 @@ #' Summarize numeric variables from a codebook object -#' +#' #' `cb_summarize_numeric()` generates a summary table for all numeric variables #' from a codebook object, optionally by group. Future releases will include options #' to specify the summary statistics used. Currently, summary statistics are valid @@ -7,6 +7,10 @@ #' #' @param cb An object of class `"li_codebook"` as produced by [`cb_create()`] or #' a variant. +#' @param stats A named list of summary functions to include. The defaults include +#' mean and standard deviation (SD); median and median absolute deviation (MAD); +#' minimum, maximum, and range; and adjusted skewness and kurtosis. See details +#' and examples. #' @param group_by <[`tidy-select`][dplyr_tidy_select]> Column or columns to group #' by. #' @param warn_if_none Should a warning be issued if there are no numeric variables @@ -21,21 +25,94 @@ #' a non-missing label stem. #' - `label`: variable label #' - `valid_n`, `valid_pct`: number and proportion of non-missing values -#' - summary statistic columns: by default, these include `mean` and standard -#' deviation (`SD`); `median`, median absolute deviation (`MAD`), `min`, `max`, -#' and `range`; skewness (`skew`), and kurtosis (`kurt`). -#' +#' - Summary statistic columns as specified in `stats` +#' +#' @details +#' The `stats` argument controls which summary statistics will be computed. It takes +#' a named list of functions, where the names will be used as column names. +#' +#' `cb_summarize_numeric()` will set `na.rm` to `TRUE` for any function that takes +#' a `na.rm` argument. +#' +#' You can include anonymous functions. If wrapping a function that takes a `na.rm` +#' argument, it is recommended you explicitly set `na.rm` to `TRUE`. (e.g., to include +#' the 25th quantile, use `q25 = \(x) quantile(x, 0.25, na.rm = TRUE)`. +#' +# #' Names will be formatted when written to an Excel codebook: +# #' - "_" will be replaced with " " +# #' - "pct" will be replaced with "%" +# #' - names will generally be changed to Title Case, except that "n" will not be +# #' capitalized, and any words already containing capital letters will be left as +# #' is. +# #' +#' @examples +#' cb_storms <- dplyr::storms |> +#' dplyr::mutate(year = factor(year)) |> +#' dplyr::filter(status %in% c("tropical storm", "hurricane")) |> +#' cb_create() +#' +#' # ungrouped summary with default stats +#' cb_summarize_numeric(cb_storms) +#' +#' # with subset of default stats +#' cb_summarize_numeric( +#' cb_storms, +#' stats = list(mean = mean, SD = sd) +#' ) +#' +#' # grouped summary +#' cb_summarize_numeric( +#' cb_storms, +#' stats = list(mean = mean, SD = sd), +#' group_by = status +#' ) +#' +#' # with custom stats +#' cb_summarize_numeric( +#' cb_storms, +#' stats = list( +#' median = median, +#' q25 = \(x) quantile(x, 0.25, na.rm = TRUE), +#' q75 = \(x) quantile(x, 0.75, na.rm = TRUE), +#' IQR = IQR +#' ) +#' ) +#' #' @export -cb_summarize_numeric <- function(cb, group_by = NULL, warn_if_none = TRUE) { +cb_summarize_numeric <- function(cb, + group_by = NULL, + stats = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), + warn_if_none = TRUE) { check_codebook(cb) group_by <- cb_untidyselect(cb, {{ group_by }}) cb_summarize_numeric_impl( - cb = cb, group_by = group_by, warn_if_none = warn_if_none + cb = cb, group_by = group_by, stats = stats, warn_if_none = warn_if_none ) } cb_summarize_numeric_impl <- function(cb, group_by = NULL, + stats = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), warn_if_none = FALSE, group_rows = NULL) { data <- attr(cb, "data_zapped")[cb$name] @@ -60,17 +137,19 @@ cb_summarize_numeric_impl <- function(cb, id_cols <- setdiff(id_cols, "label_stem") } - res <- lighthouse::summary_table( - data, - valid_n = lighthouse::n_valid, valid_pct = lighthouse::pct_valid, - mean, SD = sd, - median, MAD = mad, min = lighthouse::min_if_any, max = lighthouse::max_if_any, - range = spread_if_any, - skew = moments::skewness, kurt = moments::kurtosis, + args <- c( + list( + .data = data, na.rm = TRUE, .vars = all_of(nms_num), - .rows_group_by = all_of(group_by) - ) |> + .rows_group_by = all_of(group_by), + valid_n = lighthouse::n_valid, + valid_pct = lighthouse::pct_valid + ), + stats + ) + + res <- do.call(lighthouse::summary_table, args) |> dplyr::mutate(dplyr::across( all_of(group_by), \(x) fct_replace_na(factor(x), "(Missing)") diff --git a/R/cb_write.r b/R/cb_write.r index 962b964..794b5e1 100644 --- a/R/cb_write.r +++ b/R/cb_write.r @@ -27,6 +27,10 @@ #' @param group_rows_numeric,group_rows_categorical <[`tidy-select`][dplyr_tidy_select]> #' Column or columns to group by in rows on grouped numeric or categorical summary #' tab. +#' @param stats_numeric A named list of summary functions to include on the numeric +#' summary tab. Defaults include mean and standard deviation (SD); median and +#' median absolute deviation (MAD); minimum, maximum, and range; and adjusted +#' skewness and kurtosis. See `?cb_summarize_numeric` for details and examples. #' @param detail_missing Include detailed missing value information on ungrouped #' categorical and text summary tabs? (Detailed missing information for grouped #' summary tabs is not currently supported.) @@ -62,6 +66,17 @@ cb_write <- function(cb, group_rows = NULL, group_rows_numeric = group_rows, group_rows_categorical = group_rows, + stats_numeric = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), detail_missing = c("if_any_user_missing", "yes", "no"), n_text_vals = 5, incl_date = TRUE, @@ -93,6 +108,7 @@ cb_write <- function(cb, group_rows = group_rows, group_rows_numeric = group_rows_numeric, group_rows_categorical = group_rows_categorical, + stats_numeric = stats_numeric, detail_missing = detail_missing, n_text_vals = n_text_vals, incl_date = incl_date, @@ -109,6 +125,17 @@ cb_write_impl <- function(cb, group_rows = NULL, group_rows_numeric = group_rows, group_rows_categorical = group_rows, + stats_numeric = list( + mean = mean, + SD = sd, + median = median, + MAD = mad, + min = min_if_any, + max = max_if_any, + range = spread, + skew = skew, + kurt = kurtosis + ), detail_missing = c("if_any_user_missing", "yes", "no"), n_text_vals = 5, incl_date = TRUE, @@ -119,7 +146,7 @@ cb_write_impl <- function(cb, detail_missing == "if_any_user_missing" && length(attr(cb, "user_missing")) ) summaries <- list( - num = cb_summarize_numeric_impl(cb), + num = cb_summarize_numeric_impl(cb, stats = stats_numeric), cat = cb_summarize_categorical_impl(cb, detail_missing = detail_missing), txt = cb_summarize_text_impl( cb, @@ -131,7 +158,8 @@ cb_write_impl <- function(cb, summaries$num_grp <- cb_summarize_numeric_impl( cb, group_by = group_by, - group_rows = group_rows_numeric + group_rows = group_rows_numeric, + stats = stats_numeric ) summaries$cat_grp <- cb_summarize_categorical_impl( cb, diff --git a/README.Rmd b/README.Rmd index a60cfe5..d558513 100644 --- a/README.Rmd +++ b/README.Rmd @@ -73,9 +73,9 @@ dat_rc$data |> There are many options for controlling how data is interpreted, summarized, and presented. See the [introduction to lighthouse.codebook](https://ccsarapas.github.io/lighthouse.codebook/articles/lighthouse-codebook.html) -for some of the most useful options, including grouped data summaries and specifying -user missing codes. Further options are detailed in the help pages for `cb_create()` -and `cb_write()`. +for some of the most useful options, including grouped data summaries, summary statistics +for numeric variables, and specifying user missing codes. Further options are detailed +in the help pages for `cb_create()` and `cb_write()`.