diff --git a/.gitignore b/.gitignore
index 54f27cb..6284008 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@ docs
 inst/doc
 /doc/
 /Meta/
+.vscode/launch.json
+..Rcheck/00check.log
diff --git a/DESCRIPTION b/DESCRIPTION
index 5391d2e..7f78e32 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: lighthouse.codebook
 Title: Summarize Datasets for Lighthouse Institute Projects
-Version: 0.3.2
+Version: 0.4.0
 Authors@R: c(
     person("Casey", "Sarapas", 
            email = "ccsarapas@chestnut.org", 
@@ -14,11 +14,10 @@ Depends:
 Imports:
     cli,
     data.table,
-    dplyr,
+    dplyr (>= 1.2.0),
     haven,
     labelled,
     lighthouse,
-    moments,
     openxlsx2,
     purrr,
     rlang,
@@ -35,7 +34,9 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.3
 URL: https://github.com/ccsarapas/lighthouse.codebook, https://ccsarapas.github.io/lighthouse.codebook/
 BugReports: https://github.com/ccsarapas/lighthouse.codebook/issues
-Suggests: 
+Suggests:
     knitr,
-    rmarkdown
+    rmarkdown,
+    testthat (>= 3.0.0)
 VignetteBuilder: knitr
+
diff --git a/NAMESPACE b/NAMESPACE
index af707b1..3ae72bf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -13,8 +13,17 @@ export(cb_summarize_categorical)
 export(cb_summarize_numeric)
 export(cb_summarize_text)
 export(cb_write)
+export(kurtosis)
+export(max_if_any)
+export(min_if_any)
+export(se_mean)
+export(skew)
+export(spread)
 importFrom(lighthouse,"%<-%")
 importFrom(lighthouse,glue_chr)
+importFrom(lighthouse,max_if_any)
+importFrom(lighthouse,min_if_any)
+importFrom(lighthouse,se_mean)
 importFrom(lighthouse,untidyselect)
 importFrom(tidyselect,all_of)
 importFrom(tidyselect,any_of)
diff --git a/NEWS.md b/NEWS.md
index e5a1183..f078d5b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,27 @@
+# lighthouse.codebook 0.4.0
+
+## Added
+
+* Summary statistics returned by `cb_summarize_numeric()` can now be specified using the new `stats` argument. Summary statistics included on the numeric summary tab of workbooks written by `cb_write()` can likewise be specified using the `stats_numeric` argument. 
+
+* A handful of stats helpers for use in the new `stats` / `stats_numeric` arguments, including `skew()`, `kurtosis()`, `spread()`, and re-exports from the lighthouse package `min_if_any()`, `max_if_any()`, and `se_mean()`.
+
+## Fixed
+
+* User missing values defined in SPSS datasets or `"haven_labelled"` vectors are now consistently recognized (fixes #32).
+
+* Specfying numeric grouping columns no longer throws an error (fixes #31).
+
+* `cb_create()` no longer throws an error when `.val_labels = NULL` (fixes #34).
+
+* `cb_summarize_categorical()` no longer throws an error when a variable contains no value labels.
+
+## Internal
+
+* Added a test suite.
+
+* Dropped dependency on moments package.
+
 # lighthouse.codebook 0.3.2
 
 ## Fixed
diff --git a/R/cb_create.r b/R/cb_create.r
index 7e0fdab..13316d4 100644
--- a/R/cb_create.r
+++ b/R/cb_create.r
@@ -172,6 +172,7 @@ cb_create <- function(data,
       incompatible = .options$user_missing_incompatible
     ) |>
     cb_add_lookups(sep1 = .val_labs_sep1, sep2 = .val_labs_sep2) |>
+    cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |>
     cb_label_data(conflict = .options$user_missing_conflict) |>
     cb_zap_data() |>
     cb_add_dims() |>
@@ -378,7 +379,10 @@ cb_user_missings <- function(cb,
                              user_missing, 
                              match_type = TRUE,
                              incompatible = c("ignore", "warn", "error")) {
-  if (is.null(user_missing)) return(set_attrs(cb, user_missing = list()))
+  if (is.null(user_missing)) {
+    attr(cb, "user_missing") <- attr(cb, "user_missing") %||% list()
+    return(cb)
+  }
   user_missing <- check_user_missing_arg(user_missing)
   for (um in user_missing) {
     cb <- cb_user_missings_across(
@@ -410,8 +414,12 @@ lookups_from_string <- function(cb, data, sep1, sep2) {
     labs <- x[, 2]
     setNames(vals, labs)
   }
+  # if (!("values" %in% names(cb))) {
+  #   return(setNames(character(), character()))
+  # }
+  # early return if values col doesn't exist, length 0, or all NA
+  if (!("values" %in% names(cb)) || !sum(!is.na(cb$values))) return(list())
   val_labels <- na.omit(setNames(cb$values, cb$name))
-  if (!length(val_labels)) return(val_labels)
   if (is.null(sep1) || is.null(sep2)) {
     cli::cli_abort(
       "{.arg sep1} and {.arg sep2} must be specified if value labels are provided."
@@ -493,12 +501,36 @@ reconcile_missing_labels <- function(val_labs,
   list(val_labs = val_labs, missings = missings)
 }
 
+cb_reconcile_missing_labels <- function(cb,
+                                        conflict = c("val_label", "missing_label")) {
+  conflict <- match.arg(conflict)
+  vals_by_label <- attr(cb, "vals_by_label")
+  user_missing <- attr(cb, "user_missing")
+  factors <- attr(cb, "factors") %||% character()
+  vars <- setdiff(intersect(names(vals_by_label), names(user_missing)), factors)
+  if (!length(vars)) return(cb)
+
+  for (nm in vars) {
+    if (is.null(vals_by_label[[nm]]) || is.null(user_missing[[nm]])) next
+    vals <- reconcile_missing_labels(
+      val_labs = sort(vals_by_label[[nm]]),
+      missings = sort(user_missing[[nm]]),
+      conflict = conflict
+    )
+    user_missing[[nm]] <- sort(vals$missings)
+    vals_by_label[[nm]] <- vals$val_labs[
+      order(vals$val_labs %in% vals$missings, vals$val_labs)
+    ]
+  }
+  set_attrs(cb, vals_by_label = vals_by_label, user_missing = user_missing)
+}
+
 cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) {
   data <- attr(cb, "data")
   vals_by_label <- attr(cb, "vals_by_label")
   factors <- attr(cb, "factors")
   user_missing <- attr(cb, "user_missing")
-  label_vars <- unique(c(names(vals_by_label), names(user_missing)))
+  label_vars <- union(names(vals_by_label), names(user_missing))
   for (nm in label_vars) {
     missings <- sort(user_missing[[nm]])
     if (nm %in% factors) {
@@ -506,15 +538,7 @@ cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) {
     } else {
       val_labs <- sort(vals_by_label[[nm]])
       if (!is.null(val_labs) && !is.null(missings)) {
-        vals <- reconcile_missing_labels(
-          val_labs = val_labs,
-          missings = missings,
-          conflict = conflict
-        )
-        missings <- sort(vals$missings)
-        val_labs <- vals$val_labs[
-          order(vals$val_labs %in% vals$missings, vals$val_labs)
-        ]
+        val_labs <- val_labs[order(val_labs %in% missings, val_labs)]
       }
       data[[nm]] <- haven::labelled_spss(
         data[[nm]], labels = val_labs, na_values = missings
@@ -581,12 +605,23 @@ cb_add_val_labels_col <- function(cb, user_missing_col = c("if_any", "yes", "no"
   } else {
     missings <- NULL
   }
-  val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors"))
-  dplyr::mutate(cb, values = val_labs, user_missings = missings)
+  include_values <- "values" %in% names(cb) ||
+    any(vapply(val_labs, \(x) !is.null(x) && length(x) > 0, logical(1)))
+  if (include_values) {
+    val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors"))
+    cb <- dplyr::mutate(cb, values = val_labs)
+  }
+  if (user_missing_col) {
+    cb <- dplyr::mutate(cb, user_missings = missings)
+  }
+  cb
 }
 
 cb_split_labels_col <- function(cb, split_var_labels = NULL) {
   if (is.null(split_var_labels)) return(cb)
+  if (!("label" %in% names(cb))) {
+    cli::cli_abort("{.arg .split_var_labels} requires a {.code label} column.")
+  }
   if (rlang::is_call(split_var_labels) && rlang::call_name(split_var_labels) == "list") {
     split_var_labels <- rlang::call_args(split_var_labels)
   } else {
diff --git a/R/cb_create_redcap.r b/R/cb_create_redcap.r
index ccb8c58..ad25e69 100644
--- a/R/cb_create_redcap.r
+++ b/R/cb_create_redcap.r
@@ -115,6 +115,7 @@ cb_create_redcap <- function(data,
     cb <- cb_propagate_user_missing_checkboxes_rc(cb)
   }
   cb |>
+    cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |>
     cb_label_data(conflict = .options$user_missing_conflict) |>
     cb_zap_data() |>
     cb_add_dims() |>
diff --git a/R/cb_create_spss.r b/R/cb_create_spss.r
index 3fc7789..9823e40 100644
--- a/R/cb_create_spss.r
+++ b/R/cb_create_spss.r
@@ -96,7 +96,7 @@ cb_user_missings_from_spss <- function(cb) {
       user_missings[[nm]] <- c(user_missings[[nm]], miss_vals)      
     }
   }
-  
+
   if (length(user_missings)) attr(cb, "user_missing") <- user_missings
   cb
 }
@@ -106,13 +106,14 @@ cb_update_labels_spss <- function(cb,
                                   user_missing_conflict = c("val_label", "missing_label"),
                                   user_missing_incompatible = c("ignore", "warn", "error")) {
   data <- attr(cb, "data")
-    cb <- cb |>
-      cb_user_missings_from_spss() |>
-      cb_user_missings(
-        user_missing = user_missing,
-        incompatible = user_missing_incompatible
-      ) |>
-      cb_add_lookups()
+  cb <- cb |>
+    cb_user_missings_from_spss() |>
+    cb_user_missings(
+      user_missing = user_missing,
+      incompatible = user_missing_incompatible
+    ) |>
+    cb_add_lookups() |>
+    cb_reconcile_missing_labels(conflict = user_missing_conflict)
   if (is.null(user_missing)) {
     cb |> set_attrs(data_labelled = data)
   } else {
diff --git a/R/cb_summarize.r b/R/cb_summarize.r
index d2c4650..a477eaf 100644
--- a/R/cb_summarize.r
+++ b/R/cb_summarize.r
@@ -1,12 +1,14 @@
 #' Summarize numeric variables from a codebook object
-#'
+#' 
 #' `cb_summarize_numeric()` generates a summary table for all numeric variables
-#' from a codebook object, optionally by group. Future releases will include options
-#' to specify the summary statistics used. Currently, summary statistics are valid
-#' n and %; mean and SD; median, MAD, min, max, and range; skewness, and kurtosis.
+#' from a codebook object, optionally by group.
 #'
 #' @param cb An object of class `"li_codebook"` as produced by [`cb_create()`] or
 #'   a variant.
+#' @param stats A named list of summary functions to include. The defaults include 
+#'   mean and standard deviation (SD); median and median absolute deviation (MAD); 
+#'   minimum, maximum, and range; and adjusted skewness and kurtosis. See details 
+#'   and examples.
 #' @param group_by <[`tidy-select`][dplyr_tidy_select]> Column or columns to group
 #'   by.
 #' @param warn_if_none Should a warning be issued if there are no numeric variables
@@ -21,25 +23,101 @@
 #'      a non-missing label stem.
 #'   - `label`: variable label
 #'   - `valid_n`, `valid_pct`: number and proportion of non-missing values
-#'   - summary statistic columns: by default, these include `mean` and standard 
-#'     deviation (`SD`); `median`, median absolute deviation (`MAD`), `min`, `max`, 
-#'     and `range`; skewness (`skew`), and kurtosis (`kurt`).
-#'
+#'   - Summary statistic columns as specified in `stats`
+#' 
+#' @details
+#' The `stats` argument controls which summary statistics will be computed. It takes 
+#' a named list of functions, where the names will be used as column names. 
+#' 
+#' `cb_summarize_numeric()` will set `na.rm` to `TRUE` for any function that takes 
+#' a `na.rm` argument. 
+#' 
+#' You can include anonymous functions. If wrapping a function that takes a `na.rm`
+#' argument, it is recommended you explicitly set `na.rm` to `TRUE`. (e.g., to include 
+#' the 25th quantile, use `q25 = \(x) quantile(x, 0.25, na.rm = TRUE)`.
+#' 
+# #' Names will be formatted when written to an Excel codebook:
+# #' - "_" will be replaced with " "
+# #' - "pct" will be replaced with "%"
+# #' - names will generally be changed to Title Case, except that "n" will not be 
+# #' capitalized, and any words already containing capital letters will be left as 
+# #' is.
+# #' 
+#' @examples
+#' cb_storms <- dplyr::storms |>
+#'   dplyr::mutate(year = factor(year)) |>
+#'   dplyr::filter(status %in% c("tropical storm", "hurricane")) |>
+#'   cb_create()
+#' 
+#' # ungrouped summary with default stats
+#' cb_summarize_numeric(cb_storms)
+#' 
+#' # with subset of default stats
+#' cb_summarize_numeric(
+#'   cb_storms,
+#'   stats = list(mean = mean, SD = sd)
+#' )
+#' 
+#' # grouped summary
+#' cb_summarize_numeric(
+#'   cb_storms,
+#'   stats = list(mean = mean, SD = sd),
+#'   group_by = status
+#' )
+#' 
+#' # with custom stats
+#' cb_summarize_numeric(
+#'   cb_storms,
+#'   stats = list(
+#'     median = median,
+#'     q25 = \(x) quantile(x, 0.25, na.rm = TRUE),
+#'     q75 = \(x) quantile(x, 0.75, na.rm = TRUE),
+#'     IQR = IQR
+#'   )
+#' )
+#' 
 #' @export
-cb_summarize_numeric <- function(cb, group_by = NULL, warn_if_none = TRUE) {
+cb_summarize_numeric <- function(cb, 
+                                 group_by = NULL, 
+                                 stats = list(
+                                   mean = mean, 
+                                   SD = sd,
+                                   median = median, 
+                                   MAD = mad, 
+                                   min = min_if_any, 
+                                   max = max_if_any,
+                                   range = spread,
+                                   skew = skew, 
+                                   kurt = kurtosis
+                                 ),
+                                 warn_if_none = TRUE) {
   check_codebook(cb)
   group_by <- cb_untidyselect(cb, {{ group_by }})
   cb_summarize_numeric_impl(
-    cb = cb, group_by = group_by, warn_if_none = warn_if_none
+    cb = cb, group_by = group_by, stats = stats, warn_if_none = warn_if_none
   )
 }
 
 cb_summarize_numeric_impl <- function(cb, 
                                       group_by = NULL, 
+                                      stats = list(
+                                        mean = mean, 
+                                        SD = sd,
+                                        median = median, 
+                                        MAD = mad, 
+                                        min = min_if_any, 
+                                        max = max_if_any,
+                                        range = spread,
+                                        skew = skew, 
+                                        kurt = kurtosis
+                                      ),
                                       warn_if_none = FALSE,
                                       group_rows = NULL) {
   data <- attr(cb, "data_zapped")[cb$name]
-  nms_num <- names(data)[vapply(data, is.numeric, logical(1))]
+  nms_num <- setdiff(
+    names(data)[vapply(data, is.numeric, logical(1))],
+    group_by
+  )
   id_cols <- intersect(c("name", "label_stem", "label"), names(cb))
   out <- cb |>
     dplyr::filter(name %in% nms_num) |>
@@ -47,9 +125,15 @@ cb_summarize_numeric_impl <- function(cb,
   
   if (!nrow(out)) {
     if (warn_if_none) {
-      cli::cli_warn(c(
-        "!" = "No numeric variables in codebook; returning `NULL`."
-      ))
+      if (is.null(group_by)) {
+        cli::cli_warn(c(
+          "!" = "No numeric variables in codebook; returning `NULL`."
+        ))
+      } else {
+        cli::cli_warn(c(
+          "i" = "No numeric variables in codebook after grouping; returning `NULL`."
+        ))
+      }
     }
     return(NULL)
   }
@@ -60,17 +144,19 @@ cb_summarize_numeric_impl <- function(cb,
     id_cols <- setdiff(id_cols, "label_stem")    
   }
   
-  res <- lighthouse::summary_table(
-      data,
-      valid_n = lighthouse::n_valid, valid_pct = lighthouse::pct_valid,
-      mean, SD = sd,
-      median, MAD = mad, min = lighthouse::min_if_any, max = lighthouse::max_if_any, 
-      range = spread_if_any,
-      skew = moments::skewness, kurt = moments::kurtosis,
+  args <- c(
+    list(
+      .data = data,
       na.rm = TRUE,
-      .vars = all_of(nms_num),
-      .rows_group_by = all_of(group_by)
-    ) |>
+      .vars = rlang::expr(all_of(nms_num)),
+      .rows_group_by = rlang::expr(all_of(group_by)),
+      valid_n = lighthouse::n_valid,
+      valid_pct = lighthouse::pct_valid
+    ),
+    stats
+  )
+  
+  res <- do.call(lighthouse::summary_table, args) |> 
     dplyr::mutate(dplyr::across(
       all_of(group_by),
       \(x) fct_replace_na(factor(x), "(Missing)")
diff --git a/R/cb_write.r b/R/cb_write.r
index 962b964..992d536 100644
--- a/R/cb_write.r
+++ b/R/cb_write.r
@@ -27,6 +27,10 @@
 #' @param group_rows_numeric,group_rows_categorical <[`tidy-select`][dplyr_tidy_select]> 
 #'   Column or columns to group by in rows on grouped numeric or categorical summary 
 #'   tab.
+#' @param stats_numeric A named list of summary functions to include on the numeric 
+#'   summary tab. Defaults include mean and standard deviation (SD); median and 
+#'   median absolute deviation (MAD); minimum, maximum, and range; and adjusted 
+#'   skewness and kurtosis. See `?cb_summarize_numeric` for details and examples.
 #' @param detail_missing Include detailed missing value information on ungrouped 
 #'   categorical and text summary tabs? (Detailed missing information for grouped 
 #'   summary tabs is not currently supported.)
@@ -62,6 +66,17 @@ cb_write <- function(cb,
                      group_rows = NULL,
                      group_rows_numeric = group_rows,
                      group_rows_categorical = group_rows,
+                     stats_numeric = list(
+                        mean = mean, 
+                        SD = sd,
+                        median = median, 
+                        MAD = mad, 
+                        min = min_if_any, 
+                        max = max_if_any,
+                        range = spread,
+                        skew = skew, 
+                        kurt = kurtosis
+                      ),
                      detail_missing = c("if_any_user_missing", "yes", "no"),
                      n_text_vals = 5,
                      incl_date = TRUE,
@@ -93,6 +108,7 @@ cb_write <- function(cb,
     group_rows = group_rows,
     group_rows_numeric = group_rows_numeric,
     group_rows_categorical = group_rows_categorical,
+    stats_numeric = stats_numeric,
     detail_missing = detail_missing,
     n_text_vals = n_text_vals,
     incl_date = incl_date,
@@ -109,6 +125,17 @@ cb_write_impl <- function(cb,
                           group_rows = NULL,
                           group_rows_numeric = group_rows,
                           group_rows_categorical = group_rows,
+                          stats_numeric = list(
+                            mean = mean, 
+                            SD = sd,
+                            median = median, 
+                            MAD = mad, 
+                            min = min_if_any, 
+                            max = max_if_any,
+                            range = spread,
+                            skew = skew, 
+                            kurt = kurtosis
+                          ),
                           detail_missing = c("if_any_user_missing", "yes", "no"),
                           n_text_vals = 5,
                           incl_date = TRUE,
@@ -119,7 +146,7 @@ cb_write_impl <- function(cb,
     detail_missing == "if_any_user_missing" && length(attr(cb, "user_missing"))
   )
   summaries <- list(
-    num = cb_summarize_numeric_impl(cb),
+    num = cb_summarize_numeric_impl(cb, stats = stats_numeric),
     cat = cb_summarize_categorical_impl(cb, detail_missing = detail_missing),
     txt = cb_summarize_text_impl(
       cb,
@@ -131,7 +158,8 @@ cb_write_impl <- function(cb,
     summaries$num_grp <- cb_summarize_numeric_impl(
       cb, 
       group_by = group_by, 
-      group_rows = group_rows_numeric
+      group_rows = group_rows_numeric,
+      stats = stats_numeric
     )
     summaries$cat_grp <- cb_summarize_categorical_impl(
       cb,
@@ -377,12 +405,12 @@ var_name_hyperlinks <- function(params) {
   hl <- hl_rows$overview |>
     dplyr::mutate(
       overview_nm = params$overview$sheet_name,
-      sheet = dplyr::case_match(
+      sheet = dplyr::recode_values(
         name,
         hl_rows$num$name %||% NA ~ "num",
         hl_rows$cat$name %||% NA ~ "cat",
         hl_rows$txt$name %||% NA ~ "txt",
-        .default = NA
+        default = NA
       ),
     ) |>
     dplyr::mutate(
diff --git a/R/stats.r b/R/stats.r
new file mode 100644
index 0000000..f8efb1d
--- /dev/null
+++ b/R/stats.r
@@ -0,0 +1,91 @@
+#' Statistics for numeric summaries
+#' 
+#' @description
+#' Functions for computing summary statistics for use in `cb_summarize_numeric()`.
+#' - `skew()` and `kurtosis()` return adjusted skewness and kurtosis. (Unadjusted 
+#'   estimates can be obtained by setting `adjusted = FALSE`.)
+#' - `spread()` returns the difference between a vector's minimum and maximum values.
+#' - `min_if_any()` and `max_if_any()` return minima and maxima with alternate behavior 
+#'   if all values are missing. (Re-exported from the lighthouse package. See `lighthouse::min_if_any` 
+#'   for more details.)
+#' - `se_mean()` returns the standard error of the mean. (Re-exported from the lighthouse 
+#'   package. See `lighthouse::se_mean` for more details.)
+#' 
+#' @param x A numeric vector.
+#' 
+#' @param ... A numeric vector or vectors.
+#' 
+#' @param na.rm Should missing values be removed? (Note that `cb_summarize_numeric()` 
+#' automatically sets na.rm to `TRUE` for all functions).
+#' 
+#' @param adjusted If `TRUE`, returns adjusted skewness (_G_<sub>1</sub>) or kurtosis 
+#' (_G_<sub>2</sub>) by applying a small-sample correction. If `FALSE`, returns 
+#' the unadjusted skewness (_g_<sub>1</sub>) or kurtosis (_g_<sub>2</sub>). (Note 
+#' that `TRUE` corresponds to behavior of software such as SPSS, SAS, and Excel.)
+#' 
+#' @param excess If `TRUE`, returns excess kurtosis by (total kurtosis - 3).
+#' 
+#' @name stats
+#' 
+#' @rdname stats
+#' @export
+skew <- function(x, adjusted = TRUE, na.rm = FALSE) {
+  if (na.rm) x <- x[!is.na(x)]
+  n <- length(x)
+  
+  if (n < 3) return(NA_real_)
+  
+  m  <- mean(x)
+  m2 <- mean((x - m)^2)
+  m3 <- mean((x - m)^3)
+  
+  if (m2 == 0) return(0)
+  
+  g1 <- m3 / (m2^(3/2))
+  
+  if (!adjusted) return(g1)
+  
+  sqrt(n * (n - 1)) / (n - 2) * g1
+}
+#' 
+#' @rdname stats
+#' @export
+kurtosis <- function(x, adjusted = TRUE, excess = TRUE, na.rm = FALSE) {
+  if (na.rm) x <- x[!is.na(x)]
+  n <- length(x)
+  if (n < 4) return(NA_real_)
+
+  m  <- mean(x)
+  m2 <- mean((x - m)^2)
+  m4 <- mean((x - m)^4)
+
+  if (m2 == 0) return(if (excess) 0 else 3)
+
+  g2 <- m4 / (m2^2)
+  g2_excess <- g2 - 3
+
+  if (!adjusted) return(if (excess) g2_excess else g2)
+
+  G2_excess <- ((n - 1) / ((n - 2) * (n - 3))) * ((n + 1) * g2_excess + 6)
+
+  if (excess) G2_excess else G2_excess + 3
+}
+#' 
+#' @rdname stats
+#' @export
+spread <- function(x, na.rm = FALSE) {
+  max_if_any(x, na.rm = na.rm) - min_if_any(x, na.rm = na.rm)
+}
+#' 
+#' @importFrom lighthouse min_if_any max_if_any se_mean
+#' @rdname stats
+#' @export
+lighthouse::min_if_any
+#' 
+#' @rdname stats
+#' @export
+lighthouse::max_if_any
+#' 
+#' @rdname stats
+#' @export
+lighthouse::se_mean
diff --git a/R/utils.r b/R/utils.r
index 6f49877..59d481a 100644
--- a/R/utils.r
+++ b/R/utils.r
@@ -76,10 +76,11 @@ try_sort_numeric <- function(x,
     sort(x, decreasing = decreasing, ...)
   }
 }
-try_sort_numeric(letters)
-  coercible <- lighthouse::is_coercible_numeric(letters, na = "TRUE")
 
-class_collapse <- function(x, sep = ", ") stringr::str_c(class(x), collapse = sep)
+class_collapse <- function(x, sep = ", ") {
+  stringr::str_c(class(x), collapse = sep)
+}
+
 strip_html <- function(x) {
   stopifnot(is.character(x))
   has_tags <- grepl("<[A-Za-z!/]", x)
@@ -172,7 +173,6 @@ cb_match_type <- function(nm,
 
 as_named <- function(x, class) setNames(as(x, class), names(x))
 
-
 has_val_labels <- function(x) !is.null(labelled::val_labels(x))
 
 to_labelled_chr <- function(x, 
@@ -185,9 +185,6 @@ to_labelled_chr <- function(x,
   )
 }
 
-spread_if_any <- function(..., na.rm = TRUE) {
-  lighthouse::max_if_any(..., na.rm = na.rm) - lighthouse::min_if_any(..., na.rm = na.rm)
-}
 
 #' @export
 nan_to_na.default <- function(x) dplyr::if_else(is.nan(x), NA, x)
diff --git a/README.Rmd b/README.Rmd
index a60cfe5..be922f8 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -27,7 +27,8 @@ customized in a number of ways, including options for grouped summaries.
 You can install lighthouse.codebook by running:
 
 ```r
-# install.packages("remotes")
+## git2r needed only if Git isn't installed on your system
+# install.packages(c("git2r", "remotes"))
 remotes::install_github("ccsarapas/lighthouse.codebook")
 ```
 
@@ -73,9 +74,9 @@ dat_rc$data |>
 
 There are many options for controlling how data is interpreted, summarized, and presented. 
 See the [introduction to lighthouse.codebook](https://ccsarapas.github.io/lighthouse.codebook/articles/lighthouse-codebook.html)
-for some of the most useful options, including grouped data summaries and specifying 
-user missing codes. Further options are detailed in the help pages for `cb_create()` 
-and `cb_write()`.
+for some of the most useful options, including grouped data summaries, summary statistics
+for numeric variables, and specifying user missing codes. Further options are detailed 
+in the help pages for `cb_create()` and `cb_write()`.
 
 <!-- - The "Creating Codebooks" vignette covers options for controlling how data and 
 metadata are _interpreted,_ such as by applying value labels, specifying user missing 
diff --git a/README.md b/README.md
index 9259cfa..32148ec 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,8 @@ summaries.
 You can install lighthouse.codebook by running:
 
 ``` r
-# install.packages("remotes")
+## git2r needed only if Git isn't installed on your system
+# install.packages(c("git2r", "remotes"))
 remotes::install_github("ccsarapas/lighthouse.codebook")
 ```
 
@@ -64,9 +65,10 @@ dat_rc$data |>
 There are many options for controlling how data is interpreted,
 summarized, and presented. See the [introduction to
 lighthouse.codebook](https://ccsarapas.github.io/lighthouse.codebook/articles/lighthouse-codebook.html)
-for some of the most useful options, including grouped data summaries
-and specifying user missing codes. Further options are detailed in the
-help pages for `cb_create()` and `cb_write()`.
+for some of the most useful options, including grouped data summaries,
+summary statistics for numeric variables, and specifying user missing
+codes. Further options are detailed in the help pages for `cb_create()`
+and `cb_write()`.
 
 <!-- - The "Creating Codebooks" vignette covers options for controlling how data and 
 metadata are _interpreted,_ such as by applying value labels, specifying user missing 
diff --git a/man/cb_summarize_numeric.Rd b/man/cb_summarize_numeric.Rd
index adb486f..fb81fe3 100644
--- a/man/cb_summarize_numeric.Rd
+++ b/man/cb_summarize_numeric.Rd
@@ -4,7 +4,13 @@
 \alias{cb_summarize_numeric}
 \title{Summarize numeric variables from a codebook object}
 \usage{
-cb_summarize_numeric(cb, group_by = NULL, warn_if_none = TRUE)
+cb_summarize_numeric(
+  cb,
+  group_by = NULL,
+  stats = list(mean = mean, SD = sd, median = median, MAD = mad, min = min_if_any, max =
+    max_if_any, range = spread, skew = skew, kurt = kurtosis),
+  warn_if_none = TRUE
+)
 }
 \arguments{
 \item{cb}{An object of class \code{"li_codebook"} as produced by \code{\link[=cb_create]{cb_create()}} or
@@ -13,6 +19,11 @@ a variant.}
 \item{group_by}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Column or columns to group
 by.}
 
+\item{stats}{A named list of summary functions to include. The defaults include
+mean and standard deviation (SD); median and median absolute deviation (MAD);
+minimum, maximum, and range; and adjusted skewness and kurtosis. See details
+and examples.}
+
 \item{warn_if_none}{Should a warning be issued if there are no numeric variables
 in \code{cb}?}
 }
@@ -27,14 +38,55 @@ columns:
 a non-missing label stem.
 \item \code{label}: variable label
 \item \code{valid_n}, \code{valid_pct}: number and proportion of non-missing values
-\item summary statistic columns: by default, these include \code{mean} and standard
-deviation (\code{SD}); \code{median}, median absolute deviation (\code{MAD}), \code{min}, \code{max},
-and \code{range}; skewness (\code{skew}), and kurtosis (\code{kurt}).
+\item Summary statistic columns as specified in \code{stats}
 }
 }
 \description{
 \code{cb_summarize_numeric()} generates a summary table for all numeric variables
-from a codebook object, optionally by group. Future releases will include options
-to specify the summary statistics used. Currently, summary statistics are valid
-n and \%; mean and SD; median, MAD, min, max, and range; skewness, and kurtosis.
+from a codebook object, optionally by group.
+}
+\details{
+The \code{stats} argument controls which summary statistics will be computed. It takes
+a named list of functions, where the names will be used as column names.
+
+\code{cb_summarize_numeric()} will set \code{na.rm} to \code{TRUE} for any function that takes
+a \code{na.rm} argument.
+
+You can include anonymous functions. If wrapping a function that takes a \code{na.rm}
+argument, it is recommended you explicitly set \code{na.rm} to \code{TRUE}. (e.g., to include
+the 25th quantile, use \verb{q25 = \\(x) quantile(x, 0.25, na.rm = TRUE)}.
+}
+\examples{
+cb_storms <- dplyr::storms |>
+  dplyr::mutate(year = factor(year)) |>
+  dplyr::filter(status \%in\% c("tropical storm", "hurricane")) |>
+  cb_create()
+
+# ungrouped summary with default stats
+cb_summarize_numeric(cb_storms)
+
+# with subset of default stats
+cb_summarize_numeric(
+  cb_storms,
+  stats = list(mean = mean, SD = sd)
+)
+
+# grouped summary
+cb_summarize_numeric(
+  cb_storms,
+  stats = list(mean = mean, SD = sd),
+  group_by = status
+)
+
+# with custom stats
+cb_summarize_numeric(
+  cb_storms,
+  stats = list(
+    median = median,
+    q25 = \(x) quantile(x, 0.25, na.rm = TRUE),
+    q75 = \(x) quantile(x, 0.75, na.rm = TRUE),
+    IQR = IQR
+  )
+)
+
 }
diff --git a/man/cb_write.Rd b/man/cb_write.Rd
index 1699465..83c7dd7 100644
--- a/man/cb_write.Rd
+++ b/man/cb_write.Rd
@@ -12,6 +12,8 @@ cb_write(
   group_rows = NULL,
   group_rows_numeric = group_rows,
   group_rows_categorical = group_rows,
+  stats_numeric = list(mean = mean, SD = sd, median = median, MAD = mad, min =
+    min_if_any, max = max_if_any, range = spread, skew = skew, kurt = kurtosis),
   detail_missing = c("if_any_user_missing", "yes", "no"),
   n_text_vals = 5,
   incl_date = TRUE,
@@ -43,6 +45,11 @@ in \code{group_rows_numeric} or \code{group_rows_categorical}.}
 Column or columns to group by in rows on grouped numeric or categorical summary
 tab.}
 
+\item{stats_numeric}{A named list of summary functions to include on the numeric
+summary tab. Defaults include mean and standard deviation (SD); median and
+median absolute deviation (MAD); minimum, maximum, and range; and adjusted
+skewness and kurtosis. See \code{?cb_summarize_numeric} for details and examples.}
+
 \item{detail_missing}{Include detailed missing value information on ungrouped
 categorical and text summary tabs? (Detailed missing information for grouped
 summary tabs is not currently supported.)}
diff --git a/man/stats.Rd b/man/stats.Rd
new file mode 100644
index 0000000..e4c4d1a
--- /dev/null
+++ b/man/stats.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/stats.r
+\name{stats}
+\alias{stats}
+\alias{skew}
+\alias{kurtosis}
+\alias{spread}
+\alias{min_if_any}
+\alias{max_if_any}
+\alias{se_mean}
+\title{Statistics for numeric summaries}
+\usage{
+skew(x, adjusted = TRUE, na.rm = FALSE)
+
+kurtosis(x, adjusted = TRUE, excess = TRUE, na.rm = FALSE)
+
+spread(x, na.rm = FALSE)
+
+min_if_any(..., na.rm = TRUE)
+
+max_if_any(..., na.rm = TRUE)
+
+se_mean(x, na.rm = FALSE)
+}
+\arguments{
+\item{x}{A numeric vector.}
+
+\item{adjusted}{If \code{TRUE}, returns adjusted skewness (\emph{G}\if{html}{\out{<sub>}}1\if{html}{\out{</sub>}}) or kurtosis
+(\emph{G}\if{html}{\out{<sub>}}2\if{html}{\out{</sub>}}) by applying a small-sample correction. If \code{FALSE}, returns
+the unadjusted skewness (\emph{g}\if{html}{\out{<sub>}}1\if{html}{\out{</sub>}}) or kurtosis (\emph{g}\if{html}{\out{<sub>}}2\if{html}{\out{</sub>}}). (Note
+that \code{TRUE} corresponds to behavior of software such as SPSS, SAS, and Excel.)}
+
+\item{na.rm}{Should missing values be removed? (Note that \code{cb_summarize_numeric()}
+automatically sets na.rm to \code{TRUE} for all functions).}
+
+\item{excess}{If \code{TRUE}, returns excess kurtosis by (total kurtosis - 3).}
+
+\item{...}{A numeric vector or vectors.}
+}
+\description{
+Functions for computing summary statistics for use in \code{cb_summarize_numeric()}.
+\itemize{
+\item \code{skew()} and \code{kurtosis()} return adjusted skewness and kurtosis. (Unadjusted
+estimates can be obtained by setting \code{adjusted = FALSE}.)
+\item \code{spread()} returns the difference between a vector's minimum and maximum values.
+\item \code{min_if_any()} and \code{max_if_any()} return minima and maxima with alternate behavior
+if all values are missing. (Re-exported from the lighthouse package. See \code{lighthouse::min_if_any}
+for more details.)
+\item \code{se_mean()} returns the standard error of the mean. (Re-exported from the lighthouse
+package. See \code{lighthouse::se_mean} for more details.)
+}
+}
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index a77cf55..2f83ada 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -22,12 +22,20 @@ reference:
   - cb_create_spss
   - cb_create_redcap
   - cb_write
-- title: Other functions
-  desc: Functions to set options or get information from codebook objects
+- title: Other codebook functions
+  desc: Extract data or generate summaries from codebook objects
   contents:
   - cb_get_data
   - cb_summarize_numeric
   - cb_summarize_categorical
   - cb_summarize_text
+- title: Helpers
+  contents:
   - cb_create_options
   - cb_create_redcap_options
+  - skew
+  - kurtosis
+  - spread
+  - min_if_any
+  - max_if_any
+  - se_mean
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 0000000..8a490c0
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(lighthouse.codebook)
+
+test_check("lighthouse.codebook")
diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R
new file mode 100644
index 0000000..a3e945b
--- /dev/null
+++ b/tests/testthat/helper.R
@@ -0,0 +1,94 @@
+fixture_core <- function() {
+  data <- tibble::tibble(
+    id = 1:6,
+    num_score = c(10, 12, 99, NA, 15, 99),
+    cat_code = c(1, 2, 1, 2, NA, 2),
+    mh_red = c(1, 0, 1, 0, 1, 0),
+    mh_blue = c(0, 1, 0, 1, 0, 1),
+    txt_note = c("alpha", "beta", "alpha", NA, "gamma", "alpha"),
+    event_date = as.Date(c("2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05", "2024-01-06"))
+  )
+
+  metadata <- tibble::tibble(
+    name = names(data),
+    label = c(
+      "Record ID",
+      "Numeric score",
+      "Binary category",
+      "Mood: Red",
+      "Mood: Blue",
+      "Free-text note",
+      "Event date"
+    ),
+    val_labels = c(
+      NA,
+      NA,
+      "1 = Yes; 2 = No",
+      "0 = No; 1 = Yes",
+      "0 = No; 1 = Yes",
+      NA,
+      NA
+    )
+  )
+
+  list(data = data, metadata = metadata)
+}
+
+fixture_redcap <- function() {
+  data <- tibble::tibble(
+    q1___0 = c(1, 0, 0),
+    q1___1 = c(0, 1, 0),
+    q1___9 = c(0, 0, 1)
+  )
+
+  metadata <- tibble::tibble(
+    field_name = "q1",
+    field_label = "Select all that apply",
+    select_choices_or_calculations = "0, Option A | 1, Option B | 9, Not asked",
+    field_type = "checkbox",
+    form_name = "form_a",
+    text_validation_type_or_show_slider_number = NA_character_
+  )
+
+  list(data = data, metadata = metadata)
+}
+
+fixture_spss <- function() {
+  vals <- c(1, 2, -9)
+
+  tibble::tibble(
+    num_na_vals = haven::labelled_spss(
+      vals,
+      na_values = -9,
+      label = "Numeric NA values"
+    ),
+    num_na_range = haven::labelled_spss(
+      vals,
+      na_range = c(-9, -1),
+      label = "Numeric NA range"
+    ),
+    cat_na_vals_unlabelled = haven::labelled_spss(
+      vals,
+      labels = c(Yes = 1, No = 2),
+      na_values = -9,
+      label = "Categorical NA values (unlabelled missing)"
+    ),
+    cat_na_vals_labelled = haven::labelled_spss(
+      vals,
+      labels = c(Yes = 1, No = 2, Refused = -9),
+      na_values = -9,
+      label = "Categorical NA values (labelled missing)"
+    ),
+    cat_na_range = haven::labelled_spss(
+      vals,
+      labels = c(Yes = 1, No = 2),
+      na_range = c(-9, -1),
+      label = "Categorical NA range"
+    ),
+    txt_na_vals = haven::labelled_spss(
+      as.character(vals),
+      na_values = "-9",
+      label = "Text NA values"
+    )
+  )
+}
diff --git a/tests/testthat/test-cb_create.R b/tests/testthat/test-cb_create.R
new file mode 100644
index 0000000..f1b2ede
--- /dev/null
+++ b/tests/testthat/test-cb_create.R
@@ -0,0 +1,157 @@
+test_that('`cb_create()` returns "li_codebook" and preserves variable order', {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  expect_s3_class(cb, "li_codebook")
+  expect_identical(cb$name, names(fx$data))
+  expect_true(all(c("name", "type", "label", "values", "missing") %in% names(cb)))
+})
+
+test_that("`cb_create()` parses metadata value labels and errors without separators", {
+  fx <- fixture_core()
+
+  expect_error(
+    cb_create(data = fx$data, metadata = fx$metadata),
+    "sep1"
+  )
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  cat_vals <- cb$values[cb$name == "cat_code"]
+  expect_match(cat_vals, "\\[1\\].*Yes")
+  expect_match(cat_vals, "\\[2\\].*No")
+})
+
+test_that("`cb_create()` accepts omitted variable labels from metadata (issue #34)", {
+  meta_no_var_lab <- data.frame(
+    name = "cat_code",
+    val_labels = "1 = Yes; 2 = No"
+  )
+
+  cb <- expect_no_error(
+    cb_create(
+      data = fixture_core()$data,
+      metadata = meta_no_var_lab,
+      .var_label = NULL,
+      .val_labs_sep1 = " = ",
+      .val_labs_sep2 = "; "
+    )
+  )
+
+  expect_false("label" %in% names(cb))
+  expect_true("values" %in% names(cb))
+})
+
+test_that("`cb_create()` accepts omitted value labels from metadata (issue #34)", {
+  meta_no_val_labs <- data.frame(
+    name = "cat_code",
+    label = "Binary category"
+  )
+
+  cb <- expect_no_error(
+    cb_create(
+      data = fixture_core()$data,
+      metadata = meta_no_val_labs,
+      .val_labels = NULL
+    )
+  )
+
+  expect_true("label" %in% names(cb))
+  expect_false("values" %in% names(cb))
+})
+
+test_that("`cb_create()` still adds `values` for data-derived labels when metadata labels are omitted", {
+  dat <- tibble::tibble(cat = factor(c("Yes", "No", "Yes")))
+  meta <- data.frame(name = "cat", label = "Category")
+
+  cb <- cb_create(
+    data = dat,
+    metadata = meta,
+    .val_labels = NULL
+  )
+
+  expect_true("values" %in% names(cb))
+  expect_identical(unname(cb$values), "No; Yes")
+})
+
+test_that("`cb_create()` handles user missing incompatibility according to options", {
+  fx <- fixture_core()
+
+  opts_warn <- cb_create_options(user_missing_incompatible = "warn")
+  expect_warning(
+    cb_create(
+      data = fx$data,
+      metadata = fx$metadata,
+      .val_labs_sep1 = " = ",
+      .val_labs_sep2 = "; ",
+      .user_missing = event_date ~ as.Date("2024-01-01"),
+      .options = opts_warn
+    ),
+    "not compatible"
+  )
+
+  opts_error <- cb_create_options(user_missing_incompatible = "error")
+  expect_error(
+    cb_create(
+      data = fx$data,
+      metadata = fx$metadata,
+      .val_labs_sep1 = " = ",
+      .val_labs_sep2 = "; ",
+      .user_missing = event_date ~ as.Date("2024-01-01"),
+      .options = opts_error
+    ),
+    "not compatible"
+  )
+})
+
+test_that("`cb_create()` `split_var_labels` creates `label_stem` and rejects overlaps", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; ",
+    .split_var_labels = tidyselect::starts_with("mh_")
+  )
+
+  expect_true("label_stem" %in% names(cb))
+  mh_idx <- cb$name %in% c("mh_red", "mh_blue")
+  expect_equal(length(unique(stats::na.omit(cb$label_stem[mh_idx]))), 1)
+  expect_setequal(cb$label[mh_idx], c("Red", "Blue"))
+
+  expect_error(
+    cb_create(
+      data = fx$data,
+      metadata = fx$metadata,
+      .val_labs_sep1 = " = ",
+      .val_labs_sep2 = "; ",
+      .split_var_labels = list(tidyselect::starts_with("mh_"), mh_red)
+    ),
+    "captured by more than one expression"
+  )
+
+  expect_error(
+    cb_create(
+      data = fx$data,
+      metadata = dplyr::select(fx$metadata, name, val_labels),
+      .var_label = NULL,
+      .val_labs_sep1 = " = ",
+      .val_labs_sep2 = "; ",
+      .split_var_labels = tidyselect::starts_with("mh_")
+    ),
+    "requires a `label` column"
+  )
+})
+
diff --git a/tests/testthat/test-cb_get_data.R b/tests/testthat/test-cb_get_data.R
new file mode 100644
index 0000000..557ceb8
--- /dev/null
+++ b/tests/testthat/test-cb_get_data.R
@@ -0,0 +1,46 @@
+test_that("`cb_get_data()` factors format converts labelled vars and zaps user missings", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; ",
+    .user_missing = num_score ~ 99
+  )
+
+  out <- cb_get_data(cb, format = "factors")
+
+  expect_s3_class(out, "data.frame")
+  expect_true(is.factor(out$cat_code))
+  expect_true(any(is.na(out$num_score)))
+  expect_false(any(out$num_score == 99, na.rm = TRUE))
+})
+
+test_that("`cb_get_data()` haven format returns labelled vectors with missing metadata", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; ",
+    .user_missing = num_score ~ 99
+  )
+
+  out <- cb_get_data(cb, format = "haven")
+
+  expect_true("haven_labelled_spss" %in% class(out$cat_code))
+  expect_true("haven_labelled_spss" %in% class(out$num_score))
+  expect_true(99 %in% labelled::na_values(out$num_score))
+})
+
+test_that("`cb_get_data()` rejects deprecated format values", {
+  fx <- fixture_core()
+  cb <- cb_create(data = fx$data)
+
+  expect_error(
+    cb_get_data(cb, format = "values"),
+    "no longer supported"
+  )
+})
diff --git a/tests/testthat/test-cb_write.R b/tests/testthat/test-cb_write.R
new file mode 100644
index 0000000..230ab3b
--- /dev/null
+++ b/tests/testthat/test-cb_write.R
@@ -0,0 +1,81 @@
+test_that("`cb_write()` writes workbook and includes core summary sheets", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  out_file <- tempfile(fileext = ".xlsx")
+  on.exit(unlink(out_file), add = TRUE)
+  out <- cb_write(cb, file = out_file, overwrite = TRUE)
+
+  expect_identical(out, out_file)
+  expect_true(file.exists(out_file))
+
+  wb <- openxlsx2::wb_load(out_file)
+  sheets <- openxlsx2::wb_get_sheet_names(wb)
+
+  expect_true(all(c(
+    "Overview",
+    "Summary - Numeric",
+    "Summary - Categorical",
+    "Summary - Text"
+  ) %in% sheets))
+})
+
+test_that("`cb_write()` `stats_numeric` controls numeric summary columns", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  out_file <- tempfile(fileext = ".xlsx")
+  on.exit(unlink(out_file), add = TRUE)
+  cb_write(
+    cb,
+    file = out_file,
+    stats_numeric = list(mean = mean),
+    overwrite = TRUE
+  )
+
+  wb <- openxlsx2::wb_load(out_file)
+  num <- openxlsx2::wb_to_df(wb, sheet = "Summary - Numeric", start_row = 2)
+
+  expect_true("Mean" %in% names(num))
+  expect_false("SD" %in% names(num))
+})
+
+test_that("`cb_write()` `group_by` adds grouped summary sheets", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  out_file <- tempfile(fileext = ".xlsx")
+  on.exit(unlink(out_file), add = TRUE)
+  cb_write(
+    cb,
+    file = out_file,
+    group_by = mh_red,
+    overwrite = TRUE
+  )
+
+  wb <- openxlsx2::wb_load(out_file)
+  sheets <- openxlsx2::wb_get_sheet_names(wb)
+
+  expect_true(all(c(
+    "Grouped Summary - Numeric",
+    "Grouped Summary - Categorical"
+  ) %in% sheets))
+})
diff --git a/tests/testthat/test-redcap.R b/tests/testthat/test-redcap.R
new file mode 100644
index 0000000..ab08bf9
--- /dev/null
+++ b/tests/testthat/test-redcap.R
@@ -0,0 +1,27 @@
+test_that("`cb_create_redcap()` checkbox relabeling and missing propagation", {
+  fx <- fixture_redcap()
+
+  cb <- cb_create_redcap(
+    data = fx$data,
+    metadata = fx$metadata,
+    .user_missing = ~ 9,
+    .options = cb_create_redcap_options(
+      checkbox_resp_values = TRUE,
+      propagate_checkbox_missings = TRUE
+    )
+  )
+
+  expect_s3_class(cb, "li_codebook")
+  expect_true(all(c("q1___0", "q1___1", "q1___9") %in% cb$name))
+
+  vals_0 <- cb$values[cb$name == "q1___0"]
+  vals_9 <- cb$values[cb$name == "q1___9"]
+  expect_match(vals_0, "Option A")
+  expect_match(vals_9, "Not asked")
+
+  dat_haven <- cb_get_data(cb, format = "haven")
+  expect_equal(as.numeric(dat_haven$q1___0[3]), 9)
+  expect_equal(as.numeric(dat_haven$q1___1[3]), 9)
+  expect_true(9 %in% labelled::na_values(dat_haven$q1___0))
+  expect_true(9 %in% labelled::na_values(dat_haven$q1___1))
+})
diff --git a/tests/testthat/test-spss.R b/tests/testthat/test-spss.R
new file mode 100644
index 0000000..ac9f7f2
--- /dev/null
+++ b/tests/testthat/test-spss.R
@@ -0,0 +1,131 @@
+spss_issue32_vars <- c(
+  "num_na_vals",
+  "num_na_range",
+  "cat_na_vals_unlabelled",
+  "cat_na_vals_labelled",
+  "cat_na_range",
+  "txt_na_vals"
+)
+
+spss_issue32_cat_vars <- c(
+  "cat_na_vals_unlabelled",
+  "cat_na_vals_labelled",
+  "cat_na_range"
+)
+
+test_that("`cb_create_spss()` imports variable labels and value labels for issue #32 fixture", {
+  dat <- fixture_spss()
+  cb <- suppressWarnings(cb_create_spss(dat))
+
+  expect_s3_class(cb, "li_codebook")
+  expect_identical(cb$name, names(dat))
+
+  expected_labels <- c(
+    num_na_vals = "Numeric NA values",
+    num_na_range = "Numeric NA range",
+    cat_na_vals_unlabelled = "Categorical NA values (unlabelled missing)",
+    cat_na_vals_labelled = "Categorical NA values (labelled missing)",
+    cat_na_range = "Categorical NA range",
+    txt_na_vals = "Text NA values"
+  )
+
+  expect_mapequal(setNames(cb$label, cb$name), expected_labels)
+
+  expect_match(cb$values[cb$name == "cat_na_vals_unlabelled"], "\\[1\\].*Yes")
+  expect_match(cb$values[cb$name == "cat_na_vals_unlabelled"], "\\[2\\].*No")
+  expect_match(cb$user_missings[cb$name == "cat_na_vals_labelled"], "\\[-9\\].*Refused")
+
+  expect_mapequal(
+    attr(cb, "vals_by_label")$cat_na_vals_unlabelled,
+    c(Yes = 1, No = 2)
+  )
+})
+
+test_that("`cb_create_spss()` imports user missings from `na_values` and `na_range` (issue #32)", {
+  dat <- fixture_spss()
+  expect_warning(
+    cb <- cb_create_spss(dat),
+    "User missing ranges will be treated as discrete user missing values"
+  )
+
+  attr_user_miss <- attr(cb, "user_missing")
+  attr_user_miss_names <- names(attr_user_miss)
+  if (is.null(attr_user_miss_names)) attr_user_miss_names <- character()
+
+  expect_setequal(attr_user_miss_names, spss_issue32_vars)
+
+  if (length(attr_user_miss_names)) {
+    expect_in(-9, attr_user_miss$num_na_vals)
+    expect_in(-9, attr_user_miss$num_na_range)
+    expect_in(-9, attr_user_miss$cat_na_vals_unlabelled)
+    expect_in(-9, attr_user_miss$cat_na_vals_labelled)
+    expect_in(-9, attr_user_miss$cat_na_range)
+    expect_in("-9", as.character(attr_user_miss$txt_na_vals))
+  }
+
+  expect_true("user_missings" %in% names(cb))
+  expect_true(all(!is.na(cb$user_missings[match(spss_issue32_vars, cb$name)])))
+})
+
+test_that("`cb_summarize_categorical()` with `detail_missing = TRUE` flags SPSS user missings", {
+  cb <- suppressWarnings(cb_create_spss(fixture_spss()))
+  out <- cb_summarize_categorical(cb, detail_missing = TRUE)
+  out_cat <- out[out$name %in% spss_issue32_cat_vars, ]
+
+  missing_rows <- out_cat[out_cat$is_missing, c("name", "n", "pct_of_missing")]
+  expect_setequal(unique(missing_rows$name), spss_issue32_cat_vars)
+  expect_true(all(missing_rows$n == 1L))
+  expect_true(all(missing_rows$pct_of_missing == 1))
+
+  valid_rows <- out_cat[!out_cat$is_missing, ]
+  expect_true(all(valid_rows$pct_of_valid == 0.5))
+
+  refused <- out_cat[
+    out_cat$name == "cat_na_vals_labelled" & grepl("Refused", out_cat$value, fixed = TRUE),
+  ]
+  expect_equal(nrow(refused), 1)
+  expect_true(refused$is_missing)
+  expect_equal(refused$n, 1L)
+})
+
+test_that("`cb_summarize_categorical()` with `detail_missing = FALSE` does not duplicate labelled user missings", {
+  cb <- suppressWarnings(cb_create_spss(fixture_spss()))
+  out <- cb_summarize_categorical(cb, detail_missing = FALSE)
+  out_cat <- out[out$name %in% spss_issue32_cat_vars, ]
+
+  missing_names <- out_cat$name[out_cat$value == "(Missing)"]
+  expect_setequal(missing_names, spss_issue32_cat_vars)
+
+  expect_false(any(
+    out_cat$name == "cat_na_vals_labelled" & grepl("Refused", out_cat$value, fixed = TRUE)
+  ))
+})
+
+test_that("`cb_summarize_text()` detailed missing rows keep SPSS user missing values", {
+  cb <- suppressWarnings(cb_create_spss(fixture_spss()))
+  out <- cb_summarize_text(cb, detail_missing = TRUE)
+
+  miss <- out[out$name == "txt_na_vals" & out$is_missing, ]
+
+  expect_equal(nrow(miss), 1)
+  expect_false(is.na(miss$value))
+  expect_match(miss$value, "-9", fixed = TRUE)
+  expect_equal(miss$n, 1L)
+  expect_equal(miss$pct_of_missing, 1)
+})
+
+test_that("`cb_create_spss()` + `cb_get_data()` factors zaps SPSS user missing values", {
+  cb <- suppressWarnings(cb_create_spss(fixture_spss()))
+  out <- cb_get_data(cb, format = "factors")
+
+  expect_true(any(is.na(out$num_na_vals)))
+  expect_true(any(is.na(out$num_na_range)))
+  expect_true(any(is.na(out$cat_na_vals_unlabelled)))
+  expect_true(any(is.na(out$cat_na_vals_labelled)))
+  expect_true(any(is.na(out$cat_na_range)))
+  expect_true(any(is.na(out$txt_na_vals)))
+
+  expect_false(any(out$num_na_vals == -9, na.rm = TRUE))
+  expect_false(any(out$num_na_range == -9, na.rm = TRUE))
+  expect_false(any(out$txt_na_vals == "-9", na.rm = TRUE))
+})
diff --git a/tests/testthat/test-summarize.R b/tests/testthat/test-summarize.R
new file mode 100644
index 0000000..01265b0
--- /dev/null
+++ b/tests/testthat/test-summarize.R
@@ -0,0 +1,165 @@
+test_that("`cb_summarize_numeric()` returns expected columns", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  num <- cb_summarize_numeric(cb)
+  expect_true(all(c("name", "valid_n", "valid_pct", "mean", "SD") %in% names(num)))
+})
+
+test_that("`cb_summarize_numeric()` returns `NULL` when no numeric vars", {
+  dat_chr <- data.frame(a = c("x", "y", NA), b = c("m", "m", "n"))
+  cb_chr <- cb_create(dat_chr)
+  expect_warning(out <- cb_summarize_numeric(cb_chr), "No numeric variables")
+  expect_null(out)
+})
+
+test_that("`cb_summarize_numeric()` supports grouping", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  num_group <- cb_summarize_numeric(cb, group_by = mh_red)
+  expect_true("mh_red" %in% names(num_group))
+  expect_gt(nrow(num_group), 0)
+})
+
+test_that("`cb_summarize_numeric()` supports numeric grouping variables (issue #31)", {
+  dat <- data.frame(
+    grp = c(1, 2, 1, 2),
+    num = c(1, 2, 3, 4)
+  )
+
+  cb <- cb_create(dat)
+
+  num_group <- NULL
+  expect_no_error(
+    num_group <- cb_summarize_numeric(cb, group_by = grp)
+  )
+
+  if (!is.null(num_group)) expect_true("grp" %in% names(num_group))
+})
+
+test_that("`cb_summarize_numeric()` `stats` controls included statistic columns", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  num <- cb_summarize_numeric(
+    cb,
+    stats = list(mean = mean)
+  )
+
+  expect_true(all(c("valid_n", "valid_pct", "mean") %in% names(num)))
+  expect_false("SD" %in% names(num))
+  expect_false("median" %in% names(num))
+})
+
+test_that("`cb_summarize_numeric()` accepts non-default custom stats functions", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  num <- cb_summarize_numeric(
+    cb,
+    stats = list(q25 = \(x) quantile(x, 0.25, na.rm = TRUE))
+  )
+
+  expect_true("q25" %in% names(num))
+  score <- num[num$name == "num_score", "q25", drop = TRUE]
+  expect_equal(score, quantile(fx$data$num_score, 0.25, na.rm = TRUE))
+})
+
+test_that(
+  "`cb_summarize_categorical()` `detail_missing` toggles detailed missing columns", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; ",
+    .user_missing = cat_code ~ c(Skipped = 2)
+  )
+
+  cat_detail <- cb_summarize_categorical(cb, detail_missing = TRUE)
+  expect_true("is_missing" %in% names(cat_detail))
+  expect_true("pct_of_missing" %in% names(cat_detail))
+
+  cat_simple <- cb_summarize_categorical(cb, detail_missing = FALSE)
+  expect_false("is_missing" %in% names(cat_simple))
+  expect_false("pct_of_missing" %in% names(cat_simple))
+})
+
+test_that("`cb_summarize_categorical()` supports grouping", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; ",
+    .user_missing = cat_code ~ c(Skipped = 2)
+  )
+
+  cat_group <- cb_summarize_categorical(cb, group_by = mh_red)
+  expect_true("mh_red" %in% names(cat_group))
+  expect_gt(nrow(cat_group), 0)
+})
+
+test_that("`cb_summarize_text()` `detail_missing` toggles detailed missing columns", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; ",
+    .user_missing = txt_note ~ c(Skipped = "SKIP")
+  )
+
+  txt_detail <- cb_summarize_text(cb, detail_missing = TRUE)
+  expect_true("is_missing" %in% names(txt_detail))
+  expect_true("pct_of_missing" %in% names(txt_detail))
+
+  txt_simple <- cb_summarize_text(cb, detail_missing = FALSE)
+  expect_false("is_missing" %in% names(txt_simple))
+  expect_false("pct_of_missing" %in% names(txt_simple))
+})
+
+test_that("`cb_summarize_text()` truncates displayed values with `n_text_vals`", {
+  fx <- fixture_core()
+
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  txt <- cb_summarize_text(cb, n_text_vals = 1, detail_missing = FALSE)
+
+  txt_note <- txt[txt$name == "txt_note", ]
+  expect_true(any(grepl("other values", txt_note$value, fixed = TRUE)))
+})
+
diff --git a/tests/testthat/test-validation.R b/tests/testthat/test-validation.R
new file mode 100644
index 0000000..5b4a630
--- /dev/null
+++ b/tests/testthat/test-validation.R
@@ -0,0 +1,66 @@
+test_that("`cb_create()` and `cb_create_redcap()` enforce options class", {
+  fx <- fixture_core()
+
+  expect_error(
+    cb_create(
+      data = fx$data,
+      metadata = fx$metadata,
+      .options = cb_create_redcap_options()
+    ),
+    "Did you mean to call"
+  )
+
+  rc <- fixture_redcap()
+  expect_error(
+    cb_create_redcap(
+      data = rc$data,
+      metadata = rc$metadata,
+      .options = cb_create_options()
+    ),
+    "must be created from"
+  )
+})
+
+test_that("`cb_create()` validates `.user_missing` argument type", {
+  fx <- fixture_core()
+
+  expect_error(
+    cb_create(
+      data = fx$data,
+      metadata = fx$metadata,
+      .val_labs_sep1 = " = ",
+      .val_labs_sep2 = "; ",
+      .user_missing = c(98, 99)
+    ),
+    "must be a formula or list of formulas"
+  )
+})
+
+test_that("`cb_write()` validates `group_rows` arguments", {
+  fx <- fixture_core()
+  cb <- cb_create(
+    data = fx$data,
+    metadata = fx$metadata,
+    .val_labs_sep1 = " = ",
+    .val_labs_sep2 = "; "
+  )
+
+  expect_error(
+    cb_write(
+      cb,
+      file = tempfile(fileext = ".xlsx"),
+      group_rows = mh_red
+    ),
+    "group_by"
+  )
+
+  expect_error(
+    cb_write(
+      cb,
+      file = tempfile(fileext = ".xlsx"),
+      group_by = mh_red,
+      group_rows = cat_code
+    ),
+    "must also be included in"
+  )
+})
diff --git a/vignettes/lighthouse-codebook.Rmd b/vignettes/lighthouse-codebook.Rmd
index 2e5cc8e..2e17c07 100644
--- a/vignettes/lighthouse-codebook.Rmd
+++ b/vignettes/lighthouse-codebook.Rmd
@@ -105,11 +105,11 @@ are detailed in the documentation for `cb_create()` and `cb_write()`.
 Numeric and categorical data summaries can be grouped by one or more variables by 
 specifying them in the `group_by` argument to `cb_write()`.
 ```r
-cb_create(data, metadata) |>
-  cb_write("cb.xlsx", group_by = treatment_group)
+cb <- cb_create(data, metadata)
 
-cb_create(data, metadata) |>
-  cb_write("cb.xlsx", group_by = c(treatment_group, timepoint, age_group))
+cb_write(cb, "cb.xlsx", group_by = treatment_group)
+
+cb_write(cb, "cb.xlsx", group_by = c(treatment_group, timepoint, age_group))
 ```
 
 By default, values for each subgroup are shown in separate columns, with decked 
@@ -117,7 +117,7 @@ heads if more than one grouping variable is specified. However, some or all grou
 variables can instead be shown in rows using the `group_rows` argument.
 ```r
 # show `treatment_group` in columns and `timepoint` in rows
-cb_create(data, metadata) |>
+cb |>
   cb_write(
     "cb.xlsx", 
     group_by = c(treatment_group, timepoint),
@@ -129,7 +129,7 @@ summary tabs using the `group_rows_numeric` and `group_rows_categorical` argumen
 ```r
 # for numeric summary, show `treatment_group` in columns and `timepoint` in rows;
 # for categorical summary, show all grouping variables in columns
-cb_create(data, metadata) |>
+cb |>
   cb_write(
     "cb.xlsx", 
     group_by = c(treatment_group, timepoint),
@@ -138,7 +138,7 @@ cb_create(data, metadata) |>
 
 # for numeric summary, show all grouping variables in rows; 
 # for categorical summary, show `treatment_group` in rows
-cb_create(data, metadata) |>
+cb |>
   cb_write(
     "cb.xlsx", 
     group_by = c(treatment_group, timepoint),
@@ -147,6 +147,33 @@ cb_create(data, metadata) |>
   )
 ```
 
+### Statistics for numeric summaries
+
+Summary statistics shown on numeric summary tabs can be customized using the
+`stats_numeric` argument to `cb_write()`. This argument takes a named list of
+functions, where names are used as column names.
+
+For any function that has a `na.rm` argument, `cb_write()` will automatically
+set `na.rm = TRUE`. Anonymous functions can also be used. If wrapping a
+function that takes a `na.rm` argument, it is recommended you explicitly set
+`na.rm = TRUE`.
+
+```r
+cb <- cb_create(data, metadata)
+
+cb |> 
+  cb_write(
+  "cb.xlsx",
+  stats_numeric = list(
+    mean = mean,
+    SD = sd,
+    q25 = \(x) quantile(x, 0.25, na.rm = TRUE),
+    q75 = \(x) quantile(x, 0.75, na.rm = TRUE),
+    IQR = IQR
+  )
+)
+```
+
 ### User missing values
 
 User missing values (also known as nonresponse codes, reserve codes, or special