Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ docs
inst/doc
/doc/
/Meta/
.vscode/launch.json
..Rcheck/00check.log
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: lighthouse.codebook
Title: Summarize Datasets for Lighthouse Institute Projects
Version: 0.3.2
Version: 0.4.0
Authors@R: c(
person("Casey", "Sarapas",
email = "ccsarapas@chestnut.org",
Expand All @@ -14,11 +14,10 @@ Depends:
Imports:
cli,
data.table,
dplyr,
dplyr (>= 1.2.0),
haven,
labelled,
lighthouse,
moments,
openxlsx2,
purrr,
rlang,
Expand All @@ -35,7 +34,9 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3
URL: https://github.com/ccsarapas/lighthouse.codebook, https://ccsarapas.github.io/lighthouse.codebook/
BugReports: https://github.com/ccsarapas/lighthouse.codebook/issues
Suggests:
Suggests:
knitr,
rmarkdown
rmarkdown,
testthat (>= 3.0.0)
VignetteBuilder: knitr

9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,17 @@ export(cb_summarize_categorical)
export(cb_summarize_numeric)
export(cb_summarize_text)
export(cb_write)
export(kurtosis)
export(max_if_any)
export(min_if_any)
export(se_mean)
export(skew)
export(spread)
importFrom(lighthouse,"%<-%")
importFrom(lighthouse,glue_chr)
importFrom(lighthouse,max_if_any)
importFrom(lighthouse,min_if_any)
importFrom(lighthouse,se_mean)
importFrom(lighthouse,untidyselect)
importFrom(tidyselect,all_of)
importFrom(tidyselect,any_of)
24 changes: 24 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
# lighthouse.codebook 0.4.0

## Added

* Summary statistics returned by `cb_summarize_numeric()` can now be specified using the new `stats` argument. Summary statistics included on the numeric summary tab of workbooks written by `cb_write()` can likewise be specified using the `stats_numeric` argument.

* A handful of stats helpers for use in the new `stats` / `stats_numeric` arguments, including `skew()`, `kurtosis()`, `spread()`, and re-exports from the lighthouse package `min_if_any()`, `max_if_any()`, and `se_mean()`.

## Fixed

* User missing values defined in SPSS datasets or `"haven_labelled"` vectors are now consistently recognized (fixes #32).

* Specfying numeric grouping columns no longer throws an error (fixes #31).

* `cb_create()` no longer throws an error when `.val_labels = NULL` (fixes #34).

* `cb_summarize_categorical()` no longer throws an error when a variable contains no value labels.

## Internal

* Added a test suite.

* Dropped dependency on moments package.

# lighthouse.codebook 0.3.2

## Fixed
Expand Down
63 changes: 49 additions & 14 deletions R/cb_create.r
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ cb_create <- function(data,
incompatible = .options$user_missing_incompatible
) |>
cb_add_lookups(sep1 = .val_labs_sep1, sep2 = .val_labs_sep2) |>
cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |>
cb_label_data(conflict = .options$user_missing_conflict) |>
cb_zap_data() |>
cb_add_dims() |>
Expand Down Expand Up @@ -378,7 +379,10 @@ cb_user_missings <- function(cb,
user_missing,
match_type = TRUE,
incompatible = c("ignore", "warn", "error")) {
if (is.null(user_missing)) return(set_attrs(cb, user_missing = list()))
if (is.null(user_missing)) {
attr(cb, "user_missing") <- attr(cb, "user_missing") %||% list()
return(cb)
}
user_missing <- check_user_missing_arg(user_missing)
for (um in user_missing) {
cb <- cb_user_missings_across(
Expand Down Expand Up @@ -410,8 +414,12 @@ lookups_from_string <- function(cb, data, sep1, sep2) {
labs <- x[, 2]
setNames(vals, labs)
}
# if (!("values" %in% names(cb))) {
# return(setNames(character(), character()))
# }
# early return if values col doesn't exist, length 0, or all NA
if (!("values" %in% names(cb)) || !sum(!is.na(cb$values))) return(list())
val_labels <- na.omit(setNames(cb$values, cb$name))
if (!length(val_labels)) return(val_labels)
if (is.null(sep1) || is.null(sep2)) {
cli::cli_abort(
"{.arg sep1} and {.arg sep2} must be specified if value labels are provided."
Expand Down Expand Up @@ -493,28 +501,44 @@ reconcile_missing_labels <- function(val_labs,
list(val_labs = val_labs, missings = missings)
}

cb_reconcile_missing_labels <- function(cb,
conflict = c("val_label", "missing_label")) {
conflict <- match.arg(conflict)
vals_by_label <- attr(cb, "vals_by_label")
user_missing <- attr(cb, "user_missing")
factors <- attr(cb, "factors") %||% character()
vars <- setdiff(intersect(names(vals_by_label), names(user_missing)), factors)
if (!length(vars)) return(cb)

for (nm in vars) {
if (is.null(vals_by_label[[nm]]) || is.null(user_missing[[nm]])) next
vals <- reconcile_missing_labels(
val_labs = sort(vals_by_label[[nm]]),
missings = sort(user_missing[[nm]]),
conflict = conflict
)
user_missing[[nm]] <- sort(vals$missings)
vals_by_label[[nm]] <- vals$val_labs[
order(vals$val_labs %in% vals$missings, vals$val_labs)
]
}
set_attrs(cb, vals_by_label = vals_by_label, user_missing = user_missing)
}

cb_label_data <- function(cb, conflict = c("val_label", "missing_label")) {
data <- attr(cb, "data")
vals_by_label <- attr(cb, "vals_by_label")
factors <- attr(cb, "factors")
user_missing <- attr(cb, "user_missing")
label_vars <- unique(c(names(vals_by_label), names(user_missing)))
label_vars <- union(names(vals_by_label), names(user_missing))
for (nm in label_vars) {
missings <- sort(user_missing[[nm]])
if (nm %in% factors) {
data[[nm]] <- to_labelled_chr(data[[nm]], na_values = missings)
} else {
val_labs <- sort(vals_by_label[[nm]])
if (!is.null(val_labs) && !is.null(missings)) {
vals <- reconcile_missing_labels(
val_labs = val_labs,
missings = missings,
conflict = conflict
)
missings <- sort(vals$missings)
val_labs <- vals$val_labs[
order(vals$val_labs %in% vals$missings, vals$val_labs)
]
val_labs <- val_labs[order(val_labs %in% missings, val_labs)]
}
data[[nm]] <- haven::labelled_spss(
data[[nm]], labels = val_labs, na_values = missings
Expand Down Expand Up @@ -581,12 +605,23 @@ cb_add_val_labels_col <- function(cb, user_missing_col = c("if_any", "yes", "no"
} else {
missings <- NULL
}
val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors"))
dplyr::mutate(cb, values = val_labs, user_missings = missings)
include_values <- "values" %in% names(cb) ||
any(vapply(val_labs, \(x) !is.null(x) && length(x) > 0, logical(1)))
if (include_values) {
val_labs <- string_from_lookups(val_labs, no_prefix = attr(cb, "factors"))
cb <- dplyr::mutate(cb, values = val_labs)
}
if (user_missing_col) {
cb <- dplyr::mutate(cb, user_missings = missings)
}
cb
}

cb_split_labels_col <- function(cb, split_var_labels = NULL) {
if (is.null(split_var_labels)) return(cb)
if (!("label" %in% names(cb))) {
cli::cli_abort("{.arg .split_var_labels} requires a {.code label} column.")
}
if (rlang::is_call(split_var_labels) && rlang::call_name(split_var_labels) == "list") {
split_var_labels <- rlang::call_args(split_var_labels)
} else {
Expand Down
1 change: 1 addition & 0 deletions R/cb_create_redcap.r
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ cb_create_redcap <- function(data,
cb <- cb_propagate_user_missing_checkboxes_rc(cb)
}
cb |>
cb_reconcile_missing_labels(conflict = .options$user_missing_conflict) |>
cb_label_data(conflict = .options$user_missing_conflict) |>
cb_zap_data() |>
cb_add_dims() |>
Expand Down
17 changes: 9 additions & 8 deletions R/cb_create_spss.r
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ cb_user_missings_from_spss <- function(cb) {
user_missings[[nm]] <- c(user_missings[[nm]], miss_vals)
}
}

if (length(user_missings)) attr(cb, "user_missing") <- user_missings
cb
}
Expand All @@ -106,13 +106,14 @@ cb_update_labels_spss <- function(cb,
user_missing_conflict = c("val_label", "missing_label"),
user_missing_incompatible = c("ignore", "warn", "error")) {
data <- attr(cb, "data")
cb <- cb |>
cb_user_missings_from_spss() |>
cb_user_missings(
user_missing = user_missing,
incompatible = user_missing_incompatible
) |>
cb_add_lookups()
cb <- cb |>
cb_user_missings_from_spss() |>
cb_user_missings(
user_missing = user_missing,
incompatible = user_missing_incompatible
) |>
cb_add_lookups() |>
cb_reconcile_missing_labels(conflict = user_missing_conflict)
if (is.null(user_missing)) {
cb |> set_attrs(data_labelled = data)
} else {
Expand Down
Loading
Loading