Skip to content

internal haven dataset returned by cb_get_data() lacks variable labels #41

Description

@ccsarapas

Behavior is different for codebooks made with cb_create() vs. cb_create_spss(). For cb_create(), haven data has no variable labels:

library(lighthouse.codebook)

dat <- tibble::tibble(
  var1 = 1:3,
  var2 = factor(c("a", "b", "c")),
  var3 = var1
)
meta <- tibble::tibble(
  name = c("var1", "var2", "var3"), 
  label = c("var1 numeric", "var2 factor", "var3 numeric")
)

## variable labels are in codebook
(cb <- dat |>
  cb_create(metadata = meta, .val_labels = NULL))
# # A tibble: 3 × 5
#   name  type        label        values  missing
#   <chr> <chr>       <chr>        <chr>     <dbl>
# 1 var1  integer     var1 numeric NA            0
# 2 var2  categorical var2 factor  a; b; c       0
# 3 var3  integer     var3 numeric NA            0

## but not haven dataset
cb |>
  cb_get_data("haven") |>
  labelled::var_label()
# $var1
# NULL

# $var2
# NULL

# $var3
# NULL

For cb_create_spss(), variable labels are present except for any variables specified in .user_missing:

dat_spss <- tibble::tibble(
  var1 = haven::labelled(
    c(1, 2, 3, 1),
    labels = c(a = 1, b = 2, c = 3),
    label = "haven_labelled"
  ),
  var2 = haven::labelled_spss(
    c(1, 2, 3, 99),
    labels = c(a = 1, b = 2, c = 3, decline = 99),
    na_values = 99,
    label = "haven_labelled_spss with pre-defined missing"
  ),
  var3 = var1
)

## variable labels are in codebook
(cb_spss <- dat_spss |>
  cb_create_spss(.user_missing = var1:var2 ~ 99)) 
# # A tibble: 3 × 6
#   name  type        label                           values user_missings missing
#   <chr> <chr>       <chr>                           <chr>  <chr>           <dbl>
# 1 var1  categorical haven_labelled                  [1] a… [99]             0   
# 2 var2  categorical haven_labelled_spss with pre-d… [1] a… [99] decline     0.25
# 3 var3  categorical haven_labelled                  [1] a… NA               0   

## but are stripped from haven dataset for variables in `.user_missing`
cb_spss |> 
  cb_get_data("haven") |>
  labelled::var_label()
# $var1
# NULL

# $var2
# NULL

# $var3
# [1] "haven_labelled"

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions