Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: TwoSampleMR
Title: Two Sample MR Functions and Interface to MRC Integrative
Epidemiology Unit OpenGWAS Database
Version: 0.7.8
Version: 0.7.9
Authors@R: c(
person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0920-1055")),
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# TwoSampleMR v0.7.9

(Release date 2026-06-24)

* Fixed inflated bootstrap standard errors (and p-values) in `mr_mode()` and `mr_rucker_bootstrap()` introduced in v0.6.30. The pre-generated `rnorm()` matrix was filled column-by-column while the per-SNP means and standard errors recycled element-wise, so each bootstrap draw was taken from the wrong SNP's distribution; the means and SEs are now laid out with `rep(..., each = nboot)` so each column draws from its own SNP. Point estimates were unaffected. (thanks @peterk87 reported in #684)
* Fixed `mr_rucker_bootstrap()` which errored ("values must be length 1") because it accessed the per-combination result (`$rucker`, `$Q`, `$res`, `$selected`) directly while `mr_rucker()` returns a list with one element per exposure-outcome combination; it now unwraps the first element.
* Fixed `mr_rucker_cooksdistance()`, which had the same return-shape problem: `$cooksdistance` was `NULL` so the Cook's distance filtering loop never ran and a malformed object was returned; it now unwraps the first element.
* Added regression tests for the `mr_mode()` and `mr_rucker_bootstrap()` bootstrap standard errors and for `mr_rucker_cooksdistance()`.

# TwoSampleMR v0.7.8

(Release date 2026-06-10)
Expand Down
20 changes: 16 additions & 4 deletions R/mr_mode.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,27 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
n <- length(BetaIV.in)
nphi <- length(phi)

#Pre-generate all random values as matrices (nboot x n)
#mean and sd recycle in lockstep (both length n), filling column-by-column
#Pre-generate all random values as matrices (nboot x n). The matrix is
#filled column-by-column, so each column j (SNP j) must draw from
#N(BetaIV.in[j], seBetaIV.in[j]); rep(..., each = nboot) lays the means and
#sds out so that the first nboot values use SNP 1, the next nboot use SNP 2,
#and so on. Passing the length-n vectors directly would recycle them and
#scramble which SNP each draw comes from.
BetaIV.boot_mat <- matrix(
stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 1]),
stats::rnorm(
nboot * n,
mean = rep(BetaIV.in, each = nboot),
sd = rep(seBetaIV.in[, 1], each = nboot)
),
nrow = nboot,
ncol = n
)
BetaIV.boot_NOME_mat <- matrix(
stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 2]),
stats::rnorm(
nboot * n,
mean = rep(BetaIV.in, each = nboot),
sd = rep(seBetaIV.in[, 2], each = nboot)
),
nrow = nboot,
ncol = n
)
Expand Down
37 changes: 28 additions & 9 deletions R/rucker.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,17 +283,33 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) {
nsnp <- nrow(dat)
Qthresh <- parameters$Qthresh

# Main result
rucker <- mr_rucker(dat, parameters)

# Pre-generate all random values as matrices (nboot x nsnp)
# Main result. mr_rucker() returns a list with one element per
# exposure-outcome combination; the bootstrap operates on a single combination
# (see nsnp/dat2 below), so unwrap the first (only) element here and below so
# the $rucker/$Q/$res/$selected accessors work directly.
rucker <- mr_rucker(dat, parameters)[[1]]

# Pre-generate all random values as matrices (nboot x nsnp). The matrix is
# filled column-by-column, so each column (SNP) must draw from that SNP's
# N(beta, se); rep(..., each = nboot) lays the means and sds out so the first
# nboot values belong to SNP 1, the next nboot to SNP 2, etc. Passing the
# length-nsnp vectors directly would recycle them and scramble which SNP each
# draw comes from.
boot_exp <- matrix(
stats::rnorm(nboot * nsnp, mean = dat$beta.exposure, sd = dat$se.exposure),
stats::rnorm(
nboot * nsnp,
mean = rep(dat$beta.exposure, each = nboot),
sd = rep(dat$se.exposure, each = nboot)
),
nrow = nboot,
ncol = nsnp
)
boot_out <- matrix(
stats::rnorm(nboot * nsnp, mean = dat$beta.outcome, sd = dat$se.outcome),
stats::rnorm(
nboot * nsnp,
mean = rep(dat$beta.outcome, each = nboot),
sd = rep(dat$se.outcome, each = nboot)
),
nrow = nboot,
ncol = nsnp
)
Expand All @@ -303,7 +319,7 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) {
for (i in seq_len(nboot)) {
dat2$beta.exposure <- boot_exp[i, ]
dat2$beta.outcome <- boot_out[i, ]
l[[i]] <- mr_rucker(dat2, parameters)
l[[i]] <- mr_rucker(dat2, parameters)[[1]]
}

modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE)
Expand Down Expand Up @@ -565,7 +581,10 @@ mr_rucker_cooksdistance <- function(dat, parameters = default_parameters()) {
}

dat_orig <- dat
rucker_orig <- mr_rucker(dat_orig, parameters)
# mr_rucker() returns one element per exposure-outcome combination; this
# function operates on a single combination, so unwrap the first element here
# and below so the $cooksdistance/$selected/$rucker accessors work directly.
rucker_orig <- mr_rucker(dat_orig, parameters)[[1]]
rucker <- rucker_orig
cooks_threshold <- 4 / nrow(dat)
index <- rucker_orig$cooksdistance > cooks_threshold
Expand All @@ -575,7 +594,7 @@ mr_rucker_cooksdistance <- function(dat, parameters = default_parameters()) {
while (any(index) && sum(!index) > 3) {
dat <- dat[!index, ]
cooks_threshold <- 4 / nrow(dat)
rucker <- mr_rucker(dat, parameters)
rucker <- mr_rucker(dat, parameters)[[1]]
l[[i]] <- rucker
index <- rucker$cooksdistance > cooks_threshold
i <- i + 1
Expand Down
43 changes: 43 additions & 0 deletions tests/testthat/test_mr_mode.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Regression tests for issue #684.
#
# In v0.6.30-v0.7.8 the mr_mode() bootstrap pre-generated its random draws with
# matrix(rnorm(nboot * n, mean = BetaIV, sd = seBetaIV), nrow = nboot, ncol = n)
# rnorm() recycles the length-n mean/sd vectors element-wise, but matrix() fills
# column-by-column, so each column (SNP) did not keep its own mean/SE - the SNPs'
# parameters were scrambled across the matrix. This left the point estimates
# untouched but inflated the bootstrap standard errors (and hence p-values). The
# fix lays the parameters out with rep(..., each = nboot) so column j draws from
# SNP j. Correct SEs for this data are ~0.1; the bug inflated them ~3x (and much
# more when the number of SNPs divides nboot).

load(system.file("extdata", "test_commondata.RData", package = "TwoSampleMR"))

test_that("mr_mode() weighted/penalised bootstrap SEs are not inflated (issue #684)", {
set.seed(1234)
m <- mr_mode(dat)

# Point estimates are unaffected by the bug.
expect_equal(m$b[m$method == "Simple mode"], 0.340, tolerance = 1e-2)
expect_equal(m$b[m$method == "Weighted mode"], 0.379, tolerance = 1e-2)

# Weighted and penalised modes are corrupted at any number of SNPs because the
# bootstrapped betas get mispaired with the fixed-order weights.
expect_equal(m$se[m$method == "Weighted mode"], 0.105, tolerance = 1e-1)
expect_equal(m$se[m$method == "Penalised mode"], 0.109, tolerance = 1e-1)

# Anti-inflation ceiling: the buggy SEs were ~0.30+, the correct ones ~0.10.
expect_lt(m$se[m$method == "Weighted mode"], 0.2)
expect_lt(m$se[m$method == "Penalised mode"], 0.2)
})

test_that("mr_mode() simple-mode SE is not inflated when nsnp divides nboot (issue #684)", {
# The simple mode is only corrupted when gcd(nsnp, nboot) > 1. Use nsnp = 50,
# which divides the default nboot = 1000 - the worst case, where the buggy
# fill made every bootstrap draw come from a single SNP.
dat50 <- dat[1:50, ]
set.seed(1234)
m <- mr_mode(dat50)

# Correct simple-mode SE is ~0.17; the bug inflated it to ~0.57.
expect_lt(m$se[m$method == "Simple mode"], 0.35)
})
55 changes: 55 additions & 0 deletions tests/testthat/test_rucker.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Tests for mr_rucker_bootstrap().
#
# 1. mr_rucker() returns a list with one element per exposure-outcome combination,
# but mr_rucker_bootstrap() accesses the per-combination result directly
# ($rucker/$Q/$res/$selected). It must unwrap [[1]], otherwise it errors with
# "values must be length 1, but FUN(X[[1]]) result is length 0".
#
# 2. Regression test for issue #684: the bootstrap draws are pre-generated as a
# matrix and were filled in a way that scrambled which SNP each draw came from,
# inflating the bootstrapped standard errors. The effect is worst when the
# number of SNPs divides nboot; with nsnp = 50 and nboot = 1000 the Rucker
# mean/median SE was inflated from ~0.06-0.09 to ~0.50.

load(system.file("extdata", "test_commondata.RData", package = "TwoSampleMR"))

test_that("mr_rucker_bootstrap() runs and returns the expected structure", {
set.seed(1234)
rb <- suppressMessages(mr_rucker_bootstrap(dat))

expect_named(
rb,
c("rucker", "res", "bootstrap_estimates", "boostrap_q", "q_plot", "e_plot")
)
# $rucker is the unwrapped single-combination result, not the combo list.
expect_true(all(c("rucker", "Q", "res", "selected") %in% names(rb$rucker)))
expect_equal(nrow(rb$bootstrap_estimates), 1000L)
expect_true(all(c("Rucker mean", "Rucker median") %in% rb$res$Method))
})

test_that("mr_rucker_bootstrap() SEs are not inflated when nsnp divides nboot (issue #684)", {
# nsnp = 50 divides the default nboot = 1000 - the worst case for the bug.
dat50 <- dat[1:50, ]
set.seed(1234)
rb <- suppressMessages(mr_rucker_bootstrap(dat50))

mean_se <- rb$res$SE[rb$res$Method == "Rucker mean"]
median_se <- rb$res$SE[rb$res$Method == "Rucker median"]

# Correct SEs are ~0.06-0.09; the bug inflated them to ~0.50.
expect_lt(mean_se, 0.25)
expect_lt(median_se, 0.25)
})

test_that("mr_rucker_cooksdistance() runs and returns a well-formed result", {
# Like mr_rucker_bootstrap(), this accessed the per-combination result
# directly ($cooksdistance/$selected/$rucker) while mr_rucker() returns a
# combo list, so the Cook's distance loop silently never ran and a malformed
# object was returned. It must unwrap [[1]].
set.seed(1234)
cd <- suppressMessages(mr_rucker_cooksdistance(dat))

expect_true(is.data.frame(cd$selected))
expect_true(is.data.frame(cd$rucker))
expect_equal(cd$selected$Method, "Rucker (CD)")
})