MRCIEU · remlapmot · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: TwoSampleMR
 Title: Two Sample MR Functions and Interface to MRC Integrative
     Epidemiology Unit OpenGWAS Database
-Version: 0.7.8
+Version: 0.7.9
 Authors@R: c(
     person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-0920-1055")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,12 @@
+# TwoSampleMR v0.7.9
+
+(Release date 2026-06-24)
+
+* Fixed inflated bootstrap standard errors (and p-values) in `mr_mode()` and `mr_rucker_bootstrap()` introduced in v0.6.30. The pre-generated `rnorm()` matrix was filled column-by-column while the per-SNP means and standard errors recycled element-wise, so each bootstrap draw was taken from the wrong SNP's distribution; the means and SEs are now laid out with `rep(..., each = nboot)` so each column draws from its own SNP. Point estimates were unaffected. (thanks @peterk87 reported in #684)
+* Fixed `mr_rucker_bootstrap()` which errored ("values must be length 1") because it accessed the per-combination result (`$rucker`, `$Q`, `$res`, `$selected`) directly while `mr_rucker()` returns a list with one element per exposure-outcome combination; it now unwraps the first element.
+* Fixed `mr_rucker_cooksdistance()`, which had the same return-shape problem: `$cooksdistance` was `NULL` so the Cook's distance filtering loop never ran and a malformed object was returned; it now unwraps the first element.
+* Added regression tests for the `mr_mode()` and `mr_rucker_bootstrap()` bootstrap standard errors and for `mr_rucker_cooksdistance()`.
+
 # TwoSampleMR v0.7.8
 
 (Release date 2026-06-10)

diff --git a/R/mr_mode.R b/R/mr_mode.R
@@ -58,15 +58,27 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
     n <- length(BetaIV.in)
     nphi <- length(phi)
 
-    #Pre-generate all random values as matrices (nboot x n)
-    #mean and sd recycle in lockstep (both length n), filling column-by-column
+    #Pre-generate all random values as matrices (nboot x n). The matrix is
+    #filled column-by-column, so each column j (SNP j) must draw from
+    #N(BetaIV.in[j], seBetaIV.in[j]); rep(..., each = nboot) lays the means and
+    #sds out so that the first nboot values use SNP 1, the next nboot use SNP 2,
+    #and so on. Passing the length-n vectors directly would recycle them and
+    #scramble which SNP each draw comes from.
     BetaIV.boot_mat <- matrix(
-      stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 1]),
+      stats::rnorm(
+        nboot * n,
+        mean = rep(BetaIV.in, each = nboot),
+        sd = rep(seBetaIV.in[, 1], each = nboot)
+      ),
       nrow = nboot,
       ncol = n
     )
     BetaIV.boot_NOME_mat <- matrix(
-      stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 2]),
+      stats::rnorm(
+        nboot * n,
+        mean = rep(BetaIV.in, each = nboot),
+        sd = rep(seBetaIV.in[, 2], each = nboot)
+      ),
       nrow = nboot,
       ncol = n
     )

diff --git a/R/rucker.R b/R/rucker.R
@@ -283,17 +283,33 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) {
   nsnp <- nrow(dat)
   Qthresh <- parameters$Qthresh
 
-  # Main result
-  rucker <- mr_rucker(dat, parameters)
-
-  # Pre-generate all random values as matrices (nboot x nsnp)
+  # Main result. mr_rucker() returns a list with one element per
+  # exposure-outcome combination; the bootstrap operates on a single combination
+  # (see nsnp/dat2 below), so unwrap the first (only) element here and below so
+  # the $rucker/$Q/$res/$selected accessors work directly.
+  rucker <- mr_rucker(dat, parameters)[[1]]
+
+  # Pre-generate all random values as matrices (nboot x nsnp). The matrix is
+  # filled column-by-column, so each column (SNP) must draw from that SNP's
+  # N(beta, se); rep(..., each = nboot) lays the means and sds out so the first
+  # nboot values belong to SNP 1, the next nboot to SNP 2, etc. Passing the
+  # length-nsnp vectors directly would recycle them and scramble which SNP each
+  # draw comes from.
   boot_exp <- matrix(
-    stats::rnorm(nboot * nsnp, mean = dat$beta.exposure, sd = dat$se.exposure),
+    stats::rnorm(
+      nboot * nsnp,
+      mean = rep(dat$beta.exposure, each = nboot),
+      sd = rep(dat$se.exposure, each = nboot)
+    ),
     nrow = nboot,
     ncol = nsnp
   )
   boot_out <- matrix(
-    stats::rnorm(nboot * nsnp, mean = dat$beta.outcome, sd = dat$se.outcome),
+    stats::rnorm(
+      nboot * nsnp,
+      mean = rep(dat$beta.outcome, each = nboot),
+      sd = rep(dat$se.outcome, each = nboot)
+    ),
     nrow = nboot,
     ncol = nsnp
   )
@@ -303,7 +319,7 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) {
   for (i in seq_len(nboot)) {
     dat2$beta.exposure <- boot_exp[i, ]
     dat2$beta.outcome <- boot_out[i, ]
-    l[[i]] <- mr_rucker(dat2, parameters)
+    l[[i]] <- mr_rucker(dat2, parameters)[[1]]
   }
 
   modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE)
@@ -565,7 +581,10 @@ mr_rucker_cooksdistance <- function(dat, parameters = default_parameters()) {
   }
 
   dat_orig <- dat
-  rucker_orig <- mr_rucker(dat_orig, parameters)
+  # mr_rucker() returns one element per exposure-outcome combination; this
+  # function operates on a single combination, so unwrap the first element here
+  # and below so the $cooksdistance/$selected/$rucker accessors work directly.
+  rucker_orig <- mr_rucker(dat_orig, parameters)[[1]]
   rucker <- rucker_orig
   cooks_threshold <- 4 / nrow(dat)
   index <- rucker_orig$cooksdistance > cooks_threshold
@@ -575,7 +594,7 @@ mr_rucker_cooksdistance <- function(dat, parameters = default_parameters()) {
   while (any(index) && sum(!index) > 3) {
     dat <- dat[!index, ]
     cooks_threshold <- 4 / nrow(dat)
-    rucker <- mr_rucker(dat, parameters)
+    rucker <- mr_rucker(dat, parameters)[[1]]
     l[[i]] <- rucker
     index <- rucker$cooksdistance > cooks_threshold
     i <- i + 1

diff --git a/tests/testthat/test_mr_mode.R b/tests/testthat/test_mr_mode.R
@@ -0,0 +1,43 @@
+# Regression tests for issue #684.
+#
+# In v0.6.30-v0.7.8 the mr_mode() bootstrap pre-generated its random draws with
+#   matrix(rnorm(nboot * n, mean = BetaIV, sd = seBetaIV), nrow = nboot, ncol = n)
+# rnorm() recycles the length-n mean/sd vectors element-wise, but matrix() fills
+# column-by-column, so each column (SNP) did not keep its own mean/SE - the SNPs'
+# parameters were scrambled across the matrix. This left the point estimates
+# untouched but inflated the bootstrap standard errors (and hence p-values). The
+# fix lays the parameters out with rep(..., each = nboot) so column j draws from
+# SNP j. Correct SEs for this data are ~0.1; the bug inflated them ~3x (and much
+# more when the number of SNPs divides nboot).
+
+load(system.file("extdata", "test_commondata.RData", package = "TwoSampleMR"))
+
+test_that("mr_mode() weighted/penalised bootstrap SEs are not inflated (issue #684)", {
+  set.seed(1234)
+  m <- mr_mode(dat)
+
+  # Point estimates are unaffected by the bug.
+  expect_equal(m$b[m$method == "Simple mode"], 0.340, tolerance = 1e-2)
+  expect_equal(m$b[m$method == "Weighted mode"], 0.379, tolerance = 1e-2)
+
+  # Weighted and penalised modes are corrupted at any number of SNPs because the
+  # bootstrapped betas get mispaired with the fixed-order weights.
+  expect_equal(m$se[m$method == "Weighted mode"], 0.105, tolerance = 1e-1)
+  expect_equal(m$se[m$method == "Penalised mode"], 0.109, tolerance = 1e-1)
+
+  # Anti-inflation ceiling: the buggy SEs were ~0.30+, the correct ones ~0.10.
+  expect_lt(m$se[m$method == "Weighted mode"], 0.2)
+  expect_lt(m$se[m$method == "Penalised mode"], 0.2)
+})
+
+test_that("mr_mode() simple-mode SE is not inflated when nsnp divides nboot (issue #684)", {
+  # The simple mode is only corrupted when gcd(nsnp, nboot) > 1. Use nsnp = 50,
+  # which divides the default nboot = 1000 - the worst case, where the buggy
+  # fill made every bootstrap draw come from a single SNP.
+  dat50 <- dat[1:50, ]
+  set.seed(1234)
+  m <- mr_mode(dat50)
+
+  # Correct simple-mode SE is ~0.17; the bug inflated it to ~0.57.
+  expect_lt(m$se[m$method == "Simple mode"], 0.35)
+})
diff --git a/tests/testthat/test_rucker.R b/tests/testthat/test_rucker.R
@@ -0,0 +1,55 @@
+# Tests for mr_rucker_bootstrap().
+#
+# 1. mr_rucker() returns a list with one element per exposure-outcome combination,
+#    but mr_rucker_bootstrap() accesses the per-combination result directly
+#    ($rucker/$Q/$res/$selected). It must unwrap [[1]], otherwise it errors with
+#    "values must be length 1, but FUN(X[[1]]) result is length 0".
+#
+# 2. Regression test for issue #684: the bootstrap draws are pre-generated as a
+#    matrix and were filled in a way that scrambled which SNP each draw came from,
+#    inflating the bootstrapped standard errors. The effect is worst when the
+#    number of SNPs divides nboot; with nsnp = 50 and nboot = 1000 the Rucker
+#    mean/median SE was inflated from ~0.06-0.09 to ~0.50.
+
+load(system.file("extdata", "test_commondata.RData", package = "TwoSampleMR"))
+
+test_that("mr_rucker_bootstrap() runs and returns the expected structure", {
+  set.seed(1234)
+  rb <- suppressMessages(mr_rucker_bootstrap(dat))
+
+  expect_named(
+    rb,
+    c("rucker", "res", "bootstrap_estimates", "boostrap_q", "q_plot", "e_plot")
+  )
+  # $rucker is the unwrapped single-combination result, not the combo list.
+  expect_true(all(c("rucker", "Q", "res", "selected") %in% names(rb$rucker)))
+  expect_equal(nrow(rb$bootstrap_estimates), 1000L)
+  expect_true(all(c("Rucker mean", "Rucker median") %in% rb$res$Method))
+})
+
+test_that("mr_rucker_bootstrap() SEs are not inflated when nsnp divides nboot (issue #684)", {
+  # nsnp = 50 divides the default nboot = 1000 - the worst case for the bug.
+  dat50 <- dat[1:50, ]
+  set.seed(1234)
+  rb <- suppressMessages(mr_rucker_bootstrap(dat50))
+
+  mean_se <- rb$res$SE[rb$res$Method == "Rucker mean"]
+  median_se <- rb$res$SE[rb$res$Method == "Rucker median"]
+
+  # Correct SEs are ~0.06-0.09; the bug inflated them to ~0.50.
+  expect_lt(mean_se, 0.25)
+  expect_lt(median_se, 0.25)
+})
+
+test_that("mr_rucker_cooksdistance() runs and returns a well-formed result", {
+  # Like mr_rucker_bootstrap(), this accessed the per-combination result
+  # directly ($cooksdistance/$selected/$rucker) while mr_rucker() returns a
+  # combo list, so the Cook's distance loop silently never ran and a malformed
+  # object was returned. It must unwrap [[1]].
+  set.seed(1234)
+  cd <- suppressMessages(mr_rucker_cooksdistance(dat))
+
+  expect_true(is.data.frame(cd$selected))
+  expect_true(is.data.frame(cd$rucker))
+  expect_equal(cd$selected$Method, "Rucker (CD)")
+})