From 04b85e361f5d81df25940de5cfd8e2c1dc3c6167 Mon Sep 17 00:00:00 2001
From: Yining97 <liyining1997@gmail.com>
Date: Tue, 23 Jun 2026 16:17:21 -0400
Subject: [PATCH 1/3] Export af (not MAF) in top_loci; add medianAbsCorr; PIP
 screen after harmonization

top_loci now exports the effect-allele frequency as an af column instead of
MAF, and the GwasSumStats/QtlSumStats methods actually wire the QC'd entry
frequency into it (it was never passed before, so the old MAF column was
always NA). af is the harmonized, complemented frequency; NA when absent.

fineMappingPipeline gains a medianAbsCorr argument (default NULL), threaded to
susie_get_cs for OR-logic credible-set purity alongside minAbsCorr.

The optional PIP screen now runs after panel-vs-sumstats allele harmonization,
so it operates on the harmonized variant set.

Tests: update the MAF->af schema assertions and add coverage for af passthrough,
medianAbsCorr OR-logic, and the PIP-screen ordering.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 R/FineMappingEntry.R                      | 20 ++++----
 R/fineMappingPipeline.R                   | 31 +++++++++++-
 R/fineMappingWrappers.R                   |  4 +-
 R/sumstatsQc.R                            | 18 +++----
 tests/testthat/test_fineMappingWrappers.R | 59 ++++++++++++++++++++++-
 tests/testthat/test_sumstatsQc.R          | 34 +++++++++++++
 vignettes/rss-qc.Rmd                      |  2 +-
 7 files changed, 144 insertions(+), 24 deletions(-)

diff --git a/R/FineMappingEntry.R b/R/FineMappingEntry.R
index 298a787a..f948d201 100644
--- a/R/FineMappingEntry.R
+++ b/R/FineMappingEntry.R
@@ -80,7 +80,7 @@ setClass("FineMappingEntry",
 #'   the pipeline's \code{trim} parameter).
 #' @param topLoci Per-variant \code{data.frame} in canonical schema:
 #'   identity columns (\code{variant_id, chrom, pos, A1, A2}), context
-#'   (\code{N, MAF}), marginal columns (\code{marginal_beta,
+#'   (\code{N, af}; effect-allele frequency, never MAF), marginal columns (\code{marginal_beta,
 #'   marginal_se, marginal_z, marginal_p}), posterior columns
 #'   (\code{pip, posterior_mean, posterior_sd, cs_*, cs_*_purity}),
 #'   pipeline stamps (\code{method, gene, event, grange_start,
@@ -268,16 +268,17 @@ setMethod("show", "FineMappingEntry", function(object) {
 }
 
 # Project the canonical wide topLoci to the posterior view: identity +
-# N/MAF + (beta=posterior_mean, se=posterior_sd) + pip + cs_* + signal_cluster
+# N/af + (beta=posterior_mean, se=posterior_sd) + pip + cs_* + signal_cluster
 # + pipeline stamps. Renames `posterior_mean`/`posterior_sd` to `beta`/`se`.
-# Missing optional columns are NA-filled.
+# Exports effect-allele frequency as `af` (never MAF). Missing optional
+# columns are NA-filled.
 # @noRd
 .projectPosteriorView <- function(tl) {
   if (nrow(tl) == 0L) {
     return(data.frame(
       variant_id = character(0), chrom = character(0), pos = integer(0),
       A1 = character(0), A2 = character(0),
-      N = numeric(0), MAF = numeric(0),
+      N = numeric(0), af = numeric(0),
       beta = numeric(0), se = numeric(0), pip = numeric(0),
       stringsAsFactors = FALSE))
   }
@@ -288,7 +289,7 @@ setMethod("show", "FineMappingEntry", function(object) {
     A1              = .tlCol(tl, "A1",         "character"),
     A2              = .tlCol(tl, "A2",         "character"),
     N               = .tlCol(tl, "N",          "numeric"),
-    MAF             = .tlCol(tl, "MAF",        "numeric"),
+    af              = .tlCol(tl, "af",         "numeric"),
     beta            = .tlCol(tl, "posterior_mean", "numeric"),
     se              = .tlCol(tl, "posterior_sd",   "numeric"),
     pip             = .tlCol(tl, "pip",        "numeric"),
@@ -304,16 +305,17 @@ setMethod("show", "FineMappingEntry", function(object) {
   out
 }
 
-# Project to the marginal view: identity + N/MAF + (beta, se, z, p) where
+# Project to the marginal view: identity + N/af + (beta, se, z, p) where
 # beta/se/z/p are the marginal univariate columns renamed from their
-# `marginal_*` storage names. Missing optional columns are NA-filled.
+# `marginal_*` storage names. Exports effect-allele frequency as `af`
+# (never MAF). Missing optional columns are NA-filled.
 # @noRd
 .projectMarginalView <- function(tl) {
   if (nrow(tl) == 0L) {
     return(data.frame(
       variant_id = character(0), chrom = character(0), pos = integer(0),
       A1 = character(0), A2 = character(0),
-      N = numeric(0), MAF = numeric(0),
+      N = numeric(0), af = numeric(0),
       beta = numeric(0), se = numeric(0), z = numeric(0), p = numeric(0),
       stringsAsFactors = FALSE))
   }
@@ -324,7 +326,7 @@ setMethod("show", "FineMappingEntry", function(object) {
     A1         = .tlCol(tl, "A1",            "character"),
     A2         = .tlCol(tl, "A2",            "character"),
     N          = .tlCol(tl, "N",             "numeric"),
-    MAF        = .tlCol(tl, "MAF",           "numeric"),
+    af         = .tlCol(tl, "af",            "numeric"),
     beta       = .tlCol(tl, "marginal_beta", "numeric"),
     se         = .tlCol(tl, "marginal_se",   "numeric"),
     z          = .tlCol(tl, "marginal_z",    "numeric"),
diff --git a/R/fineMappingPipeline.R b/R/fineMappingPipeline.R
index 38416366..928b4a06 100644
--- a/R/fineMappingPipeline.R
+++ b/R/fineMappingPipeline.R
@@ -144,6 +144,10 @@
 #'   \code{0.025}.
 #' @param minAbsCorr Minimum absolute correlation for credible-set
 #'   purity. Default \code{0.8}.
+#' @param medianAbsCorr Optional median absolute correlation for
+#'   credible-set purity, routed to \code{susieR::susie_get_cs}. A set is
+#'   kept if it passes either \code{minAbsCorr} or \code{medianAbsCorr}
+#'   (OR-logic). Default \code{NULL} (off).
 #' @param fineMappingResult Optional existing \code{FineMappingResult}
 #'   to use as a resume cache; tuples already present are not refit.
 #' @param jointSpecification Optional joint-fit specification (NULL by
@@ -584,7 +588,8 @@ setGeneric("fineMappingPipeline",
 .fmPostprocessOne <- function(fit, method, dataX, dataY,
                               coverage, secondaryCoverage, signalCutoff,
                               minAbsCorr, csInput = NULL, af = NULL,
-                              region = NULL, trim = NULL) {
+                              region = NULL, trim = NULL,
+                              medianAbsCorr = NULL) {
   # Inherit `trim` from the calling method's frame if not passed in
   # explicitly. The 10 internal call sites don't currently forward it
   # (they predate the trim knob) so we look it up from the caller. This
@@ -594,12 +599,19 @@ setGeneric("fineMappingPipeline",
     trim <- tryCatch(get("trim", envir = parent.frame()),
                      error = function(e) TRUE)
   }
+  # `medianAbsCorr` is inherited the same way (each public setMethod gains a
+  # `medianAbsCorr = NULL` parameter); NULL is a no-op (OR-logic purity off).
+  if (is.null(medianAbsCorr)) {
+    medianAbsCorr <- tryCatch(get("medianAbsCorr", envir = parent.frame()),
+                              error = function(e) NULL)
+  }
   fits <- setNames(list(fit), method)
   post <- postprocessFinemappingFits(
     fits = fits, dataX = dataX, dataY = dataY,
     af = af, coverage = coverage,
     secondaryCoverage = secondaryCoverage,
     signalCutoff = signalCutoff, minAbsCorr = minAbsCorr,
+    medianAbsCorr = medianAbsCorr,
     region = region,
     csInput = csInput, trim = isTRUE(trim))
   out <- formatFinemappingOutput(post, primaryMethod = method)
@@ -738,6 +750,7 @@ setMethod("fineMappingPipeline", "QtlDataset",
            secondaryCoverage  = c(0.7, 0.5),
            signalCutoff       = 0.025,
            minAbsCorr         = 0.8,
+           medianAbsCorr      = NULL,
            fineMappingResult  = NULL,
            naAction           = c("drop", "impute"),
            verbose            = 1,
@@ -1181,6 +1194,7 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset",
            secondaryCoverage  = c(0.7, 0.5),
            signalCutoff       = 0.025,
            minAbsCorr         = 0.8,
+           medianAbsCorr      = NULL,
            fineMappingResult  = NULL,
            naAction           = c("drop", "impute"),
            verbose            = 1,
@@ -1313,6 +1327,7 @@ setMethod("fineMappingPipeline", "QtlSumStats",
            secondaryCoverage  = c(0.7, 0.5),
            signalCutoff       = 0.025,
            minAbsCorr         = 0.8,
+           medianAbsCorr      = NULL,
            fineMappingResult  = NULL,
            verbose            = 1,
            trim               = TRUE,
@@ -1406,6 +1421,11 @@ setMethod("fineMappingPipeline", "QtlSumStats",
         variantIds <- zn$variantIds
         z <- zn$z
         n <- zn$n
+        # Effect-allele frequency for export as `af` (entry MAF mcol, post-QC
+        # harmonized/complemented); aligned to variantIds, NULL -> af NA.
+        .qmc <- S4Vectors::mcols(entry)
+        afByVar <- if ("MAF" %in% colnames(.qmc))
+          setNames(as.numeric(.qmc$MAF), as.character(.qmc$SNP))[variantIds] else NULL
         ldMat <- .fmLdFromSketch(ldSketch, variantIds)
         names(z) <- variantIds
 
@@ -1441,6 +1461,7 @@ setMethod("fineMappingPipeline", "QtlSumStats",
             secondaryCoverage = secondaryCoverage,
             signalCutoff = signalCutoff,
             minAbsCorr = minAbsCorr,
+            af = afByVar,
             csInput = "Xcorr")
           # The method column on the FineMappingResult carries the bare
           # token (susie / susieInf / susieAsh), independent of which
@@ -1552,6 +1573,7 @@ setMethod("fineMappingPipeline", "GwasSumStats",
            secondaryCoverage = c(0.7, 0.5),
            signalCutoff      = 0.025,
            minAbsCorr        = 0.8,
+           medianAbsCorr     = NULL,
            fineMappingResult = NULL,
            verbose           = 1,
            trim              = TRUE,
@@ -1590,6 +1612,12 @@ setMethod("fineMappingPipeline", "GwasSumStats",
       variantIds <- zn$variantIds
       z <- zn$z
       n <- zn$n
+      # Effect-allele frequency for export as the `af` column (post-QC the
+      # entry carries the harmonized, complemented frequency in its MAF mcol).
+      # Aligned to `variantIds`; NULL when absent -> af exported as NA.
+      .gmc <- S4Vectors::mcols(gr)
+      afByVar <- if ("MAF" %in% colnames(.gmc))
+        setNames(as.numeric(.gmc$MAF), as.character(.gmc$SNP))[variantIds] else NULL
       # Derive a region_id from the entry's GRanges so multi-block
       # genome-wide GWAS sweeps can carry one row per block without
       # tripping (study, method, region_id) uniqueness. Format:
@@ -1651,6 +1679,7 @@ setMethod("fineMappingPipeline", "GwasSumStats",
           secondaryCoverage = secondaryCoverage,
           signalCutoff = signalCutoff,
           minAbsCorr = minAbsCorr,
+          af = afByVar,
           csInput = "Xcorr")
         pushRow(st, tk, region_id, ent)
       }
diff --git a/R/fineMappingWrappers.R b/R/fineMappingWrappers.R
index 9fb8462b..3922cbc1 100644
--- a/R/fineMappingWrappers.R
+++ b/R/fineMappingWrappers.R
@@ -672,7 +672,7 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL,
     A1             = parsed$A1,
     A2             = parsed$A2,
     N              = rep(fitN, nV),
-    MAF            = if (is.null(af)) rep(NA_real_, nV) else as.numeric(af),
+    af             = if (is.null(af)) rep(NA_real_, nV) else as.numeric(af),
     marginal_beta  = marginalBeta,
     marginal_se    = marginalSe,
     marginal_z     = marginalZ,
@@ -742,7 +742,7 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL,
     A1             = character(),
     A2             = character(),
     N              = numeric(),
-    MAF            = numeric(),
+    af             = numeric(),
     marginal_beta  = numeric(),
     marginal_se    = numeric(),
     marginal_z     = numeric(),
diff --git a/R/sumstatsQc.R b/R/sumstatsQc.R
index e206731d..dc1ce961 100644
--- a/R/sumstatsQc.R
+++ b/R/sumstatsQc.R
@@ -2989,20 +2989,12 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
     entryAudit$skipRegionDropped <- before - nrow(df)
   }
 
-  # 4. Optional PIP screen.
-  if (opts$pipCutoffToSkip != 0) {
-    pip <- .applyPipScreen(df, n = opts$nForPip, cutoff = opts$pipCutoffToSkip)
-    df <- pip$df
-    entryAudit$pipScreenSkipped <- isTRUE(pip$skipped)
-    if (isTRUE(pip$skipped)) entryAudit$pipScreenReason <- pip$reason
-  }
-
   if (nrow(df) < 2L) {
     entryAudit$earlyExit <- "fewer than two variants after pre-harmonization QC"
     return(list(gr = .dfToEntryGranges(df), audit = entryAudit))
   }
 
-  # 5. Panel-vs-sumstats allele harmonization.
+  # 4. Panel-vs-sumstats allele harmonization.
   nHarmIn <- nrow(df)
   df <- .matchAgainstSketch(
     df, ldSketch,
@@ -3027,6 +3019,14 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
          " variant(s).")
   }
 
+  # 5. Optional PIP screen (after harmonization, on panel-aligned variants).
+  if (opts$pipCutoffToSkip != 0) {
+    pip <- .applyPipScreen(df, n = opts$nForPip, cutoff = opts$pipCutoffToSkip)
+    df <- pip$df
+    entryAudit$pipScreenSkipped <- isTRUE(pip$skipped)
+    if (isTRUE(pip$skipped)) entryAudit$pipScreenReason <- pip$reason
+  }
+
   # 6. Optional kriging prefilter.
   if (isTRUE(opts$alleleFlipKriging) && nrow(df) >= 2L) {
     nKrIn <- nrow(df)
diff --git a/tests/testthat/test_fineMappingWrappers.R b/tests/testthat/test_fineMappingWrappers.R
index 936d050f..fe5cc06b 100644
--- a/tests/testthat/test_fineMappingWrappers.R
+++ b/tests/testthat/test_fineMappingWrappers.R
@@ -509,7 +509,7 @@ if (!exists(".make_univariate_data", inherits = FALSE)) {
 
 .UNIFIED_TOP_LOCI_COLS <- c(
   "variant_id", "chrom", "pos", "A1", "A2",
-  "N", "MAF",
+  "N", "af",
   "marginal_beta", "marginal_se", "marginal_z", "marginal_p",
   "pip", "posterior_mean", "posterior_sd",
   "cs_95", "cs_70", "cs_50", "cs_95_purity",
@@ -610,7 +610,7 @@ test_that("buildTopLoci returns the exact 22-column schema in order with stable
   expect_true(is.character(out$A1))
   expect_true(is.character(out$A2))
   expect_true(is.numeric(out$N))
-  expect_true(is.numeric(out$MAF))
+  expect_true(is.numeric(out$af))
   expect_true(is.numeric(out$marginal_beta))
   expect_true(is.numeric(out$marginal_se))
   expect_true(is.numeric(out$marginal_z))
@@ -652,6 +652,61 @@ test_that("buildTopLoci emits 22 columns in the fixed order on a non-empty fit",
   expect_equal(unique(out$method), "susie")
 })
 
+test_that("buildTopLoci exports af (not MAF) and carries the supplied af values", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
+  inp <- .fake_fit_and_cs(
+    variant_ids,
+    cs_at_cov = list("0.95" = list(c(1L, 2L)), "0.7" = list(c(1L, 2L)),
+                     "0.5" = list(c(1L, 2L))),
+    pip = c(0.9, 0.9))
+  out <- .runBuildTopLoci(inp, method = "susie", af = c(0.12, 0.87))
+  expect_true("af" %in% names(out))
+  expect_false("MAF" %in% names(out))
+  # Directional effect-allele frequency: value passed through verbatim,
+  # not folded to a minor-allele frequency (0.87 retained, not 0.13).
+  expect_equal(out$af, c(0.12, 0.87))
+})
+
+test_that("buildTopLoci sets af = NA when no af is supplied (no silent coercion)", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
+  inp <- .fake_fit_and_cs(
+    variant_ids,
+    cs_at_cov = list("0.95" = list(c(1L, 2L)), "0.7" = list(c(1L, 2L)),
+                     "0.5" = list(c(1L, 2L))),
+    pip = c(0.9, 0.9))
+  out <- .runBuildTopLoci(inp, method = "susie", af = NULL)
+  expect_true("af" %in% names(out))
+  expect_true(all(is.na(out$af)))
+})
+
+.n_cs95 <- function(post)
+  length(setdiff(unique(as.character(post$top_loci$cs_95)), c(NA, "")))
+
+test_that("postprocessFinemappingFits forwards medianAbsCorr to susie_get_cs (OR-logic admits >= sets)", {
+  d <- .make_univariate_data(seed = 11, effect_idx = c(10, 35))
+  fit <- susieR::susie(d$X, d$y, L = 5)
+  # A very strict min_abs_corr alone vs the same min_abs_corr OR a lenient
+  # median_abs_corr: OR-logic keeps at least as many credible sets.
+  pStrict <- postprocessFinemappingFits(list(susie = fit), dataX = d$X, dataY = d$y,
+                                        coverage = 0.95, minAbsCorr = 0.999,
+                                        medianAbsCorr = NULL)
+  pOr     <- postprocessFinemappingFits(list(susie = fit), dataX = d$X, dataY = d$y,
+                                        coverage = 0.95, minAbsCorr = 0.999,
+                                        medianAbsCorr = 0.1)
+  expect_gte(.n_cs95(pOr), .n_cs95(pStrict))
+})
+
+test_that("postprocessFinemappingFits with medianAbsCorr = NULL is a no-op", {
+  d <- .make_univariate_data(seed = 7, effect_idx = c(20))
+  fit <- susieR::susie(d$X, d$y, L = 5)
+  p1 <- postprocessFinemappingFits(list(susie = fit), dataX = d$X, dataY = d$y,
+                                   coverage = 0.95)
+  p2 <- postprocessFinemappingFits(list(susie = fit), dataX = d$X, dataY = d$y,
+                                   coverage = 0.95, medianAbsCorr = NULL)
+  expect_equal(p1$top_loci$cs_95, p2$top_loci$cs_95)
+  expect_equal(p1$top_loci$af, p2$top_loci$af)
+})
+
 test_that("cs_95 / cs_70 / cs_50 are character strings of the form '<method>_<idx>'", {
   variant_ids <- c("chr1:100:A:G", "chr1:200:C:T", "chr1:300:T:C")
   # Variants 1 and 2 in CS 1 (at 95), variant 3 in CS 2 (at 95). All three
diff --git a/tests/testthat/test_sumstatsQc.R b/tests/testthat/test_sumstatsQc.R
index a7ce10c8..d3b07edd 100644
--- a/tests/testthat/test_sumstatsQc.R
+++ b/tests/testthat/test_sumstatsQc.R
@@ -2648,6 +2648,40 @@ test_that("summaryStatsQc: infoCutoff > 0 with no INFO column errors", {
                "infoCutoff > 0 requires every entry to carry an INFO column")
 })
 
+test_that("summaryStatsQc: PIP screen runs AFTER allele harmonization", {
+  # Entry: three weak panel-matched variants plus one STRONG signal whose
+  # position (9999) is absent from the LD sketch (panel BP = 100..800), so
+  # harmonization drops it. With the screen running *after* harmonization it
+  # never sees the strong off-panel signal, so the remaining weak variants
+  # fail the PIP screen and the region is skipped (empty). (Pre-harmonization
+  # screening would have kept the region on the strength of the off-panel hit.)
+  gr <- .ssQ_makeEntryGr(c("rs1", "rs2", "rs3", "rsX"),
+                         positions = c(100L, 200L, 300L, 9999L))
+  mc <- S4Vectors::mcols(gr)
+  mc$Z <- c(0.2, 0.3, 0.1, 12)   # only the off-panel rsX carries signal
+  S4Vectors::mcols(gr) <- mc
+  ss <- GwasSumStats(study = "g1", entry = list(gr),
+                     genome = "hg19", ldSketch = .ssQ_makeHandle())
+  out <- summaryStatsQc(ss, pipCutoffToSkip = 0.5, nCutoff = 0)
+  snps <- as.character(S4Vectors::mcols(out$entry[[1L]])$SNP)
+  expect_false("rsX" %in% snps)            # dropped by harmonization
+  expect_equal(length(out$entry[[1L]]), 0L) # screen (post-harmonization) skips the region
+})
+
+test_that("summaryStatsQc: PIP screen off leaves the harmonized set intact", {
+  # Same harmonized variants, screen disabled (pipCutoffToSkip = 0): the three
+  # panel-matched variants survive (behavior-invariance for the screen-off path).
+  gr <- .ssQ_makeEntryGr(c("rs1", "rs2", "rs3", "rsX"),
+                         positions = c(100L, 200L, 300L, 9999L))
+  mc <- S4Vectors::mcols(gr); mc$Z <- c(0.2, 0.3, 0.1, 12)
+  S4Vectors::mcols(gr) <- mc
+  ss <- GwasSumStats(study = "g1", entry = list(gr),
+                     genome = "hg19", ldSketch = .ssQ_makeHandle())
+  out <- summaryStatsQc(ss, pipCutoffToSkip = 0, nCutoff = 0)
+  snps <- as.character(S4Vectors::mcols(out$entry[[1L]])$SNP)
+  expect_setequal(snps, c("rs1", "rs2", "rs3"))
+})
+
 test_that(".deriveBetaSeFromZ: derives BETA+SE when entry has Z+MAF+N only", {
   df <- data.frame(
     SNP = paste0("rs", 1:3),
diff --git a/vignettes/rss-qc.Rmd b/vignettes/rss-qc.Rmd
index b73cb79f..f52b020e 100644
--- a/vignettes/rss-qc.Rmd
+++ b/vignettes/rss-qc.Rmd
@@ -35,7 +35,7 @@ entry point:
 | Stage | Mechanism |
 |---|---|
 | Variant-content QC (column standardization, indels, strand-ambiguous, MAF/INFO/N filters, p-value & effect-size sanity checks, optional dbSNP / liftover) | Delegated to `MungeSumstats::format_sumstats()` from inside `summaryStatsQc()`. |
-| Pecotmr-specific QC (`skipRegion`, optional PIP screen, panel harmonization against the `ldSketch`, optional SLALOM/DENTIST LD-mismatch QC, optional RAISS imputation) | Implemented inside `summaryStatsQc()`. |
+| Pecotmr-specific QC (`skipRegion`, panel harmonization against the `ldSketch`, optional PIP screen on the harmonized variants, optional SLALOM/DENTIST LD-mismatch QC, optional RAISS imputation) | Implemented inside `summaryStatsQc()`. |
 
 `summaryStatsQc()` is the **only** QC entry point. It takes a
 `GwasSumStats` or `QtlSumStats` (a DFrame collection that already carries

From 5990a5ab8b50f1ac3c203d7151d1b369ab586800 Mon Sep 17 00:00:00 2001
From: Yining97 <liyining1997@gmail.com>
Date: Tue, 23 Jun 2026 16:42:18 -0400
Subject: [PATCH 2/3] Skip medianAbsCorr test when susieR lacks median_abs_corr

The CI's conda-forge susieR (CRAN build) has no median_abs_corr in
susie_get_cs; guard the new OR-logic test so it runs only where susieR
supports it (GitHub-HEAD) and skips otherwise.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/testthat/test_fineMappingWrappers.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/testthat/test_fineMappingWrappers.R b/tests/testthat/test_fineMappingWrappers.R
index fe5cc06b..7c3e6560 100644
--- a/tests/testthat/test_fineMappingWrappers.R
+++ b/tests/testthat/test_fineMappingWrappers.R
@@ -683,6 +683,11 @@ test_that("buildTopLoci sets af = NA when no af is supplied (no silent coercion)
   length(setdiff(unique(as.character(post$top_loci$cs_95)), c(NA, "")))
 
 test_that("postprocessFinemappingFits forwards medianAbsCorr to susie_get_cs (OR-logic admits >= sets)", {
+  # medianAbsCorr is only meaningful when the installed susieR's susie_get_cs
+  # accepts median_abs_corr (GitHub-HEAD susieR; the CRAN/conda-forge build
+  # does not yet). Skip rather than error where it is unavailable.
+  skip_if_not("median_abs_corr" %in% names(formals(susieR::susie_get_cs)),
+              "installed susieR has no median_abs_corr support")
   d <- .make_univariate_data(seed = 11, effect_idx = c(10, 35))
   fit <- susieR::susie(d$X, d$y, L = 5)
   # A very strict min_abs_corr alone vs the same min_abs_corr OR a lenient

From 80d0fe30147173637b53ceaf5b0649fedf513674 Mon Sep 17 00:00:00 2001
From: Yining97 <liyining1997@gmail.com>
Date: Tue, 23 Jun 2026 17:41:42 -0400
Subject: [PATCH 3/3] Reimplement krigingOutlierQc on susieR::kriging_rss

Replace the hand-rolled fixed-ridge conditional with susieR's kriging RSS
diagnostic: estimate the LD-mismatch scale via estimate_s_rss() and take the
per-variant conditional distribution from kriging_rss(), flagging outliers on
its standardized residual. krigingOutlierQc now takes n (threaded from the QC
call site) and errors clearly when susieR lacks the diagnostic; ridge is gone.

Better calibrated than the fixed ridge, which over-flagged in the tails (chr21
reference: 28 -> 8 flagged, the 8 a strict subset).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 R/sumstatsQc.R                   | 59 +++++++++++++++++++-------------
 tests/testthat/test_sumstatsQc.R | 23 ++++++++++++-
 2 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/R/sumstatsQc.R b/R/sumstatsQc.R
index dc1ce961..7db0e417 100644
--- a/R/sumstatsQc.R
+++ b/R/sumstatsQc.R
@@ -2331,45 +2331,56 @@ ldMismatchQc <- function(zScore, R = NULL, X = NULL, nSample = NULL,
 #' Kriging-style LD-consistency outlier QC
 #'
 #' Flags variants whose observed z-score is inconsistent with the value
-#' predicted from its LD neighbours. For \code{z ~ N(0, R)} the leave-one-out
-#' conditional distribution of \code{z_i} given the rest has mean
-#' \code{-(1/Omega_ii) * Omega_{i,-i} z_{-i}} and variance \code{1/Omega_ii},
-#' where \code{Omega = R^{-1}}. The standardized residual is ~\code{N(0,1)} when
-#' the z-scores and LD are mutually consistent, so a large residual marks an
-#' allele-flip / LD-mismatch outlier. RSS-only helper, opt-in via
-#' \code{alleleFlipKriging}; never wired into \code{alleleQc()} /
-#' \code{matchRefPanel()}.
+#' predicted from its LD neighbours, using susieR's kriging diagnostic.
+#' \code{susieR::kriging_rss()} computes the leave-one-out conditional
+#' distribution of each \code{z_i} given the rest, with the LD-mismatch scale
+#' \code{s} estimated by \code{susieR::estimate_s_rss()}; its standardized
+#' residual (\code{z_std_diff}) is ~\code{N(0,1)} when z-scores and LD agree, so
+#' a large residual marks an allele-flip / LD-mismatch outlier. RSS-only helper,
+#' opt-in via \code{alleleFlipKriging}; never wired into \code{alleleQc()} /
+#' \code{matchRefPanel()}. Requires a susieR that provides \code{kriging_rss()}
+#' and \code{estimate_s_rss()}.
 #'
 #' @param zScore Numeric vector of harmonized z-scores.
 #' @param R Square LD correlation matrix aligned to \code{zScore}.
+#' @param n Sample size, forwarded to \code{susieR::estimate_s_rss()} and
+#'   \code{susieR::kriging_rss()}.
 #' @param variantIds Optional variant IDs for the diagnostics table.
 #' @param pThreshold Two-sided p-value cutoff for flagging an outlier
 #'   (default \code{5e-8}).
-#' @param ridge Small diagonal added to \code{R} before inversion for numerical
-#'   stability (default \code{1e-3}).
 #' @return A list with \code{outlier} (logical vector) and \code{diagnostics}
 #'   (data frame of per-variant predicted z, residual, statistic, p-value, and
 #'   outlier flag).
 #' @importFrom stats pnorm
 #' @export
-krigingOutlierQc <- function(zScore, R, variantIds = NULL,
-                             pThreshold = 5e-8, ridge = 1e-3) {
+krigingOutlierQc <- function(zScore, R, n, variantIds = NULL,
+                             pThreshold = 5e-8) {
   zScore <- as.numeric(zScore)
   m <- length(zScore)
   if (is.null(R) || !is.matrix(R) || nrow(R) != m || ncol(R) != m) {
     stop("krigingOutlierQc requires a square LD matrix aligned to zScore.")
   }
+  if (missing(n) || length(n) != 1L || is.na(n) || !is.finite(n) || n <= 0) {
+    stop("krigingOutlierQc requires a single positive sample size 'n'.")
+  }
+  if (!requireNamespace("susieR", quietly = TRUE) ||
+      !all(c("estimate_s_rss", "kriging_rss") %in% getNamespaceExports("susieR"))) {
+    stop("krigingOutlierQc requires a susieR that provides estimate_s_rss() and ",
+         "kriging_rss(); the installed susieR does not. Install a susieR with the ",
+         "kriging RSS diagnostic, or disable alleleFlipKriging.")
+  }
   if (is.null(variantIds)) variantIds <- rownames(R)
-  # Regularize so the precision matrix is well-defined for collinear panels.
-  Omega <- solve(R + diag(ridge, m))
-  d <- diag(Omega)
-  omegaZ <- as.numeric(Omega %*% zScore)
-  condMean <- -(omegaZ - d * zScore) / d
-  condVar <- 1 / d
-  residual <- zScore - condMean
-  statistic <- residual / sqrt(condVar)
-  pValue <- 2 * pnorm(-abs(statistic))
-  outlier <- !is.na(pValue) & pValue < pThreshold
+  # susieR's kriging RSS diagnostic: estimate the LD-mismatch scale, then take
+  # the per-variant conditional distribution. `z_std_diff` is the standardized
+  # residual we threshold (same p-value rule as before).
+  s  <- tryCatch(susieR::estimate_s_rss(z = zScore, R = R, n = n),
+                 error = function(e) 0)
+  cd <- susieR::kriging_rss(z = zScore, R = R, n = n, s = s)$conditional_dist
+  condMean  <- as.numeric(cd$condmean)
+  statistic <- as.numeric(cd$z_std_diff)
+  residual  <- zScore - condMean
+  pValue    <- 2 * stats::pnorm(-abs(statistic))
+  outlier   <- !is.na(pValue) & pValue < pThreshold
   list(
     outlier = outlier,
     diagnostics = data.frame(
@@ -3035,7 +3046,9 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
     dosage <- t(SummarizedExperiment::assay(block, "dosage"))
     colnames(dosage) <- df$SNP
     R <- computeLd(dosage, method = "sample")
-    kr <- krigingOutlierQc(df$Z, R, variantIds = df$SNP)
+    nKrig <- if (!is.null(opts$nForPip) && is.finite(opts$nForPip)) opts$nForPip
+             else stats::median(as.numeric(df$N), na.rm = TRUE)
+    kr <- krigingOutlierQc(df$Z, R, n = nKrig, variantIds = df$SNP)
     nKr <- sum(kr$outlier)
     if (nKr > 0L) df <- df[!kr$outlier, , drop = FALSE]
     entryAudit$krigingOutliersDropped <- nKr
diff --git a/tests/testthat/test_sumstatsQc.R b/tests/testthat/test_sumstatsQc.R
index d3b07edd..74bdac47 100644
--- a/tests/testthat/test_sumstatsQc.R
+++ b/tests/testthat/test_sumstatsQc.R
@@ -67,6 +67,8 @@ test_that(".resolveZMismatchQc rejects stale rss_qc and other invalid tokens", {
 # ===========================================================================
 
 test_that("krigingOutlierQc flags an LD-inconsistent variant and spares the rest", {
+  skip_if_not("kriging_rss" %in% getNamespaceExports("susieR"),
+              "installed susieR has no kriging_rss")
   m <- 6
   rho <- 0.7
   R <- matrix(rho, m, m); diag(R) <- 1
@@ -74,7 +76,7 @@ test_that("krigingOutlierQc flags an LD-inconsistent variant and spares the rest
   rownames(R) <- colnames(R) <- ids
   z <- rep(3, m)
   z[3] <- -8                       # strongly inconsistent with its neighbours
-  kr <- krigingOutlierQc(z, R, variantIds = ids)
+  kr <- krigingOutlierQc(z, R, n = 1000, variantIds = ids)
   expect_true(kr$outlier[3])
   expect_false(any(kr$outlier[-3]))
   expect_equal(nrow(kr$diagnostics), m)
@@ -82,6 +84,25 @@ test_that("krigingOutlierQc flags an LD-inconsistent variant and spares the rest
                     colnames(kr$diagnostics)))
 })
 
+test_that("krigingOutlierQc statistic matches susieR::kriging_rss z_std_diff", {
+  skip_if_not("kriging_rss" %in% getNamespaceExports("susieR"),
+              "installed susieR has no kriging_rss")
+  set.seed(7); m <- 10
+  R <- cov2cor(crossprod(matrix(rnorm(m * m), m)))
+  ids <- paste0("1:", seq_len(m) * 100, ":A:G")
+  rownames(R) <- colnames(R) <- ids
+  z <- as.numeric(R %*% rnorm(m)); z[4] <- 9
+  kr  <- krigingOutlierQc(z, R, n = 5000, variantIds = ids)
+  s   <- susieR::estimate_s_rss(z = z, R = R, n = 5000)
+  ref <- susieR::kriging_rss(z = z, R = R, n = 5000, s = s)$conditional_dist
+  expect_equal(kr$diagnostics$statistic, as.numeric(ref$z_std_diff), tolerance = 1e-8)
+  expect_equal(kr$diagnostics$predicted, as.numeric(ref$condmean),   tolerance = 1e-8)
+})
+
+test_that("krigingOutlierQc requires a positive sample size n", {
+  expect_error(krigingOutlierQc(c(1, 2, 3), diag(3)), "positive sample size")
+})
+
 
 test_that("alignVariantNames correctly aligns variant names", {
   # Test case 1: Matching variant names