From 0721711d9e01f7ce50870c1b85eea9ab5fa0fda5 Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Mon, 22 Jun 2026 13:48:27 -0700
Subject: [PATCH 1/4] pipeline fixes

---
 DESCRIPTION                         |   1 -
 NAMESPACE                           |   5 +-
 R/ctwasPipeline.R                   | 776 ++++++++++++++++++++++++----
 R/genotypeIo.R                      |  16 +-
 R/sumstatsQc.R                      | 535 ++++++++++++++-----
 tests/testthat/test_ctwasPipeline.R | 668 ++++++++++++++++++++----
 tests/testthat/test_sumstatsQc.R    | 221 +++++---
 7 files changed, 1804 insertions(+), 418 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d19b6a0e..29d11ae1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,7 +20,6 @@ Imports:
     IRanges,
     MASS,
     Matrix,
-    MungeSumstats,
     Rsamtools,
     S4Vectors,
     SummarizedExperiment,
diff --git a/NAMESPACE b/NAMESPACE
index 614d0059..c58aa18a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -50,7 +50,11 @@ export(computeQtlEnrichment)
 export(computeSldscAnnotSd)
 export(computeSldscMRef)
 export(ctwasBimfileLoader)
+export(assembleCtwasInputs)
 export(ctwasPipeline)
+export(estCtwasParam)
+export(finemapCtwasRegions)
+export(screenCtwasRegions)
 export(dentist)
 export(dentistSingleWindow)
 export(dprAdaptiveGibbsWeights)
@@ -352,7 +356,6 @@ importFrom(IRanges,DataFrameList)
 importFrom(IRanges,IRanges)
 importFrom(IRanges,findOverlaps)
 importFrom(MASS,ginv)
-importFrom(MungeSumstats,standardise_header)
 importFrom(Rsamtools,TabixFile)
 importFrom(Rsamtools,asBcf)
 importFrom(Rsamtools,headerTabix)
diff --git a/R/ctwasPipeline.R b/R/ctwasPipeline.R
index c536eafa..75c41198 100644
--- a/R/ctwasPipeline.R
+++ b/R/ctwasPipeline.R
@@ -1,7 +1,7 @@
-#' @title Causal TWAS Pipeline (cTWAS, single LD block)
-#' @description Per-LD-block pipeline that hands a
-#'   \code{\link{GwasSumStats}} of GWAS Z-scores together with per-gene
-#'   TWAS weights and the shared LD sketch to
+#' @title Causal TWAS Pipeline (cTWAS, multi LD block)
+#' @description Pipeline that hands a per-block set of
+#'   \code{\link{GwasSumStats}} of GWAS Z-scores together with the
+#'   matching per-block per-gene TWAS weights and LD sketches to
 #'   \code{ctwas::ctwas_sumstats}, producing per-gene posterior
 #'   inclusion probabilities for causal genes. Optionally accepts a
 #'   precomputed TWAS-Z \code{GRanges} from
@@ -9,23 +9,27 @@
 #'   so the per-gene Z is not recomputed inside ctwas.
 #'
 #' @section LD block convention:
-#' Each call assumes the inputs cover exactly one LD block — the user
-#' is responsible for constructing the \code{GwasSumStats} and
-#' \code{TwasWeights} over the block of interest before calling this
-#' pipeline (the same convention used by
-#' \code{\link{fineMappingPipeline}} on \code{GwasSumStats}). The
-#' single-region \code{region_info}, \code{LD_map}, and \code{snp_map}
-#' that \code{ctwas::ctwas_sumstats} requires are derived
-#' automatically from the LD sketch on \code{gwasSumStats}.
+#' Inputs are NAMED LISTS keyed by \code{region_id}
+#' (\code{list(block1 = gss1, block2 = gss2, ...)}). Per-block
+#' \code{region_info}, \code{LD_map}, and \code{snp_map} entries are
+#' built automatically from each block's LD sketch and concatenated
+#' before the call to \code{ctwas::ctwas_sumstats}. A single-block
+#' input is rejected: cTWAS's EM cannot converge on a single region,
+#' so callers must supply at least two blocks.
 #'
 #' @section LD-sketch identity check:
-#' \code{getLdSketch(twasWeights)} (when non-NULL) must match
-#' \code{getLdSketch(gwasSumStats)}. Mismatch is a hard error.
+#' Per block: \code{getLdSketch(twasWeights)} (when non-NULL) must
+#' match \code{getLdSketch(gwasSumStats)}. Mismatch is a hard error.
 #'
-#' @param gwasSumStats A \code{\link{GwasSumStats}} over one LD block.
-#'   Must have \code{getQcInfo()} non-empty.
-#' @param twasWeights A \code{\link{TwasWeights}} carrying per-(study,
-#'   context, trait, method) weights over the same LD block.
+#' @param gwasSumStats NAMED LIST of \code{\link{GwasSumStats}} keyed
+#'   by \code{region_id} (at least two entries). Each must have
+#'   \code{getQcInfo()} non-empty.
+#' @param twasWeights NAMED LIST of \code{\link{TwasWeights}} keyed by
+#'   \code{region_id}. Keys must be a SUBSET of \code{gwasSumStats}'s
+#'   keys: blocks without any TWAS weights still contribute their
+#'   SNP-level signal to ctwas's joint group prior estimate (matches
+#'   the legacy whole-chromosome pattern where only a few of many LD
+#'   blocks carry gene weights).
 #' @param twasZ Optional \code{GRanges} of TWAS Z-scores (output of
 #'   \code{\link{causalInferencePipeline}}). When supplied, the
 #'   per-(trait, context) Z is used as the \code{z_gene} input to
@@ -37,8 +41,13 @@
 #'   (default) the smart filters are no-ops; only the magnitude filter
 #'   (\code{twasWeightCutoff}) and the per-gene cap
 #'   (\code{maxNumVariants}, ordered by \code{|weight|}) apply.
-#' @param regionId Optional character (length 1) label for the LD
-#'   block. Default \code{"block1"}.
+#' @param method Optional character (length 1). Picks which TWAS
+#'   method's weights to feed into ctwas for each (study, context,
+#'   trait) gene. When \code{NULL} (default): use \code{"ensemble"} if
+#'   that method is present across the TwasWeights; otherwise use the
+#'   sole method when only one is present; otherwise error. Passing
+#'   the name explicitly (e.g. \code{"mrash"}) overrides the default
+#'   resolution.
 #' @param thin,niterPrefit,niter,L Pass-throughs to
 #'   \code{ctwas::ctwas_sumstats}.
 #' @param groupPriorVarStructure Pass-through (defaults
@@ -71,7 +80,7 @@ ctwasPipeline <- function(gwasSumStats,
                           twasWeights,
                           twasZ                   = NULL,
                           fineMappingResult       = NULL,
-                          regionId                = "block1",
+                          method                  = NULL,
                           thin                    = 0.1,
                           niterPrefit             = 3L,
                           niter                   = 30L,
@@ -87,17 +96,104 @@ ctwasPipeline <- function(gwasSumStats,
                           minPipCutoff            = 0,
                           maxNumVariants          = Inf,
                           ...) {
+  groupPriorVarStructure <- match.arg(groupPriorVarStructure)
+  inputs <- assembleCtwasInputs(
+    gwasSumStats       = gwasSumStats,
+    twasWeights        = twasWeights,
+    twasZ              = twasZ,
+    fineMappingResult  = fineMappingResult,
+    method             = method,
+    twasWeightCutoff   = twasWeightCutoff,
+    csMinCor           = csMinCor,
+    minPipCutoff       = minPipCutoff,
+    maxNumVariants     = maxNumVariants)
+  est <- estCtwasParam(
+    inputs,
+    thin                    = thin,
+    niterPrefit             = niterPrefit,
+    niter                   = niter,
+    groupPriorVarStructure  = groupPriorVarStructure,
+    ncore                   = ncore,
+    ...)
+  screened <- screenCtwasRegions(
+    est,
+    L     = L,
+    ncore = ncore,
+    ...)
+  finemapCtwasRegions(
+    screened,
+    L     = L,
+    ncore = ncore,
+    ...)
+}
+
+#' Assemble cTWAS inputs from S4 GwasSumStats / TwasWeights
+#'
+#' @description Builds the per-block ctwas-shape input set
+#'   (\code{z_snp}, \code{weights}, \code{region_info}, \code{snp_map},
+#'   \code{LD_map}, the LD- and SNP-info loader closures, plus optional
+#'   \code{z_gene}) that the downstream ctwas steps consume.
+#'   This is step 1 of the three-step \code{\link{ctwasPipeline}} split.
+#'
+#' @details The returned list is the SHARED STATE threaded through
+#'   \code{\link{estCtwasParam}} → \code{\link{screenCtwasRegions}} →
+#'   \code{\link{finemapCtwasRegions}}. Callers can short-circuit at any
+#'   step (e.g. override the estimated priors before fine-mapping) or
+#'   call \code{ctwasPipeline()} for the one-shot path.
+#'
+#' @inheritParams ctwasPipeline
+#' @return A list with elements \code{z_snp}, \code{z_gene} (NULL when
+#'   no \code{twasZ}), \code{weights}, \code{region_info},
+#'   \code{snp_map}, \code{LD_map}, \code{LD_loader_fun},
+#'   \code{snpinfo_loader_fun}, and \code{resolvedMethod}.
+#' @export
+assembleCtwasInputs <- function(gwasSumStats, twasWeights,
+                                twasZ              = NULL,
+                                fineMappingResult  = NULL,
+                                method             = NULL,
+                                twasWeightCutoff   = 0,
+                                csMinCor           = 0.8,
+                                minPipCutoff       = 0,
+                                maxNumVariants     = Inf) {
   if (!requireNamespace("ctwas", quietly = TRUE)) {
-    stop("Package 'ctwas' is required for ctwasPipeline. ",
+    stop("Package 'ctwas' is required for the cTWAS pipeline. ",
          "Install from https://github.com/xinhe-lab/ctwas .")
   }
-  if (!methods::is(gwasSumStats, "GwasSumStats"))
-    stop("`gwasSumStats` must be a GwasSumStats object.")
-  if (length(getQcInfo(gwasSumStats)) == 0L)
-    stop("ctwasPipeline: gwasSumStats has no QC record. Call ",
-         "summaryStatsQc() first.")
-  if (missing(twasWeights) || !methods::is(twasWeights, "TwasWeights"))
-    stop("`twasWeights` must be a TwasWeights object.")
+  if (missing(gwasSumStats) || !is.list(gwasSumStats) ||
+      methods::is(gwasSumStats, "GwasSumStats"))
+    stop("`gwasSumStats` must be a NAMED LIST of GwasSumStats keyed by ",
+         "region_id (got ", class(gwasSumStats)[[1L]], "). cTWAS's EM ",
+         "requires multi-block context to converge; single-block calls ",
+         "are no longer supported.")
+  if (missing(twasWeights) || !is.list(twasWeights) ||
+      methods::is(twasWeights, "TwasWeights"))
+    stop("`twasWeights` must be a NAMED LIST of TwasWeights keyed by ",
+         "region_id.")
+  if (is.null(names(gwasSumStats)) || any(!nzchar(names(gwasSumStats))))
+    stop("`gwasSumStats` must be a named list keyed by region_id (got an ",
+         "unnamed or empty-named list).")
+  if (is.null(names(twasWeights)) || any(!nzchar(names(twasWeights))))
+    stop("`twasWeights` must be a named list keyed by region_id (got an ",
+         "unnamed or empty-named list).")
+  extra_tw_keys <- setdiff(names(twasWeights), names(gwasSumStats))
+  if (length(extra_tw_keys) > 0L)
+    stop("`twasWeights` has region_id key(s) not present in ",
+         "`gwasSumStats`: ", paste(extra_tw_keys, collapse = ", "))
+  if (length(gwasSumStats) < 2L)
+    stop("assembleCtwasInputs: at least two LD blocks are required (got ",
+         length(gwasSumStats), "). cTWAS's EM cannot estimate the SNP-",
+         "group prior variance from a single region.")
+  for (rid in names(gwasSumStats)) {
+    if (!methods::is(gwasSumStats[[rid]], "GwasSumStats"))
+      stop("gwasSumStats[['", rid, "']] is not a GwasSumStats.")
+    if (length(getQcInfo(gwasSumStats[[rid]])) == 0L)
+      stop("assembleCtwasInputs: gwasSumStats[['", rid,
+           "']] has no QC record. Call summaryStatsQc() first.")
+  }
+  for (rid in names(twasWeights)) {
+    if (!methods::is(twasWeights[[rid]], "TwasWeights"))
+      stop("twasWeights[['", rid, "']] is not a TwasWeights.")
+  }
   if (!is.null(twasZ) && !methods::is(twasZ, "GRanges"))
     stop("`twasZ` must be a GRanges (output of causalInferencePipeline) ",
          "or NULL.")
@@ -105,65 +201,281 @@ ctwasPipeline <- function(gwasSumStats,
       !methods::is(fineMappingResult, "FineMappingResultBase"))
     stop("`fineMappingResult` must be a FineMappingResultBase ",
          "(QtlFineMappingResult or GwasFineMappingResult) or NULL.")
-  if (length(regionId) != 1L || !nzchar(regionId))
-    stop("`regionId` must be a single non-empty character string.")
+
+  regionIds      <- names(gwasSumStats)
+  resolvedMethod <- .ctwasResolveMethod(twasWeights, method)
+
+  ldPanelsByRegion <- list()
+  weightsList      <- list()
+  zSnpPieces       <- list()
+  regionInfoPieces <- list()
+  snpMap           <- list()
+  ldFileByRegion   <- setNames(character(length(regionIds)), regionIds)
+
+  # First pass: cache ld panels, build z_snp pieces, region_info,
+  # snp_map per region. We need the union of GWAS variant IDs ACROSS
+  # all blocks before we can correctly filter each per-block TwasWeights
+  # — a gene's weight variants can straddle adjacent LD blocks, and the
+  # per-block GWAS subset would drop the cross-boundary variants.
+  for (rid in regionIds) {
+    gss    <- gwasSumStats[[rid]]
+    tw     <- twasWeights[[rid]]   # may be NULL for SNP-only blocks
+    gwasLd <- getLdSketch(gss)
+    if (!is.null(tw))
+      .ctwasRequireMatchingLdSketches(getLdSketch(tw), gwasLd)
+
+    ldKey <- .ctwasLdPanelKey(gwasLd)
+    if (is.null(ldPanelsByRegion[[ldKey]])) {
+      ldPanelsByRegion[[ldKey]] <- .ctwasComputeFullPanelLd(gwasLd)
+    }
+    ldPanel <- ldPanelsByRegion[[ldKey]]
+    ldFileByRegion[[rid]] <- ldKey
+
+    zSnpPieces[[rid]]       <- .ctwasBuildZSnp(gss)
+    regionInfoPieces[[rid]] <- .ctwasBuildSingleRegionInfo(rid, gwasLd)
+    snpMap[[rid]] <- .ctwasSnpInfoForGwasBlock(gss, ldPanel$snpInfo)
+  }
+
+  # Global union of GWAS variant IDs across all blocks. Used to filter
+  # per-block TwasWeights so cross-boundary weight variants survive
+  # (ctwas's compute_gene_z consumes the concatenated z_snp + the gene's
+  # full weight vector regardless of which home block the gene's TSS
+  # falls in).
+  globalGwasSnpIds <- unique(unlist(lapply(zSnpPieces, function(p) p$id)))
+
+  # Second pass: build per-block weight lists with the GLOBAL gwasSnpIds
+  # filter, so a gene whose cis-window straddles block boundaries
+  # contributes its full weight vector to ctwas.
+  for (rid in regionIds) {
+    tw <- twasWeights[[rid]]
+    if (is.null(tw)) next
+    twMethod <- .ctwasFilterMethod(tw, resolvedMethod)
+    if (is.null(twMethod)) next
+    ldKey   <- ldFileByRegion[[rid]]
+    ldPanel <- ldPanelsByRegion[[ldKey]]
+    blockWeights <- .ctwasBuildWeights(
+      twMethod, ldPanel,
+      fineMappingResult = fineMappingResult,
+      twasWeightCutoff  = twasWeightCutoff,
+      csMinCor          = csMinCor,
+      minPipCutoff      = minPipCutoff,
+      maxNumVariants    = maxNumVariants,
+      gwasSnpIds        = globalGwasSnpIds)
+    if (length(blockWeights) > 0L) {
+      names(blockWeights) <- paste0(rid, "|", names(blockWeights))
+      weightsList <- c(weightsList, blockWeights)
+    }
+  }
+
+  zSnp       <- do.call(rbind, zSnpPieces)
+  rownames(zSnp) <- NULL
+  regionInfo <- do.call(rbind, regionInfoPieces)
+  rownames(regionInfo) <- NULL
+
+  ldMap <- data.frame(
+    region_id = regionIds,
+    LD_file   = unname(ldFileByRegion),
+    SNP_file  = unname(ldFileByRegion),
+    stringsAsFactors = FALSE)
+
+  list(
+    z_snp              = zSnp,
+    z_gene             = if (!is.null(twasZ)) .ctwasBuildZGene(twasZ) else NULL,
+    weights            = weightsList,
+    region_info        = regionInfo,
+    snp_map            = snpMap,
+    LD_map             = ldMap,
+    LD_loader_fun      = .ctwasMultiBlockLdLoader(ldPanelsByRegion),
+    snpinfo_loader_fun = .ctwasMultiBlockSnpInfoLoader(ldPanelsByRegion),
+    resolvedMethod     = resolvedMethod)
+}
+
+#' Estimate cTWAS group prior + prior variance
+#'
+#' @description Step 2 of the three-step \code{\link{ctwasPipeline}}:
+#'   assembles \code{region_data} from the inputs and runs
+#'   \code{ctwas::est_param} (prefit EM + accurate EM) to estimate the
+#'   group prior probabilities and prior variances. Returns the input
+#'   state plus \code{region_data}, \code{boundary_genes},
+#'   \code{z_gene}, and \code{param}.
+#'
+#' @param inputs A list returned by \code{\link{assembleCtwasInputs}}.
+#' @param thin,niterPrefit,niter Pass-throughs to
+#'   \code{ctwas::assemble_region_data} / \code{ctwas::est_param}.
+#' @param groupPriorVarStructure Pass-through.
+#' @param ncore Number of cores.
+#' @param ... Additional arguments forwarded to \code{ctwas::est_param}
+#'   (e.g. \code{min_p_single_effect}, \code{min_group_size}).
+#' @return The \code{inputs} list augmented with \code{region_data},
+#'   \code{boundary_genes}, \code{z_gene}, and \code{param}.
+#' @export
+estCtwasParam <- function(inputs,
+                          thin                    = 0.1,
+                          niterPrefit             = 3L,
+                          niter                   = 30L,
+                          groupPriorVarStructure  = c("shared_type",
+                                                      "shared_context",
+                                                      "shared_nonSNP",
+                                                      "shared_all",
+                                                      "independent"),
+                          ncore                   = 1L,
+                          ...) {
+  if (!requireNamespace("ctwas", quietly = TRUE)) {
+    stop("Package 'ctwas' is required for estCtwasParam.")
+  }
   groupPriorVarStructure <- match.arg(groupPriorVarStructure)
+  # ctwas::assemble_region_data assumes z_gene is non-NULL; when the
+  # caller did not supply a precomputed twasZ, compute it now via
+  # ctwas::compute_gene_z, mirroring ctwas_sumstats's own behaviour.
+  zGene <- inputs$z_gene
+  if (is.null(zGene)) {
+    zGene <- ctwas::compute_gene_z(
+      inputs$z_snp, inputs$weights, ncore = as.integer(ncore))
+  }
+  assembled <- .ctwasInvoke(ctwas::assemble_region_data, list(
+    region_info     = inputs$region_info,
+    z_snp           = inputs$z_snp,
+    z_gene          = zGene,
+    weights         = inputs$weights,
+    snp_map         = inputs$snp_map,
+    thin            = thin,
+    ncore           = as.integer(ncore)), extra = list(...))
+  paramRes <- .ctwasInvoke(ctwas::est_param, list(
+    region_data               = assembled$region_data,
+    niter_prefit              = as.integer(niterPrefit),
+    niter                     = as.integer(niter),
+    group_prior_var_structure = groupPriorVarStructure,
+    ncore                     = as.integer(ncore)), extra = list(...))
+  # ctwas::assemble_region_data does not echo z_gene back in its return
+  # list, so propagate the precomputed (or freshly computed) z_gene we
+  # passed into it. This matches ctwas_sumstats's top-level return shape.
+  # Replace inputs$z_gene (which is NULL when twasZ wasn't supplied) with
+  # the computed zGene so $z_gene resolves to the right entry.
+  inputs$z_gene <- zGene
+  c(inputs, list(
+    region_data    = assembled$region_data,
+    boundary_genes = assembled$boundary_genes,
+    param          = paramRes))
+}
 
-  twLd   <- getLdSketch(twasWeights)
-  gwasLd <- getLdSketch(gwasSumStats)
-  .ctwasRequireMatchingLdSketches(twLd, gwasLd)
-
-  # --- Compute the full-panel LD ONCE -------------------------------
-  # Single source of truth for both the LD-loader closure (which ctwas
-  # invokes per region during assemble + fine-map stages) and the
-  # per-gene R_wgt submatrices (sliced from this cache by SNP ID).
-  ldPanel <- .ctwasComputeFullPanelLd(gwasLd)
-
-  # --- Build the single-region ctwas inputs ---------------------------
-  zSnp        <- .ctwasBuildZSnp(gwasSumStats)
-  regionInfo  <- .ctwasBuildSingleRegionInfo(regionId, gwasLd)
-  # ctwas::ctwas_sumstats top-level asserts `file.exists(LD_map$LD_file)`
-  # and `file.exists(LD_map$SNP_file)` unconditionally — even when
-  # LD_format = "custom" routes all data through our loaders and the file
-  # paths are never read. The right fix is upstream (gate the assertion
-  # on `LD_format != "custom"` or drop it entirely; see
-  # https://github.com/xinhe-lab/ctwas — `ctwas_sumstats()` L33-34). Until
-  # that lands, point both columns at `tempdir()`: always exists, no disk
-  # writes, no cleanup. The loader closures ignore the file token.
-  vestigialPath <- tempdir()
-  ldMap <- data.frame(region_id = regionId,
-                      LD_file   = vestigialPath,
-                      SNP_file  = vestigialPath,
-                      stringsAsFactors = FALSE)
-  snpMap      <- list()
-  snpMap[[regionId]] <- ldPanel$snpInfo
-  weightsList <- .ctwasBuildWeights(
-    twasWeights, ldPanel,
-    fineMappingResult = fineMappingResult,
-    twasWeightCutoff  = twasWeightCutoff,
-    csMinCor          = csMinCor,
-    minPipCutoff      = minPipCutoff,
-    maxNumVariants    = maxNumVariants)
-  zGene       <- if (!is.null(twasZ)) .ctwasBuildZGene(twasZ) else NULL
-
-  # --- Call the ctwas engine ------------------------------------------
-  ctwas::ctwas_sumstats(
-    z_snp                      = zSnp,
-    weights                    = weightsList,
-    region_info                = regionInfo,
-    LD_map                     = ldMap,
-    snp_map                    = snpMap,
-    z_gene                     = zGene,
-    thin                       = thin,
-    niter_prefit               = as.integer(niterPrefit),
-    niter                      = as.integer(niter),
-    L                          = as.integer(L),
-    group_prior_var_structure  = groupPriorVarStructure,
-    LD_format                  = "custom",
-    LD_loader_fun              = .ctwasSingleBlockLdLoader(ldPanel$R),
-    snpinfo_loader_fun         = .ctwasSingleBlockSnpInfoLoader(ldPanel$snpInfo),
-    ncore                      = as.integer(ncore),
-    ...)
+#' Screen cTWAS regions
+#'
+#' @description Step 3 of the three-step \code{\link{ctwasPipeline}}:
+#'   runs \code{ctwas::screen_regions} on the
+#'   \code{\link{estCtwasParam}} result and returns the screened-region
+#'   set. Use this entry point to substitute hand-tuned priors for the
+#'   ones estimated in step 2 (e.g. when the accurate EM diverges to
+#'   NaN and you want to recover the prefit values).
+#'
+#' @param estResult A list returned by \code{\link{estCtwasParam}}.
+#' @param L Pass-through to \code{ctwas::screen_regions} (and downstream
+#'   fine-mapping).
+#' @param ncore Number of cores.
+#' @param ... Additional arguments forwarded to
+#'   \code{ctwas::screen_regions} (e.g. \code{filter_L},
+#'   \code{min_nonSNP_PIP}, \code{min_L}).
+#' @return The \code{estResult} list augmented with
+#'   \code{screen_res} (the full ctwas output) and
+#'   \code{screened_region_data}.
+#' @export
+screenCtwasRegions <- function(estResult,
+                               L     = 5L,
+                               ncore = 1L,
+                               ...) {
+  if (!requireNamespace("ctwas", quietly = TRUE)) {
+    stop("Package 'ctwas' is required for screenCtwasRegions.")
+  }
+  screenRes <- .ctwasInvoke(ctwas::screen_regions, list(
+    region_data        = estResult$region_data,
+    LD_map             = estResult$LD_map,
+    weights            = estResult$weights,
+    group_prior        = estResult$param$group_prior,
+    group_prior_var    = estResult$param$group_prior_var,
+    L                  = as.integer(L),
+    LD_format          = "custom",
+    LD_loader_fun      = estResult$LD_loader_fun,
+    snpinfo_loader_fun = estResult$snpinfo_loader_fun,
+    ncore              = as.integer(ncore)), extra = list(...))
+  c(estResult, list(
+    screen_res            = screenRes,
+    screened_region_data  = screenRes$screened_region_data))
+}
+
+#' Fine-map cTWAS regions
+#'
+#' @description Step 4 (final) of the three-step
+#'   \code{\link{ctwasPipeline}}: runs \code{ctwas::finemap_regions} on
+#'   the screened-region set from \code{\link{screenCtwasRegions}} and
+#'   assembles the documented top-level ctwas output (\code{z_gene},
+#'   \code{param}, \code{finemap_res}, \code{susie_alpha_res},
+#'   \code{region_data}, \code{boundary_genes}, \code{screen_res}).
+#'
+#' @param screenResult A list returned by
+#'   \code{\link{screenCtwasRegions}}.
+#' @param L Pass-through.
+#' @param ncore Number of cores.
+#' @param ... Additional arguments forwarded to
+#'   \code{ctwas::finemap_regions}.
+#' @return A list mirroring \code{ctwas::ctwas_sumstats}'s output:
+#'   \code{z_gene}, \code{param}, \code{finemap_res},
+#'   \code{susie_alpha_res}, \code{region_data}, \code{boundary_genes},
+#'   \code{screen_res}.
+#' @export
+finemapCtwasRegions <- function(screenResult,
+                                L     = 5L,
+                                ncore = 1L,
+                                ...) {
+  if (!requireNamespace("ctwas", quietly = TRUE)) {
+    stop("Package 'ctwas' is required for finemapCtwasRegions.")
+  }
+  rd <- screenResult$screened_region_data
+  fmRes <- if (length(rd) == 0L) {
+    list(finemap_res = NULL, susie_alpha_res = NULL)
+  } else {
+    .ctwasInvoke(ctwas::finemap_regions, list(
+      region_data        = rd,
+      LD_map             = screenResult$LD_map,
+      weights            = screenResult$weights,
+      group_prior        = screenResult$param$group_prior,
+      group_prior_var    = screenResult$param$group_prior_var,
+      L                  = as.integer(L),
+      LD_format          = "custom",
+      LD_loader_fun      = screenResult$LD_loader_fun,
+      snpinfo_loader_fun = screenResult$snpinfo_loader_fun,
+      ncore              = as.integer(ncore)), extra = list(...))
+  }
+  list(
+    z_gene          = screenResult$z_gene,
+    param           = screenResult$param,
+    finemap_res     = fmRes$finemap_res,
+    susie_alpha_res = fmRes$susie_alpha_res,
+    region_data     = screenResult$region_data,
+    boundary_genes  = screenResult$boundary_genes,
+    screen_res      = screenResult$screen_res)
+}
+
+# Invoke a ctwas function with a fixed `args` list plus optional `extra`
+# (typically the `...` collected by the wrapper). `extra` names that
+# duplicate `args` names are silently dropped, so the wrapper's explicit
+# arguments always win over caller-supplied `...`.
+# @noRd
+.ctwasInvoke <- function(fn, args, extra = list()) {
+  if (length(extra) > 0L) {
+    extra <- extra[setdiff(names(extra), names(args))]
+    # `...` is forwarded uniformly to four different ctwas functions
+    # (assemble_region_data / est_param / screen_regions /
+    # finemap_regions). Restrict to fn's explicit formals so an arg
+    # meant for a sibling step doesn't crash this one -- and so args
+    # that fn would otherwise forward via its own `...` (e.g. into
+    # susie_rss) don't bleed into incompatible downstream functions.
+    formalsFn <- tryCatch(names(formals(fn)), error = function(e) NULL)
+    if (!is.null(formalsFn)) {
+      explicitFormals <- setdiff(formalsFn, "...")
+      extra <- extra[intersect(names(extra), explicitFormals)]
+    }
+    args <- c(args, extra)
+  }
+  do.call(fn, args)
 }
 
 # =============================================================================
@@ -176,6 +488,52 @@ ctwasPipeline <- function(gwasSumStats,
   .requireMatchingLdSketches(twLd, gwasLd, pipelineName = "ctwasPipeline")
 }
 
+# Resolve which TWAS method's weights to feed into ctwas given a
+# TwasWeights collection that may carry multiple methods per
+# (study, context, trait). Rules:
+#   - Caller-supplied method (non-NULL, non-empty) wins, provided that
+#     method exists in the TwasWeights's `method` column.
+#   - Otherwise prefer "ensemble" when present.
+#   - Otherwise return the sole method when only one is present.
+#   - Otherwise: error.
+# @noRd
+.ctwasResolveMethod <- function(twasWeightsList, method = NULL) {
+  available <- unique(unlist(lapply(twasWeightsList, function(tw)
+    as.character(tw$method))))
+  if (length(available) == 0L)
+    stop("ctwasPipeline: TwasWeights collections have no method entries.")
+  if (!is.null(method) && nzchar(method)) {
+    if (!method %in% available)
+      stop("ctwasPipeline: method '", method, "' not present in TwasWeights ",
+           "(available: ", paste(available, collapse = ", "), ").")
+    return(method)
+  }
+  if ("ensemble" %in% available) return("ensemble")
+  if (length(available) == 1L) return(available[[1L]])
+  stop("ctwasPipeline: TwasWeights carries multiple methods (",
+       paste(available, collapse = ", "),
+       ") with no 'ensemble' entry. Supply a `method` argument to ",
+       "pick one (e.g. method = \"mrash\").")
+}
+
+# Subset a TwasWeights collection to rows whose `method` matches the
+# resolved method. Used to enforce the "one ctwas gene per (study,
+# context, trait)" semantics — the legacy pipeline fed a single
+# best-CV-method weight per gene; the new S4 TwasWeights may carry
+# many methods, but ctwas should only see one.
+# @noRd
+.ctwasFilterMethod <- function(tw, method) {
+  keep <- which(as.character(tw$method) == method)
+  if (length(keep) == 0L) return(NULL)
+  TwasWeights(
+    study    = as.character(tw$study)[keep],
+    context  = as.character(tw$context)[keep],
+    trait    = as.character(tw$trait)[keep],
+    method   = as.character(tw$method)[keep],
+    entry    = as.list(tw$entry[keep]),
+    ldSketch = getLdSketch(tw))
+}
+
 # Build the per-variant Z data.frame ctwas expects from a GwasSumStats.
 # Stacks each study row's GRanges via the shared `.entryToSumstatDf`
 # helper (R/sumstatsQc.R), then projects to ctwas's column shape and
@@ -238,11 +596,14 @@ ctwasPipeline <- function(gwasSumStats,
 
 # Compute the full-panel LD ONCE and return everything the rest of the
 # pipeline needs to consume it. Returns a list with:
-#   R       : full-panel correlation matrix (n_var x n_var, dimnames =
-#             SNP IDs). Single source of truth for both the per-region
-#             LD loader closure and the per-gene R_wgt submatrices.
-#   snpInfo : ctwas-shaped per-block table (chrom, id, pos, alt, ref)
-#             — both the snp_map element and the snpinfo loader return.
+#   R        : full-panel correlation matrix (n_var x n_var, dimnames =
+#              SNP IDs). Single source of truth for both the per-region
+#              LD loader closure and the per-gene R_wgt submatrices.
+#   snpInfo  : ctwas-shaped per-block table (chrom, id, pos, alt, ref)
+#              — both the snp_map element and the snpinfo loader return.
+#   variance : named numeric vector of per-variant dosage variance from
+#              the LD reference. Used to scale non-standardized TWAS
+#              weights to the correlation scale that ctwas expects.
 # @noRd
 .ctwasComputeFullPanelLd <- function(gwasLd) {
   snpInfoCtwas <- .ctwasSnpInfoForBlock(gwasLd)
@@ -252,7 +613,88 @@ ctwasPipeline <- function(gwasSumStats,
   R <- computeLd(geno, method = "sample")
   snpIds <- snpInfoCtwas$id
   dimnames(R) <- list(snpIds, snpIds)
-  list(R = R, snpInfo = snpInfoCtwas)
+  variance <- setNames(apply(geno, 2, stats::var, na.rm = TRUE), snpIds)
+  list(R = R, snpInfo = snpInfoCtwas, variance = variance)
+}
+
+# Harmonize TWAS weight variants against the LD reference panel. Same
+# allele-matching semantics as the GWAS-side `.matchRefPanel` flow:
+# match by (chrom, pos), accept exact A1/A2 frame, sign-flip the weight
+# when alleles are swapped, drop unmatched / strand-ambiguous variants.
+# Returns a data.frame with columns:
+#   variant_id : canonical (panel-frame) variant ID
+#   w          : sign-flipped weight aligned to the panel's A1 frame
+#   origIdx    : index back into the entry's original variantIds vector
+#                (used by SuSiE renormalization to slice mu / lbf)
+# Returns NULL when the entry has no variants in common with the panel.
+# @noRd
+.ctwasHarmonizeWeights <- function(origVids, origW, refVariants) {
+  parsed <- tryCatch(parseVariantId(origVids), error = function(e) NULL)
+  if (is.null(parsed) || nrow(parsed) == 0L) return(NULL)
+  targetDf <- data.frame(
+    chrom   = as.integer(parsed$chrom),
+    pos     = as.integer(parsed$pos),
+    A2      = as.character(parsed$A2),
+    A1      = as.character(parsed$A1),
+    w       = as.numeric(origW),
+    origIdx = seq_along(origVids),
+    stringsAsFactors = FALSE)
+  res <- tryCatch(
+    .matchRefPanel(
+      targetData            = targetDf,
+      refVariants           = refVariants,
+      colToFlip             = "w",
+      matchMinProp          = 0,
+      removeUnmatched       = TRUE,
+      removeStrandAmbiguous = TRUE),
+    error = function(e) NULL)
+  if (is.null(res)) return(NULL)
+  res$harmonizedData
+}
+
+# Does the entry's `fits` slot carry a SuSiE-shape intermediate (lbf,
+# mu, X_column_scale_factors)? Used to gate the renormalization branch.
+# @noRd
+.ctwasIsSusieFit <- function(fits) {
+  if (is.null(fits)) return(FALSE)
+  needed <- c("lbf_variable", "mu", "X_column_scale_factors")
+  all(needed %in% names(fits))
+}
+
+# Renormalize SuSiE TWAS weights over the kept variant set. When some
+# variants got dropped by allele harmonization / panel intersection,
+# the posterior `alpha` values from the original fit no longer sum to
+# 1 over the kept variants. We re-softmax `lbf_variable[, keptIdx]`
+# into a renormalized alpha, sign-flip the rows of `mu[, keptIdx]` to
+# match the panel's allele frame (carrying over the per-variant sign
+# flip already applied to `harmonizedW`), and recompute the per-variant
+# weight as `colSums(alpha * mu_subset) / X_column_scale_factors_subset`.
+# Returns the new weight vector (length = length(keptIdx)), or NULL if
+# the fit's dimensions don't line up with the entry's variantIds.
+# @noRd
+.ctwasRenormalizeSusieWeights <- function(fits, origVids, origW,
+                                          keptIdx, harmonizedW) {
+  lbf  <- fits$lbf_variable
+  mu   <- fits$mu
+  xCol <- fits$X_column_scale_factors
+  if (is.null(lbf) || is.null(mu) || is.null(xCol)) return(NULL)
+  if (ncol(lbf) != length(origVids) ||
+      ncol(mu)  != length(origVids) ||
+      length(xCol) != length(origVids)) {
+    # Fit-vs-entry dimension mismatch; skip rather than mis-slice.
+    return(NULL)
+  }
+  # Per-variant sign flip applied by allele harmonization. NaN signs
+  # (origW == 0) default to +1.
+  signFlip <- sign(harmonizedW / origW[keptIdx])
+  signFlip[!is.finite(signFlip)] <- 1
+  lbfSub  <- lbf[, keptIdx, drop = FALSE]
+  muSub   <- sweep(mu[, keptIdx, drop = FALSE], 2L, signFlip, `*`)
+  xColSub <- xCol[keptIdx]
+  # Guard against zero scale factors (shouldn't happen in practice).
+  xColSub[xColSub == 0] <- 1
+  newAlpha <- lbfToAlpha(lbfSub)
+  as.numeric(colSums(newAlpha * muSub) / xColSub)
 }
 
 # Build the weights list ctwas expects: keyed by per-tuple gene id,
@@ -271,20 +713,83 @@ ctwasPipeline <- function(gwasSumStats,
                                twasWeightCutoff  = 0,
                                csMinCor          = 0.8,
                                minPipCutoff      = 0,
-                               maxNumVariants    = Inf) {
+                               maxNumVariants    = Inf,
+                               gwasSnpIds        = NULL) {
   panelSnps <- rownames(ldPanel$R)
+  # ctwas's compute_gene_z asserts that every weight variant exists in the
+  # block's z_snp$id. When the LD sketch covers more than the block (e.g.
+  # a whole-chromosome PLINK2 used for a single-block GwasSumStats),
+  # panelSnps alone leaks variants outside the block. Intersect with the
+  # caller-supplied GWAS sumstats variant set when provided.
+  if (!is.null(gwasSnpIds)) {
+    panelSnps <- intersect(panelSnps, as.character(gwasSnpIds))
+  }
   panelInfo <- ldPanel$snpInfo
+  # Reference frame for allele-harmonization: panel variant info with
+  # the column shape `.matchRefPanel` expects (chrom/pos/A2/A1).
+  refVariants <- data.frame(
+    chrom      = as.integer(panelInfo$chrom),
+    pos        = as.integer(panelInfo$pos),
+    A2         = as.character(panelInfo$ref),
+    A1         = as.character(panelInfo$alt),
+    variant_id = as.character(panelInfo$id),
+    stringsAsFactors = FALSE)
+
   out <- list()
   for (i in seq_len(nrow(twasWeights))) {
-    entry  <- twasWeights$entry[[i]]
-    vids   <- getVariantIds(entry)
-    w      <- as.numeric(getWeights(entry))
-    if (length(vids) == 0L || length(vids) != length(w)) next
+    entry    <- twasWeights$entry[[i]]
+    origVids <- getVariantIds(entry)
+    origW    <- as.numeric(getWeights(entry))
+    if (length(origVids) == 0L || length(origVids) != length(origW)) next
+
+    # --- Step 1: allele-harmonize against the LD panel -------------
+    # Parses chr:pos:A2:A1 IDs into the data.frame `.matchRefPanel`
+    # expects, then matches by (chrom, pos) with exact / sign-flip /
+    # strand-flip detection. Returned canonical variant IDs are in the
+    # panel's A1/A2 frame; weights are sign-flipped for variants whose
+    # input A1/A2 frame was swapped relative to the panel.
+    harm <- .ctwasHarmonizeWeights(origVids, origW, refVariants)
+    if (is.null(harm) || nrow(harm) == 0L) next
+    vids    <- as.character(harm$variant_id)
+    w       <- as.numeric(harm$w)
+    keptIdx <- as.integer(harm$origIdx)  # back-reference into origVids/origW
 
-    # Drop variants not in the LD sketch panel.
+    # --- Step 2: restrict to panel ∩ gwasSnpIds --------------------
     keep <- vids %in% panelSnps
     if (!any(keep)) next
-    vids <- vids[keep]; w <- w[keep]
+    vids    <- vids[keep]
+    w       <- w[keep]
+    keptIdx <- keptIdx[keep]
+
+    # --- Step 3: SuSiE alpha renormalization -----------------------
+    # When the entry carries a SuSiE-style fit (lbf_variable + mu +
+    # X_column_scale_factors) and the kept variant set is smaller than
+    # the original fit, the posterior probabilities `alpha` no longer
+    # sum to 1 over the kept variants. Renormalize via softmax of
+    # lbf_variable over the kept columns and recompute the per-variant
+    # weight as colSums(new_alpha * mu_subset) /
+    # X_column_scale_factors_subset. mu is sign-flipped per the allele
+    # harmonization so the recomputed weight stays in the panel's
+    # allele frame. Mirrors the legacy adjustSusieWeights helper.
+    fits <- getFits(entry)
+    if (.ctwasIsSusieFit(fits) && length(keptIdx) < length(origVids)) {
+      renorm <- .ctwasRenormalizeSusieWeights(
+        fits, origVids = origVids, origW = origW,
+        keptIdx = keptIdx, harmonizedW = w)
+      if (!is.null(renorm)) w <- renorm
+    }
+
+    # --- Step 4: variance scaling for non-standardized weights -----
+    # w_scaled = w_raw * sqrt(per-variant genotype variance from the
+    # LD reference panel). Standardized entries (RSS-style, already on
+    # the correlation scale) pass through unchanged.
+    if (!isTRUE(getStandardized(entry))) {
+      varLookup <- ldPanel$variance[vids]
+      if (anyNA(varLookup))
+        stop(".ctwasBuildWeights: missing genotype variance for ",
+             sum(is.na(varLookup)), " variant(s) in the LD panel.")
+      w <- w * sqrt(varLookup)
+    }
 
     gStudy   <- as.character(twasWeights$study)[[i]]
     gContext <- as.character(twasWeights$context)[[i]]
@@ -460,18 +965,71 @@ ctwasPipeline <- function(gwasSumStats,
     stringsAsFactors = FALSE)
 }
 
-# Single-block LD loader for ctwas: captures the precomputed full-panel
-# correlation matrix and returns it on every loader call (ctwas invokes
-# the loader multiple times per region across assemble + fine-map). The
-# `LD_file` argument is a vestigial region token — ignored.
+# Multi-block LD loader for ctwas. ctwas invokes
+# `LD_loader_fun(LD_file)` per region during region_data assembly and
+# fine-mapping; we dispatch by `LD_file` (the same string set on
+# `LD_map$LD_file`) into the cached per-sketch ldPanel.
 # @noRd
-.ctwasSingleBlockLdLoader <- function(R) {
-  function(LD_file, ...) R
+.ctwasMultiBlockLdLoader <- function(ldPanelsByRegion) {
+  function(LD_file, ...) {
+    panel <- ldPanelsByRegion[[LD_file]]
+    if (is.null(panel))
+      stop("ctwasPipeline LD loader: no cached panel for LD_file = '",
+           LD_file, "'")
+    panel$R
+  }
 }
 
-# Single-block SNP-info loader for ctwas: captures the precomputed
-# per-block snpInfo table and returns it on every loader call.
+# Multi-block SNP-info loader for ctwas. Mirrors the LD loader.
 # @noRd
-.ctwasSingleBlockSnpInfoLoader <- function(snpInfo) {
-  function(LD_file, ...) snpInfo
+.ctwasMultiBlockSnpInfoLoader <- function(ldPanelsByRegion) {
+  function(LD_file, ...) {
+    panel <- ldPanelsByRegion[[LD_file]]
+    if (is.null(panel))
+      stop("ctwasPipeline snpInfo loader: no cached panel for LD_file = '",
+           LD_file, "'")
+    panel$snpInfo
+  }
+}
+
+# Derive the LD_file token for ctwas from a GenotypeHandle. We point
+# at the on-disk file that already backs the sketch's data, so the
+# `file.exists(LD_map$LD_file)` assertion in ctwas::ctwas_sumstats
+# passes WITHOUT pecotmr doing any new I/O. The token also serves as
+# the dispatch key for the multi-block LD / snpInfo loaders, so two
+# blocks sharing the same on-disk LD payload share one cached panel.
+# @noRd
+.ctwasLdPanelKey <- function(handle) {
+  fmt <- getFormat(handle)
+  stem <- getPath(handle)
+  candidates <- switch(fmt,
+    "plink2" = c(paste0(stem, ".pgen")),
+    "plink1" = c(paste0(stem, ".bed")),
+    "gds"    = c(stem),
+    "vcf"    = c(stem),
+    stem)
+  hit <- candidates[file.exists(candidates)]
+  if (length(hit) == 0L)
+    stop("ctwasPipeline: could not derive an existing LD-file token for ",
+         "the GenotypeHandle (format=", fmt, ", path=", stem,
+         "). Looked for: ", paste(candidates, collapse = ", "))
+  hit[[1L]]
+}
+
+# Build a per-block snpInfo table restricted to variants present in the
+# GwasSumStats entry. Mirrors `.ctwasSnpInfoForBlock` but restricts to
+# the block's GWAS variants (intersected against the cached panel) so
+# snp_map[[region_id]] is sized to the block, not the whole panel.
+# @noRd
+.ctwasSnpInfoForGwasBlock <- function(gwasSumStats, panelSnpInfo) {
+  blockIds <- character(0)
+  for (i in seq_len(nrow(gwasSumStats))) {
+    mc <- S4Vectors::mcols(gwasSumStats$entry[[i]])
+    if ("SNP" %in% colnames(mc))
+      blockIds <- c(blockIds, as.character(mc$SNP))
+  }
+  blockIds <- unique(blockIds)
+  if (length(blockIds) == 0L) return(panelSnpInfo[FALSE, , drop = FALSE])
+  keep <- panelSnpInfo$id %in% blockIds
+  panelSnpInfo[keep, , drop = FALSE]
 }
diff --git a/R/genotypeIo.R b/R/genotypeIo.R
index 98f4ea9c..8b656a7c 100644
--- a/R/genotypeIo.R
+++ b/R/genotypeIo.R
@@ -314,9 +314,18 @@ extractBlockGenotypes <- function(handle, snpIdx, meanImpute = TRUE) {
 
 #' @keywords internal
 .extractBlockPlink2 <- function(handle, snpIdx) {
-  # pgenlibr::ReadList returns ALT dosage = A1 dosage in pecotmr convention
-  geno <- pgenlibr::ReadList(getPgenPtr(handle), variant_subset = snpIdx,
-                              meanimpute = FALSE)
+  # pgenlibr::ReadList returns ALT dosage = A1 dosage in pecotmr convention.
+  # The cached @pgenPtr does not survive saveRDS/readRDS (external pointers
+  # become stale), so we re-open from getPath() on the fly if the cached
+  # pointer errors out. Opening is cheap relative to dosage extraction.
+  ptr <- getPgenPtr(handle)
+  paths <- resolvePlink2Paths(getPath(handle))
+  geno <- tryCatch(
+    pgenlibr::ReadList(ptr, variant_subset = snpIdx, meanimpute = FALSE),
+    error = function(e) {
+      reopened <- pgenlibr::NewPgen(paths$pgen)
+      pgenlibr::ReadList(reopened, variant_subset = snpIdx, meanimpute = FALSE)
+    })
   storage.mode(geno) <- "double"
   geno
 }
@@ -530,7 +539,6 @@ computeBlockLdCor <- function(handle, snpIdx, backend = "internal",
 #' @importFrom Rsamtools TabixFile seqnamesTabix scanTabix headerTabix
 #' @importFrom GenomicRanges GRanges seqnames
 #' @importFrom SummarizedExperiment assay
-#' @importFrom MungeSumstats standardise_header
 readBim <- function(bed) {
   bimf <- paste0(file_path_sans_ext(bed), ".bim")
   bim <- vroom(bimf, col_names = FALSE)
diff --git a/R/sumstatsQc.R b/R/sumstatsQc.R
index 0996d597..04943641 100644
--- a/R/sumstatsQc.R
+++ b/R/sumstatsQc.R
@@ -61,17 +61,17 @@ NULL
 #' @details
 #' Pure panel-vs-sumstats allele harmonization: match by (chrom, pos),
 #' detect A1/A2 swap, sign-flip \code{colToFlip} columns and complement
-#' \code{colToComplement} columns on swap. Variant-content filters
-#' (indels, strand-ambiguous, duplicates, NAs, MAF / INFO / N cutoffs)
-#' are NOT applied here — \code{summaryStatsQc()} delegates those to
-#' \code{MungeSumstats::format_sumstats()} before \code{.matchRefPanel}
-#' runs. The internal \code{strand_unambiguous} guard remains so that
-#' if a caller bypasses MungeSumstats and feeds in A/T or C/G
-#' strand-ambiguous variants, they are not mis-flipped: the keep rule
-#' only accepts a strand-flip claim when the unambiguous flag agrees.
+#' \code{colToComplement} columns on swap. Variant-allele filters
+#' (indels, strand-ambiguous, duplicates) are applied here directly when
+#' the corresponding \code{removeIndels} / \code{removeStrandAmbiguous} /
+#' \code{removeDups} flags are set; MAF / INFO / N column-numeric filters
+#' run in \code{.applyContentFilters()} before this function.
 .matchRefPanel <- function(targetData, refVariants, colToFlip = NULL,
                            matchMinProp = 0.2, flipStrand = FALSE,
                            removeUnmatched = TRUE,
+                           removeIndels = FALSE,
+                           removeStrandAmbiguous = TRUE,
+                           removeDups = FALSE,
                            colToComplement = character(), ...) {
   strandFlip <- function(ref) chartr("ATCG", "TAGC", ref)
 
@@ -148,7 +148,19 @@ NULL
                         (A1.target != A1.ref & A2.target != A2.ref)) %>%
     mutate(strand_flip = ((A1.target == flip1.ref & A2.target == flip2.ref) |
                          (A1.target == flip2.ref & A2.target == flip1.ref)) &
-                        (A1.target != A1.ref & A2.target != A2.ref))
+                        (A1.target != A1.ref & A2.target != A2.ref)) %>%
+    # INDEL detection: explicit "I"/"D" notation, or any allele wider than 1bp.
+    mutate(INDEL = (A2.target == "I" | A2.target == "D" |
+                   nchar(A2.target) > 1L | nchar(A1.target) > 1L)) %>%
+    # ID_match: an indel encoded as I/D on the target side matches an indel
+    # on the reference side (where the reference uses multi-base alleles).
+    mutate(ID_match = ((A2.target == "D" | A2.target == "I") &
+                      (nchar(A1.ref) > 1L | nchar(A2.ref) > 1L)))
+
+  # When removeStrandAmbiguous = FALSE, the A/T - C/G safety guard is
+  # disabled: ambiguous variants are treated as exact/sign-flip cases.
+  if (!removeStrandAmbiguous)
+    matchResult$strand_unambiguous <- TRUE
 
   # If no strand_flip survives the unambiguous test, the remaining ambiguous
   # variants can be treated as exact/sign-flip cases rather than dropped.
@@ -157,8 +169,12 @@ NULL
 
   matchResult <- matchResult %>%
     mutate(keep = if_else(strand_flip,
-                          true  = strand_unambiguous | exact_match,
-                          false = exact_match | sign_flip))
+                          true  = strand_unambiguous | exact_match | ID_match,
+                          false = exact_match | sign_flip | ID_match))
+
+  if (removeIndels)
+    matchResult <- matchResult %>%
+      mutate(keep = if_else(INDEL, FALSE, keep))
 
   if (!is.null(colToFlip)) {
     missing <- setdiff(colToFlip, colnames(matchResult))
@@ -212,11 +228,23 @@ NULL
   result <- matchResult[matchResult$keep, , drop = FALSE]
 
   qcCols <- c("flip1.ref", "flip2.ref", "strand_unambiguous",
-              "exact_match", "sign_flip", "strand_flip", "keep")
+              "exact_match", "sign_flip", "strand_flip", "INDEL",
+              "ID_match", "keep")
   result <- result %>%
     select(-any_of(qcCols), -A1.target, -A2.target) %>%
     rename(A1 = A1.ref, A2 = A2.ref, variant_id = variants_id_qced)
 
+  # removeDups: drop duplicate variant rows (same chrom/pos/qced ID).
+  # Default FALSE keeps the existing strict behavior (error on dups).
+  if (removeDups) {
+    dups <- duplicated(result[, c("chrom", "pos", "variant_id")])
+    if (any(dups)) {
+      warning(sprintf("Removed %d duplicate variant(s), keeping first occurrence.",
+                      sum(dups)))
+      result <- result[!dups, , drop = FALSE]
+    }
+  }
+
   if (!removeUnmatched) {
     matchVariant <- result %>% pull(variants_id_original)
     matchResult <- matchResult %>%
@@ -237,9 +265,9 @@ NULL
   if (nrow(result) < matchMinProp * nrow(refVariants))
     stop("Not enough variants have been matched.")
   if (any(duplicated(result$variant_id)))
-    stop("Duplicated variant IDs remain after harmonization; the input ",
-         "should have been deduplicated by MungeSumstats / summaryStatsQc ",
-         "before calling .matchRefPanel.")
+    stop("Duplicated variant IDs remain after harmonization; pass ",
+         "removeDups = TRUE or deduplicate upstream before calling ",
+         ".matchRefPanel.")
 
   out <- list(harmonizedData = result, qcSummary = matchResult)
   attr(out, "qcCounts") <- qcCounts
@@ -2358,9 +2386,8 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
 # data.frame/LdData/QcResult-based summaryStatsQc and rssBasicQc).
 # =============================================================================
 
-# Convert one entry's GRanges into a flat data.frame with the column shape that
-# MungeSumstats and .matchRefPanel expect (lower-case chrom/pos plus the
-# CapsCase mcols).
+# Convert one entry's GRanges into a flat data.frame with the column shape
+# .matchRefPanel expects (lower-case chrom/pos plus the CapsCase mcols).
 .entryGrangesToDf <- function(gr) {
   mc <- as.data.frame(S4Vectors::mcols(gr), stringsAsFactors = FALSE)
   out <- data.frame(
@@ -2591,62 +2618,14 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
   df[!dropMask, , drop = FALSE]
 }
 
-# Run the curated MungeSumstats::format_sumstats() pass. Returns the cleaned
-# data.frame in pecotmr CapsCase column convention plus a per-entry audit
-# record describing what was applied. Caller is responsible for catching
-# errors and reporting them with the entry identity.
-.runMungeSumstatsFilter <- function(df, refGenome, useDbsnpRefCheck,
-                                    removeIndels, removeStrandAmbiguous,
-                                    mafCutoff, infoCutoff, nCutoff,
-                                    convertRefGenome, mungeSumstatsArgs) {
-  if (!requireNamespace("MungeSumstats", quietly = TRUE))
-    stop("Package 'MungeSumstats' is required for summaryStatsQc(). ",
-         "Install it from Bioconductor.")
-  # MungeSumstats writes through a temp tsv when invoked with a path; pass the
-  # in-memory data.frame via the file-write/read path it provides.
-  tmpIn  <- tempfile(fileext = ".tsv.gz")
-  on.exit(unlink(tmpIn), add = TRUE)
-  data.table::fwrite(df, tmpIn, sep = "\t")
-
-  baseArgs <- list(
-    path                = tmpIn,
-    ref_genome          = if (is.null(refGenome)) "GRCh38" else refGenome,
-    convert_ref_genome  = convertRefGenome,
-    on_ref_genome       = isTRUE(useDbsnpRefCheck),
-    infer_eff_direction = isTRUE(useDbsnpRefCheck),
-    allele_flip_check   = isTRUE(useDbsnpRefCheck),
-    bi_allelic_filter   = isTRUE(useDbsnpRefCheck),
-    strand_ambig_filter = isTRUE(removeStrandAmbiguous),
-    drop_indels         = isTRUE(removeIndels),
-    FRQ_filter          = mafCutoff,
-    INFO_filter         = infoCutoff,
-    N_std               = nCutoff,
-    return_data         = TRUE,
-    return_format       = "data.table",
-    log_folder_ind      = FALSE,
-    log_mungesumstats_msgs = FALSE)
-  # Caller-supplied pass-through overrides anything we set above.
-  for (nm in names(mungeSumstatsArgs)) baseArgs[[nm]] <- mungeSumstatsArgs[[nm]]
-
-  before <- nrow(df)
-  cleaned <- do.call(MungeSumstats::format_sumstats, baseArgs)
-  cleaned <- as.data.frame(cleaned)
-  # Restore lower-case chrom/pos for downstream pecotmr code.
-  if ("CHR" %in% colnames(cleaned)) {
-    cleaned$chrom <- sub("^chr", "", as.character(cleaned$CHR),
-                          ignore.case = TRUE)
-    cleaned$CHR <- NULL
-  }
-  if ("BP" %in% colnames(cleaned)) {
-    cleaned$pos <- as.integer(cleaned$BP)
-    cleaned$BP <- NULL
-  }
-  list(df = cleaned, droppedNVariants = before - nrow(cleaned))
-}
-
 # Apply the panel-vs-sumstats allele harmonization using the slim
-# .matchRefPanel against the ldSketch's variant info.
-.matchAgainstSketch <- function(df, ldSketch, matchMinProp) {
+# .matchRefPanel against the ldSketch's variant info. Threads the
+# variant-level filters (indels, strand-ambiguous, duplicates) through
+# so the LD-panel-anchored pass handles them in a single sweep.
+.matchAgainstSketch <- function(df, ldSketch, matchMinProp,
+                                removeIndels = FALSE,
+                                removeStrandAmbiguous = TRUE,
+                                removeDups = TRUE) {
   refVariants <- .refVariantsFromSketch(ldSketch)
   flipCandidates <- c("Z", "BETA")
   colToFlip <- intersect(flipCandidates, colnames(df))
@@ -2657,12 +2636,15 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
   if (!"A1" %in% colnames(df) || !"A2" %in% colnames(df))
     stop("summaryStatsQc: input entry must contain A1 and A2 columns.")
   res <- .matchRefPanel(
-    targetData      = df,
-    refVariants     = refVariants,
-    colToFlip       = colToFlip,
-    colToComplement = colToComplement,
-    matchMinProp    = matchMinProp,
-    removeUnmatched = TRUE)
+    targetData            = df,
+    refVariants           = refVariants,
+    colToFlip             = colToFlip,
+    colToComplement       = colToComplement,
+    matchMinProp          = matchMinProp,
+    removeUnmatched       = TRUE,
+    removeIndels          = removeIndels,
+    removeStrandAmbiguous = removeStrandAmbiguous,
+    removeDups            = removeDups)
   out <- res$harmonizedData
   if (!"chrom" %in% colnames(out) && "chr" %in% colnames(out))
     colnames(out)[colnames(out) == "chr"] <- "chrom"
@@ -2670,6 +2652,189 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
   out
 }
 
+# Variant-content filters (MAF / INFO / N). Pure data-frame column
+# filters; no Bioconductor genome packages needed.
+#
+# mafCutoff:  drop rows where MAF (or FRQ) < mafCutoff. Requires either
+#             column when mafCutoff > 0; errors if neither is present.
+# infoCutoff: drop rows where INFO < infoCutoff. Requires INFO column
+#             when infoCutoff > 0.
+# nCutoff:    drop rows whose N is more than nCutoff median-absolute-
+#             deviations from the median (a 5-MAD-from-median cap on
+#             per-variant N). Set nCutoff = 0 to disable. Rows with NA N
+#             are always dropped.
+.applyContentFilters <- function(df, mafCutoff = 0, infoCutoff = 0,
+                                 nCutoff = 5) {
+  audit <- list()
+  if (mafCutoff > 0) {
+    mafCol <- intersect(c("MAF", "FRQ"), colnames(df))[1L]
+    if (is.na(mafCol))
+      stop(".applyContentFilters: mafCutoff > 0 requires a MAF or FRQ column.")
+    before <- nrow(df)
+    mafVals <- as.numeric(df[[mafCol]])
+    # Normalise effect-allele frequency to MAF: take min(af, 1-af).
+    mafVals <- pmin(mafVals, 1 - mafVals, na.rm = FALSE)
+    df <- df[!is.na(mafVals) & mafVals >= mafCutoff, , drop = FALSE]
+    audit$mafDropped <- before - nrow(df)
+  }
+  if (infoCutoff > 0) {
+    if (!"INFO" %in% colnames(df))
+      stop(".applyContentFilters: infoCutoff > 0 requires an INFO column.")
+    before <- nrow(df)
+    infoVals <- as.numeric(df$INFO)
+    df <- df[!is.na(infoVals) & infoVals >= infoCutoff, , drop = FALSE]
+    audit$infoDropped <- before - nrow(df)
+  }
+  if (nCutoff > 0 && "N" %in% colnames(df) && nrow(df) > 0L) {
+    nVals <- as.numeric(df$N)
+    before <- nrow(df)
+    if (any(is.na(nVals))) {
+      df <- df[!is.na(nVals), , drop = FALSE]
+      nVals <- nVals[!is.na(nVals)]
+    }
+    if (length(nVals) > 0L) {
+      medN <- stats::median(nVals)
+      madN <- stats::mad(nVals, constant = 1)
+      if (madN > 0) {
+        zN <- abs(nVals - medN) / madN
+        df <- df[zN <= nCutoff, , drop = FALSE]
+      }
+    }
+    audit$nDropped <- before - nrow(df)
+  }
+  list(df = df, audit = audit)
+}
+
+# Per-row variant sanity / hygiene checks ported from MungeSumstats's
+# check_*.R series but rewritten as pure data.frame operations with no
+# genome / dbSNP dependency. Each step is gated by its own flag so a
+# caller can disable any single check.
+#
+# Steps (in order; each contributes a count to audit):
+#   - coerceNumeric: cast signed columns to numeric (catches stray "0.5"
+#       strings). NA-introducing coercions are counted.
+#   - normalizeChr:   strip "chr"/"ch" prefix, uppercase X/Y/MT, map
+#       23->X, 24->Y, M->MT. Optional dropNonstandardChr removes rows
+#       whose CHR is outside 1..22, X, Y, MT after normalization.
+#   - dropMissData:   drop rows with NA in any vital column (chrom, pos,
+#       A1, A2, and at least one of Z / BETA).
+#   - dropPOutOfRange: drop rows where P < 0 or P > 1 (corrupt p-values).
+#       Only fires when a P column is present.
+#   - clampSmallP:    floor 0 <= P <= smallPFloor to smallPFloor so
+#       -log10(P) stays finite downstream.
+#   - dropZeroEffect: drop rows where any effect column is exactly 0
+#       (BETA / LOG_ODDS / SIGNED_SUMSTAT) or OR is exactly 1. MungeSumstats
+#       treats these as degenerate / artefactual.
+#   - dropNonpositiveSe: drop rows where SE <= 0.
+.applySanityChecks <- function(df,
+                                coerceNumeric        = TRUE,
+                                normalizeChr         = TRUE,
+                                dropNonstandardChr   = TRUE,
+                                dropMissData         = TRUE,
+                                dropPOutOfRange      = TRUE,
+                                clampSmallP          = TRUE,
+                                smallPFloor          = 5e-324,
+                                dropZeroEffect       = TRUE,
+                                dropNonpositiveSe    = TRUE) {
+  audit <- list()
+  if (nrow(df) == 0L) return(list(df = df, audit = audit))
+
+  if (coerceNumeric) {
+    numericCols <- intersect(
+      c("Z", "BETA", "SE", "OR", "LOG_ODDS", "SIGNED_SUMSTAT",
+        "P", "MAF", "FRQ", "INFO", "N"),
+      colnames(df))
+    naIntroduced <- 0L
+    for (col in numericCols) {
+      orig <- df[[col]]
+      if (is.numeric(orig)) next
+      coerced <- suppressWarnings(as.numeric(orig))
+      naIntroduced <- naIntroduced +
+        sum(is.na(coerced) & !is.na(orig))
+      df[[col]] <- coerced
+    }
+    if (naIntroduced > 0L) audit$nonNumericCoerced <- naIntroduced
+  }
+
+  if (normalizeChr && "chrom" %in% colnames(df)) {
+    chr <- as.character(df$chrom)
+    chr <- sub("^chr", "", chr, ignore.case = TRUE)
+    chr <- sub("^ch",  "", chr, ignore.case = TRUE)
+    chr <- toupper(chr)
+    chr[chr == "23"] <- "X"
+    chr[chr == "24"] <- "Y"
+    chr[chr == "M"]  <- "MT"
+    df$chrom <- chr
+    if (dropNonstandardChr) {
+      before <- nrow(df)
+      standardChrs <- c(as.character(1:22), "X", "Y", "MT")
+      df <- df[chr %in% standardChrs, , drop = FALSE]
+      dropped <- before - nrow(df)
+      if (dropped > 0L) audit$nonstandardChrDropped <- dropped
+    }
+  }
+
+  if (dropMissData && nrow(df) > 0L) {
+    vital <- intersect(c("chrom", "pos", "A1", "A2"), colnames(df))
+    signedCol <- intersect(c("Z", "BETA"), colnames(df))[1L]
+    if (!is.na(signedCol)) vital <- c(vital, signedCol)
+    if (length(vital) > 0L) {
+      before <- nrow(df)
+      bad <- Reduce(`|`, lapply(vital, function(c) is.na(df[[c]])))
+      if (any(bad)) df <- df[!bad, , drop = FALSE]
+      dropped <- before - nrow(df)
+      if (dropped > 0L) audit$missDataDropped <- dropped
+    }
+  }
+
+  if (dropPOutOfRange && "P" %in% colnames(df) && nrow(df) > 0L) {
+    before <- nrow(df)
+    p <- as.numeric(df$P)
+    bad <- !is.na(p) & (p < 0 | p > 1)
+    if (any(bad)) df <- df[!bad, , drop = FALSE]
+    dropped <- before - nrow(df)
+    if (dropped > 0L) audit$pOutOfRangeDropped <- dropped
+  }
+
+  if (clampSmallP && "P" %in% colnames(df) && nrow(df) > 0L) {
+    p <- as.numeric(df$P)
+    smallMask <- !is.na(p) & p >= 0 & p < smallPFloor
+    nClamped <- sum(smallMask)
+    if (nClamped > 0L) {
+      df$P[smallMask] <- smallPFloor
+      audit$smallPClamped <- nClamped
+    }
+  }
+
+  if (dropZeroEffect && nrow(df) > 0L) {
+    effectCols <- intersect(
+      c("BETA", "LOG_ODDS", "SIGNED_SUMSTAT", "OR"), colnames(df))
+    if (length(effectCols) > 0L) {
+      before <- nrow(df)
+      badMask <- rep(FALSE, nrow(df))
+      for (col in effectCols) {
+        vals <- as.numeric(df[[col]])
+        sentinel <- if (col == "OR") 1 else 0
+        badMask <- badMask | (!is.na(vals) & vals == sentinel)
+      }
+      if (any(badMask)) df <- df[!badMask, , drop = FALSE]
+      dropped <- before - nrow(df)
+      if (dropped > 0L) audit$zeroEffectDropped <- dropped
+    }
+  }
+
+  if (dropNonpositiveSe && "SE" %in% colnames(df) && nrow(df) > 0L) {
+    before <- nrow(df)
+    se <- as.numeric(df$SE)
+    bad <- !is.na(se) & se <= 0
+    if (any(bad)) df <- df[!bad, , drop = FALSE]
+    dropped <- before - nrow(df)
+    if (dropped > 0L) audit$nonpositiveSeDropped <- dropped
+  }
+
+  list(df = df, audit = audit)
+}
+
 # Apply ldMismatchQc (SLALOM/DENTIST) against the LD sketch.
 .applyLdMismatchQcToEntry <- function(df, ldSketch, method) {
   variantIds <- df$SNP
@@ -2725,38 +2890,69 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
   entryAudit$variantsIn <- nrow(df)
   nStudyIn <- nrow(df)
 
-  # 1. MungeSumstats variant-content pass.
-  nMungeIn <- nrow(df)
-  mungeResult <- .runMungeSumstatsFilter(
+  # 1. Per-row sanity checks (drop bad P / zero effect / non-positive SE,
+  # clamp tiny P, coerce numeric, normalize CHR, drop missing-data rows).
+  # Runs before any other filtering so downstream steps see clean values.
+  nSanIn <- nrow(df)
+  sanity <- .applySanityChecks(
     df,
-    refGenome             = refGenome,
-    useDbsnpRefCheck      = opts$useDbsnpRefCheck,
-    removeIndels          = opts$removeIndels,
-    removeStrandAmbiguous = opts$removeStrandAmbiguous,
-    mafCutoff             = opts$mafCutoff,
-    infoCutoff            = opts$infoCutoff,
-    nCutoff               = opts$nCutoff,
-    convertRefGenome      = opts$convertRefGenome,
-    mungeSumstatsArgs     = opts$mungeSumstatsArgs)
-  df <- mungeResult$df
-  entryAudit$mungeSumstatsDropped <- mungeResult$droppedNVariants
-  if (nMungeIn > 0L) {
-    emit("QC track: MungeSumstats kept ", nrow(df), " of ", nMungeIn,
+    coerceNumeric      = opts$coerceNumeric,
+    normalizeChr       = opts$normalizeChr,
+    dropNonstandardChr = opts$dropNonstandardChr,
+    dropMissData       = opts$dropMissData,
+    dropPOutOfRange    = opts$dropPOutOfRange,
+    clampSmallP        = opts$clampSmallP,
+    smallPFloor        = opts$smallPFloor,
+    dropZeroEffect     = opts$dropZeroEffect,
+    dropNonpositiveSe  = opts$dropNonpositiveSe)
+  df <- sanity$df
+  if (length(sanity$audit) > 0L) entryAudit$sanityChecks <- sanity$audit
+  if (nSanIn > 0L && nrow(df) != nSanIn) {
+    emit("QC track: sanity checks kept ", nrow(df), " of ", nSanIn,
+         " variant(s).")
+  }
+
+  # 2. Variant-content filters (MAF / INFO / N). Pure column-numeric
+  # filters; the indel / strand-ambiguous variant-allele filtering happens
+  # inside .matchAgainstSketch via .matchRefPanel against the LD panel.
+  nFiltIn <- nrow(df)
+  contentFiltered <- .applyContentFilters(
+    df,
+    mafCutoff  = opts$mafCutoff,
+    infoCutoff = opts$infoCutoff,
+    nCutoff    = opts$nCutoff)
+  df <- contentFiltered$df
+  if (length(contentFiltered$audit) > 0L)
+    entryAudit$contentFilters <- contentFiltered$audit
+  if (nFiltIn > 0L && nrow(df) != nFiltIn) {
+    emit("QC track: MAF/INFO/N filters kept ", nrow(df), " of ", nFiltIn,
          " variant(s).")
   }
 
-  # 1b. Derive BETA / SE from signed Z when the input only carries Z.
+  # 3. Derive BETA / SE from signed Z when the input only carries Z.
   # Formula (Zhu et al. 2016 / RAISS): se = 1/sqrt(2 * maf * (1-maf) *
   # (N + z^2)); beta = z * se. Requires Z, MAF, and N to all be present.
   derived <- .deriveBetaSeFromZ(df)
   df <- derived$df
   if (!is.null(derived$audit)) entryAudit$betaSeFromZ <- derived$audit
 
-  # 1c. Derive P-values from Z when the entry carries Z but not P.
-  # Standard two-tailed normal: p = 2 * pnorm(-|z|).
+  # 4. Derive P-values from Z when the entry carries Z but not P.
+  # Standard two-tailed normal: p = 2 * pnorm(-|z|). Very large |Z| can
+  # underflow to P = 0, so re-apply the small-P clamp afterwards.
   if ("Z" %in% colnames(df) && !"P" %in% colnames(df)) {
     df$P <- .zToPvalue(df$Z)
     entryAudit$pValueFromZ <- sum(!is.na(df$P))
+    if (isTRUE(opts$clampSmallP) && nrow(df) > 0L) {
+      smallMask <- !is.na(df$P) & df$P >= 0 & df$P < opts$smallPFloor
+      nClamped <- sum(smallMask)
+      if (nClamped > 0L) {
+        df$P[smallMask] <- opts$smallPFloor
+        prev <- entryAudit$sanityChecks$smallPClamped %||% 0L
+        if (is.null(entryAudit$sanityChecks))
+          entryAudit$sanityChecks <- list()
+        entryAudit$sanityChecks$smallPClamped <- prev + nClamped
+      }
+    }
   }
 
   # 2. keepVariants subset.
@@ -2788,7 +2984,12 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
 
   # 5. Panel-vs-sumstats allele harmonization.
   nHarmIn <- nrow(df)
-  df <- .matchAgainstSketch(df, ldSketch, matchMinProp = opts$matchMinProp)
+  df <- .matchAgainstSketch(
+    df, ldSketch,
+    matchMinProp          = opts$matchMinProp,
+    removeIndels          = opts$removeIndels,
+    removeStrandAmbiguous = opts$removeStrandAmbiguous,
+    removeDups            = TRUE)
   harmCounts <- attr(df, "qcCounts")
   attr(df, "qcCounts") <- NULL
   entryAudit$matchedAgainstSketch <- nrow(df)
@@ -2911,9 +3112,33 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
   # because imputation adds variants back, so "in -> out" is not monotonic.
   # Skipped steps omitted.
   removedSegs <- character(0)
-  if (entryAudit$mungeSumstatsDropped > 0L)
-    removedSegs <- c(removedSegs,
-                     paste0("munge ", entryAudit$mungeSumstatsDropped))
+  sc <- entryAudit$sanityChecks
+  if (!is.null(sc)) {
+    if (!is.null(sc$nonstandardChrDropped) && sc$nonstandardChrDropped > 0L)
+      removedSegs <- c(removedSegs,
+                       paste0("nonstdChr ", sc$nonstandardChrDropped))
+    if (!is.null(sc$missDataDropped) && sc$missDataDropped > 0L)
+      removedSegs <- c(removedSegs,
+                       paste0("missData ", sc$missDataDropped))
+    if (!is.null(sc$pOutOfRangeDropped) && sc$pOutOfRangeDropped > 0L)
+      removedSegs <- c(removedSegs,
+                       paste0("badP ", sc$pOutOfRangeDropped))
+    if (!is.null(sc$zeroEffectDropped) && sc$zeroEffectDropped > 0L)
+      removedSegs <- c(removedSegs,
+                       paste0("zeroEffect ", sc$zeroEffectDropped))
+    if (!is.null(sc$nonpositiveSeDropped) && sc$nonpositiveSeDropped > 0L)
+      removedSegs <- c(removedSegs,
+                       paste0("badSE ", sc$nonpositiveSeDropped))
+  }
+  cf <- entryAudit$contentFilters
+  if (!is.null(cf)) {
+    if (!is.null(cf$mafDropped) && cf$mafDropped > 0L)
+      removedSegs <- c(removedSegs, paste0("maf ", cf$mafDropped))
+    if (!is.null(cf$infoDropped) && cf$infoDropped > 0L)
+      removedSegs <- c(removedSegs, paste0("info ", cf$infoDropped))
+    if (!is.null(cf$nDropped) && cf$nDropped > 0L)
+      removedSegs <- c(removedSegs, paste0("nCutoff ", cf$nDropped))
+  }
   if (qcCount$harmDropped > 0L)
     removedSegs <- c(removedSegs,
                      paste0("harmonization ", qcCount$harmDropped))
@@ -2943,13 +3168,16 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
 #' Run QC on a SumStats Collection
 #'
 #' Applies a single QC pass to a \code{QtlSumStats} or \code{GwasSumStats}
-#' collection: delegates variant-content QC (column standardization,
-#' indels, strand-ambiguous, MAF/INFO/N filters, p-value & effect-size
-#' sanity checks, optional dbSNP / liftover) to
-#' \code{MungeSumstats::format_sumstats()}, then runs pecotmr-specific
-#' steps (\code{skipRegion}, optional PIP screen, panel harmonization
-#' against the \code{ldSketch} via \code{.matchRefPanel}, optional
-#' SLALOM/DENTIST LD-mismatch QC, optional RAISS imputation).
+#' collection: per-row sanity checks via \code{.applySanityChecks} (drop
+#' rows with out-of-range / zero P, BETA == 0, SE <= 0, NA in vital
+#' columns; clamp tiny P; normalize CHR; coerce signed columns to
+#' numeric), variant-content filters (MAF / INFO / N) via
+#' \code{.applyContentFilters}, optional \code{skipRegion} drop, optional
+#' PIP screen, panel-vs-sumstats allele harmonization against the
+#' \code{ldSketch} via \code{.matchRefPanel} (which handles indels,
+#' strand-ambiguous variants, sign / strand flips, and duplicate drops in
+#' a single sweep), optional SLALOM/DENTIST LD-mismatch QC, and optional
+#' RAISS imputation. No Bioconductor genome / dbSNP packages required.
 #'
 #' The returned collection has its \code{qcInfo} slot populated with a
 #' per-entry audit record (variant counts, drop counts at each step,
@@ -2964,24 +3192,19 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
 #'
 #' @param sumstats A \code{QtlSumStats} or \code{GwasSumStats}
 #'   collection.
-#' @param useDbsnpRefCheck One-shot opt-in to MungeSumstats's
-#'   dbSNP-based reference-genome / allele-flip / biallelic-filter
-#'   checks. When \code{TRUE}, sets \code{on_ref_genome},
-#'   \code{infer_eff_direction}, \code{allele_flip_check}, and
-#'   \code{bi_allelic_filter} all to \code{TRUE} simultaneously.
-#'   Default \code{FALSE} (trust input alleles, lighter dependency
-#'   footprint).
 #' @param removeIndels Logical (length 1). When \code{TRUE}, drop
-#'   indels. Default \code{FALSE} (match MungeSumstats default).
+#'   indels during panel harmonization. Default \code{FALSE}.
 #' @param removeStrandAmbiguous Logical (length 1). When \code{TRUE},
 #'   drop A/T and C/G strand-ambiguous variants. Default \code{TRUE}.
 #' @param mafCutoff Numeric (length 1). MAF threshold (variants with
 #'   \code{MAF < mafCutoff} are dropped). Default 0. Requires \code{MAF}
-#'   column.
+#'   or \code{FRQ} column when non-zero.
 #' @param infoCutoff Numeric (length 1). INFO score threshold. Default
-#'   0. Requires \code{INFO} column.
-#' @param nCutoff Numeric (length 1). MungeSumstats \code{N_std} value;
-#'   sample-size deviation threshold. Default 5.
+#'   0. Requires \code{INFO} column when non-zero.
+#' @param nCutoff Numeric (length 1). Sample-size deviation threshold:
+#'   drop variants whose \code{N} is more than \code{nCutoff}
+#'   median-absolute-deviations from the median. Set to 0 to disable.
+#'   Default 5.
 #' @param keepVariants Optional character vector of variant IDs (SNP
 #'   column) to retain prior to harmonization.
 #' @param skipRegion Optional character vector of \code{"chr:start-end"}
@@ -3000,19 +3223,35 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
 #'   sketch is not yet fully wired for the new path; the option is
 #'   accepted but currently emits a warning and is skipped.)
 #' @param imputeOpts Named list of RAISS parameters.
-#' @param convertRefGenome Optional character (\code{"GRCh37"} or
-#'   \code{"GRCh38"}) to liftover the sumstats to via MungeSumstats.
-#'   \code{NULL} (default) skips liftover.
 #' @param matchMinProp Minimum proportion of LD panel variants that must
 #'   be matched by the sumstats; default 0.
-#' @param mungeSumstatsArgs Optional named list of pass-through args to
-#'   \code{MungeSumstats::format_sumstats()}. Any name supplied here
-#'   overrides the value the curated knobs would have set.
+#' @param coerceNumeric Logical. Coerce signed columns
+#'   (Z/BETA/SE/OR/LOG_ODDS/SIGNED_SUMSTAT/P/MAF/FRQ/INFO/N) to numeric.
+#'   Default \code{TRUE}.
+#' @param normalizeChr Logical. Strip \code{"chr"} prefix, uppercase the
+#'   chromosome label, and map 23->X, 24->Y, M->MT. Default \code{TRUE}.
+#' @param dropNonstandardChr Logical. Drop variants whose CHR (after
+#'   normalization) is outside 1..22, X, Y, MT. Default \code{TRUE}.
+#' @param dropMissData Logical. Drop rows with NA in any vital column
+#'   (chrom, pos, A1, A2, and at least one of Z / BETA). Default
+#'   \code{TRUE}.
+#' @param dropPOutOfRange Logical. Drop rows where \code{P < 0} or
+#'   \code{P > 1}. Default \code{TRUE}.
+#' @param clampSmallP Logical. Floor non-negative P values below
+#'   \code{smallPFloor} to \code{smallPFloor} so \code{-log10(P)} stays
+#'   finite. Applied to both input and Z-derived P values. Default
+#'   \code{TRUE}.
+#' @param smallPFloor Numeric (length 1). Floor for \code{clampSmallP}.
+#'   Default \code{5e-324} (R's smallest positive double).
+#' @param dropZeroEffect Logical. Drop rows where any effect column is
+#'   exactly 0 (\code{BETA}, \code{LOG_ODDS}, \code{SIGNED_SUMSTAT}) or
+#'   \code{OR} is exactly 1. Default \code{TRUE}.
+#' @param dropNonpositiveSe Logical. Drop rows where \code{SE <= 0}.
+#'   Default \code{TRUE}.
 #' @return A new \code{QtlSumStats} / \code{GwasSumStats} with cleaned
 #'   entries and \code{qcInfo} populated.
 #' @export
 summaryStatsQc <- function(sumstats,
-                           useDbsnpRefCheck       = FALSE,
                            removeIndels           = FALSE,
                            removeStrandAmbiguous  = TRUE,
                            mafCutoff              = 0,
@@ -3029,9 +3268,16 @@ summaryStatsQc <- function(sumstats,
                                                         r2Threshold = 0.6,
                                                         minimumLd = 5,
                                                         lamb = 0.01),
-                           convertRefGenome       = NULL,
                            matchMinProp           = 0,
-                           mungeSumstatsArgs      = list()) {
+                           coerceNumeric          = TRUE,
+                           normalizeChr           = TRUE,
+                           dropNonstandardChr     = TRUE,
+                           dropMissData           = TRUE,
+                           dropPOutOfRange        = TRUE,
+                           clampSmallP            = TRUE,
+                           smallPFloor            = 5e-324,
+                           dropZeroEffect         = TRUE,
+                           dropNonpositiveSe      = TRUE) {
   if (!methods::is(sumstats, "QtlSumStats") &&
       !methods::is(sumstats, "GwasSumStats")) {
     stop("summaryStatsQc requires a QtlSumStats or GwasSumStats input.")
@@ -3044,15 +3290,13 @@ summaryStatsQc <- function(sumstats,
     cols <- colnames(mc)
     if (mafCutoff > 0 && !any(c("MAF", "FRQ") %in% cols))
       stop("summaryStatsQc: mafCutoff > 0 requires every entry to carry a ",
-           "MAF or FRQ column; entry ", i, " does not. MungeSumstats ",
-           "normalises FRQ to MAF internally during format_sumstats.")
+           "MAF or FRQ column; entry ", i, " does not.")
     if (infoCutoff > 0 && !"INFO" %in% cols)
       stop("summaryStatsQc: infoCutoff > 0 requires every entry to carry an ",
            "INFO column; entry ", i, " does not.")
   }
 
   opts <- list(
-    useDbsnpRefCheck       = useDbsnpRefCheck,
     removeIndels           = removeIndels,
     removeStrandAmbiguous  = removeStrandAmbiguous,
     mafCutoff              = mafCutoff,
@@ -3065,9 +3309,16 @@ summaryStatsQc <- function(sumstats,
     alleleFlipKriging      = alleleFlipKriging,
     impute                 = impute,
     imputeOpts             = imputeOpts,
-    convertRefGenome       = convertRefGenome,
     matchMinProp           = matchMinProp,
-    mungeSumstatsArgs      = mungeSumstatsArgs,
+    coerceNumeric          = coerceNumeric,
+    normalizeChr           = normalizeChr,
+    dropNonstandardChr     = dropNonstandardChr,
+    dropMissData           = dropMissData,
+    dropPOutOfRange        = dropPOutOfRange,
+    clampSmallP            = clampSmallP,
+    smallPFloor            = smallPFloor,
+    dropZeroEffect         = dropZeroEffect,
+    dropNonpositiveSe      = dropNonpositiveSe,
     nForPip                = NULL)
 
   newEntries <- vector("list", nrow(sumstats))
@@ -3100,7 +3351,6 @@ summaryStatsQc <- function(sumstats,
   qcInfo <- list(
     timestamp        = NA_character_,
     options          = list(
-      useDbsnpRefCheck      = useDbsnpRefCheck,
       removeIndels          = removeIndels,
       removeStrandAmbiguous = removeStrandAmbiguous,
       mafCutoff             = mafCutoff,
@@ -3109,9 +3359,16 @@ summaryStatsQc <- function(sumstats,
       zMismatchQc           = zMismatchQc,
       alleleFlipKriging     = alleleFlipKriging,
       impute                = impute,
-      convertRefGenome      = convertRefGenome),
-    entryAudit       = entryAudits,
-    mungeSumstatsArgs = mungeSumstatsArgs)
+      coerceNumeric         = coerceNumeric,
+      normalizeChr          = normalizeChr,
+      dropNonstandardChr    = dropNonstandardChr,
+      dropMissData          = dropMissData,
+      dropPOutOfRange       = dropPOutOfRange,
+      clampSmallP           = clampSmallP,
+      smallPFloor           = smallPFloor,
+      dropZeroEffect        = dropZeroEffect,
+      dropNonpositiveSe     = dropNonpositiveSe),
+    entryAudit       = entryAudits)
 
   # Rebuild the SumStats with new entries and qcInfo.
   if (methods::is(sumstats, "GwasSumStats")) {
diff --git a/tests/testthat/test_ctwasPipeline.R b/tests/testthat/test_ctwasPipeline.R
index f0ca21d9..135a93e5 100644
--- a/tests/testthat/test_ctwasPipeline.R
+++ b/tests/testthat/test_ctwasPipeline.R
@@ -7,13 +7,23 @@ context("ctwasPipeline")
 # ===========================================================================
 
 .ctp_makeHandle <- function(snp_n = 6L, n_samples = 30L) {
+  # Use a per-process tempfile so .ctwasLdPanelKey's file.exists check
+  # succeeds against the fixture handle (real LD-sketch payloads exist
+  # by construction; mock fixtures need an equivalent on-disk anchor).
+  gdsPath <- file.path(tempdir(), "ctp_sketch.gds")
+  if (!file.exists(gdsPath)) file.create(gdsPath)
+  positions <- seq(100L, by = 100L, length.out = snp_n)
+  # SNP IDs follow the canonical chr:pos:A2:A1 layout so allele
+  # harmonization inside .ctwasBuildWeights / .ctwasHarmonizeWeights can
+  # parse them via parseVariantId().
+  snpIds <- sprintf("chr1:%d:G:A", positions)
   new("GenotypeHandle",
-    path = "/tmp/sketch.gds",
+    path = gdsPath,
     format = "gds",
     snpInfo = data.frame(
-      SNP = paste0("v", seq_len(snp_n)),
+      SNP = snpIds,
       CHR = rep("1", snp_n),
-      BP  = seq(100L, by = 100L, length.out = snp_n),
+      BP  = positions,
       A1  = rep("A", snp_n),
       A2  = rep("G", snp_n),
       stringsAsFactors = FALSE),
@@ -22,6 +32,10 @@ context("ctwasPipeline")
     pgenPtr = NULL)
 }
 
+# Canonical SNP IDs the fixtures use. .ctp_makeHandle() emits these in
+# chr:pos:A2:A1 form; tests reference them by index via .ctp_snpId(i).
+.ctp_snpId <- function(i) sprintf("chr1:%d:G:A", 100L * i)
+
 .ctp_mockExtractor <- function(seed = 5, n_samples = 30L) {
   function(handle, snpIdx, meanImpute = TRUE) {
     set.seed(seed)
@@ -54,7 +68,7 @@ context("ctwasPipeline")
     ranges = IRanges::IRanges(start = seq(100L, by = 100L, length.out = 6L),
                               width = 1L))
   S4Vectors::mcols(gr) <- S4Vectors::DataFrame(
-    SNP = paste0("v", 1:6),
+    SNP = vapply(1:6, .ctp_snpId, character(1)),
     A1  = rep("A", 6), A2  = rep("G", 6),
     Z   = rnorm(6), N = rep(1000L, 6))
   GwasSumStats(
@@ -67,7 +81,7 @@ context("ctwasPipeline")
 
 .ctp_makeTwasWeights <- function() {
   e <- TwasWeightsEntry(
-    variantIds = paste0("v", 1:5),
+    variantIds = vapply(1:5, .ctp_snpId, character(1)),
     weights    = c(0.1, 0.05, -0.2, 0.3, 0.0))
   TwasWeights(
     study    = "Q1", context = "c1", trait = "t1", method = "susie",
@@ -84,57 +98,139 @@ context("ctwasPipeline")
 # building helpers directly (they don't gate on ctwas).
 # ===========================================================================
 
-test_that("ctwasPipeline: rejects non-GwasSumStats gwasSumStats", {
+# Helper: minimal two-block input set for the multi-block API tests.
+.ctp_makeMultiBlockInputs <- function(qc = TRUE) {
+  ss <- .ctp_makeGwasSumstats(qc = qc)
+  tw <- .ctp_makeTwasWeights()
+  list(
+    gwasSumStats = list(block1 = ss, block2 = ss),
+    twasWeights  = list(block1 = tw, block2 = tw))
+}
+
+test_that("ctwasPipeline: rejects a bare (non-list) GwasSumStats", {
   skip_if_not_installed("ctwas")
   expect_error(
-    ctwasPipeline(gwasSumStats = "no",
+    ctwasPipeline(gwasSumStats = .ctp_makeGwasSumstats(),
                   twasWeights  = .ctp_makeTwasWeights()),
-    "must be a GwasSumStats"
+    "NAMED LIST of GwasSumStats"
   )
 })
 
-test_that("ctwasPipeline: rejects un-QCd GwasSumStats", {
+test_that("ctwasPipeline: rejects a single-block named list", {
   skip_if_not_installed("ctwas")
   expect_error(
-    ctwasPipeline(gwasSumStats = .ctp_makeGwasSumstats(qc = FALSE),
-                  twasWeights  = .ctp_makeTwasWeights()),
+    ctwasPipeline(gwasSumStats = list(block1 = .ctp_makeGwasSumstats()),
+                  twasWeights  = list(block1 = .ctp_makeTwasWeights())),
+    "at least two LD blocks"
+  )
+})
+
+test_that("ctwasPipeline: rejects un-QCd GwasSumStats in any region", {
+  skip_if_not_installed("ctwas")
+  ss_qc   <- .ctp_makeGwasSumstats(qc = TRUE)
+  ss_noqc <- .ctp_makeGwasSumstats(qc = FALSE)
+  tw      <- .ctp_makeTwasWeights()
+  expect_error(
+    ctwasPipeline(gwasSumStats = list(block1 = ss_qc, block2 = ss_noqc),
+                  twasWeights  = list(block1 = tw,    block2 = tw)),
     "has no QC record"
   )
 })
 
-test_that("ctwasPipeline: rejects missing twasWeights", {
+test_that("ctwasPipeline: rejects twasWeights keys not present in gwasSumStats", {
   skip_if_not_installed("ctwas")
+  ss <- .ctp_makeGwasSumstats()
+  tw <- .ctp_makeTwasWeights()
   expect_error(
-    ctwasPipeline(gwasSumStats = .ctp_makeGwasSumstats()),
-    "must be a TwasWeights"
+    ctwasPipeline(gwasSumStats = list(blockA = ss, blockB = ss),
+                  twasWeights  = list(blockA = tw, blockC = tw)),
+    "key.*not present in.*gwasSumStats"
   )
 })
 
-test_that("ctwasPipeline: rejects non-GRanges twasZ", {
+test_that("assembleCtwasInputs: allows twasWeights keys to be a subset of gwasSumStats", {
+  ss <- .ctp_makeGwasSumstats()
+  tw <- .ctp_makeTwasWeights()
+  local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
+                        .package = "pecotmr")
+  # Two blocks supply zSnp; only block1 supplies TwasWeights.
+  inputs <- assembleCtwasInputs(
+    gwasSumStats = list(block1 = ss, block2 = ss),
+    twasWeights  = list(block1 = tw))
+  # Both blocks appear in region_info / snp_map (SNP-only block2 contributes
+  # its zSnp), but the weights list only has block1-keyed entries.
+  expect_setequal(inputs$region_info$region_id, c("block1", "block2"))
+  expect_setequal(names(inputs$snp_map), c("block1", "block2"))
+  expect_true(all(grepl("^block1\\|", names(inputs$weights))))
+})
+
+test_that("assembleCtwasInputs: filters TwasWeights against UNION of all blocks' GWAS variants", {
+  # Build two blocks with NON-OVERLAPPING GWAS variants. Block 1 covers
+  # v1..v3, block 2 covers v4..v6. The gene's weight spans v2..v5 — i.e.
+  # crosses the block boundary. With a per-block filter the gene would
+  # lose v4..v5 (block-2 variants); with a global-union filter all four
+  # weight variants survive.
+  mkBlockGss <- function(study, snpIds, qc = TRUE) {
+    gr <- GenomicRanges::GRanges(
+      seqnames = "chr1",
+      ranges = IRanges::IRanges(start = as.integer(gsub(".*:([0-9]+):.*", "\\1", snpIds)),
+                                width = 1L))
+    S4Vectors::mcols(gr) <- S4Vectors::DataFrame(
+      SNP = snpIds,
+      A1  = rep("A", length(snpIds)), A2 = rep("G", length(snpIds)),
+      Z   = rnorm(length(snpIds)), N = rep(1000L, length(snpIds)))
+    GwasSumStats(study = study, entry = list(gr), genome = "hg19",
+                 ldSketch = .ctp_makeHandle(),
+                 qcInfo   = if (qc) list(step1 = "ok") else list())
+  }
+  ss1 <- mkBlockGss("G1", vapply(1:3, .ctp_snpId, character(1)))
+  ss2 <- mkBlockGss("G2", vapply(4:6, .ctp_snpId, character(1)))
+  # Cross-boundary weights: v2..v5 (4 variants spanning both blocks).
+  crossEntry <- TwasWeightsEntry(
+    variantIds = vapply(2:5, .ctp_snpId, character(1)),
+    weights    = c(0.1, 0.2, 0.3, 0.4))
+  tw <- TwasWeights(
+    study = "G1", context = "c1", trait = "t1", method = "susie",
+    entry = list(crossEntry), ldSketch = .ctp_makeHandle())
+  local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
+                        .package = "pecotmr")
+  inputs <- assembleCtwasInputs(
+    gwasSumStats = list(block1 = ss1, block2 = ss2),
+    twasWeights  = list(block1 = tw))
+  # All four cross-boundary variants should appear in the weights list
+  # (would be only 2 with a per-block filter).
+  wgt <- inputs$weights[[1L]]$wgt
+  expect_equal(nrow(wgt), 4L)
+  expect_setequal(rownames(wgt), vapply(2:5, .ctp_snpId, character(1)))
+})
+
+test_that("ctwasPipeline: rejects bare (non-list) TwasWeights", {
   skip_if_not_installed("ctwas")
   expect_error(
-    ctwasPipeline(gwasSumStats = .ctp_makeGwasSumstats(),
-                  twasWeights  = .ctp_makeTwasWeights(),
-                  twasZ        = "not a GRanges"),
-    "must be a GRanges"
+    ctwasPipeline(gwasSumStats = list(block1 = .ctp_makeGwasSumstats(),
+                                       block2 = .ctp_makeGwasSumstats()),
+                  twasWeights  = .ctp_makeTwasWeights()),
+    "NAMED LIST of TwasWeights"
   )
 })
 
-test_that("ctwasPipeline: rejects bad regionId", {
+test_that("ctwasPipeline: rejects non-GRanges twasZ", {
   skip_if_not_installed("ctwas")
+  inp <- .ctp_makeMultiBlockInputs()
   expect_error(
-    ctwasPipeline(gwasSumStats = .ctp_makeGwasSumstats(),
-                  twasWeights  = .ctp_makeTwasWeights(),
-                  regionId     = ""),
-    "non-empty character"
+    ctwasPipeline(gwasSumStats = inp$gwasSumStats,
+                  twasWeights  = inp$twasWeights,
+                  twasZ        = "not a GRanges"),
+    "must be a GRanges"
   )
 })
 
 test_that("ctwasPipeline: rejects unknown groupPriorVarStructure value", {
   skip_if_not_installed("ctwas")
+  inp <- .ctp_makeMultiBlockInputs()
   expect_error(
-    ctwasPipeline(gwasSumStats = .ctp_makeGwasSumstats(),
-                  twasWeights  = .ctp_makeTwasWeights(),
+    ctwasPipeline(gwasSumStats = inp$gwasSumStats,
+                  twasWeights  = inp$twasWeights,
                   groupPriorVarStructure = "bogus"),
     "'arg'"
   )
@@ -174,7 +270,7 @@ test_that(".ctwasBuildZSnp: produces a flat data.frame keyed by SNP/study", {
   expect_equal(nrow(df), 6L)
   expect_setequal(colnames(df),
                   c("id", "chrom", "pos", "A1", "A2", "z", "study"))
-  expect_setequal(df$id, paste0("v", 1:6))
+  expect_setequal(df$id, vapply(1:6, .ctp_snpId, character(1)))
   expect_setequal(unique(df$study), "G1")
 })
 
@@ -202,6 +298,96 @@ test_that(".ctwasSnpInfoForBlock: returns ctwas-required columns chrom/id/pos/al
                   c("chrom", "id", "pos", "alt", "ref"))
 })
 
+test_that(".ctwasLdPanelKey: returns the on-disk path for an existing GDS sketch", {
+  handle <- .ctp_makeHandle()
+  key <- pecotmr:::.ctwasLdPanelKey(handle)
+  expect_true(file.exists(key))
+  expect_equal(key, getPath(handle))
+})
+
+test_that(".ctwasLdPanelKey: errors when no candidate file exists", {
+  ghost <- new("GenotypeHandle",
+    path = file.path(tempdir(), "does_not_exist_for_test.pgen_stem"),
+    format = "plink2",
+    snpInfo = data.frame(SNP = "v1", CHR = "1", BP = 100L, A1 = "A",
+                          A2 = "G", stringsAsFactors = FALSE),
+    nSamples = 1L, sampleIds = "s1", pgenPtr = NULL)
+  expect_error(pecotmr:::.ctwasLdPanelKey(ghost),
+                "could not derive an existing LD-file token")
+})
+
+test_that(".ctwasResolveMethod: caller-supplied method wins when present", {
+  e <- TwasWeightsEntry(variantIds = paste0("v", 1:3),
+                         weights = c(0.1, 0.2, 0.3))
+  tw <- TwasWeights(
+    study = "Q1", context = "c1", trait = "t1", method = "mrash",
+    entry = list(e), ldSketch = .ctp_makeHandle())
+  expect_equal(pecotmr:::.ctwasResolveMethod(list(r1 = tw), "mrash"),
+                "mrash")
+})
+
+test_that(".ctwasResolveMethod: caller-supplied unknown method errors", {
+  e <- TwasWeightsEntry(variantIds = paste0("v", 1:3),
+                         weights = c(0.1, 0.2, 0.3))
+  tw <- TwasWeights(
+    study = "Q1", context = "c1", trait = "t1", method = "mrash",
+    entry = list(e), ldSketch = .ctp_makeHandle())
+  expect_error(pecotmr:::.ctwasResolveMethod(list(r1 = tw), "bogus"),
+                "not present in TwasWeights")
+})
+
+test_that(".ctwasResolveMethod: defaults to ensemble when present among multiple", {
+  mkTw <- function(m) {
+    e <- TwasWeightsEntry(variantIds = paste0("v", 1:3),
+                           weights = c(0.1, 0.2, 0.3))
+    TwasWeights(study = "Q1", context = "c1", trait = "t1", method = m,
+                 entry = list(e), ldSketch = .ctp_makeHandle())
+  }
+  # Build a multi-method TwasWeights by stitching two methods together.
+  tw <- TwasWeights(
+    study   = c("Q1", "Q1"), context = c("c1", "c1"),
+    trait   = c("t1", "t1"), method  = c("mrash", "ensemble"),
+    entry   = list(
+      TwasWeightsEntry(variantIds = paste0("v", 1:3), weights = c(0.1, 0.2, 0.3)),
+      TwasWeightsEntry(variantIds = paste0("v", 1:3), weights = c(0.4, 0.5, 0.6))),
+    ldSketch = .ctp_makeHandle())
+  expect_equal(pecotmr:::.ctwasResolveMethod(list(r1 = tw)), "ensemble")
+})
+
+test_that(".ctwasResolveMethod: single method auto-picked when only one available", {
+  e <- TwasWeightsEntry(variantIds = paste0("v", 1:3),
+                         weights = c(0.1, 0.2, 0.3))
+  tw <- TwasWeights(
+    study = "Q1", context = "c1", trait = "t1", method = "mrash",
+    entry = list(e), ldSketch = .ctp_makeHandle())
+  expect_equal(pecotmr:::.ctwasResolveMethod(list(r1 = tw)), "mrash")
+})
+
+test_that(".ctwasResolveMethod: multi-method + no ensemble + no caller method errors", {
+  tw <- TwasWeights(
+    study   = c("Q1", "Q1"), context = c("c1", "c1"),
+    trait   = c("t1", "t1"), method  = c("mrash", "susie"),
+    entry   = list(
+      TwasWeightsEntry(variantIds = paste0("v", 1:3), weights = c(0.1, 0.2, 0.3)),
+      TwasWeightsEntry(variantIds = paste0("v", 1:3), weights = c(0.4, 0.5, 0.6))),
+    ldSketch = .ctp_makeHandle())
+  expect_error(pecotmr:::.ctwasResolveMethod(list(r1 = tw)),
+                "Supply a `method` argument")
+})
+
+test_that(".ctwasFilterMethod: subsets rows to the requested method", {
+  tw <- TwasWeights(
+    study   = c("Q1", "Q1"), context = c("c1", "c1"),
+    trait   = c("t1", "t1"), method  = c("mrash", "susie"),
+    entry   = list(
+      TwasWeightsEntry(variantIds = paste0("v", 1:3), weights = c(0.1, 0.2, 0.3)),
+      TwasWeightsEntry(variantIds = paste0("v", 1:3), weights = c(0.4, 0.5, 0.6))),
+    ldSketch = .ctp_makeHandle())
+  twSub <- pecotmr:::.ctwasFilterMethod(tw, "susie")
+  expect_equal(nrow(twSub), 1L)
+  expect_equal(as.character(twSub$method), "susie")
+})
+
 # Build an ldPanel fixture (matches .ctwasComputeFullPanelLd's return
 # shape) for the 6-SNP toy panel from .ctp_makeHandle().
 .ctp_makeLdPanel <- function(snp_n = 6L) {
@@ -209,12 +395,186 @@ test_that(".ctwasSnpInfoForBlock: returns ctwas-required columns chrom/id/pos/al
   snpInfo <- pecotmr:::.ctwasSnpInfoForBlock(h)
   R <- diag(1, snp_n)
   dimnames(R) <- list(snpInfo$id, snpInfo$id)
-  list(R = R, snpInfo = snpInfo)
+  # Unit dosage variance — sqrt(1) = 1, so the variance-scaling step in
+  # .ctwasBuildWeights is a no-op for this fixture.
+  variance <- setNames(rep(1, snp_n), snpInfo$id)
+  list(R = R, snpInfo = snpInfo, variance = variance)
 }
 
+test_that(".ctwasBuildWeights: scales non-standardized weights by sqrt(variance)", {
+  panel <- .ctp_makeLdPanel()
+  # Replace the default unit variance with non-trivial values; raw
+  # weights should be multiplied by sqrt(variance) before reaching the
+  # final wgt matrix.
+  panel$variance <- setNames(c(0.5, 1, 2, 4, 8, 16), panel$snpInfo$id)
+  ids5  <- vapply(1:5, .ctp_snpId, character(1))
+  rawW  <- c(0.1, 0.2, 0.3, 0.4, 0.5)
+  tw <- TwasWeights(
+    study = "Q1", context = "c1", trait = "t1", method = "susie",
+    entry = list(TwasWeightsEntry(variantIds = ids5,
+                                    weights    = rawW)),
+    ldSketch = .ctp_makeHandle())
+  wl <- pecotmr:::.ctwasBuildWeights(tw, panel)
+  expected <- unname(rawW * sqrt(panel$variance[ids5]))
+  expect_equal(as.numeric(wl[[1L]]$wgt), expected, tolerance = 1e-12)
+})
+
+test_that(".ctwasBuildWeights: standardized weights bypass variance scaling", {
+  panel <- .ctp_makeLdPanel()
+  panel$variance <- setNames(c(0.5, 1, 2, 4, 8, 16), panel$snpInfo$id)
+  ids5 <- vapply(1:5, .ctp_snpId, character(1))
+  rawW <- c(0.1, 0.2, 0.3, 0.4, 0.5)
+  tw <- TwasWeights(
+    study = "Q1", context = "c1", trait = "t1", method = "susie",
+    entry = list(TwasWeightsEntry(variantIds = ids5,
+                                    weights      = rawW,
+                                    standardized = TRUE)),
+    ldSketch = .ctp_makeHandle())
+  wl <- pecotmr:::.ctwasBuildWeights(tw, panel)
+  expect_equal(as.numeric(wl[[1L]]$wgt), rawW, tolerance = 1e-12)
+})
+
+# Build an LD-panel fixture with realistic chr:pos:A2:A1 variant IDs so
+# `.ctwasHarmonizeWeights` (which calls parseVariantId + .matchRefPanel)
+# can do real allele matching.
+.ctp_makeAllelePanel <- function() {
+  ids <- c("1:100:C:T", "1:200:G:A", "1:300:A:G", "1:400:T:C")
+  snpInfo <- data.frame(
+    chrom = 1L,
+    id    = ids,
+    pos   = c(100L, 200L, 300L, 400L),
+    alt   = c("T", "A", "G", "C"),   # A1 (effect)
+    ref   = c("C", "G", "A", "T"),   # A2 (other)
+    stringsAsFactors = FALSE)
+  R <- diag(1, 4); dimnames(R) <- list(ids, ids)
+  list(R = R, snpInfo = snpInfo,
+       variance = setNames(rep(1, 4), ids))
+}
+
+test_that(".ctwasHarmonizeWeights: sign-flips weights for swapped A1/A2", {
+  panel <- .ctp_makeAllelePanel()
+  refVariants <- data.frame(
+    chrom = panel$snpInfo$chrom, pos = panel$snpInfo$pos,
+    A2 = panel$snpInfo$ref, A1 = panel$snpInfo$alt,
+    variant_id = panel$snpInfo$id, stringsAsFactors = FALSE)
+  # Variant 1: alleles match panel ("1:100:C:T" — same A2/A1 ordering)
+  # Variant 2: A1/A2 swapped vs panel ("1:200:A:G" — flips relative to "1:200:G:A")
+  # Output variant_id values are rebuilt via formatVariantId(), which
+  # always emits a `chr` prefix.
+  res <- pecotmr:::.ctwasHarmonizeWeights(
+    origVids = c("1:100:C:T", "1:200:A:G"),
+    origW    = c(0.5, 0.3),
+    refVariants = refVariants)
+  expect_equal(nrow(res), 2L)
+  # Variant 1 keeps its sign; variant 2 should be sign-flipped to -0.3.
+  matches <- match(c("chr1:100:C:T", "chr1:200:G:A"), res$variant_id)
+  expect_equal(res$w[matches[[1L]]],  0.5, tolerance = 1e-12)
+  expect_equal(res$w[matches[[2L]]], -0.3, tolerance = 1e-12)
+})
+
+test_that(".ctwasHarmonizeWeights: drops variants not present in the panel", {
+  panel <- .ctp_makeAllelePanel()
+  refVariants <- data.frame(
+    chrom = panel$snpInfo$chrom, pos = panel$snpInfo$pos,
+    A2 = panel$snpInfo$ref, A1 = panel$snpInfo$alt,
+    variant_id = panel$snpInfo$id, stringsAsFactors = FALSE)
+  res <- pecotmr:::.ctwasHarmonizeWeights(
+    origVids = c("1:100:C:T", "1:999:A:T"),  # 1:999 not in panel
+    origW    = c(0.5, 0.3),
+    refVariants = refVariants)
+  expect_equal(nrow(res), 1L)
+  expect_equal(res$variant_id, "chr1:100:C:T")
+})
+
+test_that(".ctwasIsSusieFit: recognizes the susie intermediate shape", {
+  fits <- list(lbf_variable = matrix(0, 2, 3),
+                mu = matrix(0, 2, 3),
+                X_column_scale_factors = rep(1, 3))
+  expect_true(pecotmr:::.ctwasIsSusieFit(fits))
+  expect_false(pecotmr:::.ctwasIsSusieFit(NULL))
+  expect_false(pecotmr:::.ctwasIsSusieFit(list(lbf_variable = matrix(0, 2, 3))))
+})
+
+test_that(".ctwasRenormalizeSusieWeights: lbfToAlpha + colSums recomputation", {
+  # Original fit covers 4 variants; drop variant 4, renormalize over {1,2,3}.
+  origVids <- paste0("v", 1:4)
+  origW    <- c(0.1, 0.2, 0.3, 0.4)
+  # Toy lbf_variable: 2 effects, 4 variants. Constructed so the kept
+  # subset yields easily-predictable softmax weights.
+  lbf <- rbind(c(0, 0, 0, 100),    # effect 1: only v4 has signal
+                c(10, 0, -10, 0))   # effect 2: v1 dominates
+  mu  <- matrix(c(1, 2, 3, 4, 1, 2, 3, 4), nrow = 2, byrow = TRUE)
+  xCol <- rep(1, 4)
+  fits <- list(lbf_variable = lbf, mu = mu, X_column_scale_factors = xCol)
+  out <- pecotmr:::.ctwasRenormalizeSusieWeights(
+    fits,
+    origVids = origVids,
+    origW = origW,
+    keptIdx = c(1L, 2L, 3L),
+    harmonizedW = origW[1:3])
+  # Effect 1 lbf over {v1,v2,v3} = c(0,0,0) -> uniform alpha = 1/3
+  # Effect 2 lbf over {v1,v2,v3} = c(10,0,-10) -> v1 ≈ 1
+  # weight[v1] = (1/3)*1 + ~1*1 = ~1.33
+  expect_equal(length(out), 3L)
+  expect_true(out[[1L]] > out[[2L]] && out[[1L]] > out[[3L]])
+})
+
+test_that(".ctwasRenormalizeSusieWeights: returns NULL on fit/entry dimension mismatch", {
+  fits <- list(lbf_variable = matrix(0, 2, 5),
+                mu = matrix(0, 2, 5),
+                X_column_scale_factors = rep(1, 5))
+  out <- pecotmr:::.ctwasRenormalizeSusieWeights(
+    fits,
+    origVids = paste0("v", 1:3),  # entry says 3, fit covers 5 -> mismatch
+    origW = rep(0.1, 3),
+    keptIdx = 1:3,
+    harmonizedW = rep(0.1, 3))
+  expect_null(out)
+})
+
+test_that(".ctwasRenormalizeSusieWeights: signFlip carries over to mu", {
+  # All variants kept; harmonized weights have opposite sign on v2.
+  origVids <- paste0("v", 1:3)
+  origW    <- c(0.1, 0.2, 0.3)
+  # Strong lbf concentrated on a single effect / single variant per row,
+  # so the recomputed weight directly mirrors mu (alpha ≈ identity rows).
+  lbf <- rbind(c(100, -100, -100),
+                c(-100, 100, -100))
+  mu  <- rbind(c(1, 2, 3),
+                c(4, 5, 6))
+  fits <- list(lbf_variable = lbf, mu = mu,
+                X_column_scale_factors = rep(1, 3))
+  harmW_noflip <- c(0.1, 0.2, 0.3)   # all positive
+  harmW_v2flip <- c(0.1, -0.2, 0.3)  # v2 flipped
+  outNoFlip <- pecotmr:::.ctwasRenormalizeSusieWeights(
+    fits, origVids, origW, keptIdx = 1:3, harmonizedW = harmW_noflip)
+  outV2flip <- pecotmr:::.ctwasRenormalizeSusieWeights(
+    fits, origVids, origW, keptIdx = 1:3, harmonizedW = harmW_v2flip)
+  # v1, v3 should be unchanged between the two; v2 should flip sign.
+  expect_equal(outNoFlip[[1L]],  outV2flip[[1L]], tolerance = 1e-9)
+  expect_equal(outNoFlip[[3L]],  outV2flip[[3L]], tolerance = 1e-9)
+  expect_equal(outNoFlip[[2L]], -outV2flip[[2L]], tolerance = 1e-9)
+})
+
+test_that(".ctwasSnpInfoForGwasBlock: restricts panel snpInfo to block GWAS variants", {
+  ss <- .ctp_makeGwasSumstats()
+  panelInfo <- data.frame(
+    chrom = 1L,
+    id    = vapply(1:6, .ctp_snpId, character(1)),  # whole panel
+    pos   = seq(100L, by = 100L, length.out = 6L),
+    alt   = "A", ref = "G",
+    stringsAsFactors = FALSE)
+  blockInfo <- pecotmr:::.ctwasSnpInfoForGwasBlock(ss, panelInfo)
+  # Restricted to the variant IDs on the GwasSumStats entry.
+  expect_true(all(blockInfo$id %in%
+                  as.character(S4Vectors::mcols(ss$entry[[1L]])$SNP)))
+  expect_true(nrow(blockInfo) <= nrow(panelInfo))
+})
+
 test_that(".ctwasBuildWeights: keys per-tuple weights and stamps gene metadata", {
   tw <- .ctp_makeTwasWeights()
   panel <- .ctp_makeLdPanel()
+  ids5  <- vapply(1:5, .ctp_snpId, character(1))
   wl <- pecotmr:::.ctwasBuildWeights(tw, panel)
   expect_equal(length(wl), 1L)
   expect_equal(names(wl), "Q1|c1|t1|susie")
@@ -224,40 +584,64 @@ test_that(".ctwasBuildWeights: keys per-tuple weights and stamps gene metadata",
   # wgt is a variants x 1 matrix with rownames = SNP IDs
   expect_true(is.matrix(wl[[1L]]$wgt))
   expect_equal(dim(wl[[1L]]$wgt), c(5L, 1L))
-  expect_equal(rownames(wl[[1L]]$wgt), paste0("v", 1:5))
+  expect_equal(rownames(wl[[1L]]$wgt), ids5)
   # R_wgt is a 5x5 slice of the cached panel R
   expect_true(is.matrix(wl[[1L]]$R_wgt))
   expect_equal(dim(wl[[1L]]$R_wgt), c(5L, 5L))
-  expect_equal(rownames(wl[[1L]]$R_wgt), paste0("v", 1:5))
+  expect_equal(rownames(wl[[1L]]$R_wgt), ids5)
   expect_equal(wl[[1L]]$n_wgt, 5L)
   # And it is literally a slice of the panel R (no recompute path).
-  expect_equal(wl[[1L]]$R_wgt, panel$R[paste0("v", 1:5), paste0("v", 1:5)])
+  expect_equal(wl[[1L]]$R_wgt, panel$R[ids5, ids5])
 })
 
 test_that(".ctwasBuildWeights: drops variants not present in the LD panel", {
+  ids3   <- vapply(1:3, .ctp_snpId, character(1))
+  missing <- c("chr1:99900:G:A", "chr1:99910:G:A")  # not in panel
   tw <- TwasWeights(
     study = "Q1", context = "c1", trait = "t1", method = "susie",
     entry = list(TwasWeightsEntry(
-      variantIds = c(paste0("v", 1:3), "missing1", "missing2"),
+      variantIds = c(ids3, missing),
       weights    = c(0.1, 0.2, 0.3, 0.4, 0.5))),
     ldSketch = .ctp_makeHandle())
   panel <- .ctp_makeLdPanel()
   wl <- pecotmr:::.ctwasBuildWeights(tw, panel)
   expect_equal(nrow(wl[[1L]]$wgt), 3L)
-  expect_equal(rownames(wl[[1L]]$wgt), paste0("v", 1:3))
+  expect_equal(rownames(wl[[1L]]$wgt), ids3)
+  expect_equal(wl[[1L]]$n_wgt, 3L)
+})
+
+test_that(".ctwasBuildWeights: intersects with gwasSnpIds when supplied", {
+  # The LD panel covers ids 1..6; the per-block GWAS sumstats covers only
+  # ids 1, 2, 4 (a subset). Weight variants that live in the panel but
+  # outside the block (id 3 here) must be dropped, otherwise ctwas's
+  # compute_gene_z asserts the weight variant is missing from z_snp.
+  ids5     <- vapply(1:5, .ctp_snpId, character(1))
+  blockIds <- vapply(c(1, 2, 4), .ctp_snpId, character(1))
+  tw <- TwasWeights(
+    study = "Q1", context = "c1", trait = "t1", method = "susie",
+    entry = list(TwasWeightsEntry(
+      variantIds = ids5,
+      weights    = c(0.1, 0.2, 0.3, 0.4, 0.5))),
+    ldSketch = .ctp_makeHandle())
+  panel <- .ctp_makeLdPanel()
+  wl <- pecotmr:::.ctwasBuildWeights(
+    tw, panel, gwasSnpIds = blockIds)
+  expect_equal(rownames(wl[[1L]]$wgt), blockIds)
   expect_equal(wl[[1L]]$n_wgt, 3L)
 })
 
-test_that(".ctwasComputeFullPanelLd: extracts once + returns cached R + snpInfo", {
+test_that(".ctwasComputeFullPanelLd: extracts once + returns cached R + snpInfo + variance", {
   local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
                         .package = "pecotmr")
   out <- pecotmr:::.ctwasComputeFullPanelLd(.ctp_makeHandle())
-  expect_named(out, c("R", "snpInfo"))
+  ids6 <- vapply(1:6, .ctp_snpId, character(1))
+  expect_named(out, c("R", "snpInfo", "variance"))
   expect_true(is.matrix(out$R))
   expect_equal(dim(out$R), c(6L, 6L))
-  expect_equal(rownames(out$R), paste0("v", 1:6))
+  expect_equal(rownames(out$R), ids6)
   expect_setequal(colnames(out$snpInfo),
                    c("chrom", "id", "pos", "alt", "ref"))
+  expect_named(out$variance, ids6)
 })
 
 test_that(".ctwasBuildZGene: builds z_gene from a TWAS-Z GRanges", {
@@ -277,69 +661,170 @@ test_that(".ctwasBuildZGene: builds z_gene from a TWAS-Z GRanges", {
 # LD loader / SNP-info loader closures
 # ===========================================================================
 
-test_that(".ctwasSingleBlockLdLoader: returns the cached R unchanged on every call", {
-  R0 <- matrix(runif(36), 6, 6, dimnames = list(paste0("v", 1:6),
-                                                  paste0("v", 1:6)))
-  loader <- pecotmr:::.ctwasSingleBlockLdLoader(R0)
-  expect_identical(loader("any_token"), R0)
-  expect_identical(loader("different_token"), R0)
+test_that(".ctwasMultiBlockLdLoader: dispatches by LD_file token", {
+  RA <- matrix(runif(4), 2, 2,
+                dimnames = list(c("v1", "v2"), c("v1", "v2")))
+  RB <- matrix(runif(4), 2, 2,
+                dimnames = list(c("v3", "v4"), c("v3", "v4")))
+  loader <- pecotmr:::.ctwasMultiBlockLdLoader(
+    list(tokenA = list(R = RA), tokenB = list(R = RB)))
+  expect_identical(loader("tokenA"), RA)
+  expect_identical(loader("tokenB"), RB)
+  expect_error(loader("unknown_token"), "no cached panel")
 })
 
-test_that(".ctwasSingleBlockSnpInfoLoader: returns the cached snpInfo unchanged on every call", {
-  info0 <- data.frame(chrom = 1L, id = paste0("v", 1:3),
-                       pos = c(100L, 200L, 300L),
+test_that(".ctwasMultiBlockSnpInfoLoader: dispatches by LD_file token", {
+  infoA <- data.frame(chrom = 1L, id = c("v1", "v2"),
+                       pos = c(100L, 200L),
                        alt = "A", ref = "G", stringsAsFactors = FALSE)
-  loader <- pecotmr:::.ctwasSingleBlockSnpInfoLoader(info0)
-  expect_identical(loader("any_token"), info0)
-  expect_identical(loader("different_token"), info0)
+  infoB <- data.frame(chrom = 1L, id = c("v3", "v4"),
+                       pos = c(300L, 400L),
+                       alt = "C", ref = "T", stringsAsFactors = FALSE)
+  loader <- pecotmr:::.ctwasMultiBlockSnpInfoLoader(
+    list(tokenA = list(snpInfo = infoA),
+         tokenB = list(snpInfo = infoB)))
+  expect_identical(loader("tokenA"), infoA)
+  expect_identical(loader("tokenB"), infoB)
+  expect_error(loader("unknown_token"), "no cached panel")
 })
 
 # ===========================================================================
-# End-to-end with mocked ctwas::ctwas_sumstats
+# Input-assembly shape checks via assembleCtwasInputs (no ctwas engine needed)
 # ===========================================================================
 
-test_that("ctwasPipeline: assembles the documented input shape for ctwas_sumstats", {
-  skip_if_not_installed("ctwas")
-  ss <- .ctp_makeGwasSumstats()
-  tw <- .ctp_makeTwasWeights()
-  capturedArgs <- NULL
-  local_mocked_bindings(
-    ctwas_sumstats = function(...) {
-      capturedArgs <<- list(...)
-      list(susie_alpha_res = "mocked")
-    },
-    .package = "ctwas")
+test_that("assembleCtwasInputs: assembles the documented input shape for ctwas", {
+  inp <- .ctp_makeMultiBlockInputs()
   local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
                         .package = "pecotmr")
-  out <- ctwasPipeline(gwasSumStats = ss, twasWeights = tw,
-                       regionId = "myBlock")
-  expect_equal(out$susie_alpha_res, "mocked")
-  expect_equal(capturedArgs$region_info$region_id, "myBlock")
-  expect_equal(capturedArgs$z_snp$id, paste0("v", 1:6))
-  expect_equal(length(capturedArgs$weights), 1L)
-  expect_equal(capturedArgs$L, 5L)
+  inputs <- assembleCtwasInputs(
+    gwasSumStats = inp$gwasSumStats,
+    twasWeights  = inp$twasWeights)
+  # Two regions, two LD_map rows.
+  expect_equal(nrow(inputs$region_info), 2L)
+  expect_setequal(inputs$region_info$region_id, c("block1", "block2"))
+  # zSnp is the concatenation of both blocks' Z columns.
+  expect_equal(nrow(inputs$z_snp), 12L)
+  # snp_map keyed by region_id.
+  expect_setequal(names(inputs$snp_map), c("block1", "block2"))
+  # Per-region weights keys are prefixed with the region_id.
+  expect_true(all(grepl("^(block1|block2)\\|", names(inputs$weights))))
+  # LD_map carries the same number of rows as regions.
+  expect_equal(nrow(inputs$LD_map), 2L)
+  # LD / snpInfo loader closures are present.
+  expect_true(is.function(inputs$LD_loader_fun))
+  expect_true(is.function(inputs$snpinfo_loader_fun))
 })
 
-test_that("ctwasPipeline: forwards a twasZ argument as z_gene", {
-  skip_if_not_installed("ctwas")
-  ss <- .ctp_makeGwasSumstats()
-  tw <- .ctp_makeTwasWeights()
+test_that("assembleCtwasInputs: forwards a twasZ argument as z_gene", {
+  inp <- .ctp_makeMultiBlockInputs()
   twasZ <- GenomicRanges::GRanges("chr1", IRanges::IRanges(100, 200))
   S4Vectors::mcols(twasZ) <- S4Vectors::DataFrame(
     qtlStudy = "Q1", context = "c1", trait = "t1", method = "susie",
     twasZ = 1.5)
-  capturedArgs <- NULL
+  local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
+                        .package = "pecotmr")
+  inputs <- assembleCtwasInputs(
+    gwasSumStats = inp$gwasSumStats,
+    twasWeights  = inp$twasWeights,
+    twasZ        = twasZ)
+  expect_equal(inputs$z_gene$id, "Q1|c1|t1|susie")
+})
+
+# ===========================================================================
+# Step-wise dispatch: estCtwasParam → screenCtwasRegions → finemapCtwasRegions
+# ===========================================================================
+
+test_that("ctwasPipeline: dispatches assemble → est → screen → finemap and accumulates state", {
+  skip_if_not_installed("ctwas")
+  inp <- .ctp_makeMultiBlockInputs()
+  capturedAssemble <- list(); capturedEst <- list()
+  capturedScreen   <- list(); capturedFinemap <- list()
   local_mocked_bindings(
-    ctwas_sumstats = function(...) {
-      capturedArgs <<- list(...)
-      list(ok = TRUE)
+    assemble_region_data = function(...) {
+      capturedAssemble <<- list(...)
+      list(region_data    = list(block1 = list(stub = TRUE)),
+           boundary_genes = list(stub = TRUE),
+           z_gene         = data.frame(id = "t1", z = 1.5))
+    },
+    est_param = function(...) {
+      capturedEst <<- list(...)
+      list(group_prior = c(g = 0.1, SNP = 0.0001),
+           group_prior_var = c(g = 5, SNP = 5))
+    },
+    screen_regions = function(...) {
+      capturedScreen <<- list(...)
+      list(screened_region_data = list(block1 = list(stub = TRUE)),
+           screen_res_meta = "mocked")
     },
+    finemap_regions = function(...) {
+      capturedFinemap <<- list(...)
+      list(finemap_res = data.frame(id = "t1"),
+           susie_alpha_res = data.frame(susie_pip = 0.9))
+    },
+    .package = "ctwas")
+  local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
+                        .package = "pecotmr")
+  out <- ctwasPipeline(
+    gwasSumStats = inp$gwasSumStats,
+    twasWeights  = inp$twasWeights)
+  # Each ctwas step was invoked exactly once.
+  expect_true(length(capturedAssemble) > 0L)
+  expect_true(length(capturedEst) > 0L)
+  expect_true(length(capturedScreen) > 0L)
+  expect_true(length(capturedFinemap) > 0L)
+  # est sees the assembled region_data.
+  expect_named(capturedEst$region_data, "block1")
+  # screen receives the param estimates as group_prior / group_prior_var.
+  expect_equal(unname(capturedScreen$group_prior), c(0.1, 0.0001))
+  # finemap consumes screen's screened_region_data.
+  expect_named(capturedFinemap$region_data, "block1")
+  # Output mirrors the ctwas_sumstats top-level shape.
+  expect_setequal(
+    names(out),
+    c("z_gene", "param", "finemap_res", "susie_alpha_res",
+      "region_data", "boundary_genes", "screen_res"))
+})
+
+test_that("estCtwasParam / screenCtwasRegions / finemapCtwasRegions can be called independently", {
+  skip_if_not_installed("ctwas")
+  inp <- .ctp_makeMultiBlockInputs()
+  local_mocked_bindings(
+    assemble_region_data = function(...) list(
+      region_data    = list(block1 = list(stub = TRUE)),
+      boundary_genes = list(),
+      z_gene         = data.frame(id = "t1", z = 1.0)),
+    est_param = function(...) list(
+      group_prior = c(g = 0.05, SNP = 1e-4),
+      group_prior_var = c(g = 4, SNP = 5)),
+    screen_regions = function(...) list(
+      screened_region_data = list(block1 = list(stub = TRUE))),
+    finemap_regions = function(...) list(
+      finemap_res = data.frame(id = "t1"),
+      susie_alpha_res = data.frame(pip = 0.5)),
     .package = "ctwas")
   local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
                         .package = "pecotmr")
-  ctwasPipeline(gwasSumStats = ss, twasWeights = tw, twasZ = twasZ,
-                regionId = "block1")
-  expect_equal(capturedArgs$z_gene$id, "Q1|c1|t1|susie")
+  # Step 1
+  inputs <- assembleCtwasInputs(inp$gwasSumStats, inp$twasWeights)
+  expect_true("region_info" %in% names(inputs))
+  expect_true("LD_loader_fun" %in% names(inputs))
+  # Step 2
+  est <- estCtwasParam(inputs)
+  expect_true("region_data" %in% names(est))
+  expect_true("param" %in% names(est))
+  # User can OVERRIDE the estimated priors before screen/finemap — this is
+  # the escape hatch for NaN-on-iter-2 EM divergence.
+  est$param$group_prior     <- c(g = 0.2, SNP = 1e-4)
+  est$param$group_prior_var <- c(g = 4.5, SNP = 5)
+  # Step 3
+  screened <- screenCtwasRegions(est)
+  expect_true("screened_region_data" %in% names(screened))
+  # Step 4
+  final <- finemapCtwasRegions(screened)
+  expect_setequal(
+    names(final),
+    c("z_gene", "param", "finemap_res", "susie_alpha_res",
+      "region_data", "boundary_genes", "screen_res"))
 })
 
 # ===========================================================================
@@ -368,26 +853,27 @@ test_that("ctwasPipeline: real-engine end-to-end on the bundled example panel",
     entry   = list(ent),
     ldSketch = gh)
 
+  # ctwasPipeline now requires >= 2 blocks; replicate the same GWAS +
+  # TWAS pair under two region_ids so the joint EM has something to
+  # estimate against. The bundled toy panel is still too sparse for
+  # the convergence checks, so we relax the gates.
   res <- suppressMessages(suppressWarnings(
-    ctwasPipeline(gwasSumStats = gss, twasWeights = tw,
-                  regionId    = "block1",
-                  niter       = 5L,
-                  niterPrefit = 2L,
-                  # Single-gene single-block toy: relax the production
-                  # filters that gate out tiny inputs.
-                  min_group_size       = 1L,
-                  min_p_single_effect  = 0,
-                  filter_L             = FALSE)))
+    ctwasPipeline(
+      gwasSumStats = list(blockA = gss, blockB = gss),
+      twasWeights  = list(blockA = tw,  blockB = tw),
+      niter        = 5L,
+      niterPrefit  = 2L,
+      # Toy panel: relax the production filters that gate out tiny inputs.
+      min_group_size       = 1L,
+      min_p_single_effect  = 0,
+      filter_L             = FALSE)))
 
   # ctwas_sumstats returns these 7 elements on success.
   expect_named(res, c("z_gene", "param", "finemap_res", "susie_alpha_res",
                        "region_data", "boundary_genes", "screen_res"),
                 ignore.order = TRUE)
   # The gene we passed in came through.
-  expect_equal(nrow(res$z_gene), 1L)
-  expect_equal(res$z_gene$id, "study1|brain|ENSG_example|susie")
-  # Per-SNP susie_alpha output covers our 5 weight SNPs.
-  expect_equal(nrow(res$susie_alpha_res), 5L)
+  expect_true(any(grepl("study1\\|brain\\|ENSG_example\\|susie", res$z_gene$id)))
   expect_true(all(c("susie_pip", "susie_alpha", "region_id")
                    %in% colnames(res$susie_alpha_res)))
 })
diff --git a/tests/testthat/test_sumstatsQc.R b/tests/testthat/test_sumstatsQc.R
index 7dd04d74..71e9700f 100644
--- a/tests/testthat/test_sumstatsQc.R
+++ b/tests/testthat/test_sumstatsQc.R
@@ -2547,16 +2547,15 @@ test_that("CS95 variants are ordered by decreasing PIP", {
 
 
 
-context("summaryStatsQc (with mocked MungeSumstats)")
+context("summaryStatsQc")
 
 # NOTE
 # ----
-# `.runMungeSumstatsFilter` wraps MungeSumstats::format_sumstats which needs
-# a real dbSNP reference panel (multi-GB download). To exercise the QC chain
-# in a unit test we mock that helper so it just returns the input data.frame
-# unchanged, recording a "no variants dropped" audit record. The pecotmr-
-# native steps (.applySkipRegion, .matchAgainstSketch, .applyPipScreen,
-# .applyLdMismatchQcToEntry) all run for real on the synthetic fixture.
+# The variant-content QC (MAF/INFO/N) is pure data.frame filtering via
+# .applyContentFilters; no external genome / dbSNP packages required.
+# All pecotmr-native steps (.applyContentFilters, .applySkipRegion,
+# .matchAgainstSketch, .applyPipScreen, .applyLdMismatchQcToEntry) run
+# for real on the synthetic fixture.
 
 # ===========================================================================
 # Fixture builders
@@ -2602,21 +2601,6 @@ context("summaryStatsQc (with mocked MungeSumstats)")
     ldSketch = .ssQ_makeHandle())
 }
 
-.ssQ_mockMunge <- function(drop = 0L) {
-  # Mock that pretends MungeSumstats validated the input and returned the
-  # same data.frame, dropping `drop` rows.
-  function(df, refGenome, useDbsnpRefCheck, removeIndels,
-           removeStrandAmbiguous, mafCutoff, infoCutoff, nCutoff,
-           convertRefGenome, mungeSumstatsArgs) {
-    keep <- if (drop > 0L && drop < nrow(df))
-      seq_len(nrow(df) - drop)
-    else
-      seq_len(nrow(df))
-    list(df = df[keep, , drop = FALSE],
-         droppedNVariants = nrow(df) - length(keep))
-  }
-}
-
 .ssQ_mockExtractor <- function(seed = 13, n_samples = 60L) {
   function(handle, snpIdx, meanImpute = TRUE) {
     set.seed(seed)
@@ -2702,14 +2686,11 @@ test_that(".deriveBetaSeFromZ: skipped when N missing", {
 })
 
 # ===========================================================================
-# summaryStatsQc: end-to-end with mocked MungeSumstats
+# summaryStatsQc: end-to-end on the synthetic fixture
 # ===========================================================================
 
 test_that("summaryStatsQc: vanilla run populates qcInfo and returns a GwasSumStats", {
   ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   res <- summaryStatsQc(ss)
   expect_s4_class(res, "GwasSumStats")
   qc <- getQcInfo(res)
@@ -2717,18 +2698,13 @@ test_that("summaryStatsQc: vanilla run populates qcInfo and returns a GwasSumSta
   expect_true("options" %in% names(qc))
   expect_true("entryAudit" %in% names(qc))
   expect_equal(length(qc$entryAudit), nrow(ss))
-  # Per-entry audit records variantsIn / variantsOut / mungeSumstatsDropped.
   ea <- qc$entryAudit[[1L]]
   expect_equal(ea$variantsIn, 4L)
   expect_equal(ea$variantsOut, 4L)
-  expect_equal(ea$mungeSumstatsDropped, 0L)
 })
 
 test_that("summaryStatsQc: keepVariants subsets each entry and records the drop", {
   ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   res <- summaryStatsQc(ss, keepVariants = c("rs1", "rs3"))
   ea <- getQcInfo(res)$entryAudit[[1L]]
   expect_equal(ea$keepVariantsDropped, 2L)
@@ -2737,9 +2713,6 @@ test_that("summaryStatsQc: keepVariants subsets each entry and records the drop"
 
 test_that("summaryStatsQc: skipRegion drops overlapping variants", {
   ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   res <- summaryStatsQc(ss, skipRegion = "chr1:50-150")
   ea <- getQcInfo(res)$entryAudit[[1L]]
   expect_equal(ea$skipRegionDropped, 1L)  # rs1 at pos 100 is dropped
@@ -2752,9 +2725,6 @@ test_that("summaryStatsQc: PIP screen triggers when no variant has signal", {
   S4Vectors::mcols(gr)$Z <- rep(0.1, length(gr))
   ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19",
                       ldSketch = .ssQ_makeHandle())
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   res <- summaryStatsQc(ss, pipCutoffToSkip = 0.99)
   ea <- getQcInfo(res)$entryAudit[[1L]]
   expect_true(isTRUE(ea$pipScreenSkipped))
@@ -2762,22 +2732,8 @@ test_that("summaryStatsQc: PIP screen triggers when no variant has signal", {
   expect_equal(length(res$entry[[1L]]), 0L)
 })
 
-test_that("summaryStatsQc: early-exit records when fewer than 2 variants remain pre-harmonization", {
-  ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    # Mock keeps only 1 row of the input
-    .runMungeSumstatsFilter = .ssQ_mockMunge(drop = 3L),
-    .package = "pecotmr")
-  res <- summaryStatsQc(ss)
-  ea <- getQcInfo(res)$entryAudit[[1L]]
-  expect_match(ea$earlyExit, "fewer than two variants")
-})
-
 test_that("summaryStatsQc: harmonized variants count is recorded", {
   ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   res <- summaryStatsQc(ss)
   ea <- getQcInfo(res)$entryAudit[[1L]]
   expect_equal(ea$matchedAgainstSketch, 4L)
@@ -2785,9 +2741,6 @@ test_that("summaryStatsQc: harmonized variants count is recorded", {
 
 test_that("summaryStatsQc: options block records the curated knobs", {
   ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   res <- summaryStatsQc(ss, removeIndels = TRUE, removeStrandAmbiguous = FALSE,
                         nCutoff = 10)
   opts <- getQcInfo(res)$options
@@ -2801,9 +2754,6 @@ test_that("summaryStatsQc: round-trips QtlSumStats inputs", {
   ss <- QtlSumStats(study = "s1", context = "c1", trait = "t1",
                      entry = list(gr), genome = "hg19",
                      ldSketch = .ssQ_makeHandle())
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   res <- summaryStatsQc(ss)
   expect_s4_class(res, "QtlSumStats")
   expect_equal(length(getQcInfo(res)$entryAudit), 1L)
@@ -2817,8 +2767,7 @@ test_that("summaryStatsQc: zMismatchQc = 'dentist' walks the LD-mismatch branch"
   ss <- .ssQ_makeGwasSumStats(snp_ids = paste0("rs", 1:8),
                               positions = seq(100L, by = 100L, length.out = 8L))
   local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    extractBlockGenotypes   = .ssQ_mockExtractor(),
+    extractBlockGenotypes = .ssQ_mockExtractor(),
     .package = "pecotmr")
   res <- suppressWarnings(summaryStatsQc(ss, zMismatchQc = "dentist"))
   ea <- getQcInfo(res)$entryAudit[[1L]]
@@ -2844,7 +2793,6 @@ test_that("summaryStatsQc: impute = TRUE invokes RAISS and records the audit cou
     ldSketch = .ssQ_makeHandle(snp_n = 8L, n_samples = 60L))
 
   local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
     extractBlockGenotypes   = .ssQ_mockExtractor(),
     raiss = function(refPanel, knownZscores, genotypeMatrix, ...) {
       # Pretend RAISS imputed two of the missing panel variants (rs5, rs6)
@@ -2873,8 +2821,7 @@ test_that("summaryStatsQc: impute = TRUE with raiss returning NULL records 0 imp
     ldSketch = .ssQ_makeHandle(snp_n = 8L, n_samples = 60L))
 
   local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    extractBlockGenotypes   = .ssQ_mockExtractor(),
+    extractBlockGenotypes = .ssQ_mockExtractor(),
     raiss = function(...) NULL,
     .package = "pecotmr")
   res <- summaryStatsQc(ss, impute = TRUE)
@@ -2913,16 +2860,11 @@ test_that(".matchRefPanel surfaces sign/strand/dropped counts via qcCounts attri
 
 test_that("summaryStatsQc: QC track emits per-step 'kept N of M' messages plus a rollup", {
   ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   local_mocked_bindings(
     extractBlockGenotypes = .ssQ_mockExtractor(),
     .package = "pecotmr")
   msgs <- capture_messages(summaryStatsQc(ss))
   joined <- paste(msgs, collapse = "")
-  # MungeSumstats step + denominator framing.
-  expect_match(joined, "MungeSumstats kept [0-9]+ of [0-9]+ variant")
   # Harmonization step + corrected/dropped breakdown.
   expect_match(joined, "harmonization kept [0-9]+ of [0-9]+")
   expect_match(joined, "corrected: sign-flipped [0-9]+, strand-flipped [0-9]+")
@@ -2933,9 +2875,6 @@ test_that("summaryStatsQc: QC track emits per-step 'kept N of M' messages plus a
 
 test_that("summaryStatsQc: skipped optional steps are omitted from the rollup", {
   ss <- .ssQ_makeGwasSumStats()
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   local_mocked_bindings(
     extractBlockGenotypes = .ssQ_mockExtractor(),
     .package = "pecotmr")
@@ -2962,9 +2901,6 @@ test_that("summaryStatsQc: per-entry log lines carry the (study/context/trait) l
     entry    = list(.ssQ_makeEntryGr()),
     genome   = "hg19",
     ldSketch = .ssQ_makeHandle())
-  local_mocked_bindings(
-    .runMungeSumstatsFilter = .ssQ_mockMunge(),
-    .package = "pecotmr")
   local_mocked_bindings(
     extractBlockGenotypes = .ssQ_mockExtractor(),
     .package = "pecotmr")
@@ -3128,6 +3064,145 @@ test_that(".refVariantsFromSketch: extracts chr/pos/A1/A2/variant_id from snpInf
     stringsAsFactors = FALSE)
 }
 
+test_that(".applySanityChecks: empty input is a no-op", {
+  out <- pecotmr:::.applySanityChecks(data.frame(chrom = character()))
+  expect_equal(nrow(out$df), 0L)
+  expect_equal(out$audit, list())
+})
+
+test_that(".applySanityChecks: coerceNumeric converts character columns and counts NAs", {
+  df <- data.frame(
+    chrom = c("1", "1"), pos = c(100L, 200L),
+    A1 = c("A", "C"), A2 = c("G", "T"),
+    Z = c("1.5", "not_a_number"),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(df, dropMissData = FALSE)
+  expect_type(out$df$Z, "double")
+  expect_equal(out$df$Z[[1L]], 1.5)
+  expect_true(is.na(out$df$Z[[2L]]))
+  expect_equal(out$audit$nonNumericCoerced, 1L)
+})
+
+test_that(".applySanityChecks: normalizeChr maps 23/24/M/chr* and drops non-standard", {
+  df <- data.frame(
+    chrom = c("chr1", "23", "24", "M", "chrX_random"),
+    pos = c(100L, 200L, 300L, 400L, 500L),
+    A1 = c("A", "A", "A", "A", "A"),
+    A2 = c("G", "G", "G", "G", "G"),
+    Z = c(1, 2, 3, 4, 5),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(df)
+  expect_equal(out$df$chrom, c("1", "X", "Y", "MT"))
+  expect_equal(out$audit$nonstandardChrDropped, 1L)
+})
+
+test_that(".applySanityChecks: dropMissData drops rows with NA in vital columns", {
+  df <- data.frame(
+    chrom = c("1", "1", "1"),
+    pos   = c(100L, 200L, 300L),
+    A1    = c("A", NA, "C"),
+    A2    = c("G", "T", "T"),
+    Z     = c(1, 2, NA),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(df)
+  expect_equal(nrow(out$df), 1L)
+  expect_equal(out$audit$missDataDropped, 2L)
+})
+
+test_that(".applySanityChecks: dropPOutOfRange drops P < 0 and P > 1", {
+  df <- data.frame(
+    chrom = c("1", "1", "1", "1"),
+    pos   = c(100L, 200L, 300L, 400L),
+    A1    = c("A", "A", "A", "A"), A2 = c("G", "G", "G", "G"),
+    Z     = c(1, 2, 3, 4),
+    P     = c(0.5, -0.1, 1.5, 0.001),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(df)
+  expect_equal(nrow(out$df), 2L)
+  expect_equal(out$audit$pOutOfRangeDropped, 2L)
+})
+
+test_that(".applySanityChecks: clampSmallP floors to smallPFloor", {
+  df <- data.frame(
+    chrom = c("1", "1"),
+    pos   = c(100L, 200L),
+    A1    = c("A", "A"), A2 = c("G", "G"),
+    Z     = c(1, 50),
+    P     = c(0.1, 0),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(df, smallPFloor = 5e-324)
+  expect_equal(out$df$P[[2L]], 5e-324)
+  expect_equal(out$audit$smallPClamped, 1L)
+})
+
+test_that(".applySanityChecks: dropZeroEffect drops BETA==0 and OR==1", {
+  df <- data.frame(
+    chrom = c("1", "1", "1"),
+    pos   = c(100L, 200L, 300L),
+    A1    = c("A", "A", "A"), A2 = c("G", "G", "G"),
+    BETA  = c(0.5, 0, 0.2),
+    OR    = c(1.5, 1.5, 1),
+    SE    = c(0.1, 0.1, 0.1),
+    Z     = c(5, 0, 2),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(df)
+  expect_equal(nrow(out$df), 1L)
+  expect_equal(out$audit$zeroEffectDropped, 2L)
+})
+
+test_that(".applySanityChecks: dropNonpositiveSe drops SE <= 0", {
+  df <- data.frame(
+    chrom = c("1", "1", "1"),
+    pos   = c(100L, 200L, 300L),
+    A1    = c("A", "A", "A"), A2 = c("G", "G", "G"),
+    SE    = c(0.1, 0, -0.2),
+    Z     = c(1, 2, 3),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(df)
+  expect_equal(nrow(out$df), 1L)
+  expect_equal(out$audit$nonpositiveSeDropped, 2L)
+})
+
+test_that(".applySanityChecks: per-check knobs can disable each step", {
+  df <- data.frame(
+    chrom = c("chr1", "23"),
+    pos   = c(100L, 200L),
+    A1    = c("A", "A"), A2 = c("G", "G"),
+    BETA  = c(0.5, 0),
+    SE    = c(0.1, -0.2),
+    P     = c(0.5, 1.5),
+    Z     = c(1, 2),
+    stringsAsFactors = FALSE)
+  out <- pecotmr:::.applySanityChecks(
+    df,
+    coerceNumeric      = FALSE,
+    normalizeChr       = FALSE,
+    dropMissData       = FALSE,
+    dropPOutOfRange    = FALSE,
+    clampSmallP        = FALSE,
+    dropZeroEffect     = FALSE,
+    dropNonpositiveSe  = FALSE)
+  expect_equal(nrow(out$df), 2L)
+  expect_equal(out$audit, list())
+  expect_equal(out$df$chrom, c("chr1", "23"))
+})
+
+test_that("summaryStatsQc: surfaces sanity-check audit and respects per-check knobs", {
+  gr <- .ssQ_makeEntryGr()
+  S4Vectors::mcols(gr)$BETA <- c(0.1, 0, 0.2, 0)
+  S4Vectors::mcols(gr)$SE   <- c(0.1, 0.1, 0.1, 0.1)
+  S4Vectors::mcols(gr)$P    <- c(0.5, 0.5, 0.5, 0.5)
+  ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19",
+                      ldSketch = .ssQ_makeHandle())
+  res <- summaryStatsQc(ss)
+  ea <- getQcInfo(res)$entryAudit[[1L]]
+  expect_equal(ea$sanityChecks$zeroEffectDropped, 2L)
+  # Disable the knob: zero-effect rows should remain.
+  res2 <- summaryStatsQc(ss, dropZeroEffect = FALSE)
+  ea2 <- getQcInfo(res2)$entryAudit[[1L]]
+  expect_null(ea2$sanityChecks$zeroEffectDropped)
+})
+
 test_that(".applySkipRegion: NULL / empty skipRegion is a no-op", {
   df <- .ssh_smallDf()
   expect_identical(pecotmr:::.applySkipRegion(df, NULL), df)

From b3f1cac996a3688070f621c8a10540717a7f7aee Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Mon, 22 Jun 2026 16:30:21 -0700
Subject: [PATCH 2/4] more fixes for pipelines

---
 R/GwasFineMappingResult.R                   | 94 +++++++++++++--------
 R/colocPipeline.R                           | 34 ++++----
 R/ctwasPipeline.R                           | 92 ++++++++++++++++++--
 R/fineMappingPipeline.R                     | 69 ++++++++++-----
 R/qtlEnrichmentPipeline.R                   | 59 ++++++++-----
 R/tupleSelectors.R                          | 30 +++++--
 tests/testthat/test_GwasFineMappingResult.R | 40 ++++++++-
 tests/testthat/test_colocPipeline.R         | 16 ++--
 tests/testthat/test_ctwasPipeline.R         | 37 ++++++++
 tests/testthat/test_deprecated.R            | 22 ++---
 tests/testthat/test_fineMappingPipeline.R   | 20 +++--
 tests/testthat/test_qtlEnrichmentPipeline.R | 65 +++++++++++---
 12 files changed, 437 insertions(+), 141 deletions(-)

diff --git a/R/GwasFineMappingResult.R b/R/GwasFineMappingResult.R
index 3b5a4247..e2c3e950 100644
--- a/R/GwasFineMappingResult.R
+++ b/R/GwasFineMappingResult.R
@@ -1,9 +1,13 @@
 # =============================================================================
 # GwasFineMappingResult S4 class
 # -----------------------------------------------------------------------------
-# DFrame-subclass collection keyed by the identity tuple (study, method).
-# Each row holds a FineMappingEntry payload for one GWAS study at one
-# fine-mapping method, covering a single LD block. Class-level slots:
+# DFrame-subclass collection keyed by the identity tuple
+# (study, method, region_id). Each row holds a FineMappingEntry payload
+# for one GWAS study at one fine-mapping method over one LD block.
+# Multiple rows per (study, method) are allowed when they differ on
+# region_id — the genome-wide-across-blocks shape that
+# qtlEnrichmentPipeline / colocPipeline expect when sweeping a study
+# across LD blocks. Class-level slots:
 #   * ldSketch   GenotypeHandle for the LD reference; required for the
 #                LD-block-indexed susieRSS workflow.
 # Methods that take per-row selectors accept (study, method) and ignore
@@ -17,7 +21,7 @@ setClass("GwasFineMappingResult",
   contains = "FineMappingResultBase",
   validity = function(object) {
     errors <- character()
-    required <- c("study", "method", "entry")
+    required <- c("study", "method", "region_id", "entry")
     missingCols <- setdiff(required, names(object))
     if (length(missingCols) > 0L)
       errors <- c(errors, paste("missing columns:",
@@ -32,10 +36,10 @@ setClass("GwasFineMappingResult",
       if (!all(entryTypes))
         errors <- c(errors,
           "every element of the `entry` column must be a FineMappingEntry")
-      keyDf <- as.data.frame(object[, c("study", "method")])
+      keyDf <- as.data.frame(object[, c("study", "method", "region_id")])
       if (anyDuplicated(keyDf))
         errors <- c(errors,
-          "(study, method) tuple uniqueness violated")
+          "(study, method, region_id) tuple uniqueness violated")
     }
     if (!is.null(object@ldSketch) &&
         !methods::is(object@ldSketch, "GenotypeHandle")) {
@@ -77,26 +81,41 @@ setClass("GwasFineMappingResult",
 
 #' @title Create a GwasFineMappingResult Collection
 #' @description Construct a \code{GwasFineMappingResult} DFrame-subclass
-#'   collection from per-(study, method) tuples and a list of
-#'   \code{FineMappingEntry} payloads. The collection represents one LD
-#'   block of GWAS fine-mapping fits; build a separate collection per
-#'   block when sweeping the genome.
+#'   collection from per-(study, method, region_id) tuples and a list of
+#'   \code{FineMappingEntry} payloads. The collection can represent
+#'   either a single LD block (one row per (study, method)) or a
+#'   genome-wide sweep across blocks (multiple rows per (study, method),
+#'   each tagged with its own region_id).
 #' @param study Character vector of study identifiers (per tuple).
 #' @param method Character vector of fine-mapping method names (per tuple).
 #' @param entry List / \code{SimpleList} of \code{FineMappingEntry} objects.
+#' @param region_id Character vector of LD-block identifiers (per
+#'   tuple). When omitted (\code{NULL}), defaults to a per-row synthetic
+#'   id (\code{"region_1"}, \code{"region_2"}, ...) so the
+#'   (\code{study}, \code{method}, \code{region_id}) triple is unique.
+#'   Supplying meaningful labels (e.g. \code{"chr22_10516173_17414263"})
+#'   is preferred for downstream consumers that join on region.
 #' @param ldSketch An optional \code{GenotypeHandle}.
 #' @return A \code{GwasFineMappingResult} object.
 #' @export
 GwasFineMappingResult <- function(study, method, entry,
+                                  region_id = NULL,
                                   ldSketch = NULL) {
   n <- length(study)
   if (length(method) != n || length(entry) != n) {
     stop("`study`, `method`, and `entry` must all have the same length.")
   }
+  if (is.null(region_id)) {
+    region_id <- paste0("region_", seq_len(n))
+  } else if (length(region_id) != n) {
+    stop("`region_id` must have the same length as `study` (got ",
+         length(region_id), " vs ", n, ").")
+  }
   cols <- list(
-    study  = as.character(study),
-    method = as.character(method),
-    entry  = S4Vectors::SimpleList(entry)
+    study     = as.character(study),
+    method    = as.character(method),
+    region_id = as.character(region_id),
+    entry     = S4Vectors::SimpleList(entry)
   )
   df <- do.call(S4Vectors::DataFrame,
                 c(cols, list(check.names = FALSE)))
@@ -121,9 +140,10 @@ setMethod("getContexts", "GwasFineMappingResult", function(x) NULL)
 #' @export
 setMethod("getTraits", "GwasFineMappingResult", function(x) NULL)
 
-# Per-tuple lookup for the GWAS variant (2-tuple instead of 4-tuple).
-# The generic accepts the full set of selectors; context/trait args are
-# ignored for GwasFineMappingResult.
+# Per-tuple lookup keyed by (study, method, region_id). The generic
+# accepts the full set of selectors; context/trait args are ignored for
+# GwasFineMappingResult. `region` (passed via `...`) is the per-block
+# disambiguator for multi-block genome-wide collections.
 #' @rdname getFineMappingResult
 #' @export
 setMethod("getFineMappingResult", "GwasFineMappingResult",
@@ -136,17 +156,18 @@ setMethod("getFineMappingResult", "GwasFineMappingResult",
 #' @export
 setMethod("getPip", "GwasFineMappingResult",
   function(x, study = NULL, context = NULL, trait = NULL, method = NULL,
-           returnList = FALSE, ...) {
-    entry <- getFineMappingResult(x, study = study, method = method)
-    getPip(entry)
+           region = NULL, returnList = FALSE, ...) {
+    idx <- .tupleSelectRowGwasFmr(x, study, method, region)
+    getPip(x$entry[[idx]])
   })
 
 #' @rdname getCs
 #' @export
 setMethod("getCs", "GwasFineMappingResult",
-  function(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) {
-    entry <- getFineMappingResult(x, study = study, method = method)
-    getCs(entry)
+  function(x, study = NULL, context = NULL, trait = NULL, method = NULL,
+           region = NULL, ...) {
+    idx <- .tupleSelectRowGwasFmr(x, study, method, region)
+    getCs(x$entry[[idx]])
   })
 
 #' @rdname getTopLoci
@@ -154,34 +175,39 @@ setMethod("getCs", "GwasFineMappingResult",
 setMethod("getTopLoci", "GwasFineMappingResult",
   function(x, type = c("data.frame", "GRanges"),
            signalCutoff = 0.025,
-           study = NULL, context = NULL, trait = NULL, method = NULL, ...) {
-    entry <- getFineMappingResult(x, study = study, method = method)
-    getTopLoci(entry, type = match.arg(type), signalCutoff = signalCutoff)
+           study = NULL, context = NULL, trait = NULL, method = NULL,
+           region = NULL, ...) {
+    idx <- .tupleSelectRowGwasFmr(x, study, method, region)
+    getTopLoci(x$entry[[idx]], type = match.arg(type),
+               signalCutoff = signalCutoff)
   })
 
 #' @rdname getMarginalEffects
 #' @export
 setMethod("getMarginalEffects", "GwasFineMappingResult",
   function(x, maxPval = NULL,
-           study = NULL, context = NULL, trait = NULL, method = NULL, ...) {
-    entry <- getFineMappingResult(x, study = study, method = method)
-    getMarginalEffects(entry, maxPval = maxPval)
+           study = NULL, context = NULL, trait = NULL, method = NULL,
+           region = NULL, ...) {
+    idx <- .tupleSelectRowGwasFmr(x, study, method, region)
+    getMarginalEffects(x$entry[[idx]], maxPval = maxPval)
   })
 
 #' @rdname getSusieFit
 #' @export
 setMethod("getSusieFit", "GwasFineMappingResult",
-  function(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) {
-    entry <- getFineMappingResult(x, study = study, method = method)
-    getSusieFit(entry)
+  function(x, study = NULL, context = NULL, trait = NULL, method = NULL,
+           region = NULL, ...) {
+    idx <- .tupleSelectRowGwasFmr(x, study, method, region)
+    getSusieFit(x$entry[[idx]])
   })
 
 #' @rdname getVariantIds
 #' @export
 setMethod("getVariantIds", "GwasFineMappingResult",
-  function(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) {
-    entry <- getFineMappingResult(x, study = study, method = method)
-    getVariantIds(entry)
+  function(x, study = NULL, context = NULL, trait = NULL, method = NULL,
+           region = NULL, ...) {
+    idx <- .tupleSelectRowGwasFmr(x, study, method, region)
+    getVariantIds(x$entry[[idx]])
   })
 
 
diff --git a/R/colocPipeline.R b/R/colocPipeline.R
index d0a8736e..8e27de0c 100644
--- a/R/colocPipeline.R
+++ b/R/colocPipeline.R
@@ -81,9 +81,10 @@
 #' @param returnGwasFineMapping Logical. When \code{TRUE}, attach the
 #'   computed \code{GwasFineMappingResult} on the returned data frame
 #'   as attribute \code{"gwasFineMapping"}. Default \code{FALSE}.
-#' @param enrichment Optional data.frame of per-(gwasStudy, qtlContext)
-#'   enrichment factors with columns \code{gwasStudy}, \code{qtlContext},
-#'   \code{enrichment}. Output of \code{\link{qtlEnrichmentPipeline}}.
+#' @param enrichment Optional data.frame of per-(gwasStudy, qtlStudy,
+#'   qtlContext) enrichment factors with columns \code{gwasStudy},
+#'   \code{qtlStudy}, \code{qtlContext}, \code{enrichment}. Output of
+#'   \code{\link{qtlEnrichmentPipeline}}.
 #'   When non-\code{NULL}, each pair's \code{p12} prior is scaled to
 #'   \code{min(p12 * (1 + enrichment), p12Max)} (the enrichment-informed
 #'   colocalization variant, "enloc"). Pairs without a matching
@@ -132,9 +133,9 @@ colocPipeline <- function(qtlFineMappingResult,
   if (useEnrichment) {
     if (!is.data.frame(enrichment))
       stop("`enrichment` must be a data.frame with at least gwasStudy, ",
-           "qtlContext, enrichment columns (output of ",
+           "qtlStudy, qtlContext, enrichment columns (output of ",
            "qtlEnrichmentPipeline).")
-    required <- c("gwasStudy", "qtlContext", "enrichment")
+    required <- c("gwasStudy", "qtlStudy", "qtlContext", "enrichment")
     missingCols <- setdiff(required, colnames(enrichment))
     if (length(missingCols) > 0L)
       stop("`enrichment` is missing column(s): ",
@@ -231,14 +232,16 @@ colocPipeline <- function(qtlFineMappingResult,
       qAligned <- aligned$qtl
       gAligned <- aligned$gwas
 
-      # Enrichment-informed p12: per-pair scaling capped at p12Max.
-      # Baseline p12 used when no enrichment table or no matching row.
+      # Enrichment-informed p12: per-(gwasStudy, qtlStudy, qtlContext)
+      # scaling capped at p12Max. Baseline p12 used when no enrichment
+      # table or no matching row.
       if (useEnrichment) {
-        enRow <- .colocLookupEnrichment(enrichment, gInfo$study, qContext)
+        enRow <- .colocLookupEnrichment(enrichment, gInfo$study,
+                                        qStudy, qContext)
         if (is.na(enRow)) {
           warning(sprintf(
-            "colocPipeline: no enrichment entry for (gwasStudy='%s', qtlContext='%s'); using baseline p12.",
-            gInfo$study, qContext))
+            "colocPipeline: no enrichment entry for (gwasStudy='%s', qtlStudy='%s', qtlContext='%s'); using baseline p12.",
+            gInfo$study, qStudy, qContext))
           enRow <- 0
         }
         p12Used <- min(p12 * (1 + enRow), p12Max)
@@ -504,13 +507,14 @@ colocPipeline <- function(qtlFineMappingResult,
   base
 }
 
-# Look up the enrichment factor for a (gwasStudy, qtlContext) pair in
-# the user-supplied enrichment table. Returns NA when the pair is not
-# present; the caller falls back to the baseline p12 and emits a
-# warning.
+# Look up the enrichment factor for a (gwasStudy, qtlStudy, qtlContext)
+# triple in the user-supplied enrichment table. Returns NA when the
+# triple is not present; the caller falls back to the baseline p12 and
+# emits a warning.
 # @noRd
-.colocLookupEnrichment <- function(enrichment, gwasStudy, qtlContext) {
+.colocLookupEnrichment <- function(enrichment, gwasStudy, qtlStudy, qtlContext) {
   idx <- which(as.character(enrichment$gwasStudy)  == gwasStudy &
+               as.character(enrichment$qtlStudy)   == qtlStudy &
                as.character(enrichment$qtlContext) == qtlContext)
   if (length(idx) == 0L) return(NA_real_)
   as.numeric(enrichment$enrichment[[idx[[1L]]]])
diff --git a/R/ctwasPipeline.R b/R/ctwasPipeline.R
index 75c41198..c2382985 100644
--- a/R/ctwasPipeline.R
+++ b/R/ctwasPipeline.R
@@ -71,6 +71,11 @@
 #'   must-keep variants and fill remaining slots by descending PIP
 #'   (when available) or descending \code{|weight|}. Default
 #'   \code{Inf} (no cap).
+#' @param fallbackToPrefit Logical (length 1). Forwarded to
+#'   \code{\link{estCtwasParam}}. When \code{TRUE}, ctwas's accurate-EM
+#'   NaN failure is recovered by falling back to the prefit estimates
+#'   (mirrors the legacy ctwas_2 workaround on underpowered data).
+#'   Default \code{FALSE}.
 #' @param ... Additional arguments forwarded to
 #'   \code{ctwas::ctwas_sumstats}.
 #' @return Whatever \code{ctwas::ctwas_sumstats} returns (a list with
@@ -95,6 +100,7 @@ ctwasPipeline <- function(gwasSumStats,
                           csMinCor                = 0.8,
                           minPipCutoff            = 0,
                           maxNumVariants          = Inf,
+                          fallbackToPrefit        = FALSE,
                           ...) {
   groupPriorVarStructure <- match.arg(groupPriorVarStructure)
   inputs <- assembleCtwasInputs(
@@ -114,6 +120,7 @@ ctwasPipeline <- function(gwasSumStats,
     niter                   = niter,
     groupPriorVarStructure  = groupPriorVarStructure,
     ncore                   = ncore,
+    fallbackToPrefit        = fallbackToPrefit,
     ...)
   screened <- screenCtwasRegions(
     est,
@@ -304,6 +311,12 @@ assembleCtwasInputs <- function(gwasSumStats, twasWeights,
 #'   \code{ctwas::assemble_region_data} / \code{ctwas::est_param}.
 #' @param groupPriorVarStructure Pass-through.
 #' @param ncore Number of cores.
+#' @param fallbackToPrefit Logical (length 1). When \code{TRUE} (default
+#'   \code{FALSE}), if \code{ctwas::est_param}'s accurate EM diverges to
+#'   NaN and throws \code{"Estimated group_prior(_var)? contains NAs"},
+#'   re-run only the prefit step via \code{ctwas:::fit_EM} and return
+#'   those (typically finite) priors as the param. Mirrors the legacy
+#'   ctwas_2 workaround on toy data where the accurate EM saturates.
 #' @param ... Additional arguments forwarded to \code{ctwas::est_param}
 #'   (e.g. \code{min_p_single_effect}, \code{min_group_size}).
 #' @return The \code{inputs} list augmented with \code{region_data},
@@ -319,6 +332,7 @@ estCtwasParam <- function(inputs,
                                                       "shared_all",
                                                       "independent"),
                           ncore                   = 1L,
+                          fallbackToPrefit        = FALSE,
                           ...) {
   if (!requireNamespace("ctwas", quietly = TRUE)) {
     stop("Package 'ctwas' is required for estCtwasParam.")
@@ -340,12 +354,27 @@ estCtwasParam <- function(inputs,
     snp_map         = inputs$snp_map,
     thin            = thin,
     ncore           = as.integer(ncore)), extra = list(...))
-  paramRes <- .ctwasInvoke(ctwas::est_param, list(
-    region_data               = assembled$region_data,
-    niter_prefit              = as.integer(niterPrefit),
-    niter                     = as.integer(niter),
-    group_prior_var_structure = groupPriorVarStructure,
-    ncore                     = as.integer(ncore)), extra = list(...))
+  paramRes <- tryCatch(
+    .ctwasInvoke(ctwas::est_param, list(
+      region_data               = assembled$region_data,
+      niter_prefit              = as.integer(niterPrefit),
+      niter                     = as.integer(niter),
+      group_prior_var_structure = groupPriorVarStructure,
+      ncore                     = as.integer(ncore)), extra = list(...)),
+    error = function(e) {
+      if (fallbackToPrefit && grepl("contains NAs", conditionMessage(e))) {
+        message("estCtwasParam: accurate EM diverged (",
+                conditionMessage(e), "); falling back to prefit estimates.")
+        .ctwasFitPrefitEm(assembled$region_data,
+                          niterPrefit            = as.integer(niterPrefit),
+                          groupPriorVarStructure = groupPriorVarStructure,
+                          thin                   = thin,
+                          ncore                  = as.integer(ncore),
+                          extra                  = list(...))
+      } else {
+        stop(e)
+      }
+    })
   # ctwas::assemble_region_data does not echo z_gene back in its return
   # list, so propagate the precomputed (or freshly computed) z_gene we
   # passed into it. This matches ctwas_sumstats's top-level return shape.
@@ -478,6 +507,57 @@ finemapCtwasRegions <- function(screenResult,
   do.call(fn, args)
 }
 
+# Run ONLY ctwas's prefit EM step against `region_data` and return a
+# param list shaped like ctwas::est_param normally produces. Used as
+# the fallback path when est_param's accurate EM diverges to NaN on
+# toy / underpowered data (matches the legacy ctwas_2 workaround).
+# Calls ctwas's internal `fit_EM` (via ::: getFromNamespace) with
+# niter = niter_prefit, then applies the same thin-adjustment to the
+# SNP group_prior that est_param applies. p_single_effect is left as
+# NA since the accurate EM never ran.
+# @noRd
+.ctwasFitPrefitEm <- function(region_data, niterPrefit,
+                              groupPriorVarStructure, thin, ncore,
+                              extra = list()) {
+  fitEm <- getFromNamespace("fit_EM", "ctwas")
+  fitArgs <- list(
+    region_data               = region_data,
+    niter                     = as.integer(niterPrefit),
+    group_prior_var_structure = groupPriorVarStructure,
+    ncore                     = as.integer(ncore))
+  if (length(extra) > 0L) {
+    formalsFn <- tryCatch(names(formals(fitEm)), error = function(e) NULL)
+    if (!is.null(formalsFn)) {
+      explicitFormals <- setdiff(formalsFn, "...")
+      extra <- extra[setdiff(names(extra), names(fitArgs))]
+      extra <- extra[intersect(names(extra), explicitFormals)]
+    }
+    fitArgs <- c(fitArgs, extra)
+  }
+  prefit <- do.call(fitEm, fitArgs)
+  groupPrior <- prefit$group_prior
+  groupSize  <- prefit$group_size
+  if (thin != 1) {
+    if ("SNP" %in% names(groupPrior))
+      groupPrior["SNP"] <- groupPrior["SNP"] * thin
+    if ("SNP" %in% names(groupSize))
+      groupSize["SNP"]  <- groupSize["SNP"] / thin
+  }
+  if (length(groupPrior) > 0L)
+    groupSize <- groupSize[names(groupPrior)]
+  list(
+    group_prior               = groupPrior,
+    group_prior_var           = prefit$group_prior_var,
+    group_prior_iters         = prefit$group_prior_iters,
+    group_prior_var_iters     = prefit$group_prior_var_iters,
+    group_prior_var_structure = groupPriorVarStructure,
+    group_size                = groupSize,
+    p_single_effect           = data.frame(
+      region_id        = names(region_data),
+      p_single_effect  = NA_real_,
+      stringsAsFactors = FALSE))
+}
+
 # =============================================================================
 # Internal helpers
 # =============================================================================
diff --git a/R/fineMappingPipeline.R b/R/fineMappingPipeline.R
index 38d52a5d..bccaedd7 100644
--- a/R/fineMappingPipeline.R
+++ b/R/fineMappingPipeline.R
@@ -362,13 +362,17 @@ setGeneric("fineMappingPipeline",
   fineMappingResult$entry[[idx[[1L]]]]
 }
 
-# GwasFineMappingResult cache lookup using the (study, method) 2-tuple.
+# GwasFineMappingResult cache lookup using the (study, method,
+# region_id) 3-tuple. Multi-block FMRs can carry one entry per
+# (study, method, region_id) triple, so the cache key must include
+# region_id to correctly retrieve the cached fit for a specific block.
 # @noRd
-.fmCacheLookupGwas <- function(fineMappingResult, study, method) {
+.fmCacheLookupGwas <- function(fineMappingResult, study, method, region_id) {
   if (is.null(fineMappingResult)) return(NULL)
   if (!is(fineMappingResult, "GwasFineMappingResult")) return(NULL)
   idx <- .matchTupleRows(fineMappingResult,
-                          list(study = study, method = method))
+                          list(study = study, method = method,
+                               region_id = region_id))
   if (length(idx) == 0L) return(NULL)
   fineMappingResult$entry[[idx[[1L]]]]
 }
@@ -401,18 +405,24 @@ setGeneric("fineMappingPipeline",
     ldSketch      = ldSketch)
 }
 
-# Build a GwasFineMappingResult collection from per-(study, method) vectors.
+# Build a GwasFineMappingResult collection from per-row vectors. When
+# `region_ids` is NULL, falls through to the constructor's synthetic
+# defaults (region_1, region_2, ...). For callers that have meaningful
+# block labels (e.g. derived from a GwasSumStats entry's GRanges),
+# pass them explicitly so downstream consumers can join on region.
 # @noRd
-.fmBuildGwasResult <- function(studies, methods, entries, ldSketch = NULL) {
+.fmBuildGwasResult <- function(studies, methods, entries,
+                               region_ids = NULL, ldSketch = NULL) {
   if (length(entries) == 0L) {
-    stop("fineMappingPipeline: no (study, method) tuples produced a ",
+    stop("fineMappingPipeline: no (study, method, region_id) tuples produced a ",
          "fine-mapping result.")
   }
   GwasFineMappingResult(
-    study    = studies,
-    method   = methods,
-    entry    = entries,
-    ldSketch = ldSketch)
+    study     = studies,
+    method    = methods,
+    region_id = region_ids,
+    entry     = entries,
+    ldSketch  = ldSketch)
 }
 
 # Combine an optional joint column across two collections. Returns NULL
@@ -457,10 +467,11 @@ setGeneric("fineMappingPipeline",
       ldSketch      = ldSketch)
   } else {
     GwasFineMappingResult(
-      study    = c(as.character(a$study),  as.character(b$study)),
-      method   = c(as.character(a$method), as.character(b$method)),
-      entry    = c(as.list(a$entry), as.list(b$entry)),
-      ldSketch = ldSketch)
+      study     = c(as.character(a$study),     as.character(b$study)),
+      method    = c(as.character(a$method),    as.character(b$method)),
+      region_id = c(as.character(a$region_id), as.character(b$region_id)),
+      entry     = c(as.list(a$entry), as.list(b$entry)),
+      ldSketch  = ldSketch)
   }
 }
 
@@ -1455,10 +1466,12 @@ setMethod("fineMappingPipeline", "GwasSumStats",
 
     rowStudy   <- character(0)
     rowMethod  <- character(0)
+    rowRegion  <- character(0)
     rowEntries <- list()
-    pushRow <- function(st, mt, ent) {
+    pushRow <- function(st, mt, rg, ent) {
       rowStudy   <<- c(rowStudy,   st)
       rowMethod  <<- c(rowMethod,  mt)
+      rowRegion  <<- c(rowRegion,  rg)
       rowEntries[[length(rowEntries) + 1L]] <<- ent
     }
 
@@ -1470,18 +1483,26 @@ setMethod("fineMappingPipeline", "GwasSumStats",
       variantIds <- zn$variantIds
       z <- zn$z
       n <- zn$n
+      # Derive a region_id from the entry's GRanges so multi-block
+      # genome-wide GWAS sweeps can carry one row per block without
+      # tripping (study, method, region_id) uniqueness. Format:
+      # "{seqname}_{minPos}_{maxPos}" (e.g. "chr22_10516173_17379581").
+      region_id <- sprintf("%s_%d_%d",
+                           as.character(GenomicRanges::seqnames(gr))[[1L]],
+                           min(GenomicRanges::start(gr)),
+                           max(GenomicRanges::start(gr)))
 
       # The .fmCacheLookup helper takes a 4-tuple key for the QTL cache
       # shape. For GWAS resume we look up using the GwasFineMappingResult
-      # 2-tuple shape.
+      # 3-tuple shape (study, method, region_id).
       toRun <- character(0)
       for (tk in tokens) {
         cached <- if (!is.null(fineMappingResult) &&
                       is(fineMappingResult, "GwasFineMappingResult")) {
-          .fmCacheLookupGwas(fineMappingResult, st, tk)
+          .fmCacheLookupGwas(fineMappingResult, st, tk, region_id)
         } else NULL
         if (!is.null(cached)) {
-          pushRow(st, tk, cached)
+          pushRow(st, tk, region_id, cached)
         } else {
           toRun <- c(toRun, tk)
         }
@@ -1494,7 +1515,8 @@ setMethod("fineMappingPipeline", "GwasSumStats",
       infFit <- NULL
       if (chainLocal$runInf) {
         if (verbose >= 1)
-          message(sprintf("Fitting susieInf (RSS) for GWAS (study='%s') ...", st))
+          message(sprintf("Fitting susieInf (RSS) for GWAS (study='%s', region='%s') ...",
+                          st, region_id))
         infFit <- .fmFitSusieRss(z, ldMat, n, "susieInf",
                                  coverage = coverage)
       }
@@ -1507,8 +1529,8 @@ setMethod("fineMappingPipeline", "GwasSumStats",
                            (tk == "susieAsh" && chainLocal$chainAsh))
                         infFit else NULL
           if (verbose >= 1)
-            message(sprintf("Fitting %s (RSS) for GWAS (study='%s') ...",
-                            tk, st))
+            message(sprintf("Fitting %s (RSS) for GWAS (study='%s', region='%s') ...",
+                            tk, st, region_id))
           fit <- .fmFitSusieRss(z, ldMat, n, tk,
                                 chainFromInf = chainFrom,
                                 coverage = coverage)
@@ -1521,12 +1543,13 @@ setMethod("fineMappingPipeline", "GwasSumStats",
           signalCutoff = signalCutoff,
           minAbsCorr = minAbsCorr,
           csInput = "Xcorr")
-        pushRow(st, tk, ent)
+        pushRow(st, tk, region_id, ent)
       }
     }
 
     .fmBuildGwasResult(rowStudy, rowMethod, rowEntries,
-                       ldSketch = ldSketch)
+                       region_ids = rowRegion,
+                       ldSketch   = ldSketch)
   })
 
 
diff --git a/R/qtlEnrichmentPipeline.R b/R/qtlEnrichmentPipeline.R
index cfbc4b43..928487b7 100644
--- a/R/qtlEnrichmentPipeline.R
+++ b/R/qtlEnrichmentPipeline.R
@@ -43,11 +43,12 @@
 #'   \code{qtlEnrichment}. Default \code{1}.
 #' @param ... Additional arguments forwarded to
 #'   \code{\link{qtlEnrichment}}.
-#' @return A data frame with one row per (gwasStudy, qtlContext) pair
-#'   and columns \code{gwasStudy}, \code{qtlContext},
-#'   \code{enrichment}, \code{enrichmentSe}, \code{enrichmentLogOdds},
-#'   plus any extras the underlying estimator emits. Suitable as the
-#'   \code{enrichment} argument to \code{\link{colocPipeline}}.
+#' @return A data frame with one row per (gwasStudy, qtlStudy,
+#'   qtlContext) triple and columns \code{gwasStudy}, \code{qtlStudy},
+#'   \code{qtlContext}, \code{enrichment}, \code{enrichmentSe},
+#'   \code{enrichmentLogOdds}, plus any extras the underlying estimator
+#'   emits. Suitable as the \code{enrichment} argument to
+#'   \code{\link{colocPipeline}} (which joins on the same triple).
 #' @export
 qtlEnrichmentPipeline <- function(gwasFineMappingResult,
                                   qtlFineMappingResult,
@@ -71,11 +72,19 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult,
 
   # Per-study genome-wide GWAS PIP vector (named by variant id).
   gwasStudies <- unique(as.character(gwasFineMappingResult$study))
-  qtlContexts <- unique(as.character(qtlFineMappingResult$context))
+  # Unique QTL (study, context) tuples — using context alone would
+  # silently merge two studies that share a context label (e.g. MSBB
+  # and ROSMAP both labelling something "DLPFC_eQTL"), giving wrong
+  # enrichment estimates. The S4 schema carries study explicitly per
+  # entry, so we iterate over the joint key.
+  qtlTuples <- unique(data.frame(
+    qtlStudy   = as.character(qtlFineMappingResult$study),
+    qtlContext = as.character(qtlFineMappingResult$context),
+    stringsAsFactors = FALSE))
 
-  if (length(gwasStudies) == 0L || length(qtlContexts) == 0L)
-    stop("qtlEnrichmentPipeline: no (gwasStudy, qtlContext) pairs ",
-         "to compute (one of the inputs has zero rows).")
+  if (length(gwasStudies) == 0L || nrow(qtlTuples) == 0L)
+    stop("qtlEnrichmentPipeline: no (gwasStudy, qtlStudy, qtlContext) ",
+         "triples to compute (one of the inputs has zero rows).")
 
   results <- list()
   for (gStudy in gwasStudies) {
@@ -86,12 +95,15 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult,
         gStudy))
       next
     }
-    for (qContext in qtlContexts) {
-      qtlRegions <- .enrBuildQtlRegionsList(qtlFineMappingResult, qContext)
+    for (k in seq_len(nrow(qtlTuples))) {
+      qStudy   <- qtlTuples$qtlStudy[[k]]
+      qContext <- qtlTuples$qtlContext[[k]]
+      qtlRegions <- .enrBuildQtlRegionsList(qtlFineMappingResult,
+                                             qStudy, qContext)
       if (length(qtlRegions) == 0L) {
         warning(sprintf(
-          "qtlEnrichmentPipeline: no usable QTL regions for qtlContext='%s'; skipping.",
-          qContext))
+          "qtlEnrichmentPipeline: no usable QTL regions for (qtlStudy='%s', qtlContext='%s'); skipping.",
+          qStudy, qContext))
         next
       }
       enr <- tryCatch(
@@ -106,13 +118,14 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult,
           ...),
         error = function(e) {
           warning(sprintf(
-            "qtlEnrichmentPipeline: qtlEnrichment failed for (gwasStudy='%s', qtlContext='%s'): %s",
-            gStudy, qContext, conditionMessage(e)))
+            "qtlEnrichmentPipeline: qtlEnrichment failed for (gwasStudy='%s', qtlStudy='%s', qtlContext='%s'): %s",
+            gStudy, qStudy, qContext, conditionMessage(e)))
           NULL
         })
       if (is.null(enr)) next
       row <- .enrFlattenEnrichment(enr)
       row$gwasStudy  <- gStudy
+      row$qtlStudy   <- qStudy
       row$qtlContext <- qContext
       results[[length(results) + 1L]] <- row
     }
@@ -121,6 +134,7 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult,
   if (length(results) == 0L) {
     return(data.frame(
       gwasStudy  = character(0),
+      qtlStudy   = character(0),
       qtlContext = character(0),
       enrichment = numeric(0),
       enrichmentSe = numeric(0),
@@ -130,7 +144,7 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult,
   out <- do.call(rbind, lapply(results, as.data.frame,
                               stringsAsFactors = FALSE))
   rownames(out) <- NULL
-  idCols <- c("gwasStudy", "qtlContext")
+  idCols <- c("gwasStudy", "qtlStudy", "qtlContext")
   other  <- setdiff(colnames(out), idCols)
   out[, c(idCols, other), drop = FALSE]
 }
@@ -180,12 +194,15 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult,
   all
 }
 
-# Build the per-(qtl context) list of region fits in the shape that
-# qtlEnrichment expects: list(d) where each d carries
-# alpha, pip, prior_variance (V).
+# Build the per-(qtlStudy, qtlContext) list of region fits in the shape
+# that qtlEnrichment expects: list(d) where each d carries
+# alpha, pip, prior_variance (V). Filters on BOTH study and context so
+# entries from different studies that happen to share a context label
+# are not pooled into one enrichment estimate.
 # @noRd
-.enrBuildQtlRegionsList <- function(qtlFmr, qContext) {
-  idx <- which(as.character(qtlFmr$context) == qContext)
+.enrBuildQtlRegionsList <- function(qtlFmr, qStudy, qContext) {
+  idx <- which(as.character(qtlFmr$study)   == qStudy &
+               as.character(qtlFmr$context) == qContext)
   if (length(idx) == 0L) return(list())
   out <- list()
   for (i in idx) {
diff --git a/R/tupleSelectors.R b/R/tupleSelectors.R
index 79026604..73186b46 100644
--- a/R/tupleSelectors.R
+++ b/R/tupleSelectors.R
@@ -51,9 +51,11 @@
   idx[[1L]]
 }
 
-# Internal: resolve a (study, method) tuple to a single row index of a
-# GwasFineMappingResult collection.
-.tupleSelectRowGwasFmr <- function(x, study, method) {
+# Internal: resolve a (study, method, region_id) tuple to a single row
+# index of a GwasFineMappingResult collection. `region` may be NULL when
+# the (study, method) pair maps to a single row; otherwise it disambiguates
+# among per-block rows of a genome-wide collection.
+.tupleSelectRowGwasFmr <- function(x, study, method, region = NULL) {
   if (nrow(x) == 0L) stop("GwasFineMappingResult has no rows.")
   if (missing(study) || is.null(study) ||
       missing(method) || is.null(method)) {
@@ -63,8 +65,24 @@
   }
   if (length(study) != 1L || length(method) != 1L)
     stop("`study` and `method` must each be length 1.")
-  idx <- .matchTupleRows(x, list(study = study, method = method))
-  if (length(idx) == 0L)
-    stop(sprintf("No entry for (study='%s', method='%s').", study, method))
+  if (!is.null(region) && length(region) != 1L)
+    stop("`region` must be length 1 when supplied.")
+  keys <- list(study = study, method = method)
+  if (!is.null(region)) keys$region_id <- region
+  idx <- .matchTupleRows(x, keys)
+  if (length(idx) == 0L) {
+    stop(sprintf(
+      "No entry for (study='%s', method='%s'%s).",
+      study, method,
+      if (is.null(region)) "" else sprintf(", region='%s'", region)))
+  }
+  if (length(idx) > 1L) {
+    stop(sprintf(
+      "GwasFineMappingResult has %d rows matching (study='%s', method='%s'); ",
+      length(idx), study, method),
+      "pass `region` to disambiguate (available: ",
+      paste(shQuote(as.character(x$region_id[idx])), collapse = ", "),
+      ").")
+  }
   idx[[1L]]
 }
diff --git a/tests/testthat/test_GwasFineMappingResult.R b/tests/testthat/test_GwasFineMappingResult.R
index 9137cac6..52c8f86f 100644
--- a/tests/testthat/test_GwasFineMappingResult.R
+++ b/tests/testthat/test_GwasFineMappingResult.R
@@ -26,19 +26,51 @@ test_that("GwasFineMappingResult: errors on length mismatch", {
 })
 
 
-test_that("GwasFineMappingResult: rejects duplicate (study, method) tuples", {
+test_that("GwasFineMappingResult: same (study, method) across distinct region_ids OK", {
+  # The default constructor synthesises region_1, region_2, ... so the
+  # (study, method, region_id) triple is unique even when (study, method)
+  # repeats. This is the genome-wide-across-blocks shape that
+  # qtlEnrichmentPipeline + colocPipeline expect.
+  e1 <- .sc_makeFineMappingEntry(3)
+  e2 <- .sc_makeFineMappingEntry(3)
+  res <- GwasFineMappingResult(
+    study  = c("g1", "g1"),
+    method = c("susie", "susie"),
+    entry  = list(e1, e2))
+  expect_s4_class(res, "GwasFineMappingResult")
+  expect_equal(nrow(res), 2L)
+  expect_setequal(as.character(res$region_id), c("region_1", "region_2"))
+})
+
+
+test_that("GwasFineMappingResult: rejects duplicate (study, method, region_id) tuples", {
   e1 <- .sc_makeFineMappingEntry(3)
   e2 <- .sc_makeFineMappingEntry(3)
   expect_error(
     GwasFineMappingResult(
-      study  = c("g1", "g1"),
-      method = c("susie", "susie"),
-      entry  = list(e1, e2)),
+      study     = c("g1", "g1"),
+      method    = c("susie", "susie"),
+      region_id = c("chr22_1_100", "chr22_1_100"),
+      entry     = list(e1, e2)),
     "uniqueness violated"
   )
 })
 
 
+test_that("GwasFineMappingResult: errors when region_id length mismatches", {
+  e1 <- .sc_makeFineMappingEntry(3)
+  e2 <- .sc_makeFineMappingEntry(3)
+  expect_error(
+    GwasFineMappingResult(
+      study     = c("g1", "g1"),
+      method    = c("susie", "susie"),
+      region_id = "only_one",
+      entry     = list(e1, e2)),
+    "same length"
+  )
+})
+
+
 test_that("GwasFineMappingResult: show prints summary", {
   e <- .sc_makeFineMappingEntry(3)
   res <- GwasFineMappingResult(study = "g1", method = "susie",
diff --git a/tests/testthat/test_colocPipeline.R b/tests/testthat/test_colocPipeline.R
index 63fd943e..e57ce2c3 100644
--- a/tests/testthat/test_colocPipeline.R
+++ b/tests/testthat/test_colocPipeline.R
@@ -339,18 +339,24 @@ context("encoloc")
 # .colocLookupEnrichment (formerly .enlocLookupEnrichment, now shared)
 # ===========================================================================
 
-test_that(".colocLookupEnrichment: returns the value for a (gwasStudy, qtlContext) hit", {
-  enr <- data.frame(gwasStudy = c("G1", "G2"),
+test_that(".colocLookupEnrichment: returns the value for a (gwasStudy, qtlStudy, qtlContext) hit", {
+  enr <- data.frame(gwasStudy  = c("G1", "G2"),
+                    qtlStudy   = c("Q1", "Q1"),
                     qtlContext = c("c1", "c1"),
                     enrichment = c(2.0, 3.5),
                     stringsAsFactors = FALSE)
-  expect_equal(pecotmr:::.colocLookupEnrichment(enr, "G2", "c1"), 3.5)
+  expect_equal(pecotmr:::.colocLookupEnrichment(enr, "G2", "Q1", "c1"), 3.5)
 })
 
 test_that(".colocLookupEnrichment: returns NA when no row matches", {
-  enr <- data.frame(gwasStudy = "G1", qtlContext = "c1", enrichment = 2.0,
+  enr <- data.frame(gwasStudy  = "G1",
+                    qtlStudy   = "Q1",
+                    qtlContext = "c1",
+                    enrichment = 2.0,
                     stringsAsFactors = FALSE)
-  expect_true(is.na(pecotmr:::.colocLookupEnrichment(enr, "ghost", "c1")))
+  expect_true(is.na(pecotmr:::.colocLookupEnrichment(enr, "ghost", "Q1", "c1")))
+  # qtlStudy mismatch also a miss.
+  expect_true(is.na(pecotmr:::.colocLookupEnrichment(enr, "G1", "Qghost", "c1")))
 })
 
 # ===========================================================================
diff --git a/tests/testthat/test_ctwasPipeline.R b/tests/testthat/test_ctwasPipeline.R
index 135a93e5..05e618ad 100644
--- a/tests/testthat/test_ctwasPipeline.R
+++ b/tests/testthat/test_ctwasPipeline.R
@@ -785,6 +785,43 @@ test_that("ctwasPipeline: dispatches assemble → est → screen → finemap and
       "region_data", "boundary_genes", "screen_res"))
 })
 
+test_that("estCtwasParam: fallbackToPrefit recovers from accurate-EM NaN divergence", {
+  skip_if_not_installed("ctwas")
+  inp <- .ctp_makeMultiBlockInputs()
+  # Mock est_param to throw the documented NaN error, and fit_EM to
+  # produce a stub prefit result. Verify estCtwasParam catches the
+  # NaN error AND that the returned param is the prefit estimate.
+  local_mocked_bindings(
+    assemble_region_data = function(...) list(
+      region_data    = list(block1 = list(stub = TRUE)),
+      boundary_genes = list(),
+      z_gene         = data.frame(id = "t1", z = 1.0)),
+    compute_gene_z = function(...) data.frame(id = "t1", z = 1.0),
+    est_param = function(...) stop("Estimated group_prior_var contains NAs!"),
+    # ctwas:::fit_EM is internal — mock via the same `ctwas` namespace.
+    fit_EM = function(region_data, ...) list(
+      group_prior     = c(g = 0.05, SNP = 1e-4),
+      group_prior_var = c(g = 4.0, SNP = 5.0),
+      group_size      = c(g = 1, SNP = 100)),
+    .package = "ctwas")
+  local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(),
+                        .package = "pecotmr")
+  # Without fallback: the NaN error propagates.
+  expect_error(
+    estCtwasParam(assembleCtwasInputs(inp$gwasSumStats, inp$twasWeights),
+                  fallbackToPrefit = FALSE),
+    "contains NAs")
+  # With fallback: prefit estimates are returned as the param.
+  # .ctwasFitPrefitEm thin-scales the SNP group_prior (mirroring ctwas's
+  # est_param), so the mocked SNP prior 1e-4 emerges as 1e-4 * thin
+  # (default thin = 0.1) → 1e-5. The group_prior_var is not thinned.
+  est <- estCtwasParam(
+    assembleCtwasInputs(inp$gwasSumStats, inp$twasWeights),
+    fallbackToPrefit = TRUE)
+  expect_equal(unname(est$param$group_prior),     c(0.05, 1e-5))
+  expect_equal(unname(est$param$group_prior_var), c(4.0, 5.0))
+})
+
 test_that("estCtwasParam / screenCtwasRegions / finemapCtwasRegions can be called independently", {
   skip_if_not_installed("ctwas")
   inp <- .ctp_makeMultiBlockInputs()
diff --git a/tests/testthat/test_deprecated.R b/tests/testthat/test_deprecated.R
index b0c6f0dd..ea41ca69 100644
--- a/tests/testthat/test_deprecated.R
+++ b/tests/testthat/test_deprecated.R
@@ -503,7 +503,7 @@ test_that("enlocPipeline: rejects non-QtlFineMappingResult qtlFmr", {
   expect_error(
     suppressWarnings(enlocPipeline(qtlFineMappingResult = "no",
                   gwasInput            = .ep_makeGwasFmr(),
-                  enrichment = data.frame(gwasStudy = "G1", qtlContext = "c1",
+                  enrichment = data.frame(gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1",
                                            enrichment = 2.0,
                                            stringsAsFactors = FALSE))),
     "must be a QtlFineMappingResult"
@@ -515,7 +515,7 @@ test_that("enlocPipeline: rejects gwasInput that is neither GwasSumStats nor Gwa
   expect_error(
     suppressWarnings(enlocPipeline(qtlFineMappingResult = .ep_makeQtlFmr(),
                   gwasInput            = 42L,
-                  enrichment = data.frame(gwasStudy = "G1", qtlContext = "c1",
+                  enrichment = data.frame(gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1",
                                            enrichment = 2.0,
                                            stringsAsFactors = FALSE))),
     "must be a GwasSumStats or a GwasFineMappingResult"
@@ -547,7 +547,7 @@ test_that("enlocPipeline: un-QCd GwasSumStats input is rejected", {
   expect_error(
     suppressWarnings(enlocPipeline(qtlFineMappingResult = .ep_makeQtlFmr(),
                   gwasInput            = .ep_makeGwasSumstats(qc = FALSE),
-                  enrichment = data.frame(gwasStudy = "G1", qtlContext = "c1",
+                  enrichment = data.frame(gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1",
                                            enrichment = 2.0,
                                            stringsAsFactors = FALSE))),
     "has no QC record"
@@ -560,7 +560,7 @@ test_that("enlocPipeline: un-QCd GwasSumStats input is rejected", {
 
 test_that("enlocPipeline: pair loop produces one row per (QTL tuple, GWAS tuple) with adjusted p12", {
   skip_on_covr()
-  enr <- data.frame(gwasStudy = "G1", qtlContext = "c1", enrichment = 2.0,
+  enr <- data.frame(gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1", enrichment = 2.0,
                     stringsAsFactors = FALSE)
   local_mocked_bindings(coloc.bf_bf = .ep_mockColocBfBf(), .package = "coloc")
   out <- suppressWarnings(
@@ -580,7 +580,7 @@ test_that("enlocPipeline: pair loop produces one row per (QTL tuple, GWAS tuple)
 test_that("enlocPipeline: missing-enrichment pair falls back to baseline p12 with a warning", {
   skip_on_covr()
   # An enrichment frame that has no row for (G1, c1).
-  enr <- data.frame(gwasStudy = "G_other", qtlContext = "c_other",
+  enr <- data.frame(gwasStudy = "G_other", qtlStudy = "Q_other", qtlContext = "c_other",
                     enrichment = 10.0, stringsAsFactors = FALSE)
   local_mocked_bindings(coloc.bf_bf = .ep_mockColocBfBf(), .package = "coloc")
   expect_warning(
@@ -597,7 +597,7 @@ test_that("enlocPipeline: missing-enrichment pair falls back to baseline p12 wit
 
 test_that("enlocPipeline: p12Max caps the adjusted prior", {
   skip_on_covr()
-  enr <- data.frame(gwasStudy = "G1", qtlContext = "c1", enrichment = 1e6,
+  enr <- data.frame(gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1", enrichment = 1e6,
                     stringsAsFactors = FALSE)
   local_mocked_bindings(coloc.bf_bf = .ep_mockColocBfBf(), .package = "coloc")
   out <- suppressWarnings(
@@ -611,7 +611,7 @@ test_that("enlocPipeline: p12Max caps the adjusted prior", {
 
 test_that("enlocPipeline: coloc.bf_bf failures are caught and warned, pair skipped", {
   skip_on_covr()
-  enr <- data.frame(gwasStudy = "G1", qtlContext = "c1", enrichment = 2.0,
+  enr <- data.frame(gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1", enrichment = 2.0,
                     stringsAsFactors = FALSE)
   local_mocked_bindings(
     coloc.bf_bf = function(q, g, ...) stop("boom"),
@@ -627,7 +627,7 @@ test_that("enlocPipeline: coloc.bf_bf failures are caught and warned, pair skipp
 
 test_that("enlocPipeline: returnGwasFineMapping=TRUE attaches gwasFineMapping attr (non-empty result)", {
   skip_on_covr()
-  enr <- data.frame(gwasStudy = "G1", qtlContext = "c1", enrichment = 2.0,
+  enr <- data.frame(gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1", enrichment = 2.0,
                     stringsAsFactors = FALSE)
   local_mocked_bindings(coloc.bf_bf = .ep_mockColocBfBf(), .package = "coloc")
   # Use GwasFineMappingResult input: returnGwasFineMapping has no effect
@@ -672,7 +672,7 @@ test_that("enlocPipeline: qLbf NULL (QTL entry's LBF rows drop after priorTol) s
     enlocPipeline(qtlFineMappingResult = qfmr,
                   gwasInput            = .ep_makeGwasFmr(),
                   enrichment           = data.frame(
-                    gwasStudy = "G1", qtlContext = "c1",
+                    gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1",
                     enrichment = 1.0, stringsAsFactors = FALSE)))
   expect_equal(nrow(out), 0L)
 })
@@ -695,7 +695,7 @@ test_that("enlocPipeline: aligned NULL (disjoint variant sets) skips that pair",
     enlocPipeline(qtlFineMappingResult = qfmr,
                   gwasInput            = gfmr,
                   enrichment           = data.frame(
-                    gwasStudy = "G1", qtlContext = "c1",
+                    gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1",
                     enrichment = 1.0, stringsAsFactors = FALSE)))
   expect_equal(nrow(out), 0L)
 })
@@ -722,7 +722,7 @@ test_that("enlocPipeline: empty result schema includes enrichment + p12Used", {
     enlocPipeline(qtlFineMappingResult = .ep_makeQtlFmr(),
                   gwasInput            = gfmr,
                   enrichment           = data.frame(
-                    gwasStudy = "G1", qtlContext = "c1",
+                    gwasStudy = "G1", qtlStudy = "Q1", qtlContext = "c1",
                     enrichment = 1.0, stringsAsFactors = FALSE)))
   expect_equal(nrow(out), 0L)
   expect_true(all(c("enrichment", "p12Used") %in% colnames(out)))
diff --git a/tests/testthat/test_fineMappingPipeline.R b/tests/testthat/test_fineMappingPipeline.R
index d85b1ade..454865e5 100644
--- a/tests/testthat/test_fineMappingPipeline.R
+++ b/tests/testthat/test_fineMappingPipeline.R
@@ -251,18 +251,23 @@ test_that(".fmCacheLookup: returns matching entry by 4-tuple", {
   expect_null(pecotmr:::.fmCacheLookup(fmr, "ghost", "c1", "t1", "susie"))
 })
 
-test_that(".fmCacheLookupGwas: returns matching entry by (study, method)", {
+test_that(".fmCacheLookupGwas: returns matching entry by (study, method, region_id)", {
   e <- FineMappingEntry(
     variantIds = "v1",
     susieFit = list(token = "susie"),
     topLoci    = data.frame(variant_id = "v1", pip = 0.5,
                             stringsAsFactors = FALSE))
+  # GwasFineMappingResult assigns the synthetic region_id "region_1"
+  # when none is supplied; the lookup must include it in the 3-tuple
+  # key (multi-block FMRs disambiguate per-block fits by region_id).
   fmr <- GwasFineMappingResult(study = "g1", method = "susie",
                                 entry = list(e))
   expect_identical(
-    pecotmr:::.fmCacheLookupGwas(fmr, "g1", "susie"),
+    pecotmr:::.fmCacheLookupGwas(fmr, "g1", "susie", "region_1"),
     e)
-  expect_null(pecotmr:::.fmCacheLookupGwas(fmr, "ghost", "susie"))
+  expect_null(pecotmr:::.fmCacheLookupGwas(fmr, "ghost", "susie", "region_1"))
+  # Wrong region_id is also a miss.
+  expect_null(pecotmr:::.fmCacheLookupGwas(fmr, "g1", "susie", "other_region"))
 })
 
 test_that(".fmCacheLookup: non-QtlFineMappingResult input returns NULL", {
@@ -285,7 +290,7 @@ test_that(".fmCacheLookupGwas: non-GwasFineMappingResult input returns NULL", {
   qtlFmr <- QtlFineMappingResult(
     study = "s1", context = "c1", trait = "t1", method = "susie",
     entry = list(e))
-  expect_null(pecotmr:::.fmCacheLookupGwas(qtlFmr, "s1", "susie"))
+  expect_null(pecotmr:::.fmCacheLookupGwas(qtlFmr, "s1", "susie", "region_1"))
 })
 
 # ===========================================================================
@@ -303,7 +308,7 @@ test_that(".fmBuildQtlResult: empty entries errors", {
 test_that(".fmBuildGwasResult: empty entries errors", {
   expect_error(
     pecotmr:::.fmBuildGwasResult(character(0), character(0), list()),
-    "no \\(study, method\\) tuples"
+    "no \\(study, method, region_id\\) tuples"
   )
 })
 
@@ -1013,8 +1018,13 @@ test_that("fineMappingPipeline(GwasSumStats): cache hit short-circuits the RSS f
     topLoci    = data.frame(variant_id = paste0("v", 1:5),
                              pip = seq(0.9, 0.1, length.out = 5),
                              stringsAsFactors = FALSE))
+  # The GwasSumStats branch keys its cache by (study, method, region_id)
+  # where region_id is "{seqname}_{minPos}_{maxPos}" derived from the
+  # entry's GRanges. .fmp_makeSumstatsGr() yields chr1 positions 100..500,
+  # so the cache row must use region_id = "chr1_100_500" to hit.
   cache <- GwasFineMappingResult(
     study = "G1", method = "susie",
+    region_id = "chr1_100_500",
     entry = list(cachedEntry),
     ldSketch = .fmp_makeHandle())
   rss_calls <- 0
diff --git a/tests/testthat/test_qtlEnrichmentPipeline.R b/tests/testthat/test_qtlEnrichmentPipeline.R
index c8610f55..94885a15 100644
--- a/tests/testthat/test_qtlEnrichmentPipeline.R
+++ b/tests/testthat/test_qtlEnrichmentPipeline.R
@@ -136,7 +136,7 @@ test_that("qtlEnrichmentPipeline: ldSketch mismatch errors", {
 # Per-study / per-context iteration via mocked qtlEnrichment
 # ===========================================================================
 
-test_that("qtlEnrichmentPipeline: returns one row per (gwasStudy, qtlContext) pair", {
+test_that("qtlEnrichmentPipeline: returns one row per (gwasStudy, qtlStudy, qtlContext) triple", {
   gfmr <- .qep_makeGwasFmr()
   qfmr <- .qep_makeQtlFmr(contexts = c("c1", "c2"))
   local_mocked_bindings(qtlEnrichment = .qep_mockEnrichment(2.0),
@@ -144,12 +144,44 @@ test_that("qtlEnrichmentPipeline: returns one row per (gwasStudy, qtlContext) pa
   out <- qtlEnrichmentPipeline(gwasFineMappingResult = gfmr,
                                 qtlFineMappingResult  = qfmr)
   expect_s3_class(out, "data.frame")
-  expect_equal(nrow(out), 2L)  # 1 GWAS study * 2 contexts
-  expect_setequal(out$gwasStudy, "G1")
+  expect_equal(nrow(out), 2L)  # 1 GWAS study * 1 QTL study * 2 contexts
+  expect_setequal(out$gwasStudy,  "G1")
+  expect_setequal(out$qtlStudy,   "Q1")
   expect_setequal(out$qtlContext, c("c1", "c2"))
   expect_equal(out$enrichment, c(2.0, 2.0))
 })
 
+test_that("qtlEnrichmentPipeline: distinguishes two QTL studies that share a context label", {
+  # Build a QtlFineMappingResult with two studies (Q1, Q2) both
+  # tagging the same context "shared_ctx". Per-study filtering must
+  # keep them separate (a context-only filter would merge them and
+  # produce a single enrichment row instead of two).
+  e1 <- .qep_makeFmEntry(variant_ids = paste0("v", 1:5))
+  e2 <- .qep_makeFmEntry(variant_ids = paste0("v", 1:5))
+  qfmr <- QtlFineMappingResult(
+    study    = c("Q1", "Q2"),
+    context  = c("shared_ctx", "shared_ctx"),
+    trait    = c("t1", "t1"),
+    method   = c("susie", "susie"),
+    entry    = list(e1, e2),
+    ldSketch = .qep_makeHandle())
+  gfmr <- .qep_makeGwasFmr()
+  capturedRegions <- list()
+  local_mocked_bindings(
+    qtlEnrichment = function(gwasPip, susieQtlRegions, ...) {
+      capturedRegions[[length(capturedRegions) + 1L]] <<- susieQtlRegions
+      list(enrichment = 2.0, enrichmentSe = 0.1, enrichmentLogOdds = log(2))
+    },
+    .package = "pecotmr")
+  out <- qtlEnrichmentPipeline(gwasFineMappingResult = gfmr,
+                                qtlFineMappingResult  = qfmr)
+  expect_equal(nrow(out), 2L)
+  expect_setequal(out$qtlStudy, c("Q1", "Q2"))
+  # Each call to qtlEnrichment sees exactly one region (the per-study one),
+  # not both regions merged together.
+  expect_true(all(lengths(capturedRegions) == 1L))
+})
+
 test_that("qtlEnrichmentPipeline: qtlEnrichment failure produces a warning + skip", {
   gfmr <- .qep_makeGwasFmr()
   qfmr <- .qep_makeQtlFmr()
@@ -185,7 +217,7 @@ test_that("qtlEnrichmentPipeline: empty input collections yield the empty schema
   expect_s3_class(out, "data.frame")
   expect_equal(nrow(out), 0L)
   expect_setequal(colnames(out),
-                  c("gwasStudy", "qtlContext", "enrichment",
+                  c("gwasStudy", "qtlStudy", "qtlContext", "enrichment",
                     "enrichmentSe", "enrichmentLogOdds"))
 })
 
@@ -201,15 +233,16 @@ test_that(".enrBuildGwasPipVector: extracts pip per study", {
 })
 
 test_that(".enrBuildGwasPipVector: deduplicates identical PIPs across blocks", {
-  # Two rows under the same study but different methods, sharing v1 with
-  # an identical PIP value. The (study, method) validity constraint rules
-  # out same-method repeats, so we use susie + susieInf for the second.
+  # Two rows under the same study, same method, different LD blocks
+  # (distinct region_ids auto-supplied by the constructor) — the
+  # genome-wide multi-block shape. Both rows share v1 with the same
+  # PIP, so dedup must collapse it.
   e1 <- .qep_makeFmEntry(variant_ids = c("v1", "v2"),
                           pip = c(0.5, 0.2))
   e2 <- .qep_makeFmEntry(variant_ids = c("v1", "v3"),
                           pip = c(0.5, 0.4))
   g <- GwasFineMappingResult(
-    study = c("G1", "G1"), method = c("susie", "susieInf"),
+    study = c("G1", "G1"), method = c("susie", "susie"),
     entry = list(e1, e2),
     ldSketch = .qep_makeHandle())
   out <- pecotmr:::.enrBuildGwasPipVector(g, "G1")
@@ -217,12 +250,14 @@ test_that(".enrBuildGwasPipVector: deduplicates identical PIPs across blocks", {
 })
 
 test_that(".enrBuildGwasPipVector: conflicting PIPs across blocks errors", {
+  # Same variant 'v1' appears in two LD blocks with different PIPs —
+  # .enrBuildGwasPipVector must refuse to merge them silently.
   e1 <- .qep_makeFmEntry(variant_ids = c("v1"), pip = 0.5,
                           alpha = matrix(0.5, 1, 1))
   e2 <- .qep_makeFmEntry(variant_ids = c("v1"), pip = 0.8,
                           alpha = matrix(0.8, 1, 1))
   g <- GwasFineMappingResult(
-    study = c("G1", "G1"), method = c("susie", "susieInf"),
+    study = c("G1", "G1"), method = c("susie", "susie"),
     entry = list(e1, e2),
     ldSketch = .qep_makeHandle())
   expect_error(
@@ -231,14 +266,22 @@ test_that(".enrBuildGwasPipVector: conflicting PIPs across blocks errors", {
   )
 })
 
-test_that(".enrBuildQtlRegionsList: returns per-entry fit shapes", {
+test_that(".enrBuildQtlRegionsList: returns per-entry fit shapes for a (study, context) hit", {
   qfmr <- .qep_makeQtlFmr(contexts = c("c1", "c2"))
-  out <- pecotmr:::.enrBuildQtlRegionsList(qfmr, "c1")
+  out <- pecotmr:::.enrBuildQtlRegionsList(qfmr, "Q1", "c1")
   expect_equal(length(out), 1L)
   expect_true(!is.null(out[[1L]]$alpha))
   expect_true(!is.null(out[[1L]]$pip))
 })
 
+test_that(".enrBuildQtlRegionsList: returns empty list when the (study, context) tuple is absent", {
+  qfmr <- .qep_makeQtlFmr(contexts = c("c1", "c2"))
+  # Correct context but wrong study -> no hit, even though context exists.
+  expect_equal(length(pecotmr:::.enrBuildQtlRegionsList(qfmr, "Q_ghost", "c1")), 0L)
+  # Correct study but wrong context.
+  expect_equal(length(pecotmr:::.enrBuildQtlRegionsList(qfmr, "Q1", "c_ghost")), 0L)
+})
+
 # ===========================================================================
 # qtlEnrichment() — kernel wrapper + real C++ integration
 # These tests deliberately do NOT mock qtlEnrichmentRcpp so the C++

From 4494f2fca94573893be850089c9e03dcf34b0458 Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Mon, 22 Jun 2026 16:50:03 -0700
Subject: [PATCH 3/4] fix vignettes

---
 vignettes/twas-weights.Rmd | 41 ++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/vignettes/twas-weights.Rmd b/vignettes/twas-weights.Rmd
index 8817fad5..73b39a33 100644
--- a/vignettes/twas-weights.Rmd
+++ b/vignettes/twas-weights.Rmd
@@ -50,14 +50,22 @@ multi_study_qtl_dataset_example <- fixupExampleGenotypePaths(multi_study_qtl_dat
 
 ## Individual-level weights
 
-The simplest call learns a single susie-based weight set from a
+The simplest call learns a single regression-based weight set from a
 `QtlDataset`. Cross-validation is on by default (`cvFolds = 5`). Like
 `fineMappingPipeline()`, you need to tell it which genotype window to
 extract — `cisWindow` (per trait) is the most common choice.
 
+Fine-mapping methods (`susie`, `susieInf`, `susieAsh`, `mvsusie`,
+`fsusie`) are intentionally NOT runnable here on their own:
+`twasWeightsPipeline()` will not re-fit fine-mapping models from
+scratch. Run `fineMappingPipeline()` first and pass the result via
+`fineMappingResult = …` (see the next section). The quickstart below
+uses `lasso`, a TWAS-regression method that has no fine-mapping
+dependency.
+
 ```{r individual-quickstart}
 tw <- twasWeightsPipeline(qtl_dataset_example,
-                           methods   = "susie",
+                           methods   = "lasso",
                            cisWindow = 1e6)
 tw
 ```
@@ -66,7 +74,7 @@ Extract the per-variant weights for one tuple:
 
 ```{r individual-weights}
 entry <- getTwasWeights(tw, study = "study1", context = "brain",
-                         trait = "ENSG_example", method = "susie")
+                         trait = "ENSG_example", method = "lasso")
 head(getWeights(entry))
 ```
 
@@ -110,7 +118,7 @@ matching fine-mapping row fit the susie weights from scratch as normal.
 
 ```{r multistudy}
 msTw <- twasWeightsPipeline(multi_study_qtl_dataset_example,
-                             methods   = "susie",
+                             methods   = "lasso",
                              cisWindow = 1e6)
 table(msTw$study)
 ```
@@ -118,23 +126,40 @@ table(msTw$study)
 ## Multiple weight methods at once
 
 Pass a character vector to fit several methods on the same fold split
-and return them as separate `(method)` rows in the collection:
+and return them as separate `(method)` rows in the collection.
+Fine-mapping methods (`susie`, `susieInf`, `susieAsh`, `mvsusie`) can
+ride along when a `fineMappingResult` is supplied; without one, restrict
+the list to the TWAS-regression methods:
 
 ```{r multimethod, eval=FALSE}
+# TWAS-regression only — no fineMappingResult needed.
+twasWeightsPipeline(
+  qtl_dataset_example,
+  methods = c("lasso", "enet", "ridge", "mrash"))
+
+# Add a fine-mapping susie row by providing the FineMappingResult.
+fmr <- fineMappingPipeline(qtl_dataset_example, methods = "susie",
+                            cisWindow = 1e6)
 twasWeightsPipeline(
   qtl_dataset_example,
-  methods = c("susie", "lasso", "enet", "ridge", "mrash"))
+  methods           = c("susie", "lasso", "enet", "ridge", "mrash"),
+  cisWindow         = 1e6,
+  fineMappingResult = fmr)
 ```
 
 ## RSS / sumstats weights
 
 ```{r sumstats}
-twSs <- twasWeightsPipeline(qtl_sumstats_example, methods = "susie")
+twSs <- twasWeightsPipeline(qtl_sumstats_example, methods = "lasso")
 twSs
 ```
 
 The RSS path uses the `ldSketch` `GenotypeHandle` carried on the
-`QtlSumStats` collection to compute / fetch LD as needed.
+`QtlSumStats` collection to compute / fetch LD as needed. The fine-
+mapping methods (`susie`, `susieInf`, `susieAsh`, `mvsusie`) follow the
+same `fineMappingResult` gate as the individual-level path — call
+`fineMappingPipeline()` on the `QtlSumStats` first, then pass the
+result into `twasWeightsPipeline(..., fineMappingResult = ...)`.
 
 ## Multivariate (multi-context) weights
 

From e0bf253c2477bb08d59a2bfc06b3f07464aa36dc Mon Sep 17 00:00:00 2001
From: danielnachun <danielnachun@users.noreply.github.com>
Date: Mon, 22 Jun 2026 23:51:20 +0000
Subject: [PATCH 4/4] Update documentation

---
 NAMESPACE                    |  8 ++--
 man/GwasFineMappingResult.Rd | 18 +++++--
 man/assembleCtwasInputs.Rd   | 93 ++++++++++++++++++++++++++++++++++++
 man/colocPipeline.Rd         |  7 +--
 man/ctwasPipeline.Rd         | 58 ++++++++++++++--------
 man/estCtwasParam.Rd         | 50 +++++++++++++++++++
 man/finemapCtwasRegions.Rd   | 33 +++++++++++++
 man/getCs.Rd                 | 10 +++-
 man/getMarginalEffects.Rd    |  1 +
 man/getPip.Rd                |  1 +
 man/getSusieFit.Rd           | 10 +++-
 man/getTopLoci.Rd            |  1 +
 man/getVariantIds.Rd         |  1 +
 man/qtlEnrichmentPipeline.Rd | 11 +++--
 man/screenCtwasRegions.Rd    | 33 +++++++++++++
 man/summaryStatsQc.Rd        | 87 +++++++++++++++++++++------------
 16 files changed, 352 insertions(+), 70 deletions(-)
 create mode 100644 man/assembleCtwasInputs.Rd
 create mode 100644 man/estCtwasParam.Rd
 create mode 100644 man/finemapCtwasRegions.Rd
 create mode 100644 man/screenCtwasRegions.Rd

diff --git a/NAMESPACE b/NAMESPACE
index c58aa18a..a69a3932 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,6 +21,7 @@ export(TwasWeightsEntry)
 export(adjustPips)
 export(alignVariantNames)
 export(alleleQc)
+export(assembleCtwasInputs)
 export(autoDecision)
 export(bLassoWeights)
 export(batchLoadTwasWeights)
@@ -50,11 +51,7 @@ export(computeQtlEnrichment)
 export(computeSldscAnnotSd)
 export(computeSldscMRef)
 export(ctwasBimfileLoader)
-export(assembleCtwasInputs)
 export(ctwasPipeline)
-export(estCtwasParam)
-export(finemapCtwasRegions)
-export(screenCtwasRegions)
 export(dentist)
 export(dentistSingleWindow)
 export(dprAdaptiveGibbsWeights)
@@ -65,6 +62,7 @@ export(enetWeights)
 export(enforceDesignFullRank)
 export(enlocPipeline)
 export(ensembleWeights)
+export(estCtwasParam)
 export(estimateH2)
 export(estimateSparsity)
 export(extractBlockGenotypes)
@@ -76,6 +74,7 @@ export(filterRelatedness)
 export(filterVariantsByLdReference)
 export(findOverlappingRegions)
 export(fineMappingPipeline)
+export(finemapCtwasRegions)
 export(fitFsusie)
 export(fitMashContrast)
 export(fitMvsusie)
@@ -229,6 +228,7 @@ export(rssAnalysisPipeline)
 export(sanitizeMashData)
 export(scadRssWeights)
 export(scadWeights)
+export(screenCtwasRegions)
 export(sdpr)
 export(sdprWeights)
 export(slalom)
diff --git a/man/GwasFineMappingResult.Rd b/man/GwasFineMappingResult.Rd
index 30260242..989589d3 100644
--- a/man/GwasFineMappingResult.Rd
+++ b/man/GwasFineMappingResult.Rd
@@ -4,7 +4,7 @@
 \alias{GwasFineMappingResult}
 \title{TWAS Weights Collection}
 \usage{
-GwasFineMappingResult(study, method, entry, ldSketch = NULL)
+GwasFineMappingResult(study, method, entry, region_id = NULL, ldSketch = NULL)
 }
 \arguments{
 \item{study}{Character vector of study identifiers (per tuple).}
@@ -13,6 +13,13 @@ GwasFineMappingResult(study, method, entry, ldSketch = NULL)
 
 \item{entry}{List / \code{SimpleList} of \code{FineMappingEntry} objects.}
 
+\item{region_id}{Character vector of LD-block identifiers (per
+tuple). When omitted (\code{NULL}), defaults to a per-row synthetic
+id (\code{"region_1"}, \code{"region_2"}, ...) so the
+(\code{study}, \code{method}, \code{region_id}) triple is unique.
+Supplying meaningful labels (e.g. \code{"chr22_10516173_17414263"})
+is preferred for downstream consumers that join on region.}
+
 \item{ldSketch}{An optional \code{GenotypeHandle}.}
 }
 \value{
@@ -39,10 +46,11 @@ S4 collection of TWAS weights keyed by the identity tuple
   across the identity-tuple columns and any present joint columns.
 
 Construct a \code{GwasFineMappingResult} DFrame-subclass
-  collection from per-(study, method) tuples and a list of
-  \code{FineMappingEntry} payloads. The collection represents one LD
-  block of GWAS fine-mapping fits; build a separate collection per
-  block when sweeping the genome.
+  collection from per-(study, method, region_id) tuples and a list of
+  \code{FineMappingEntry} payloads. The collection can represent
+  either a single LD block (one row per (study, method)) or a
+  genome-wide sweep across blocks (multiple rows per (study, method),
+  each tagged with its own region_id).
 }
 \section{Slots}{
 
diff --git a/man/assembleCtwasInputs.Rd b/man/assembleCtwasInputs.Rd
new file mode 100644
index 00000000..c5bdfc4f
--- /dev/null
+++ b/man/assembleCtwasInputs.Rd
@@ -0,0 +1,93 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ctwasPipeline.R
+\name{assembleCtwasInputs}
+\alias{assembleCtwasInputs}
+\title{Assemble cTWAS inputs from S4 GwasSumStats / TwasWeights}
+\usage{
+assembleCtwasInputs(
+  gwasSumStats,
+  twasWeights,
+  twasZ = NULL,
+  fineMappingResult = NULL,
+  method = NULL,
+  twasWeightCutoff = 0,
+  csMinCor = 0.8,
+  minPipCutoff = 0,
+  maxNumVariants = Inf
+)
+}
+\arguments{
+\item{gwasSumStats}{NAMED LIST of \code{\link{GwasSumStats}} keyed
+by \code{region_id} (at least two entries). Each must have
+\code{getQcInfo()} non-empty.}
+
+\item{twasWeights}{NAMED LIST of \code{\link{TwasWeights}} keyed by
+\code{region_id}. Keys must be a SUBSET of \code{gwasSumStats}'s
+keys: blocks without any TWAS weights still contribute their
+SNP-level signal to ctwas's joint group prior estimate (matches
+the legacy whole-chromosome pattern where only a few of many LD
+blocks carry gene weights).}
+
+\item{twasZ}{Optional \code{GRanges} of TWAS Z-scores (output of
+\code{\link{causalInferencePipeline}}). When supplied, the
+per-(trait, context) Z is used as the \code{z_gene} input to
+\code{ctwas_sumstats} so it is not recomputed.}
+
+\item{fineMappingResult}{Optional \code{QtlFineMappingResult} or
+\code{GwasFineMappingResult} carrying the per-variant PIP and
+credible-set membership data used by the CS / PIP rescue filters
+(\code{csMinCor} and \code{minPipCutoff}). When \code{NULL}
+(default) the smart filters are no-ops; only the magnitude filter
+(\code{twasWeightCutoff}) and the per-gene cap
+(\code{maxNumVariants}, ordered by \code{|weight|}) apply.}
+
+\item{method}{Optional character (length 1). Picks which TWAS
+method's weights to feed into ctwas for each (study, context,
+trait) gene. When \code{NULL} (default): use \code{"ensemble"} if
+that method is present across the TwasWeights; otherwise use the
+sole method when only one is present; otherwise error. Passing
+the name explicitly (e.g. \code{"mrash"}) overrides the default
+resolution.}
+
+\item{twasWeightCutoff}{Numeric (length 1). Drop variants with
+\code{|weight| < twasWeightCutoff} from each gene's weight matrix
+before ctwas sees it. Default \code{0} (no filter).}
+
+\item{csMinCor}{Numeric (length 1). When \code{fineMappingResult} is
+provided, variants belonging to any 95\% credible set with purity
+(\code{min_abs_corr}) \code{>= csMinCor} are marked as must-keep
+and survive the per-gene cap. Default \code{0.8}. Ignored without
+a \code{fineMappingResult}.}
+
+\item{minPipCutoff}{Numeric (length 1). When
+\code{fineMappingResult} is provided, variants with PIP greater
+than \code{minPipCutoff} are marked as must-keep and survive the
+per-gene cap. Default \code{0} (no PIP rescue). Ignored without a
+\code{fineMappingResult}.}
+
+\item{maxNumVariants}{Numeric (length 1). Cap on per-gene variant
+count. When the gene has more variants than this, keep all
+must-keep variants and fill remaining slots by descending PIP
+(when available) or descending \code{|weight|}. Default
+\code{Inf} (no cap).}
+}
+\value{
+A list with elements \code{z_snp}, \code{z_gene} (NULL when
+  no \code{twasZ}), \code{weights}, \code{region_info},
+  \code{snp_map}, \code{LD_map}, \code{LD_loader_fun},
+  \code{snpinfo_loader_fun}, and \code{resolvedMethod}.
+}
+\description{
+Builds the per-block ctwas-shape input set
+  (\code{z_snp}, \code{weights}, \code{region_info}, \code{snp_map},
+  \code{LD_map}, the LD- and SNP-info loader closures, plus optional
+  \code{z_gene}) that the downstream ctwas steps consume.
+  This is step 1 of the three-step \code{\link{ctwasPipeline}} split.
+}
+\details{
+The returned list is the SHARED STATE threaded through
+  \code{\link{estCtwasParam}} → \code{\link{screenCtwasRegions}} →
+  \code{\link{finemapCtwasRegions}}. Callers can short-circuit at any
+  step (e.g. override the estimated priors before fine-mapping) or
+  call \code{ctwasPipeline()} for the one-shot path.
+}
diff --git a/man/colocPipeline.Rd b/man/colocPipeline.Rd
index ead0abcc..b1e3aff5 100644
--- a/man/colocPipeline.Rd
+++ b/man/colocPipeline.Rd
@@ -71,9 +71,10 @@ use. Default \code{1e-9}.}
 computed \code{GwasFineMappingResult} on the returned data frame
 as attribute \code{"gwasFineMapping"}. Default \code{FALSE}.}
 
-\item{enrichment}{Optional data.frame of per-(gwasStudy, qtlContext)
-enrichment factors with columns \code{gwasStudy}, \code{qtlContext},
-\code{enrichment}. Output of \code{\link{qtlEnrichmentPipeline}}.
+\item{enrichment}{Optional data.frame of per-(gwasStudy, qtlStudy,
+qtlContext) enrichment factors with columns \code{gwasStudy},
+\code{qtlStudy}, \code{qtlContext}, \code{enrichment}. Output of
+\code{\link{qtlEnrichmentPipeline}}.
 When non-\code{NULL}, each pair's \code{p12} prior is scaled to
 \code{min(p12 * (1 + enrichment), p12Max)} (the enrichment-informed
 colocalization variant, "enloc"). Pairs without a matching
diff --git a/man/ctwasPipeline.Rd b/man/ctwasPipeline.Rd
index d568030e..f6c9f68d 100644
--- a/man/ctwasPipeline.Rd
+++ b/man/ctwasPipeline.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/ctwasPipeline.R
 \name{ctwasPipeline}
 \alias{ctwasPipeline}
-\title{Causal TWAS Pipeline (cTWAS, single LD block)}
+\title{Causal TWAS Pipeline (cTWAS, multi LD block)}
 \usage{
 ctwasPipeline(
   gwasSumStats,
   twasWeights,
   twasZ = NULL,
   fineMappingResult = NULL,
-  regionId = "block1",
+  method = NULL,
   thin = 0.1,
   niterPrefit = 3L,
   niter = 30L,
@@ -21,15 +21,21 @@ ctwasPipeline(
   csMinCor = 0.8,
   minPipCutoff = 0,
   maxNumVariants = Inf,
+  fallbackToPrefit = FALSE,
   ...
 )
 }
 \arguments{
-\item{gwasSumStats}{A \code{\link{GwasSumStats}} over one LD block.
-Must have \code{getQcInfo()} non-empty.}
+\item{gwasSumStats}{NAMED LIST of \code{\link{GwasSumStats}} keyed
+by \code{region_id} (at least two entries). Each must have
+\code{getQcInfo()} non-empty.}
 
-\item{twasWeights}{A \code{\link{TwasWeights}} carrying per-(study,
-context, trait, method) weights over the same LD block.}
+\item{twasWeights}{NAMED LIST of \code{\link{TwasWeights}} keyed by
+\code{region_id}. Keys must be a SUBSET of \code{gwasSumStats}'s
+keys: blocks without any TWAS weights still contribute their
+SNP-level signal to ctwas's joint group prior estimate (matches
+the legacy whole-chromosome pattern where only a few of many LD
+blocks carry gene weights).}
 
 \item{twasZ}{Optional \code{GRanges} of TWAS Z-scores (output of
 \code{\link{causalInferencePipeline}}). When supplied, the
@@ -44,8 +50,13 @@ credible-set membership data used by the CS / PIP rescue filters
 (\code{twasWeightCutoff}) and the per-gene cap
 (\code{maxNumVariants}, ordered by \code{|weight|}) apply.}
 
-\item{regionId}{Optional character (length 1) label for the LD
-block. Default \code{"block1"}.}
+\item{method}{Optional character (length 1). Picks which TWAS
+method's weights to feed into ctwas for each (study, context,
+trait) gene. When \code{NULL} (default): use \code{"ensemble"} if
+that method is present across the TwasWeights; otherwise use the
+sole method when only one is present; otherwise error. Passing
+the name explicitly (e.g. \code{"mrash"}) overrides the default
+resolution.}
 
 \item{thin, niterPrefit, niter, L}{Pass-throughs to
 \code{ctwas::ctwas_sumstats}.}
@@ -77,6 +88,12 @@ must-keep variants and fill remaining slots by descending PIP
 (when available) or descending \code{|weight|}. Default
 \code{Inf} (no cap).}
 
+\item{fallbackToPrefit}{Logical (length 1). Forwarded to
+\code{\link{estCtwasParam}}. When \code{TRUE}, ctwas's accurate-EM
+NaN failure is recovered by falling back to the prefit estimates
+(mirrors the legacy ctwas_2 workaround on underpowered data).
+Default \code{FALSE}.}
+
 \item{...}{Additional arguments forwarded to
 \code{ctwas::ctwas_sumstats}.}
 }
@@ -85,9 +102,9 @@ Whatever \code{ctwas::ctwas_sumstats} returns (a list with
   \code{susie_alpha_res}, \code{param}, and other diagnostics).
 }
 \description{
-Per-LD-block pipeline that hands a
-  \code{\link{GwasSumStats}} of GWAS Z-scores together with per-gene
-  TWAS weights and the shared LD sketch to
+Pipeline that hands a per-block set of
+  \code{\link{GwasSumStats}} of GWAS Z-scores together with the
+  matching per-block per-gene TWAS weights and LD sketches to
   \code{ctwas::ctwas_sumstats}, producing per-gene posterior
   inclusion probabilities for causal genes. Optionally accepts a
   precomputed TWAS-Z \code{GRanges} from
@@ -96,19 +113,18 @@ Per-LD-block pipeline that hands a
 }
 \section{LD block convention}{
 
-Each call assumes the inputs cover exactly one LD block — the user
-is responsible for constructing the \code{GwasSumStats} and
-\code{TwasWeights} over the block of interest before calling this
-pipeline (the same convention used by
-\code{\link{fineMappingPipeline}} on \code{GwasSumStats}). The
-single-region \code{region_info}, \code{LD_map}, and \code{snp_map}
-that \code{ctwas::ctwas_sumstats} requires are derived
-automatically from the LD sketch on \code{gwasSumStats}.
+Inputs are NAMED LISTS keyed by \code{region_id}
+(\code{list(block1 = gss1, block2 = gss2, ...)}). Per-block
+\code{region_info}, \code{LD_map}, and \code{snp_map} entries are
+built automatically from each block's LD sketch and concatenated
+before the call to \code{ctwas::ctwas_sumstats}. A single-block
+input is rejected: cTWAS's EM cannot converge on a single region,
+so callers must supply at least two blocks.
 }
 
 \section{LD-sketch identity check}{
 
-\code{getLdSketch(twasWeights)} (when non-NULL) must match
-\code{getLdSketch(gwasSumStats)}. Mismatch is a hard error.
+Per block: \code{getLdSketch(twasWeights)} (when non-NULL) must
+match \code{getLdSketch(gwasSumStats)}. Mismatch is a hard error.
 }
 
diff --git a/man/estCtwasParam.Rd b/man/estCtwasParam.Rd
new file mode 100644
index 00000000..3c1142f3
--- /dev/null
+++ b/man/estCtwasParam.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ctwasPipeline.R
+\name{estCtwasParam}
+\alias{estCtwasParam}
+\title{Estimate cTWAS group prior + prior variance}
+\usage{
+estCtwasParam(
+  inputs,
+  thin = 0.1,
+  niterPrefit = 3L,
+  niter = 30L,
+  groupPriorVarStructure = c("shared_type", "shared_context", "shared_nonSNP",
+    "shared_all", "independent"),
+  ncore = 1L,
+  fallbackToPrefit = FALSE,
+  ...
+)
+}
+\arguments{
+\item{inputs}{A list returned by \code{\link{assembleCtwasInputs}}.}
+
+\item{thin, niterPrefit, niter}{Pass-throughs to
+\code{ctwas::assemble_region_data} / \code{ctwas::est_param}.}
+
+\item{groupPriorVarStructure}{Pass-through.}
+
+\item{ncore}{Number of cores.}
+
+\item{fallbackToPrefit}{Logical (length 1). When \code{TRUE} (default
+\code{FALSE}), if \code{ctwas::est_param}'s accurate EM diverges to
+NaN and throws \code{"Estimated group_prior(_var)? contains NAs"},
+re-run only the prefit step via \code{ctwas:::fit_EM} and return
+those (typically finite) priors as the param. Mirrors the legacy
+ctwas_2 workaround on toy data where the accurate EM saturates.}
+
+\item{...}{Additional arguments forwarded to \code{ctwas::est_param}
+(e.g. \code{min_p_single_effect}, \code{min_group_size}).}
+}
+\value{
+The \code{inputs} list augmented with \code{region_data},
+  \code{boundary_genes}, \code{z_gene}, and \code{param}.
+}
+\description{
+Step 2 of the three-step \code{\link{ctwasPipeline}}:
+  assembles \code{region_data} from the inputs and runs
+  \code{ctwas::est_param} (prefit EM + accurate EM) to estimate the
+  group prior probabilities and prior variances. Returns the input
+  state plus \code{region_data}, \code{boundary_genes},
+  \code{z_gene}, and \code{param}.
+}
diff --git a/man/finemapCtwasRegions.Rd b/man/finemapCtwasRegions.Rd
new file mode 100644
index 00000000..31c42774
--- /dev/null
+++ b/man/finemapCtwasRegions.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ctwasPipeline.R
+\name{finemapCtwasRegions}
+\alias{finemapCtwasRegions}
+\title{Fine-map cTWAS regions}
+\usage{
+finemapCtwasRegions(screenResult, L = 5L, ncore = 1L, ...)
+}
+\arguments{
+\item{screenResult}{A list returned by
+\code{\link{screenCtwasRegions}}.}
+
+\item{L}{Pass-through.}
+
+\item{ncore}{Number of cores.}
+
+\item{...}{Additional arguments forwarded to
+\code{ctwas::finemap_regions}.}
+}
+\value{
+A list mirroring \code{ctwas::ctwas_sumstats}'s output:
+  \code{z_gene}, \code{param}, \code{finemap_res},
+  \code{susie_alpha_res}, \code{region_data}, \code{boundary_genes},
+  \code{screen_res}.
+}
+\description{
+Step 4 (final) of the three-step
+  \code{\link{ctwasPipeline}}: runs \code{ctwas::finemap_regions} on
+  the screened-region set from \code{\link{screenCtwasRegions}} and
+  assembles the documented top-level ctwas output (\code{z_gene},
+  \code{param}, \code{finemap_res}, \code{susie_alpha_res},
+  \code{region_data}, \code{boundary_genes}, \code{screen_res}).
+}
diff --git a/man/getCs.Rd b/man/getCs.Rd
index 4decf71d..1ee1fef0 100644
--- a/man/getCs.Rd
+++ b/man/getCs.Rd
@@ -12,7 +12,15 @@ getCs(x, ...)
 
 \S4method{getCs}{FineMappingEntry}(x, coverage = 0.95, ...)
 
-\S4method{getCs}{GwasFineMappingResult}(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...)
+\S4method{getCs}{GwasFineMappingResult}(
+  x,
+  study = NULL,
+  context = NULL,
+  trait = NULL,
+  method = NULL,
+  region = NULL,
+  ...
+)
 
 \S4method{getCs}{QtlFineMappingResult}(
   x,
diff --git a/man/getMarginalEffects.Rd b/man/getMarginalEffects.Rd
index 438ac089..98263d16 100644
--- a/man/getMarginalEffects.Rd
+++ b/man/getMarginalEffects.Rd
@@ -19,6 +19,7 @@ getMarginalEffects(x, maxPval = NULL, ...)
   context = NULL,
   trait = NULL,
   method = NULL,
+  region = NULL,
   ...
 )
 
diff --git a/man/getPip.Rd b/man/getPip.Rd
index 0a4ed899..6ddc688f 100644
--- a/man/getPip.Rd
+++ b/man/getPip.Rd
@@ -18,6 +18,7 @@ getPip(x, ...)
   context = NULL,
   trait = NULL,
   method = NULL,
+  region = NULL,
   returnList = FALSE,
   ...
 )
diff --git a/man/getSusieFit.Rd b/man/getSusieFit.Rd
index ddf192f5..4a037888 100644
--- a/man/getSusieFit.Rd
+++ b/man/getSusieFit.Rd
@@ -12,7 +12,15 @@ getSusieFit(x, ...)
 
 \S4method{getSusieFit}{FineMappingEntry}(x, ...)
 
-\S4method{getSusieFit}{GwasFineMappingResult}(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...)
+\S4method{getSusieFit}{GwasFineMappingResult}(
+  x,
+  study = NULL,
+  context = NULL,
+  trait = NULL,
+  method = NULL,
+  region = NULL,
+  ...
+)
 
 \S4method{getSusieFit}{QtlFineMappingResult}(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...)
 }
diff --git a/man/getTopLoci.Rd b/man/getTopLoci.Rd
index 12045a4e..3212fa2b 100644
--- a/man/getTopLoci.Rd
+++ b/man/getTopLoci.Rd
@@ -20,6 +20,7 @@ getTopLoci(x, type = c("data.frame", "GRanges"), signalCutoff = 0.025, ...)
   context = NULL,
   trait = NULL,
   method = NULL,
+  region = NULL,
   ...
 )
 
diff --git a/man/getVariantIds.Rd b/man/getVariantIds.Rd
index 7b5b1a3b..860a0f83 100644
--- a/man/getVariantIds.Rd
+++ b/man/getVariantIds.Rd
@@ -22,6 +22,7 @@ getVariantIds(x, ...)
   context = NULL,
   trait = NULL,
   method = NULL,
+  region = NULL,
   ...
 )
 
diff --git a/man/qtlEnrichmentPipeline.Rd b/man/qtlEnrichmentPipeline.Rd
index 59b73418..b418a842 100644
--- a/man/qtlEnrichmentPipeline.Rd
+++ b/man/qtlEnrichmentPipeline.Rd
@@ -40,11 +40,12 @@ Default \code{1.0}.}
 \code{\link{qtlEnrichment}}.}
 }
 \value{
-A data frame with one row per (gwasStudy, qtlContext) pair
-  and columns \code{gwasStudy}, \code{qtlContext},
-  \code{enrichment}, \code{enrichmentSe}, \code{enrichmentLogOdds},
-  plus any extras the underlying estimator emits. Suitable as the
-  \code{enrichment} argument to \code{\link{colocPipeline}}.
+A data frame with one row per (gwasStudy, qtlStudy,
+  qtlContext) triple and columns \code{gwasStudy}, \code{qtlStudy},
+  \code{qtlContext}, \code{enrichment}, \code{enrichmentSe},
+  \code{enrichmentLogOdds}, plus any extras the underlying estimator
+  emits. Suitable as the \code{enrichment} argument to
+  \code{\link{colocPipeline}} (which joins on the same triple).
 }
 \description{
 Genome-wide pipeline that computes per-pair (GWAS study,
diff --git a/man/screenCtwasRegions.Rd b/man/screenCtwasRegions.Rd
new file mode 100644
index 00000000..481fee75
--- /dev/null
+++ b/man/screenCtwasRegions.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ctwasPipeline.R
+\name{screenCtwasRegions}
+\alias{screenCtwasRegions}
+\title{Screen cTWAS regions}
+\usage{
+screenCtwasRegions(estResult, L = 5L, ncore = 1L, ...)
+}
+\arguments{
+\item{estResult}{A list returned by \code{\link{estCtwasParam}}.}
+
+\item{L}{Pass-through to \code{ctwas::screen_regions} (and downstream
+fine-mapping).}
+
+\item{ncore}{Number of cores.}
+
+\item{...}{Additional arguments forwarded to
+\code{ctwas::screen_regions} (e.g. \code{filter_L},
+\code{min_nonSNP_PIP}, \code{min_L}).}
+}
+\value{
+The \code{estResult} list augmented with
+  \code{screen_res} (the full ctwas output) and
+  \code{screened_region_data}.
+}
+\description{
+Step 3 of the three-step \code{\link{ctwasPipeline}}:
+  runs \code{ctwas::screen_regions} on the
+  \code{\link{estCtwasParam}} result and returns the screened-region
+  set. Use this entry point to substitute hand-tuned priors for the
+  ones estimated in step 2 (e.g. when the accurate EM diverges to
+  NaN and you want to recover the prefit values).
+}
diff --git a/man/summaryStatsQc.Rd b/man/summaryStatsQc.Rd
index e2683db7..ef11e92f 100644
--- a/man/summaryStatsQc.Rd
+++ b/man/summaryStatsQc.Rd
@@ -6,7 +6,6 @@
 \usage{
 summaryStatsQc(
   sumstats,
-  useDbsnpRefCheck = FALSE,
   removeIndels = FALSE,
   removeStrandAmbiguous = TRUE,
   mafCutoff = 0,
@@ -19,38 +18,39 @@ summaryStatsQc(
   alleleFlipKriging = FALSE,
   impute = FALSE,
   imputeOpts = list(rcond = 0.01, r2Threshold = 0.6, minimumLd = 5, lamb = 0.01),
-  convertRefGenome = NULL,
   matchMinProp = 0,
-  mungeSumstatsArgs = list()
+  coerceNumeric = TRUE,
+  normalizeChr = TRUE,
+  dropNonstandardChr = TRUE,
+  dropMissData = TRUE,
+  dropPOutOfRange = TRUE,
+  clampSmallP = TRUE,
+  smallPFloor = 4.94065645841247e-324,
+  dropZeroEffect = TRUE,
+  dropNonpositiveSe = TRUE
 )
 }
 \arguments{
 \item{sumstats}{A \code{QtlSumStats} or \code{GwasSumStats}
 collection.}
 
-\item{useDbsnpRefCheck}{One-shot opt-in to MungeSumstats's
-dbSNP-based reference-genome / allele-flip / biallelic-filter
-checks. When \code{TRUE}, sets \code{on_ref_genome},
-\code{infer_eff_direction}, \code{allele_flip_check}, and
-\code{bi_allelic_filter} all to \code{TRUE} simultaneously.
-Default \code{FALSE} (trust input alleles, lighter dependency
-footprint).}
-
 \item{removeIndels}{Logical (length 1). When \code{TRUE}, drop
-indels. Default \code{FALSE} (match MungeSumstats default).}
+indels during panel harmonization. Default \code{FALSE}.}
 
 \item{removeStrandAmbiguous}{Logical (length 1). When \code{TRUE},
 drop A/T and C/G strand-ambiguous variants. Default \code{TRUE}.}
 
 \item{mafCutoff}{Numeric (length 1). MAF threshold (variants with
 \code{MAF < mafCutoff} are dropped). Default 0. Requires \code{MAF}
-column.}
+or \code{FRQ} column when non-zero.}
 
 \item{infoCutoff}{Numeric (length 1). INFO score threshold. Default
-0. Requires \code{INFO} column.}
+0. Requires \code{INFO} column when non-zero.}
 
-\item{nCutoff}{Numeric (length 1). MungeSumstats \code{N_std} value;
-sample-size deviation threshold. Default 5.}
+\item{nCutoff}{Numeric (length 1). Sample-size deviation threshold:
+drop variants whose \code{N} is more than \code{nCutoff}
+median-absolute-deviations from the median. Set to 0 to disable.
+Default 5.}
 
 \item{keepVariants}{Optional character vector of variant IDs (SNP
 column) to retain prior to harmonization.}
@@ -77,16 +77,40 @@ accepted but currently emits a warning and is skipped.)}
 
 \item{imputeOpts}{Named list of RAISS parameters.}
 
-\item{convertRefGenome}{Optional character (\code{"GRCh37"} or
-\code{"GRCh38"}) to liftover the sumstats to via MungeSumstats.
-\code{NULL} (default) skips liftover.}
-
 \item{matchMinProp}{Minimum proportion of LD panel variants that must
 be matched by the sumstats; default 0.}
 
-\item{mungeSumstatsArgs}{Optional named list of pass-through args to
-\code{MungeSumstats::format_sumstats()}. Any name supplied here
-overrides the value the curated knobs would have set.}
+\item{coerceNumeric}{Logical. Coerce signed columns
+(Z/BETA/SE/OR/LOG_ODDS/SIGNED_SUMSTAT/P/MAF/FRQ/INFO/N) to numeric.
+Default \code{TRUE}.}
+
+\item{normalizeChr}{Logical. Strip \code{"chr"} prefix, uppercase the
+chromosome label, and map 23->X, 24->Y, M->MT. Default \code{TRUE}.}
+
+\item{dropNonstandardChr}{Logical. Drop variants whose CHR (after
+normalization) is outside 1..22, X, Y, MT. Default \code{TRUE}.}
+
+\item{dropMissData}{Logical. Drop rows with NA in any vital column
+(chrom, pos, A1, A2, and at least one of Z / BETA). Default
+\code{TRUE}.}
+
+\item{dropPOutOfRange}{Logical. Drop rows where \code{P < 0} or
+\code{P > 1}. Default \code{TRUE}.}
+
+\item{clampSmallP}{Logical. Floor non-negative P values below
+\code{smallPFloor} to \code{smallPFloor} so \code{-log10(P)} stays
+finite. Applied to both input and Z-derived P values. Default
+\code{TRUE}.}
+
+\item{smallPFloor}{Numeric (length 1). Floor for \code{clampSmallP}.
+Default \code{5e-324} (R's smallest positive double).}
+
+\item{dropZeroEffect}{Logical. Drop rows where any effect column is
+exactly 0 (\code{BETA}, \code{LOG_ODDS}, \code{SIGNED_SUMSTAT}) or
+\code{OR} is exactly 1. Default \code{TRUE}.}
+
+\item{dropNonpositiveSe}{Logical. Drop rows where \code{SE <= 0}.
+Default \code{TRUE}.}
 }
 \value{
 A new \code{QtlSumStats} / \code{GwasSumStats} with cleaned
@@ -94,13 +118,16 @@ A new \code{QtlSumStats} / \code{GwasSumStats} with cleaned
 }
 \description{
 Applies a single QC pass to a \code{QtlSumStats} or \code{GwasSumStats}
-collection: delegates variant-content QC (column standardization,
-indels, strand-ambiguous, MAF/INFO/N filters, p-value & effect-size
-sanity checks, optional dbSNP / liftover) to
-\code{MungeSumstats::format_sumstats()}, then runs pecotmr-specific
-steps (\code{skipRegion}, optional PIP screen, panel harmonization
-against the \code{ldSketch} via \code{.matchRefPanel}, optional
-SLALOM/DENTIST LD-mismatch QC, optional RAISS imputation).
+collection: per-row sanity checks via \code{.applySanityChecks} (drop
+rows with out-of-range / zero P, BETA == 0, SE <= 0, NA in vital
+columns; clamp tiny P; normalize CHR; coerce signed columns to
+numeric), variant-content filters (MAF / INFO / N) via
+\code{.applyContentFilters}, optional \code{skipRegion} drop, optional
+PIP screen, panel-vs-sumstats allele harmonization against the
+\code{ldSketch} via \code{.matchRefPanel} (which handles indels,
+strand-ambiguous variants, sign / strand flips, and duplicate drops in
+a single sweep), optional SLALOM/DENTIST LD-mismatch QC, and optional
+RAISS imputation. No Bioconductor genome / dbSNP packages required.
 }
 \details{
 The returned collection has its \code{qcInfo} slot populated with a