From d5ecea8304a5ac4af332e6dd609dc6a5a173e6cb Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Mon, 22 Jun 2026 20:02:50 -0700 Subject: [PATCH 1/3] RSS fixes --- NAMESPACE | 2 + R/AllClasses.R | 22 +++ R/AllGenerics.R | 20 +++ R/fineMappingPipeline.R | 185 +++++++++++++++++----- R/jointSpecification.R | 96 +++++++---- R/sumstatsQc.R | 30 +++- man/fineMappingPipeline.Rd | 27 +++- man/getQcDiagnostics.Rd | 34 ++++ tests/testthat/test_fineMappingPipeline.R | 57 +++++-- tests/testthat/test_sumstatsQc.R | 36 +++++ 10 files changed, 427 insertions(+), 82 deletions(-) create mode 100644 man/getQcDiagnostics.Rd diff --git a/NAMESPACE b/NAMESPACE index a69a3932..c0565a7d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -125,6 +125,7 @@ export(getPgenPtr) export(getPhenotypeCovariates) export(getPhenotypes) export(getPip) +export(getQcDiagnostics) export(getQcInfo) export(getQtlDatasets) export(getRefPanel) @@ -309,6 +310,7 @@ exportMethods(getPgenPtr) exportMethods(getPhenotypeCovariates) exportMethods(getPhenotypes) exportMethods(getPip) +exportMethods(getQcDiagnostics) exportMethods(getQcInfo) exportMethods(getQtlDatasets) exportMethods(getRefPanel) diff --git a/R/AllClasses.R b/R/AllClasses.R index 6f082d76..da35193c 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -57,6 +57,28 @@ setMethod("getGenome", "SumStatsBase", function(x, ...) x@genome) #' @export setMethod("getQcInfo", "SumStatsBase", function(x, ...) x@qcInfo) +#' @rdname getQcDiagnostics +#' @export +setMethod("getQcDiagnostics", "SumStatsBase", + function(x, entry = 1L, ...) { + qc <- x@qcInfo + if (length(qc) == 0L) return(NULL) + audits <- qc$entryAudit + if (is.null(audits)) return(NULL) + if (is.null(entry)) { + out <- lapply(audits, function(a) a$ldMismatchDiagnostics) + keep <- !vapply(out, is.null, logical(1L)) + if (!any(keep)) return(NULL) + setNames(out[keep], seq_along(audits)[keep]) + } else { + if (!is.numeric(entry) || length(entry) != 1L || + entry < 1L || entry > length(audits)) { + stop("`entry` must be a single integer in 1:", length(audits), ".") + } + audits[[as.integer(entry)]]$ldMismatchDiagnostics + } + }) + #' @rdname getLdSketch #' @export setMethod("getLdSketch", "SumStatsBase", function(x, ...) x@ldSketch) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 6ef0af4d..6b32d210 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -238,6 +238,26 @@ setGeneric("getGenome", function(x, ...) standardGeneric("getGenome")) #' @export setGeneric("getQcInfo", function(x, ...) standardGeneric("getQcInfo")) +#' @title Get SLALOM / DENTIST Diagnostics +#' @description Return the per-variant LD-mismatch diagnostics table +#' (SLALOM or DENTIST output) attached to a sumstats entry by +#' \code{\link{summaryStatsQc}} when \code{zMismatchQc} was not +#' \code{"none"}. Convenience accessor over +#' \code{getQcInfo(x)$entryAudit[[entry]]$ldMismatchDiagnostics}. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param entry Integer index (default 1) of the entry whose diagnostics +#' table to return. When \code{NULL}, returns a named list of every +#' entry's diagnostics keyed by entry index. +#' @param ... Unused. +#' @return A \code{data.frame} of per-variant diagnostics (columns +#' include \code{variant_id} plus the SLALOM / DENTIST output +#' columns), \code{NULL} when no diagnostics were preserved for that +#' entry, or a named list of such data.frames when \code{entry = +#' NULL}. +#' @export +setGeneric("getQcDiagnostics", + function(x, entry = 1L, ...) standardGeneric("getQcDiagnostics")) + #' @title Get LD Sketch #' @description Return the \code{GenotypeHandle} carrying the LD #' reference for this collection. Defined on classes that embed an diff --git a/R/fineMappingPipeline.R b/R/fineMappingPipeline.R index bccaedd7..38416366 100644 --- a/R/fineMappingPipeline.R +++ b/R/fineMappingPipeline.R @@ -87,8 +87,11 @@ #' \code{susieR::susie} check externally if needed. #' \item Diagnostic re-analysis paths #' (\code{singleEffect} / \code{bayesianConditionalRegression} -#' reanalysis on the RSS path): these are not part of the -#' design-doc method menu and are dropped. +#' reanalysis on the RSS path): these are not exposed as +#' dedicated method tokens. Callers who want a single-effect +#' fit can request it via per-method kwargs, e.g. +#' \code{methods = list(susie = list(L = 1))} (see the +#' \code{methods} parameter). #' \item \code{loadRssData} and explicit #' \code{ldReferenceMetaFile} arguments: the new #' \code{QtlSumStats} / \code{GwasSumStats} carry the @@ -103,9 +106,23 @@ #' #' @param data A \code{QtlDataset}, \code{MultiStudyQtlDataset}, #' \code{QtlSumStats}, or \code{GwasSumStats}. -#' @param methods Character vector of method tokens (any subset of -#' \code{c("susie", "susieInf", "susieAsh", "mvsusie", "fsusie")}). -#' See description for per-class compatibility. +#' @param methods Method specification. Accepts either: +#' \itemize{ +#' \item A character vector of method tokens, e.g. +#' \code{c("susie", "susieInf", "mvsusie")} (any subset of +#' \code{c("susie", "susieInf", "susieAsh", "mvsusie", "fsusie")}, +#' subject to per-class compatibility). +#' \item A named list keyed by method token, where each value is a +#' list of per-method kwargs to splice into the underlying +#' fitter, e.g. +#' \code{list(susie = list(L = 1, refine = FALSE), +#' mvsusie = list(max_iter = 500))}. Mirrors the +#' convention of \code{\link{twasWeightsPipeline}}'s +#' \code{methods} argument. User-supplied kwargs override the +#' capability-table defaults and any base / chained args set +#' by the pipeline (e.g. you can override \code{model_init} +#' even when fitting from a susieInf chain). +#' } #' @param contexts Optional character vector of context names. Default #' \code{NULL} (all contexts). #' @param traitId Optional character vector of trait names to restrict @@ -214,52 +231,88 @@ setGeneric("fineMappingPipeline", sumstatImpl = "susieR::susie_rss", multivariate = FALSE, gwasAllowed = TRUE, - unmappableEffects = "none"), + unmappableEffects = "none", + args = list()), susieInf = list( individualImpl = "susieR::susie", sumstatImpl = "susieR::susie_rss", multivariate = FALSE, gwasAllowed = TRUE, - unmappableEffects = "inf"), + unmappableEffects = "inf", + args = list()), susieAsh = list( individualImpl = "susieR::susie", sumstatImpl = "susieR::susie_rss", multivariate = FALSE, gwasAllowed = TRUE, - unmappableEffects = "ash"), + unmappableEffects = "ash", + args = list()), mvsusie = list( individualImpl = "mvsusieR::mvsusie", sumstatImpl = "mvsusieR::mvsusie_rss", multivariate = TRUE, gwasAllowed = FALSE, - unmappableEffects = NA_character_), + unmappableEffects = NA_character_, + args = list()), fsusie = list( individualImpl = "fsusieR::susiF", sumstatImpl = NULL, multivariate = TRUE, gwasAllowed = FALSE, - unmappableEffects = NA_character_), + unmappableEffects = NA_character_, + args = list()), mrmash = list( individualImpl = NULL, sumstatImpl = NULL, multivariate = TRUE, gwasAllowed = FALSE, - unmappableEffects = NA_character_)) + unmappableEffects = NA_character_, + args = list())) # Normalize a user-supplied `methods` argument into a character vector of # canonical tokens. Mirrors `.twasNormalizeMethods` but the fine-mapping # pipeline takes only a character vector (no preset strings, no list form). # @noRd +# Normalize a user-supplied `methods` argument into `(tokens, methodArgs)`. +# +# Accepts: +# * character vector c("susie", "susieInf") -> empty kwargs per token +# * named list list(susie = list(L = 1), ...) -> per-token kwargs +# +# Names of the returned `methodArgs` always equal `tokens` (one entry per +# token, empty list when the user supplied none). The fitters then +# `modifyList`-merge each entry into the base arg list before do.call. +# +# Mirrors the convention of .twasNormalizeMethods so the two pipelines +# expose the same shape on the user side. +# @noRd .fmNormalizeMethods <- function(methods) { if (is.null(methods) || length(methods) == 0L) { - stop("fineMappingPipeline: `methods` must be a non-empty character vector.") + stop("fineMappingPipeline: `methods` must be a non-empty character ", + "vector or named list of = entries.") } - if (!is.character(methods)) { - stop("fineMappingPipeline: `methods` must be a character vector. ", - "Got class '", class(methods)[[1L]], "'.") + if (is.character(methods)) { + tokens <- unique(methods) + methodArgs <- setNames(rep(list(list()), length(tokens)), tokens) + } else if (is.list(methods)) { + if (is.null(names(methods)) || any(names(methods) == "")) { + stop("fineMappingPipeline: when `methods` is a list it must be ", + "named (one entry per method token).") + } + nonListChild <- vapply(methods, function(x) !is.list(x), logical(1)) + if (any(nonListChild)) { + stop("fineMappingPipeline: each entry of the `methods` list must ", + "itself be a list of named kwargs (got non-list value for: ", + paste(names(methods)[nonListChild], collapse = ", "), ").") + } + tokens <- unique(names(methods)) + methodArgs <- methods[tokens] + } else { + stop("fineMappingPipeline: `methods` must be a character vector or ", + "named list. Got class '", class(methods)[[1L]], "'.") } - unique(methods) + list(tokens = tokens, methodArgs = methodArgs) } @@ -560,13 +613,33 @@ setGeneric("fineMappingPipeline", } +# Merge per-method user kwargs onto a base arg list. `userArgs` is the +# per-token kwargs supplied by the caller (e.g. `list(L = 1, refine = +# FALSE)`); the capability table's `args` default fills in any keys the +# user did not set. User-supplied values always win over base, capability +# defaults, and chain-derived args. Returns the merged list. +# @noRd +.fmMergeUserArgs <- function(baseArgs, token, userArgs = NULL) { + if (is.null(userArgs)) userArgs <- list() + info <- .fineMappingMethodCapabilities[[token]] + capDefaults <- if (!is.null(info) && !is.null(info$args)) info$args else list() + # Order matters: base < capability defaults < user overrides. + if (length(capDefaults) > 0L) baseArgs <- modifyList(baseArgs, capDefaults) + if (length(userArgs) > 0L) baseArgs <- modifyList(baseArgs, userArgs) + baseArgs +} + + # Fit one of the SuSiE-family individual-level methods on (X, y). When # `chainFromInf` is non-NULL, the susieInf fit it points at is used as # initialisation (with prepareSusieFromInfArgs); otherwise a plain fit -# with the requested `unmappable_effects` is performed. +# with the requested `unmappable_effects` is performed. `userArgs` are +# spliced via .fmMergeUserArgs (user wins over chain/base/capability +# defaults), so the caller can override things like L, max_iter, +# estimate_residual_method, refine, etc. # @noRd .fmFitSusieIndiv <- function(X, y, token, chainFromInf = NULL, - coverage = 0.95) { + coverage = 0.95, userArgs = NULL) { info <- .fineMappingMethodCapabilities[[token]] if (is.null(info) || identical(info$unmappableEffects, NA_character_)) { stop(".fmFitSusieIndiv: token '", token, "' is not a SuSiE-family method.") @@ -590,16 +663,17 @@ setGeneric("fineMappingPipeline", } else if (token == "susieAsh") { baseArgs$convergence_method <- "pip" } + baseArgs <- .fmMergeUserArgs(baseArgs, token, userArgs) fit <- do.call(susieR::susie, baseArgs) .setFinemappingFitClass(fit, token) } # Sumstat counterpart of .fmFitSusieIndiv. Calls susieR::susie_rss with -# the same unmappable_effects switch and chained init. +# the same unmappable_effects switch, chained init, and userArgs merge. # @noRd .fmFitSusieRss <- function(z, R, n, token, chainFromInf = NULL, - coverage = 0.95) { + coverage = 0.95, userArgs = NULL) { info <- .fineMappingMethodCapabilities[[token]] if (is.null(info) || identical(info$unmappableEffects, NA_character_)) { stop(".fmFitSusieRss: token '", token, "' is not a SuSiE-family method.") @@ -622,6 +696,7 @@ setGeneric("fineMappingPipeline", } else if (token == "susieAsh") { baseArgs$convergence_method <- "pip" } + baseArgs <- .fmMergeUserArgs(baseArgs, token, userArgs) fit <- do.call(susieR::susie_rss, baseArgs) # All susie_rss fits get the "susieRss" S3 class for post-processing # (this drives the Xcorr cs-input mode). Token-level distinction stays @@ -674,7 +749,9 @@ setMethod("fineMappingPipeline", "QtlDataset", ...) { naAction <- match.arg(naAction) parsedJointSpec <- parseJointSpecification(jointSpecification, data) - tokens <- .fmNormalizeMethods(methods) + norm <- .fmNormalizeMethods(methods) + tokens <- norm$tokens + methodArgs <- norm$methodArgs .fmCheckMethodCapabilities(tokens, "QtlDataset") # Explicit jointSpecification path: run the per-spec axis dispatcher for @@ -687,8 +764,10 @@ setMethod("fineMappingPipeline", "QtlDataset", jointResult <- .fmDispatchJointSpecsQtlDataset( parsedJointSpec, data, intersect(tokens, c("mvsusie", "fsusie")), contexts, traitId, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs) tokens <- setdiff(tokens, c("mvsusie", "fsusie")) + methodArgs <- methodArgs[tokens] if (length(tokens) == 0L) { if (is.null(jointResult)) stop("fineMappingPipeline(QtlDataset): no joint fits produced. ", @@ -805,7 +884,8 @@ setMethod("fineMappingPipeline", "QtlDataset", if (verbose >= 1) message(sprintf("Fitting susieInf for (context='%s', trait='%s') ...", ctx, tid)) infFit <- .fmFitSusieIndiv(X, y, "susieInf", - coverage = coverage) + coverage = coverage, + userArgs = methodArgs[["susieInf"]]) } for (tk in toRun) { @@ -821,7 +901,8 @@ setMethod("fineMappingPipeline", "QtlDataset", tk, ctx, tid)) fit <- .fmFitSusieIndiv(X, y, tk, chainFromInf = chainFrom, - coverage = coverage) + coverage = coverage, + userArgs = methodArgs[[tk]]) } entry <- .fmPostprocessOne( fit = fit, method = tk, @@ -913,10 +994,13 @@ setMethod("fineMappingPipeline", "QtlDataset", if (verbose >= 1) message(sprintf("Fitting mvsusie (multi-context) for trait='%s' ...", tid)) - fit <- fitMvsusie( + mvBaseArgs <- list( X = X, Y = Y, prior_variance = mvsusieR::create_mixture_prior(R = ncol(Y)), coverage = coverage) + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -965,10 +1049,13 @@ setMethod("fineMappingPipeline", "QtlDataset", if (verbose >= 1) message(sprintf("Fitting mvsusie (multi-trait) for context='%s' ...", ctx)) - fit <- fitMvsusie( + mvBaseArgs <- list( X = X, Y = Y, prior_variance = mvsusieR::create_mixture_prior(R = ncol(Y)), coverage = coverage) + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -1041,7 +1128,9 @@ setMethod("fineMappingPipeline", "QtlDataset", if (verbose >= 1) message(sprintf("Fitting fsusie for context='%s' (multi-trait, %d traits) ...", ctx, length(traits))) - fit <- fitFsusie(X = X, Y = Y, pos = pos) + fit <- do.call(fitFsusie, + .fmMergeUserArgs(list(X = X, Y = Y, pos = pos), + "fsusie", methodArgs[["fsusie"]])) fit <- .setFinemappingFitClass(fit, "fsusie") entry <- .fmPostprocessOne( fit = fit, method = "fsusie", @@ -1103,7 +1192,9 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset", ...) { naAction <- match.arg(naAction) parsedJointSpec <- parseJointSpecification(jointSpecification, data) - tokens <- .fmNormalizeMethods(methods) + norm <- .fmNormalizeMethods(methods) + tokens <- norm$tokens + methodArgs <- norm$methodArgs .fmCheckMethodCapabilities(tokens, "MultiStudyQtlDataset") # Explicit jointSpecification path: run the per-component, per-spec @@ -1114,9 +1205,14 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset", jointResult <- .fmDispatchJointSpecsMultiStudy( parsedJointSpec, data, intersect(tokens, c("mvsusie", "fsusie")), contexts, traitId, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) - methods <- setdiff(methods, c("mvsusie", "fsusie")) + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs) + # Forward the still-pending (non-joint) tokens + their kwargs to the + # per-QtlDataset recursion below, preserving the list shape so + # methodArgs land on the right tokens. tokens <- setdiff(tokens, c("mvsusie", "fsusie")) + methodArgs <- methodArgs[tokens] + methods <- if (length(methodArgs) > 0L) methodArgs else tokens if (length(tokens) == 0L) { if (is.null(jointResult)) stop("fineMappingPipeline(MultiStudyQtlDataset): no joint fits produced. ", @@ -1223,7 +1319,9 @@ setMethod("fineMappingPipeline", "QtlSumStats", ...) { .fmAssertQcd(data) parsedJointSpec <- parseJointSpecification(jointSpecification, data) - tokens <- .fmNormalizeMethods(methods) + norm <- .fmNormalizeMethods(methods) + tokens <- norm$tokens + methodArgs <- norm$methodArgs .fmCheckMethodCapabilities(tokens, "QtlSumStats") jointResult <- NULL @@ -1231,8 +1329,10 @@ setMethod("fineMappingPipeline", "QtlSumStats", jointResult <- .fmDispatchJointSpecsQtlSumStats( parsedJointSpec, data, intersect(tokens, "mvsusie"), contexts, traitId, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs) tokens <- setdiff(tokens, c("mvsusie", "fsusie")) + methodArgs <- methodArgs[tokens] if (length(tokens) == 0L) { if (is.null(jointResult)) stop("fineMappingPipeline(QtlSumStats): no joint fits produced. ", @@ -1315,7 +1415,8 @@ setMethod("fineMappingPipeline", "QtlSumStats", if (verbose >= 1) message(sprintf("Fitting susieInf (RSS) for (study='%s', context='%s', trait='%s') ...", st, ctx, tr)) infFit <- .fmFitSusieRss(z, ldMat, n, "susieInf", - coverage = coverage) + coverage = coverage, + userArgs = methodArgs[["susieInf"]]) } for (tk in toRun) { if (tk == "susieInf") { @@ -1330,7 +1431,8 @@ setMethod("fineMappingPipeline", "QtlSumStats", tk, st, ctx, tr)) fit <- .fmFitSusieRss(z, ldMat, n, tk, chainFromInf = chainFrom, - coverage = coverage) + coverage = coverage, + userArgs = methodArgs[[tk]]) } ent <- .fmPostprocessOne( fit = fit, method = "susieRss", @@ -1400,10 +1502,13 @@ setMethod("fineMappingPipeline", "QtlSumStats", if (verbose >= 1) message(sprintf("Fitting mvsusie (RSS) for (study='%s', trait='%s', %d contexts) ...", st, tr, length(ctxNames))) - fit <- fitMvsusieRss( + mvBaseArgs <- list( Z = Z, R = ldMat, N = as.numeric(stats::median(nVec)), prior_variance = mvsusieR::create_mixture_prior(R = ncol(Z)), coverage = coverage) + fit <- do.call(fitMvsusieRss, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") ent <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -1452,7 +1557,9 @@ setMethod("fineMappingPipeline", "GwasSumStats", trim = TRUE, ...) { .fmAssertQcd(data) - tokens <- .fmNormalizeMethods(methods) + norm <- .fmNormalizeMethods(methods) + tokens <- norm$tokens + methodArgs <- norm$methodArgs .fmCheckMethodCapabilities(tokens, "GwasSumStats") # Per the design contract, one GwasSumStats represents the GWAS @@ -1518,7 +1625,8 @@ setMethod("fineMappingPipeline", "GwasSumStats", message(sprintf("Fitting susieInf (RSS) for GWAS (study='%s', region='%s') ...", st, region_id)) infFit <- .fmFitSusieRss(z, ldMat, n, "susieInf", - coverage = coverage) + coverage = coverage, + userArgs = methodArgs[["susieInf"]]) } for (tk in toRun) { if (tk == "susieInf") { @@ -1533,7 +1641,8 @@ setMethod("fineMappingPipeline", "GwasSumStats", tk, st, region_id)) fit <- .fmFitSusieRss(z, ldMat, n, tk, chainFromInf = chainFrom, - coverage = coverage) + coverage = coverage, + userArgs = methodArgs[[tk]]) } ent <- .fmPostprocessOne( fit = fit, method = "susieRss", diff --git a/R/jointSpecification.R b/R/jointSpecification.R index 3177c7f4..f4e67230 100644 --- a/R/jointSpecification.R +++ b/R/jointSpecification.R @@ -811,7 +811,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { jointMethods <- intersect(methods, "mvsusie") if (length(jointMethods) == 0L) return(NULL) @@ -843,10 +844,13 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { message(sprintf( "jointCrossContext: fitting mvsusie for (study='%s', trait='%s') across contexts (%s) ...", study, tid, paste(xy$perTraitContexts, collapse = ", "))) - fit <- fitMvsusie( + mvBaseArgs <- list( X = xy$X, Y = xy$Y, prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), coverage = coverage) + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -887,7 +891,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { jointMethods <- intersect(methods, c("mvsusie", "fsusie")) if (length(jointMethods) == 0L) return(NULL) @@ -914,10 +919,13 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { "jointCrossTrait: fitting %s for (study='%s', context='%s') across traits (%s) ...", mm, study, cx, paste(xy$traitsHere, collapse = ", "))) if (mm == "mvsusie") { - fit <- fitMvsusie( + mvBaseArgs <- list( X = xy$X, Y = xy$Y, prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), coverage = coverage) + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -932,7 +940,9 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { ord <- match(colnames(xy$Y), rownames(xy$se)) rr <- rr[ord] pos <- (GenomicRanges::start(rr) + GenomicRanges::end(rr)) / 2 - fit <- fitFsusie(X = xy$X, Y = xy$Y, pos = pos) + fit <- do.call(fitFsusie, + .fmMergeUserArgs(list(X = xy$X, Y = xy$Y, pos = pos), + "fsusie", methodArgs[["fsusie"]])) fit <- .setFinemappingFitClass(fit, "fsusie") entry <- .fmPostprocessOne( fit = fit, method = "fsusie", @@ -974,7 +984,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { contexts, traitIds, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { jointMethods <- intersect(methods, "mvsusie") if (length(jointMethods) == 0L) return(NULL) @@ -1018,10 +1029,13 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { message(sprintf( "jointCrossContext (QtlSumStats): fitting mvsusie_rss for (study='%s', trait='%s', %d contexts) ...", s, tid, length(ctxNames))) - fit <- fitMvsusieRss( + mvBaseArgs <- list( Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), coverage = coverage) + fit <- do.call(fitMvsusieRss, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -1060,7 +1074,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { contexts, traitIds, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { if ("fsusie" %in% methods) stop("jointCrossTrait (QtlSumStats): fsusie has no RSS variant; ", "fsusie cannot participate in sumstats-based joint fits.") @@ -1100,10 +1115,13 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { message(sprintf( "jointCrossTrait (QtlSumStats): fitting mvsusie_rss for (study='%s', context='%s', %d traits) ...", s, cx, length(trNames))) - fit <- fitMvsusieRss( + mvBaseArgs <- list( Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), coverage = coverage) + fit <- do.call(fitMvsusieRss, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -1143,7 +1161,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { contexts, traitIds, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { if ("fsusie" %in% methods) stop("jointCrossStudy: fsusie cannot participate (no RSS variant).") jointMethods <- intersect(methods, "mvsusie") @@ -1190,10 +1209,13 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { message(sprintf( "jointCrossStudy: fitting mvsusie_rss for (context='%s', trait='%s', %d studies) ...", cx, tid, length(stNames))) - fit <- fitMvsusieRss( + mvBaseArgs <- list( Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), coverage = coverage) + fit <- do.call(fitMvsusieRss, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -1234,7 +1256,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { contexts, traitIds, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { axes <- spec$axes if ("study" %in% axes) stop("composed jointSpecification (QtlDataset): axes including 'study' require sumstats input.") @@ -1258,10 +1281,13 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { message(sprintf( "composed joint (QtlDataset): fitting mvsusie for study='%s' over %d (context, trait) columns ...", study, ncol(xy$Y))) - fit <- fitMvsusie( + mvBaseArgs <- list( X = xy$X, Y = xy$Y, prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), coverage = coverage) + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -1294,7 +1320,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { contexts, traitIds, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { if ("fsusie" %in% methods) stop("composed jointSpecification (QtlSumStats): fsusie has no RSS variant.") jointMethods <- intersect(methods, "mvsusie") @@ -1336,10 +1363,13 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { message(sprintf( "composed joint (QtlSumStats): fitting mvsusie_rss for axes=(%s), %d columns ...", paste(axes, collapse = ", "), length(gIdx))) - fit <- fitMvsusieRss( + mvBaseArgs <- list( Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), coverage = coverage) + fit <- do.call(fitMvsusieRss, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) fit <- .setFinemappingFitClass(fit, "mvsusie") entry <- .fmPostprocessOne( fit = fit, method = "mvsusie", @@ -1393,7 +1423,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { out <- NULL for (i in seq_along(parsedJointSpec)) { spec <- parsedJointSpec[[i]] @@ -1401,7 +1432,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { if (length(axes) > 1L) { res <- .fmDispatchComposedQtlDataset( spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs) if (!is.null(res)) out <- if (is.null(out)) res else .rbindFineMappingResult(out, res, ldSketch = NULL) @@ -1411,10 +1443,12 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { res <- switch(axis, context = .fmDispatchCrossContextQtlDataset( spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs), trait = .fmDispatchCrossTraitQtlDataset( spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs), study = stop( "fineMappingPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input. ", "QtlDataset represents a single individual-level study; cross-study joints operate on the sumstats slot of MultiStudyQtlDataset or on QtlSumStats directly."), @@ -1433,7 +1467,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { methods, contexts, traitIds, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { out <- NULL for (i in seq_along(parsedJointSpec)) { spec <- parsedJointSpec[[i]] @@ -1441,7 +1476,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { if (length(axes) > 1L) { res <- .fmDispatchComposedQtlSumStats( spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs) if (!is.null(res)) out <- if (is.null(out)) res else .rbindFineMappingResult(out, res, ldSketch = getLdSketch(data)) @@ -1451,13 +1487,16 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { res <- switch(axis, context = .fmDispatchCrossContextQtlSumStats( spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs), trait = .fmDispatchCrossTraitQtlSumStats( spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs), study = .fmDispatchCrossStudyQtlSumStats( spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs), stop(sprintf("Unsupported axis: %s", axis))) if (!is.null(res)) out <- if (is.null(out)) res @@ -1478,7 +1517,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - verbose) { + verbose, + methodArgs = list()) { out <- NULL embeddedLd <- NULL qtlDatasets <- getQtlDatasets(data) @@ -1500,7 +1540,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { qd <- qtlDatasets[[qdName]] qdRes <- .fmDispatchJointSpecsQtlDataset( nonStudyAxisSpecs, qd, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs) if (!is.null(qdRes)) out <- if (is.null(out)) qdRes else .rbindFineMappingResult(out, qdRes, ldSketch = NULL) @@ -1510,7 +1551,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { if (!is.null(sumStats)) { ssRes <- .fmDispatchJointSpecsQtlSumStats( parsedJointSpec, sumStats, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs) if (!is.null(ssRes)) { embeddedLd <- getLdSketch(ssRes) out <- if (is.null(out)) ssRes diff --git a/R/sumstatsQc.R b/R/sumstatsQc.R index 04943641..e206731d 100644 --- a/R/sumstatsQc.R +++ b/R/sumstatsQc.R @@ -2835,7 +2835,12 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL, list(df = df, audit = audit) } -# Apply ldMismatchQc (SLALOM/DENTIST) against the LD sketch. +# Apply ldMismatchQc (SLALOM/DENTIST) against the LD sketch. Returns the +# filtered df, outlier count, and the full per-variant diagnostics table +# (the data.frame returned by ldMismatchQc(), prepended with a +# variant_id column for downstream joins). Callers stamp `diagnostics` +# into the entry's qcInfo audit so the per-variant detail is available +# for plotting / postprocessing instead of just the outlier count. .applyLdMismatchQcToEntry <- function(df, ldSketch, method) { variantIds <- df$SNP if (is.null(variantIds) || any(is.na(variantIds))) @@ -2852,7 +2857,22 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL, R <- computeLd(dosage, method = "sample") qc <- ldMismatchQc(zScore = df$Z, R = R, nSample = getNSamples(ldSketch), method = method) - list(df = df[!qc$outlier, , drop = FALSE], outliers = sum(qc$outlier)) + # slalom / dentist can leave NA in the outlier column when their + # per-variant statistic is undefined (e.g. a degenerate dentist + # chisq for variants effectively orthogonal to the lead). Treat NA as + # "no evidence of being an outlier" (conservative: keep the variant) + # so the downstream df / sum() / IRanges construction stay finite. + outlierFlags <- qc$outlier + outlierFlags[is.na(outlierFlags)] <- FALSE + # Attach the variant_id column so the diagnostics data.frame stays + # self-describing once it's separated from the input df. + diagnostics <- if (is.data.frame(qc)) { + cbind(variant_id = as.character(variantIds), qc, + stringsAsFactors = FALSE) + } else NULL + list(df = df[!outlierFlags, , drop = FALSE], + outliers = sum(outlierFlags), + diagnostics = diagnostics) } # Per-entry SER-based pip-screen (skip if no signal above the cutoff). @@ -3031,6 +3051,12 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL, df <- ldQc$df entryAudit$ldMismatchOutliersDropped <- ldQc$outliers entryAudit$ldMismatchMethod <- opts$zMismatchQc + # Preserve the full per-variant SLALOM/DENTIST diagnostics table for + # downstream plotting / postprocessing (the outlier-only summary kept + # historically dropped the per-variant detail). Stored as a data.frame + # on the entry's audit; absent when zMismatchQc = "none". + if (!is.null(ldQc$diagnostics)) + entryAudit$ldMismatchDiagnostics <- ldQc$diagnostics qcCount$mismatchRemoved <- ldQc$outliers emit("QC track: ", opts$zMismatchQc, " removed ", ldQc$outliers, " of ", nMmIn, " LD-mismatch outlier(s).") diff --git a/man/fineMappingPipeline.Rd b/man/fineMappingPipeline.Rd index 2ba95d7a..f0938799 100644 --- a/man/fineMappingPipeline.Rd +++ b/man/fineMappingPipeline.Rd @@ -98,9 +98,23 @@ fineMappingPipeline(data, ...) \item{...}{Reserved for future per-method arguments.} -\item{methods}{Character vector of method tokens (any subset of -\code{c("susie", "susieInf", "susieAsh", "mvsusie", "fsusie")}). -See description for per-class compatibility.} +\item{methods}{Method specification. Accepts either: +\itemize{ + \item A character vector of method tokens, e.g. + \code{c("susie", "susieInf", "mvsusie")} (any subset of + \code{c("susie", "susieInf", "susieAsh", "mvsusie", "fsusie")}, + subject to per-class compatibility). + \item A named list keyed by method token, where each value is a + list of per-method kwargs to splice into the underlying + fitter, e.g. + \code{list(susie = list(L = 1, refine = FALSE), + mvsusie = list(max_iter = 500))}. Mirrors the + convention of \code{\link{twasWeightsPipeline}}'s + \code{methods} argument. User-supplied kwargs override the + capability-table defaults and any base / chained args set + by the pipeline (e.g. you can override \code{model_init} + even when fitting from a susieInf chain). +}} \item{contexts}{Optional character vector of context names. Default \code{NULL} (all contexts).} @@ -291,8 +305,11 @@ deliberately not ported here: \code{susieR::susie} check externally if needed. \item Diagnostic re-analysis paths (\code{singleEffect} / \code{bayesianConditionalRegression} - reanalysis on the RSS path): these are not part of the - design-doc method menu and are dropped. + reanalysis on the RSS path): these are not exposed as + dedicated method tokens. Callers who want a single-effect + fit can request it via per-method kwargs, e.g. + \code{methods = list(susie = list(L = 1))} (see the + \code{methods} parameter). \item \code{loadRssData} and explicit \code{ldReferenceMetaFile} arguments: the new \code{QtlSumStats} / \code{GwasSumStats} carry the diff --git a/man/getQcDiagnostics.Rd b/man/getQcDiagnostics.Rd new file mode 100644 index 00000000..680ffc30 --- /dev/null +++ b/man/getQcDiagnostics.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/AllClasses.R +\name{getQcDiagnostics} +\alias{getQcDiagnostics} +\alias{getQcDiagnostics,SumStatsBase-method} +\title{Get SLALOM / DENTIST Diagnostics} +\usage{ +getQcDiagnostics(x, entry = 1L, ...) + +\S4method{getQcDiagnostics}{SumStatsBase}(x, entry = 1L, ...) +} +\arguments{ +\item{x}{A \code{GwasSumStats} or \code{QtlSumStats} object.} + +\item{entry}{Integer index (default 1) of the entry whose diagnostics +table to return. When \code{NULL}, returns a named list of every +entry's diagnostics keyed by entry index.} + +\item{...}{Unused.} +} +\value{ +A \code{data.frame} of per-variant diagnostics (columns + include \code{variant_id} plus the SLALOM / DENTIST output + columns), \code{NULL} when no diagnostics were preserved for that + entry, or a named list of such data.frames when \code{entry = + NULL}. +} +\description{ +Return the per-variant LD-mismatch diagnostics table + (SLALOM or DENTIST output) attached to a sumstats entry by + \code{\link{summaryStatsQc}} when \code{zMismatchQc} was not + \code{"none"}. Convenience accessor over + \code{getQcInfo(x)$entryAudit[[entry]]$ldMismatchDiagnostics}. +} diff --git a/tests/testthat/test_fineMappingPipeline.R b/tests/testthat/test_fineMappingPipeline.R index 454865e5..7bbbd41b 100644 --- a/tests/testthat/test_fineMappingPipeline.R +++ b/tests/testthat/test_fineMappingPipeline.R @@ -118,15 +118,19 @@ context("fineMappingPipeline") } # Mocks for the SuSiE fitters + post-processor. Return tiny payloads keyed -# only by the token so post-process knows what to wrap. +# only by the token so post-process knows what to wrap. The `userArgs` +# parameter (per-method kwargs merged in by .fmMergeUserArgs) is accepted +# but ignored — the mocks don't simulate downstream susie behaviour. .fmp_mockFitIndiv <- function() { - function(X, y, token, chainFromInf = NULL, coverage = 0.95) { + function(X, y, token, chainFromInf = NULL, coverage = 0.95, + userArgs = NULL) { list(token = token, X_cols = ncol(X)) } } .fmp_mockFitRss <- function() { - function(z, R, n, token, chainFromInf = NULL, coverage = 0.95) { + function(z, R, n, token, chainFromInf = NULL, coverage = 0.95, + userArgs = NULL) { list(token = token, n_variants = length(z)) } } @@ -159,18 +163,51 @@ context("fineMappingPipeline") # .fmNormalizeMethods # =========================================================================== -test_that(".fmNormalizeMethods: rejects NULL / empty / non-character", { +test_that(".fmNormalizeMethods: rejects NULL / empty / non-character/list", { expect_error(pecotmr:::.fmNormalizeMethods(NULL), - "non-empty character vector") + "non-empty character") expect_error(pecotmr:::.fmNormalizeMethods(character(0)), - "non-empty character vector") + "non-empty character") expect_error(pecotmr:::.fmNormalizeMethods(42L), - "must be a character vector") + "character vector or") }) -test_that(".fmNormalizeMethods: deduplicates", { - expect_equal(pecotmr:::.fmNormalizeMethods(c("susie", "susie", "susieInf")), - c("susie", "susieInf")) +test_that(".fmNormalizeMethods: char-vector form deduplicates + empty methodArgs", { + res <- pecotmr:::.fmNormalizeMethods(c("susie", "susie", "susieInf")) + expect_equal(res$tokens, c("susie", "susieInf")) + expect_equal(names(res$methodArgs), c("susie", "susieInf")) + expect_true(all(vapply(res$methodArgs, length, integer(1)) == 0L)) +}) + +test_that(".fmNormalizeMethods: named-list form carries per-method kwargs", { + res <- pecotmr:::.fmNormalizeMethods( + list(susie = list(L = 1, refine = FALSE), + susieInf = list())) + expect_equal(res$tokens, c("susie", "susieInf")) + expect_equal(res$methodArgs$susie, list(L = 1, refine = FALSE)) + expect_equal(res$methodArgs$susieInf, list()) +}) + +test_that(".fmNormalizeMethods: list without names errors", { + expect_error(pecotmr:::.fmNormalizeMethods(list(list(L = 1), list())), + "must be named") +}) + +test_that(".fmNormalizeMethods: list with non-list child errors", { + expect_error( + pecotmr:::.fmNormalizeMethods(list(susie = 42, susieInf = list())), + "list of named kwargs") +}) + +test_that(".fmMergeUserArgs: user overrides win over capability defaults + base", { + # base sets convergence_method = "pip"; user overrides to "objective" + out <- pecotmr:::.fmMergeUserArgs( + list(z = 1:3, convergence_method = "pip"), + token = "susie", + userArgs = list(convergence_method = "objective", L = 1)) + expect_equal(out$convergence_method, "objective") + expect_equal(out$L, 1) + expect_identical(out$z, 1:3) }) # =========================================================================== diff --git a/tests/testthat/test_sumstatsQc.R b/tests/testthat/test_sumstatsQc.R index 71e9700f..a7ce10c8 100644 --- a/tests/testthat/test_sumstatsQc.R +++ b/tests/testthat/test_sumstatsQc.R @@ -3297,6 +3297,42 @@ test_that(".applyLdMismatchQcToEntry: errors on variants absent from the sketch" ) }) +test_that(".applyLdMismatchQcToEntry: NA outlier flags from slalom are kept (not dropped)", { + # Regression test: slalom (and dentist on degenerate inputs) can leave + # NA in the `outlier` column for variants whose per-variant statistic + # is undefined. Treating NA as TRUE would silently drop those rows; + # treating it as FALSE (current behavior) keeps them and yields a + # finite outlier count. + handle <- .ssh_makeHandle() + panel_ids <- as.character(getSnpInfo(handle)$SNP) + vids <- panel_ids[seq_len(min(4L, length(panel_ids)))] + df <- data.frame(SNP = vids, Z = c(1.0, 2.0, 0.5, 1.5), + N = rep(1000L, length(vids)), + stringsAsFactors = FALSE) + # Also mock extractBlockGenotypes: the entry-side helper extracts + # dosages from the LD sketch before delegating to ldMismatchQc, and the + # fixture handle points at a fake /tmp/sketch.gds path with no real + # GDS file behind it. + local_mocked_bindings( + extractBlockGenotypes = .ssh_mockExtractor(), + ldMismatchQc = function(zScore, R = NULL, X = NULL, nSample = NULL, + method = c("slalom", "dentist"), + ldMethod = "sample", ...) { + data.frame(original_z = zScore, + outlier = c(FALSE, TRUE, NA, NA), + stringsAsFactors = FALSE) + }, + .package = "pecotmr") + res <- pecotmr:::.applyLdMismatchQcToEntry(df, handle, method = "slalom") + # Only the explicit TRUE row should be dropped; the two NA rows survive. + expect_equal(nrow(res$df), 3L) + expect_false("v2" %in% as.character(res$df$SNP)) # v2 is the TRUE outlier + expect_equal(res$outliers, 1L) + # Diagnostics should preserve every row + add a variant_id column. + expect_equal(nrow(res$diagnostics), length(vids)) + expect_true("variant_id" %in% names(res$diagnostics)) +}) + # =========================================================================== # .applyPipScreen # =========================================================================== From 6268ff33dd23c64abc71cdd1d08941910651111b Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Mon, 22 Jun 2026 22:03:49 -0700 Subject: [PATCH 2/3] more pipeline fixes --- NAMESPACE | 1 + R/mashPipeline.R | 83 ++++++++++--- R/sldscPostprocessingPipeline.R | 34 ++++++ _pkgdown.yml | 23 ++-- tests/testthat/test_AllClasses.R | 152 ++++++++++++++++++++++++ tests/testthat/test_tupleSelectors.R | 169 +++++++++++++++++++++++++++ 6 files changed, 435 insertions(+), 27 deletions(-) create mode 100644 tests/testthat/test_AllClasses.R create mode 100644 tests/testthat/test_tupleSelectors.R diff --git a/NAMESPACE b/NAMESPACE index c0565a7d..db66db93 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -233,6 +233,7 @@ export(screenCtwasRegions) export(sdpr) export(sdprWeights) export(slalom) +export(sldscPostprocessingPipeline) export(sliceMashData) export(standardiseSumstatsColumns) export(standardizeSldscTrait) diff --git a/R/mashPipeline.R b/R/mashPipeline.R index 5e336407..951193b9 100644 --- a/R/mashPipeline.R +++ b/R/mashPipeline.R @@ -14,11 +14,23 @@ #' exponent forwarded to \code{mashr::mash_set_data()}. Use #' \code{alpha = 0} on the BETA scale, \code{alpha = 1} on the Z scale. #' @param residualCorrelation Optional pre-computed residual correlation -#' matrix (\code{Vhat}). Only consulted when \code{sumStatsList$null} -#' is absent; otherwise \code{Vhat} is estimated from the null slice -#' via \code{mashr::estimate_null_correlation_simple()}. +#' matrix (\code{Vhat}). When supplied, replaces the inline +#' \code{mashr::estimate_null_correlation_simple()} call entirely and +#' the \code{"random"} slot of \code{sumStatsList} becomes optional +#' (the function does not need it for anything else). Useful when +#' \code{Vhat} was estimated previously on a larger reference and +#' shipped as a static artefact (the legacy MWE pattern). +#' @param priorCovariances Optional named list of square covariance +#' matrices (the \code{Ulist} \code{mashr::mash()} consumes). When +#' supplied, replaces the canonical + PCA + flash + ED chain +#' (\code{cov_canonical} / \code{cov_pca} / \code{cov_flash} / +#' \code{cov_ed}) entirely; mash sees only the supplied matrices. +#' Every entry must be a \code{ncol(Bhat) x ncol(Bhat)} matrix. +#' Useful when \code{U} was learnt on a larger reference and shipped +#' as a static artefact (the legacy MWE pattern). #' @param nPcs Optional integer; number of principal components seeded #' into \code{mashr::cov_pca()}. Defaults to \code{ncol(Bhat) - 1}. +#' Ignored when \code{priorCovariances} is supplied. #' @param inputScale One of \code{"auto"} (default), \code{"beta"}, #' \code{"z"}. Controls which (Bhat, Shat) pair is extracted from each #' sumstats entry: @@ -42,6 +54,7 @@ #' @export mashPipeline <- function(sumStatsList, alpha, residualCorrelation = NULL, + priorCovariances = NULL, nPcs = NULL, inputScale = c("auto", "beta", "z"), setSeed = 999) { @@ -64,7 +77,10 @@ mashPipeline <- function(sumStatsList, alpha, "of QtlSumStats / GwasSumStats objects, named with at least ", "'strong' and 'random' (optionally 'null').") } - required <- c("strong", "random") + # `random` is required only when we need to derive vhat from data; when + # the caller supplies a pre-computed `residualCorrelation`, the random + # subset is no longer needed. + required <- if (is.null(residualCorrelation)) c("strong", "random") else "strong" missingNames <- setdiff(required, names(sumStatsList)) if (length(missingNames) > 0L) { stop("mashPipeline: `sumStatsList` is missing required entr", @@ -82,8 +98,11 @@ mashPipeline <- function(sumStatsList, alpha, strongMats <- .mashSumStatsToMatrices(sumStatsList$strong, "strong", inputScale = inputScale) - randomMats <- .mashSumStatsToMatrices(sumStatsList$random, "random", - inputScale = inputScale) + randomMats <- if ("random" %in% names(sumStatsList) && + !is.null(sumStatsList$random)) { + .mashSumStatsToMatrices(sumStatsList$random, "random", + inputScale = inputScale) + } else NULL hasNull <- "null" %in% names(sumStatsList) && !is.null(sumStatsList$null) if (hasNull) { @@ -95,7 +114,11 @@ mashPipeline <- function(sumStatsList, alpha, if (!is.null(residualCorrelation)) { vhat <- residualCorrelation } else { - conditionNum <- ncol(randomMats$b) + # Identity-fallback: count conditions off `strong` so this code + # path works even when random is absent (it always was — and is + # required when residualCorrelation is NULL — but we read off + # strong for symmetry with the priorCovariances dimension check). + conditionNum <- ncol(strongMats$b) vhat <- diag(rep(1, conditionNum)) } } else { @@ -110,19 +133,41 @@ mashPipeline <- function(sumStatsList, alpha, V = vhat, alpha, zero_Bhat_Shat_reset = 1000) - # Canonical covariance matrices - U.can <- mashr::cov_canonical(mashData) - # PCA-based covariance matrices - if (is.null(nPcs)) { - nPcs <- ncol(mashData$Bhat) - 1 + if (is.null(priorCovariances)) { + # Canonical covariance matrices + U.can <- mashr::cov_canonical(mashData) + # PCA-based covariance matrices + if (is.null(nPcs)) { + nPcs <- ncol(mashData$Bhat) - 1 + } + U.pca <- mashr::cov_pca(mashData, npc = nPcs) + # Flash-based covariance matrices (factor analysis) + U.flash <- mashr::cov_flash(mashData) + # ED-based covariance matrices (initialized from all others) + U.ed <- mashr::cov_ed(mashData, Ulist_init = c(U.can, U.pca, U.flash)) + # Combine all covariance matrices + U.all <- c(U.can, U.pca, U.flash, U.ed) + } else { + # Caller-supplied prior covariance matrices (e.g. pre-baked U from + # a reference dataset). Bypasses cov_canonical / cov_pca / + # cov_flash / cov_ed entirely; mashr only sees the supplied list. + if (!is.list(priorCovariances) || length(priorCovariances) == 0L || + is.null(names(priorCovariances)) || any(names(priorCovariances) == "")) { + stop("mashPipeline: `priorCovariances` must be a non-empty named ", + "list of square covariance matrices.") + } + nCond <- ncol(mashData$Bhat) + bad <- !vapply(priorCovariances, function(M) { + is.matrix(M) && nrow(M) == nCond && ncol(M) == nCond + }, logical(1L)) + if (any(bad)) { + stop(sprintf(paste0("mashPipeline: every `priorCovariances` entry must be ", + "a %d x %d matrix; offenders: %s."), + nCond, nCond, + paste(names(priorCovariances)[bad], collapse = ", "))) + } + U.all <- priorCovariances } - U.pca <- mashr::cov_pca(mashData, npc = nPcs) - # Flash-based covariance matrices (factor analysis) - U.flash <- mashr::cov_flash(mashData) - # ED-based covariance matrices (initialized from all others) - U.ed <- mashr::cov_ed(mashData, Ulist_init = c(U.can, U.pca, U.flash)) - # Combine all covariance matrices - U.all <- c(U.can, U.pca, U.flash, U.ed) # Fit mash to estimate mixture weights m <- mashr::mash(mashData, Ulist = U.all, outputlevel = 1) diff --git a/R/sldscPostprocessingPipeline.R b/R/sldscPostprocessingPipeline.R index f11e85ff..fbe0a7b3 100644 --- a/R/sldscPostprocessingPipeline.R +++ b/R/sldscPostprocessingPipeline.R @@ -1,3 +1,37 @@ +#' @title sLDSC Postprocessing Pipeline +#' @description Postprocess polyfun's per-trait sLDSC outputs (one +#' single-target run per target annotation, plus an optional joint +#' run) into a single results object with per-trait tau*, EnrichStat +#' with back-solved jackknife SE, and a DerSimonian-Laird random- +#' effects meta-analysis across traits. +#' @param traitSinglePrefixes Named list of file prefixes for the +#' single-target polyfun runs (one entry per trait; each value is a +#' length-N character vector of `/` prefixes, one per +#' target annotation). +#' @param traitJointPrefix Named list of file prefixes for the joint +#' polyfun runs (one entry per trait; each value a `/` +#' prefix into the joint LD-score dir). Pass an empty list to skip +#' the joint branch. +#' @param targetAnnoDir Directory containing the target `.annot.gz` +#' files used for sd_C and binary detection (typically the joint dir). +#' @param frqfileDir Optional directory of `.frq` files for the MAF +#' cutoff. Pass \code{NULL} to skip MAF filtering. +#' @param plinkName File-name prefix of the PLINK reference panel +#' (default \code{"ADSP_chr"}; combined per-chromosome as +#' \code{paste0(plinkName, chrom)}). +#' @param mafCutoff Numeric MAF cutoff applied via the `.frq` files. +#' Default \code{0.05}. Set to \code{0} to opt out. +#' @param targetCategories Optional character vector of target +#' annotation names to retain. Auto-detected from the joint run when +#' \code{NULL}. +#' @param targetLabels Optional display names, same length / order as +#' \code{targetCategories}, applied to every output column / tau* +#' block colname. +#' @return A list with \code{per_trait} (per-trait standardised tables), +#' meta tables (\code{tau_star_meta}, \code{E_meta}, +#' \code{enrich_stat_meta}), and a \code{params} record of the call +#' options. +#' @export sldscPostprocessingPipeline <- function(traitSinglePrefixes, traitJointPrefix, targetAnnoDir, diff --git a/_pkgdown.yml b/_pkgdown.yml index a1463294..002b0b1a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -246,6 +246,7 @@ reference: - subtitle: "SumStatsBase" contents: - getQcInfo + - getQcDiagnostics - subtitle: "TwasWeights" contents: @@ -271,24 +272,30 @@ reference: End-to-end pipeline entry points. Each takes one or more S4 input classes and returns a SumStats, FineMappingResult, TwasWeights collection, or a tabular summary. - `ctwasPipeline` is also exposed as four chainable steps - (`assembleCtwasInputs` → `estCtwasParam` → `screenCtwasRegions` - → `finemapCtwasRegions`) for users who want to override estimated - priors or run only part of the pipeline. contents: - causalInferencePipeline - colocboostPipeline - colocPipeline - ctwasPipeline - - assembleCtwasInputs - - estCtwasParam - - screenCtwasRegions - - finemapCtwasRegions - fineMappingPipeline - mashPipeline - qtlEnrichmentPipeline - twasWeightsPipeline + - title: "cTWAS" + desc: > + The four chainable steps that `ctwasPipeline` is built from: + `assembleCtwasInputs` → `estCtwasParam` → `screenCtwasRegions` + → `finemapCtwasRegions`. Use the steps directly when you want + to override the estimated priors (e.g. fall back to the prefit + EM values after an accurate-EM divergence), inspect the assembled + region data, or run only part of the pipeline. + contents: + - assembleCtwasInputs + - estCtwasParam + - screenCtwasRegions + - finemapCtwasRegions + - title: "Quality control" desc: > Summary-statistics QC orchestrator plus the underlying allele diff --git a/tests/testthat/test_AllClasses.R b/tests/testthat/test_AllClasses.R new file mode 100644 index 00000000..865b0a78 --- /dev/null +++ b/tests/testthat/test_AllClasses.R @@ -0,0 +1,152 @@ +context("AllClasses (virtual base classes)") + +# Most slots / accessors on the concrete subclasses are exercised in their +# own test files; these tests target the *base-class* behaviors that the +# concrete subclasses inherit without overriding (getStudy on SumStatsBase, +# getQcDiagnostics body branches, and the zero-row adjustPips short-circuit +# on FineMappingResultBase). + +# =========================================================================== +# Helpers +# =========================================================================== + +.alc_makeHandle <- function(snp_n = 3L) { + new("GenotypeHandle", + path = "/tmp/test.gds", + format = "gds", + snpInfo = data.frame( + SNP = paste0("rs", seq_len(snp_n)), + CHR = rep("1", snp_n), + BP = seq(100L, by = 100L, length.out = snp_n), + A1 = rep("A", snp_n), + A2 = rep("G", snp_n), + stringsAsFactors = FALSE), + nSamples = 10L, + sampleIds = paste0("s", seq_len(10L)), + pgenPtr = NULL) +} + +.alc_makeGr <- function(n = 3) { + gr <- GenomicRanges::GRanges( + "chr1", + IRanges::IRanges(start = seq(100L, by = 100L, length.out = n), width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = paste0("rs", seq_len(n)), + A1 = rep("A", n), A2 = rep("G", n), + Z = rnorm(n), N = rep(1000L, n)) + gr +} + +.alc_makeGwasSumStats <- function(qcInfo = list()) { + GwasSumStats( + study = "g1", + entry = list(.alc_makeGr()), + genome = "hg19", + ldSketch = .alc_makeHandle(), + qcInfo = qcInfo) +} + +.alc_makeFmEntry <- function(n = 3) { + tl <- data.frame( + variant_id = paste0("chr1:", 100 * seq_len(n), ":A:G"), + chrom = rep("1", n), + pos = as.integer(100 * seq_len(n)), + A1 = rep("G", n), + A2 = rep("A", n), + N = rep(1000, n), + MAF = rep(0.1, n), + marginal_beta = rep(0.1, n), + marginal_se = rep(0.05, n), + marginal_z = rep(2.0, n), + marginal_p = rep(0.05, n), + pip = seq(0.9, by = -0.1, length.out = n), + posterior_mean = rep(0.05, n), + posterior_sd = rep(0.02, n), + stringsAsFactors = FALSE) + FineMappingEntry( + variantIds = tl$variant_id, + susieFit = list(), + topLoci = tl) +} + +# =========================================================================== +# SumStatsBase: getStudy (inherited by QtlSumStats / GwasSumStats) +# =========================================================================== + +test_that("SumStatsBase: getStudy on a GwasSumStats returns unique study names", { + ss <- .alc_makeGwasSumStats() + expect_equal(getStudy(ss), "g1") +}) + +# =========================================================================== +# SumStatsBase: getQcDiagnostics — every branch +# =========================================================================== + +test_that("SumStatsBase: getQcDiagnostics returns NULL on empty qcInfo", { + ss <- .alc_makeGwasSumStats() # qcInfo = list() by default + expect_null(getQcDiagnostics(ss)) +}) + +test_that("SumStatsBase: getQcDiagnostics returns NULL when entryAudit slot is absent", { + # qcInfo has steps but no entryAudit -> nothing to return. + ss <- .alc_makeGwasSumStats(qcInfo = list(step1 = "ok")) + expect_null(getQcDiagnostics(ss)) +}) + +test_that("SumStatsBase: getQcDiagnostics returns the per-entry diagnostics by index", { + diag1 <- data.frame(SNP = "rs1", outlier = FALSE, stringsAsFactors = FALSE) + diag2 <- data.frame(SNP = "rs2", outlier = TRUE, stringsAsFactors = FALSE) + qc <- list(entryAudit = list( + list(ldMismatchDiagnostics = diag1), + list(ldMismatchDiagnostics = diag2))) + ss <- .alc_makeGwasSumStats(qcInfo = qc) + expect_identical(getQcDiagnostics(ss, entry = 1L), diag1) + expect_identical(getQcDiagnostics(ss, entry = 2L), diag2) +}) + +test_that("SumStatsBase: getQcDiagnostics(entry = NULL) returns the populated entries only", { + diag1 <- data.frame(SNP = "rs1", outlier = FALSE, stringsAsFactors = FALSE) + # Entry 2's audit has no ldMismatchDiagnostics field; should be filtered. + qc <- list(entryAudit = list( + list(ldMismatchDiagnostics = diag1), + list(other = "no diagnostics here"))) + ss <- .alc_makeGwasSumStats(qcInfo = qc) + out <- getQcDiagnostics(ss, entry = NULL) + expect_type(out, "list") + expect_equal(length(out), 1L) + expect_named(out, "1") + expect_identical(out[["1"]], diag1) +}) + +test_that("SumStatsBase: getQcDiagnostics(entry = NULL) returns NULL when no entry has diagnostics", { + qc <- list(entryAudit = list(list(other = 1), list(other = 2))) + ss <- .alc_makeGwasSumStats(qcInfo = qc) + expect_null(getQcDiagnostics(ss, entry = NULL)) +}) + +test_that("SumStatsBase: getQcDiagnostics errors on out-of-range entry", { + qc <- list(entryAudit = list(list(ldMismatchDiagnostics = data.frame(z = 1)))) + ss <- .alc_makeGwasSumStats(qcInfo = qc) + expect_error(getQcDiagnostics(ss, entry = 0L), "must be a single integer") + expect_error(getQcDiagnostics(ss, entry = 99L), "must be a single integer") + expect_error(getQcDiagnostics(ss, entry = c(1L, 2L)), + "must be a single integer") + expect_error(getQcDiagnostics(ss, entry = "first"), + "must be a single integer") +}) + +# =========================================================================== +# FineMappingResultBase: adjustPips zero-row short-circuit +# =========================================================================== + +test_that("FineMappingResultBase: adjustPips on a zero-row collection returns the input unchanged", { + e <- .alc_makeFmEntry(3) + res <- GwasFineMappingResult(study = "g1", method = "susie", + entry = list(e)) + empty <- res[integer(0), ] + expect_s4_class(empty, "GwasFineMappingResult") + expect_equal(nrow(empty), 0L) + # Should hit the `if (nrow(x) == 0L) return(x)` early-return. + out <- adjustPips(empty, character(0)) + expect_identical(out, empty) +}) diff --git a/tests/testthat/test_tupleSelectors.R b/tests/testthat/test_tupleSelectors.R new file mode 100644 index 00000000..7764b00b --- /dev/null +++ b/tests/testthat/test_tupleSelectors.R @@ -0,0 +1,169 @@ +context("tupleSelectors (internal row-selector helpers)") + +# These helpers (.matchTupleRows, .tupleSelectRow, .tupleSelectRowGwasFmr) +# work on anything with `nrow(x)` and `x[[col]]` semantics, so the tests +# below use plain base-R data.frames rather than building S4 collections. + +# =========================================================================== +# .matchTupleRows +# =========================================================================== + +test_that(".matchTupleRows: empty keys returns every row index", { + df <- data.frame(study = c("s1", "s2"), method = c("susie", "lasso")) + expect_equal(pecotmr:::.matchTupleRows(df, list()), + c(1L, 2L)) +}) + +test_that(".matchTupleRows: AND-matches across multiple (column, value) pairs", { + df <- data.frame(study = c("s1", "s1", "s2"), + context = c("c1", "c2", "c1"), + stringsAsFactors = FALSE) + expect_equal(pecotmr:::.matchTupleRows(df, list(study = "s1")), + c(1L, 2L)) + expect_equal( + pecotmr:::.matchTupleRows(df, list(study = "s1", context = "c2")), + 2L) + expect_equal( + pecotmr:::.matchTupleRows(df, list(study = "ghost", context = "c1")), + integer(0)) +}) + +# =========================================================================== +# .tupleSelectRow (QtlFineMappingResult / TwasWeights shape) +# =========================================================================== + +test_that(".tupleSelectRow: zero-row input errors with the class label", { + empty <- data.frame(study = character(0), context = character(0), + trait = character(0), method = character(0), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRow(empty, + study = "s1", context = "c1", trait = "t1", method = "susie", + cls = "TwasWeights"), + "TwasWeights has no rows") +}) + +test_that(".tupleSelectRow: single-row collection returns 1L without selectors", { + one <- data.frame(study = "s1", context = "c1", trait = "t1", + method = "susie", stringsAsFactors = FALSE) + expect_equal(pecotmr:::.tupleSelectRow(one), 1L) +}) + +test_that(".tupleSelectRow: multi-row + missing selectors errors with row count", { + multi <- data.frame(study = c("s1", "s1"), + context = c("c1", "c2"), + trait = c("t1", "t1"), + method = c("susie", "susie"), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRow(multi, cls = "QtlFineMappingResult"), + "QtlFineMappingResult has 2 entries") +}) + +test_that(".tupleSelectRow: non-scalar selectors error", { + multi <- data.frame(study = c("s1", "s2"), + context = c("c1", "c2"), + trait = c("t1", "t2"), + method = c("susie", "susie"), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRow(multi, + study = c("s1", "s2"), context = "c1", trait = "t1", method = "susie"), + "must each be length 1") +}) + +test_that(".tupleSelectRow: matching tuple returns first row index", { + multi <- data.frame(study = c("s1", "s1"), + context = c("c1", "c2"), + trait = c("t1", "t1"), + method = c("susie", "susie"), + stringsAsFactors = FALSE) + expect_equal( + pecotmr:::.tupleSelectRow(multi, + study = "s1", context = "c2", trait = "t1", method = "susie"), + 2L) +}) + +test_that(".tupleSelectRow: missing tuple errors with the 4-tuple in the message", { + multi <- data.frame(study = c("s1", "s1"), + context = c("c1", "c2"), + trait = c("t1", "t1"), + method = c("susie", "susie"), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRow(multi, + study = "ghost", context = "c1", trait = "t1", method = "susie"), + "No entry for") +}) + +# =========================================================================== +# .tupleSelectRowGwasFmr +# =========================================================================== + +test_that(".tupleSelectRowGwasFmr: zero-row input errors", { + empty <- data.frame(study = character(0), method = character(0), + region_id = character(0), stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRowGwasFmr(empty, study = "g1", method = "susie"), + "GwasFineMappingResult has no rows") +}) + +test_that(".tupleSelectRowGwasFmr: single-row collection returns 1L", { + one <- data.frame(study = "g1", method = "susie", region_id = "region_1", + stringsAsFactors = FALSE) + expect_equal(pecotmr:::.tupleSelectRowGwasFmr(one), 1L) +}) + +test_that(".tupleSelectRowGwasFmr: missing selectors on multi-row errors", { + multi <- data.frame(study = c("g1", "g2"), + method = c("susie", "susie"), + region_id = c("region_1", "region_1"), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRowGwasFmr(multi), + "Pass `study` and `method`") +}) + +test_that(".tupleSelectRowGwasFmr: non-scalar region errors", { + multi <- data.frame(study = c("g1", "g1"), + method = c("susie", "susie"), + region_id = c("r1", "r2"), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRowGwasFmr(multi, + study = "g1", method = "susie", region = c("r1", "r2")), + "`region` must be length 1") +}) + +test_that(".tupleSelectRowGwasFmr: region disambiguates per-block rows", { + # Same (study, method) across two regions; region picks the right row. + multi <- data.frame(study = c("g1", "g1"), + method = c("susie", "susie"), + region_id = c("chr22_1_100", "chr22_500_600"), + stringsAsFactors = FALSE) + expect_equal( + pecotmr:::.tupleSelectRowGwasFmr(multi, + study = "g1", method = "susie", region = "chr22_500_600"), + 2L) +}) + +test_that(".tupleSelectRowGwasFmr: missing tuple errors and includes region in message", { + one <- data.frame(study = "g1", method = "susie", region_id = "r1", + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRowGwasFmr(one, + study = "g1", method = "susie", region = "ghost"), + "region='ghost'") +}) + +test_that(".tupleSelectRowGwasFmr: ambiguous multi-match (no region) lists candidates", { + # Two rows share (study, method); .tupleSelectRowGwasFmr should error + # listing the available region_ids since the caller didn't disambiguate. + multi <- data.frame(study = c("g1", "g1"), + method = c("susie", "susie"), + region_id = c("region_A", "region_B"), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.tupleSelectRowGwasFmr(multi, study = "g1", method = "susie"), + "pass `region` to disambiguate") +}) From 70033a456894bfe76134f7b370521c1906563e5d Mon Sep 17 00:00:00 2001 From: danielnachun Date: Tue, 23 Jun 2026 05:06:00 +0000 Subject: [PATCH 3/3] Update documentation --- man/mashPipeline.Rd | 22 +++++++++-- man/sldscPostprocessingPipeline.Rd | 62 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 man/sldscPostprocessingPipeline.Rd diff --git a/man/mashPipeline.Rd b/man/mashPipeline.Rd index c7eff294..3a350aa8 100644 --- a/man/mashPipeline.Rd +++ b/man/mashPipeline.Rd @@ -8,6 +8,7 @@ mashPipeline( sumStatsList, alpha, residualCorrelation = NULL, + priorCovariances = NULL, nPcs = NULL, inputScale = c("auto", "beta", "z"), setSeed = 999 @@ -25,12 +26,25 @@ exponent forwarded to \code{mashr::mash_set_data()}. Use \code{alpha = 0} on the BETA scale, \code{alpha = 1} on the Z scale.} \item{residualCorrelation}{Optional pre-computed residual correlation -matrix (\code{Vhat}). Only consulted when \code{sumStatsList$null} -is absent; otherwise \code{Vhat} is estimated from the null slice -via \code{mashr::estimate_null_correlation_simple()}.} +matrix (\code{Vhat}). When supplied, replaces the inline +\code{mashr::estimate_null_correlation_simple()} call entirely and +the \code{"random"} slot of \code{sumStatsList} becomes optional +(the function does not need it for anything else). Useful when +\code{Vhat} was estimated previously on a larger reference and +shipped as a static artefact (the legacy MWE pattern).} + +\item{priorCovariances}{Optional named list of square covariance +matrices (the \code{Ulist} \code{mashr::mash()} consumes). When +supplied, replaces the canonical + PCA + flash + ED chain +(\code{cov_canonical} / \code{cov_pca} / \code{cov_flash} / +\code{cov_ed}) entirely; mash sees only the supplied matrices. +Every entry must be a \code{ncol(Bhat) x ncol(Bhat)} matrix. +Useful when \code{U} was learnt on a larger reference and shipped +as a static artefact (the legacy MWE pattern).} \item{nPcs}{Optional integer; number of principal components seeded -into \code{mashr::cov_pca()}. Defaults to \code{ncol(Bhat) - 1}.} +into \code{mashr::cov_pca()}. Defaults to \code{ncol(Bhat) - 1}. +Ignored when \code{priorCovariances} is supplied.} \item{inputScale}{One of \code{"auto"} (default), \code{"beta"}, \code{"z"}. Controls which (Bhat, Shat) pair is extracted from each diff --git a/man/sldscPostprocessingPipeline.Rd b/man/sldscPostprocessingPipeline.Rd new file mode 100644 index 00000000..90aa5919 --- /dev/null +++ b/man/sldscPostprocessingPipeline.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldscPostprocessingPipeline.R +\name{sldscPostprocessingPipeline} +\alias{sldscPostprocessingPipeline} +\title{sLDSC Postprocessing Pipeline} +\usage{ +sldscPostprocessingPipeline( + traitSinglePrefixes, + traitJointPrefix, + targetAnnoDir, + frqfileDir = NULL, + plinkName = "ADSP_chr", + mafCutoff = 0.05, + targetCategories = NULL, + targetLabels = NULL +) +} +\arguments{ +\item{traitSinglePrefixes}{Named list of file prefixes for the +single-target polyfun runs (one entry per trait; each value is a +length-N character vector of `/` prefixes, one per +target annotation).} + +\item{traitJointPrefix}{Named list of file prefixes for the joint +polyfun runs (one entry per trait; each value a `/` +prefix into the joint LD-score dir). Pass an empty list to skip +the joint branch.} + +\item{targetAnnoDir}{Directory containing the target `.annot.gz` +files used for sd_C and binary detection (typically the joint dir).} + +\item{frqfileDir}{Optional directory of `.frq` files for the MAF +cutoff. Pass \code{NULL} to skip MAF filtering.} + +\item{plinkName}{File-name prefix of the PLINK reference panel +(default \code{"ADSP_chr"}; combined per-chromosome as +\code{paste0(plinkName, chrom)}).} + +\item{mafCutoff}{Numeric MAF cutoff applied via the `.frq` files. +Default \code{0.05}. Set to \code{0} to opt out.} + +\item{targetCategories}{Optional character vector of target +annotation names to retain. Auto-detected from the joint run when +\code{NULL}.} + +\item{targetLabels}{Optional display names, same length / order as +\code{targetCategories}, applied to every output column / tau* +block colname.} +} +\value{ +A list with \code{per_trait} (per-trait standardised tables), + meta tables (\code{tau_star_meta}, \code{E_meta}, + \code{enrich_stat_meta}), and a \code{params} record of the call + options. +} +\description{ +Postprocess polyfun's per-trait sLDSC outputs (one + single-target run per target annotation, plus an optional joint + run) into a single results object with per-trait tau*, EnrichStat + with back-solved jackknife SE, and a DerSimonian-Laird random- + effects meta-analysis across traits. +}