From 41f84a703e66d91638a4b218a1574be9ebc2cd58 Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Sun, 21 Jun 2026 21:55:32 -0700 Subject: [PATCH 1/4] improve finemappingresult and website --- DESCRIPTION | 9 +- NAMESPACE | 11 +- R/AllClasses.R | 125 ++ R/AnnotationMatrix.R | 2 +- R/FineMappingEntry.R | 300 ++- R/FineMappingResultBase.R | 61 - R/GenotypeHandle.R | 2 +- R/GwasFineMappingResult.R | 20 +- R/LdBlocks.R | 2 +- R/LdStatistic.R | 2 +- R/QtlDataset.R | 2 +- R/QtlFineMappingResult.R | 20 +- R/SumStatsBase.R | 54 - R/TwasWeightsEntry.R | 2 +- R/allGenerics.R | 97 +- R/colocPipeline.R | 2 +- R/ctwasPipeline.R | 179 +- R/ctwasWrapper.R | 78 - R/fineMappingPipeline.R | 109 +- R/fineMappingWrappers.R | 260 +-- R/genotypeIo.R | 2 +- R/gwasSumStats.R | 4 +- R/h2Annotations.R | 2 +- R/h2EstimationWrappers.R | 2 +- R/jointDispatchers.R | 1601 ----------------- R/jointSpecification.R | 1592 ++++++++++++++++ R/qtlEnrichmentPipeline.R | 4 +- R/qtlSumStats.R | 4 +- R/sumstatsQc.R | 6 +- R/twasWeights.R | 2 +- R/twasWeightsPipeline.R | 33 +- R/vcfWriter.R | 215 ++- _pkgdown.yml | 444 +++++ man/FineMappingEntry.Rd | 47 +- man/FineMappingResultBase-class.Rd | 2 +- man/SumStatsBase-class.Rd | 2 +- man/adjustPips.Rd | 10 +- man/buildTopLoci.Rd | 2 +- man/computeLdScores.Rd | 2 +- man/ctwasPipeline.Rd | 35 + man/estimateH2.Rd | 2 +- man/fineMappingPipeline.Rd | 47 + man/getAnnotationMeta.Rd | 2 +- man/getAnnotations.Rd | 2 +- man/getBlockMetadata.Rd | 2 +- man/getBlocks.Rd | 2 +- man/getContexts.Rd | 2 +- man/getCorrelation.Rd | 2 +- man/getCs.Rd | 2 +- man/getCvPerformance.Rd | 2 +- man/getDataType.Rd | 2 +- man/getEffects.Rd | 19 - man/getEigenList.Rd | 2 +- man/getEnrichment.Rd | 2 +- man/getFineMappingResult.Rd | 2 +- man/getFits.Rd | 2 +- man/getFormat.Rd | 2 +- man/getGenome.Rd | 10 +- man/getGenotypeCovariates.Rd | 2 +- man/getGenotypeHandle.Rd | 2 +- man/getGenotypes.Rd | 2 +- man/getH2.Rd | 2 +- man/getInSample.Rd | 2 +- man/getLbf.Rd | 20 - man/getLdBlocks.Rd | 2 +- man/getLdMatrixList.Rd | 2 +- man/getLdScoreWeights.Rd | 2 +- man/getLdScores.Rd | 2 +- man/getLdSketch.Rd | 9 +- man/getLocal.Rd | 2 +- man/getMaf.Rd | 2 +- man/getMarginalEffects.Rd | 54 + man/getMethodNames.Rd | 3 +- man/getMixtureWeights.Rd | 2 +- man/getMolecularId.Rd | 20 - man/getN.Rd | 2 +- man/getNRef.Rd | 2 +- man/getNSamples.Rd | 2 +- man/getPath.Rd | 2 +- man/getPgenPtr.Rd | 2 +- man/getPhenotypeCovariates.Rd | 2 +- man/getPhenotypes.Rd | 2 +- man/getPip.Rd | 2 +- man/getQcInfo.Rd | 2 +- man/getQtlDatasets.Rd | 2 +- man/getRefPanel.Rd | 2 +- man/getResidualizedGenotypes.Rd | 2 +- man/getResidualizedPhenotypes.Rd | 2 +- man/getSampleIds.Rd | 2 +- man/getScaleResiduals.Rd | 2 +- man/getScoreStats.Rd | 2 +- man/getSnpIdx.Rd | 2 +- man/getSnpInfo.Rd | 2 +- man/getSnpRanges.Rd | 2 +- man/getStandardized.Rd | 2 +- man/getStudy.Rd | 10 +- man/getSumStats.Rd | 2 +- man/getSumstatDf.Rd | 2 +- man/getSusieFit.Rd | 32 + man/getTauBlocks.Rd | 2 +- man/getTopLoci.Rd | 26 +- man/getTraits.Rd | 2 +- man/getTrimmedFit.Rd | 43 - man/getVarY.Rd | 2 +- man/getVariantIds.Rd | 2 +- man/getVariantInfo.Rd | 2 +- man/getVariantNames.Rd | 19 - man/getWeights.Rd | 2 +- man/getZ.Rd | 2 +- man/hasGenotypes.Rd | 2 +- man/nSnps.Rd | 2 +- man/pecotmr-generics.Rd | 2 +- man/postprocessFinemappingFits.Rd | 3 +- man/readAnnotations.Rd | 2 +- man/readGenotypes.Rd | 2 +- man/subsetChr.Rd | 2 +- man/trimCtwasVariants.Rd | 24 - man/twasWeightsPipeline.Rd | 25 +- man/writeSumstatsVcf.Rd | 21 +- tests/testthat/helper-collectionAccessors.R | 20 +- tests/testthat/helper-s4Constructors.R | 20 +- tests/testthat/helper-showMethods.R | 20 +- tests/testthat/test_FineMappingEntry.R | 41 +- tests/testthat/test_GwasFineMappingResult.R | 11 +- tests/testthat/test_QtlFineMappingResult.R | 13 +- tests/testthat/test_causalInferencePipeline.R | 16 +- tests/testthat/test_colocPipeline.R | 27 +- tests/testthat/test_ctwasPipeline.R | 111 ++ tests/testthat/test_ctwasWrapper.R | 296 --- tests/testthat/test_deprecated.R | 43 +- tests/testthat/test_fineMappingPipeline.R | 93 +- tests/testthat/test_fineMappingWrappers.R | 71 +- tests/testthat/test_genotypeIo.R | 2 +- tests/testthat/test_jointDispatchers.R | 462 ----- tests/testthat/test_jointSpecification.R | 467 +++++ tests/testthat/test_mashWrapper.R | 2 +- tests/testthat/test_qtlEnrichmentPipeline.R | 4 +- tests/testthat/test_sumstatsQc.R | 18 +- tests/testthat/test_twasWeightsPipeline.R | 2 +- tests/testthat/test_vcfWriter.R | 144 +- 140 files changed, 4386 insertions(+), 3354 deletions(-) create mode 100644 R/AllClasses.R delete mode 100644 R/FineMappingResultBase.R delete mode 100644 R/SumStatsBase.R delete mode 100644 R/ctwasWrapper.R delete mode 100644 R/jointDispatchers.R delete mode 100644 man/getEffects.Rd delete mode 100644 man/getLbf.Rd create mode 100644 man/getMarginalEffects.Rd delete mode 100644 man/getMolecularId.Rd create mode 100644 man/getSusieFit.Rd delete mode 100644 man/getTrimmedFit.Rd delete mode 100644 man/getVariantNames.Rd delete mode 100644 man/trimCtwasVariants.Rd delete mode 100644 tests/testthat/test_ctwasWrapper.R delete mode 100644 tests/testthat/test_jointDispatchers.R diff --git a/DESCRIPTION b/DESCRIPTION index 1a449237..d19b6a0e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -89,11 +89,11 @@ VignetteBuilder: knitr Config/roxygen2/version: 8.0.0 RoxygenNote: 7.3.3 Collate: - 'allGenerics.R' + 'AllGenerics.R' + 'GenotypeHandle.R' + 'AllClasses.R' 'AnnotationMatrix.R' 'FineMappingEntry.R' - 'FineMappingResultBase.R' - 'GenotypeHandle.R' 'tupleSelectors.R' 'GwasFineMappingResult.R' 'H2Estimate.R' @@ -105,14 +105,12 @@ Collate: 'QtlDataset.R' 'MultiStudyQtlDataset.R' 'QtlFineMappingResult.R' - 'SumStatsBase.R' 'TwasWeightsEntry.R' 'causalInferencePipeline.R' 'colocPipeline.R' 'colocboostPipeline.R' 'cpp11.R' 'ctwasPipeline.R' - 'ctwasWrapper.R' 'deprecated.R' 'exampleData.R' 'fineMappingPipeline.R' @@ -121,7 +119,6 @@ Collate: 'gwasSumStats.R' 'h2Annotations.R' 'h2EstimationWrappers.R' - 'jointDispatchers.R' 'jointSpecification.R' 'ld.R' 'mashPipeline.R' diff --git a/NAMESPACE b/NAMESPACE index f37fb582..614d0059 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -93,7 +93,6 @@ export(getCs) export(getCtwasMetaData) export(getCvPerformance) export(getDataType) -export(getEffects) export(getEigenList) export(getEnrichment) export(getFineMappingResult) @@ -105,7 +104,6 @@ export(getGenotypeHandle) export(getGenotypes) export(getH2) export(getInSample) -export(getLbf) export(getLdBlocks) export(getLdMatrixList) export(getLdScoreWeights) @@ -113,9 +111,9 @@ export(getLdScores) export(getLdSketch) export(getLocal) export(getMaf) +export(getMarginalEffects) export(getMethodNames) export(getMixtureWeights) -export(getMolecularId) export(getN) export(getNRef) export(getNSamples) @@ -140,16 +138,15 @@ export(getStandardized) export(getStudy) export(getSumStats) export(getSumstatDf) +export(getSusieFit) export(getSusieResult) export(getTauBlocks) export(getTopLoci) export(getTraits) -export(getTrimmedFit) export(getTwasWeights) export(getVarY) export(getVariantIds) export(getVariantInfo) -export(getVariantNames) export(getWeights) export(getZ) export(glmnetWeights) @@ -243,7 +240,6 @@ export(susieInfWeights) export(susieRssPipeline) export(susieRssWeights) export(susieWeights) -export(trimCtwasVariants) export(twasMultivariateWeightsPipeline) export(twasPipeline) export(twasPredict) @@ -298,6 +294,7 @@ exportMethods(getLdScores) exportMethods(getLdSketch) exportMethods(getLocal) exportMethods(getMaf) +exportMethods(getMarginalEffects) exportMethods(getMethodNames) exportMethods(getMixtureWeights) exportMethods(getN) @@ -323,10 +320,10 @@ exportMethods(getStandardized) exportMethods(getStudy) exportMethods(getSumStats) exportMethods(getSumstatDf) +exportMethods(getSusieFit) exportMethods(getTauBlocks) exportMethods(getTopLoci) exportMethods(getTraits) -exportMethods(getTrimmedFit) exportMethods(getTwasWeights) exportMethods(getVarY) exportMethods(getVariantIds) diff --git a/R/AllClasses.R b/R/AllClasses.R new file mode 100644 index 00000000..6f082d76 --- /dev/null +++ b/R/AllClasses.R @@ -0,0 +1,125 @@ +# ============================================================================= +# AllClasses.R +# ----------------------------------------------------------------------------- +# Virtual base classes shared across the package. Concrete subclasses live +# in their own per-class files (QtlSumStats.R, GwasSumStats.R, QtlDataset.R, +# QtlFineMappingResult.R, GwasFineMappingResult.R, etc.). +# +# Per Bioconductor convention this file is loaded first in the Collate +# ordering (the "AllClasses.R" filename sorts to the top of the alphabet), +# and every method-bearing file uses `@include AllClasses.R` so roxygen +# topologically orders the Collate field for us. +# ============================================================================= + +#' @include AllGenerics.R GenotypeHandle.R +#' @importFrom methods setClass setMethod new is validObject +NULL + +# ============================================================================= +# SumStatsBase +# ----------------------------------------------------------------------------- +# Shared parent of the QTL and GWAS summary statistics collections. +# Concrete subclasses (QtlSumStats, GwasSumStats) inherit from DFrame and +# share the ldSketch / genome / qcInfo slots. Class-specific accessors +# (getZ / getN / getMaf / nSnps / subsetChr / getVarY / getSumStats) +# stay on the concrete subclass because they rely on the tuple shape +# (3-tuple for QtlSumStats, 1-tuple for GwasSumStats). +# ============================================================================= + +#' @title Summary Statistics Base Class +#' @description Virtual base class for QTL and GWAS summary statistics +#' collections. Concrete subclasses (\code{QtlSumStats}, +#' \code{GwasSumStats}) inherit from \code{DFrame} and share the +#' \code{ldSketch} / \code{genome} / \code{qcInfo} slots. +#' @slot ldSketch The \code{GenotypeHandle} the QC pipeline harmonized +#' against. Required: \code{summaryStatsQc()} sets it. +#' @slot genome Character, genome build label. +#' @slot qcInfo A \code{list} recording which QC steps ran. Empty +#' \code{list()} on construction; populated by \code{summaryStatsQc()} +#' with a per-step audit record (filter names, drop counts, liftover +#' target, RAISS settings, etc.). Fine-mapping and TWAS-weights +#' pipelines reject inputs where \code{length(getQcInfo(x)) == 0L} — the +#' slot serves as both the gating flag and the audit trail. +#' @export +setClass("SumStatsBase", + contains = c("VIRTUAL", "DFrame"), + representation( + ldSketch = "GenotypeHandle", + genome = "character", + qcInfo = "list" + )) + +#' @rdname getGenome +#' @export +setMethod("getGenome", "SumStatsBase", function(x, ...) x@genome) + +#' @rdname getQcInfo +#' @export +setMethod("getQcInfo", "SumStatsBase", function(x, ...) x@qcInfo) + +#' @rdname getLdSketch +#' @export +setMethod("getLdSketch", "SumStatsBase", function(x, ...) x@ldSketch) + +#' @rdname getStudy +#' @export +setMethod("getStudy", "SumStatsBase", + function(x) unique(as.character(x$study))) + +# ============================================================================= +# FineMappingResultBase +# ----------------------------------------------------------------------------- +# Shared parent of the QTL and GWAS fine-mapping result collections. +# Concrete subclasses (QtlFineMappingResult, GwasFineMappingResult) carry +# a DFrame of per-fit rows plus a shared ldSketch slot. Downstream +# pipelines dispatch on FineMappingResultBase for behaviors that apply to +# either flavour, and on the concrete subclass when the tuple shape +# matters. +# ============================================================================= + +#' @title Fine-Mapping Result Base Class +#' @description Virtual base class for fine-mapping result collections. +#' Concrete subclasses (\code{QtlFineMappingResult}, +#' \code{GwasFineMappingResult}) carry a \code{DFrame} of per-fit rows +#' and a shared \code{ldSketch} slot. Downstream pipelines should +#' dispatch on \code{FineMappingResultBase} for behaviors that apply to +#' either flavour, and on the concrete subclass when the tuple shape +#' matters. +#' @slot ldSketch The LD reference \code{GenotypeHandle} the fits were +#' computed against, or \code{NULL} when the fits were derived from +#' individual-level data (no LD reference). Used downstream for +#' cross-pipeline LD-sketch identity validation. +#' @export +setClass("FineMappingResultBase", + contains = c("VIRTUAL", "DFrame"), + representation(ldSketch = "ANY")) + +#' @rdname getStudy +#' @export +setMethod("getStudy", "FineMappingResultBase", + function(x) unique(as.character(x$study))) + +#' @rdname getLdSketch +#' @export +setMethod("getLdSketch", "FineMappingResultBase", + function(x, ...) x@ldSketch) + +#' @rdname getMethodNames +#' @export +setMethod("getMethodNames", "FineMappingResultBase", + function(x) unique(as.character(x$method))) + +#' @rdname adjustPips +#' @export +setMethod("adjustPips", "FineMappingResultBase", + function(x, keepVariants, ...) { + if (nrow(x) == 0L) return(x) + entries <- x@listData$entry + for (i in seq_along(entries)) { + adj <- tryCatch(adjustPips(entries[[i]], keepVariants, ...), + error = function(err) NULL) + if (!is.null(adj)) entries[[i]] <- adj + } + x@listData$entry <- entries + x + }) diff --git a/R/AnnotationMatrix.R b/R/AnnotationMatrix.R index 017788e9..015115d3 100644 --- a/R/AnnotationMatrix.R +++ b/R/AnnotationMatrix.R @@ -6,7 +6,7 @@ # baseline (always jointly fitted) or candidate (score-tested). # ============================================================================= -#' @include allGenerics.R +#' @include AllGenerics.R NULL #' @title Genomic Annotation Matrix diff --git a/R/FineMappingEntry.R b/R/FineMappingEntry.R index ae0e4a10..298a787a 100644 --- a/R/FineMappingEntry.R +++ b/R/FineMappingEntry.R @@ -1,105 +1,156 @@ # ============================================================================= # FineMappingEntry S4 class # ----------------------------------------------------------------------------- -# Per-tuple fine-mapping payload: variant ids + a method-specific trimmed -# fit object + a long-format topLoci data.frame (per-variant PIP, CS -# membership, beta/se). One entry sits in every row of a -# FineMappingResult collection. Accessors read directly from the payload -# slots (no further lookups required). +# Per-tuple fine-mapping payload backing one row of a FineMappingResult +# collection. Three slots: +# +# variantIds : character vector, variant IDs in fit order +# susieFit : the SuSiE fit (full or trimmed; controlled by the +# pipeline's `trim` parameter) +# topLoci : unfiltered per-variant data.frame carrying BOTH marginal +# univariate effects and posterior fine-mapping output in +# a single wide table. Stored in canonical schema (column +# names with `marginal_*` / `posterior_*` prefixes); +# accessors project + rename to user-facing column names. +# +# Accessors: +# getVariantIds(x) +# getSusieFit(x) +# getTopLoci(x, signalCutoff = 0.025) ........... posterior view (PIP filter) +# getMarginalEffects(x, maxPval = NULL) ......... marginal view (p-value filter) +# getPip(x), getCs(x, coverage) +# adjustPips(x, keepVariants) # ============================================================================= -#' @include allGenerics.R +#' @include AllGenerics.R NULL setClass("FineMappingEntry", representation( variantIds = "character", - trimmedFit = "ANY", - topLoci = "data.frame", - sumstats = "ANY" + susieFit = "ANY", + topLoci = "data.frame" ), validity = function(object) { errors <- character() + n <- length(object@variantIds) if (nrow(object@topLoci) > 0L) { + # Minimal contract: variant_id + pip. Canonical projector columns + # (marginal_*, posterior_*, etc.) are pipeline-populated; tests + # and downstream consumers building skeletal entries can omit + # them, in which case accessor projections return NA-filled cols. required <- c("variant_id", "pip") missingCols <- setdiff(required, colnames(object@topLoci)) if (length(missingCols) > 0L) - errors <- c(errors, paste("topLoci missing columns:", - paste(missingCols, collapse = ", "))) + errors <- c(errors, + paste("topLoci missing required columns:", + paste(missingCols, collapse = ", "))) + if (n > 0L && nrow(object@topLoci) != n) + errors <- c(errors, + sprintf("topLoci has %d rows but variantIds has %d entries; ", + nrow(object@topLoci), n)) + if (n > 0L && nrow(object@topLoci) == n && + !identical(as.character(object@topLoci$variant_id), + as.character(object@variantIds))) + errors <- c(errors, + "topLoci$variant_id must equal variantIds in order") + # Drift check: if susieFit carries its own pip vector, it must match + # the topLoci pip column. Catches the case where adjustPips() (or + # any future mutator) updates one and forgets the other. + sf <- object@susieFit + if (!is.null(sf) && is.list(sf) && !is.null(sf$pip) && + length(sf$pip) == n && "pip" %in% colnames(object@topLoci)) { + if (!isTRUE(all.equal(as.numeric(sf$pip), + as.numeric(object@topLoci$pip), + tolerance = 1e-10))) + errors <- c(errors, + "susieFit$pip and topLoci$pip have drifted out of sync") + } } if (length(errors) == 0L) TRUE else errors } ) -#' @title TWAS Weights Entry (per-tuple payload) -#' @description S4 container for one method's TWAS weights, attached to -#' a \code{TwasWeights} row. One entry corresponds to one -#' \code{(study, context, trait, method)} tuple. -#' @slot variantIds Character vector of variant IDs that have weights. -#' @slot weights Numeric vector (single-method, single-outcome) or -#' matrix (multi-outcome). -#' @slot fits Optional method-specific fit object. -#' @slot cvPerformance Optional named list of CV metrics (\code{rsq}, -#' \code{pval}, etc.). -#' @slot standardized Logical (length 1). Whether the weights are on the -#' standardized scale. -#' @slot dataType Data-type tag for downstream usage (e.g., -#' \code{"expression"}, \code{"splicing"}); may be \code{NULL}. -#' @export - - #' @title Create a FineMappingEntry Object #' @description Construct a \code{FineMappingEntry} payload for one #' \code{(study, context, trait, method)} row of a #' \code{FineMappingResult} collection. -#' @param variantIds Character vector of variant IDs. -#' @param trimmedFit Method-specific fit object. -#' @param topLoci Long-format \code{data.frame}. -#' @param sumstats Optional list of summary statistics, or \code{NULL}. +#' @param variantIds Character vector of variant IDs in fit order. +#' @param susieFit The SuSiE fit object (full or trimmed; controlled by +#' the pipeline's \code{trim} parameter). +#' @param topLoci Per-variant \code{data.frame} in canonical schema: +#' identity columns (\code{variant_id, chrom, pos, A1, A2}), context +#' (\code{N, MAF}), marginal columns (\code{marginal_beta, +#' marginal_se, marginal_z, marginal_p}), posterior columns +#' (\code{pip, posterior_mean, posterior_sd, cs_*, cs_*_purity}), +#' pipeline stamps (\code{method, gene, event, grange_start, +#' grange_end}). Unfiltered: one row per variant in the fit. #' @return A \code{FineMappingEntry} object. #' @export -FineMappingEntry <- function(variantIds, trimmedFit, topLoci, - sumstats = NULL) { +FineMappingEntry <- function(variantIds, susieFit, topLoci) { obj <- new("FineMappingEntry", variantIds = as.character(variantIds), - trimmedFit = trimmedFit, - topLoci = as.data.frame(topLoci), - sumstats = sumstats) + susieFit = susieFit, + topLoci = as.data.frame(topLoci)) validObject(obj) obj } - -# Per-entry accessors (reuse the existing generics; these methods read -# slots from the payload classes directly). +# ============================================================================= +# Accessors +# ============================================================================= #' @rdname getVariantIds #' @export setMethod("getVariantIds", "FineMappingEntry", function(x, ...) x@variantIds) -#' @rdname getTrimmedFit +#' @rdname getSusieFit #' @export -setMethod("getTrimmedFit", "FineMappingEntry", - function(x, ...) x@trimmedFit) +setMethod("getSusieFit", "FineMappingEntry", + function(x, ...) x@susieFit) #' @rdname getTopLoci #' @export setMethod("getTopLoci", "FineMappingEntry", - function(x, type = c("data.frame", "GRanges"), ...) { + function(x, type = c("data.frame", "GRanges"), + signalCutoff = 0.025, ...) { type <- match.arg(type) tl <- x@topLoci - if (type == "data.frame") return(tl) - if (is.null(tl) || nrow(tl) == 0L) return(GRanges()) - parsed <- parseVariantId(tl$variant_id) - gr <- GRanges( + if (nrow(tl) == 0L) { + out <- tl + } else { + keep <- if (is.null(signalCutoff) || signalCutoff <= 0) { + rep(TRUE, nrow(tl)) + } else { + !is.na(tl$pip) & tl$pip > signalCutoff + } + out <- .projectPosteriorView(tl[keep, , drop = FALSE]) + } + if (type == "data.frame") return(out) + if (is.null(out) || nrow(out) == 0L) return(GenomicRanges::GRanges()) + parsed <- parseVariantId(out$variant_id) + gr <- GenomicRanges::GRanges( seqnames = paste0("chr", parsed$chrom), - ranges = IRanges(start = parsed$pos, width = 1L) - ) - mcols(gr) <- DataFrame(tl) + ranges = IRanges::IRanges(start = parsed$pos, width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame(out) gr }) +#' @rdname getMarginalEffects +#' @export +setMethod("getMarginalEffects", "FineMappingEntry", + function(x, maxPval = NULL, ...) { + tl <- x@topLoci + if (nrow(tl) == 0L) return(.projectMarginalView(tl)) + out <- .projectMarginalView(tl) + if (!is.null(maxPval) && nrow(out) > 0L) { + keep <- !is.na(out$p) & out$p <= maxPval + out <- out[keep, , drop = FALSE] + } + out + }) + #' @rdname getPip #' @export setMethod("getPip", "FineMappingEntry", function(x, ...) { @@ -113,11 +164,12 @@ setMethod("getPip", "FineMappingEntry", function(x, ...) { setMethod("getCs", "FineMappingEntry", function(x, coverage = 0.95, ...) { tl <- x@topLoci - if (nrow(tl) == 0L) return(data.frame()) - csCol <- grep(paste0("^cs.*", coverage * 100), names(tl), value = TRUE) - if (length(csCol) == 0L && "cs" %in% names(tl)) csCol <- "cs" - if (length(csCol) == 0L) return(data.frame()) - tl[tl[[csCol[1L]]] > 0, , drop = FALSE] + if (nrow(tl) == 0L) return(.projectPosteriorView(tl)) + csCol <- grep(paste0("^cs_", coverage * 100, "$"), names(tl), value = TRUE) + if (length(csCol) == 0L) return(.projectPosteriorView(tl[FALSE, , drop = FALSE])) + keep <- !is.na(tl[[csCol[1L]]]) & nzchar(tl[[csCol[1L]]]) & + !grepl("_0$", tl[[csCol[1L]]]) + .projectPosteriorView(tl[keep, , drop = FALSE]) }) #' @rdname adjustPips @@ -130,10 +182,11 @@ setMethod("adjustPips", "FineMappingEntry", stop("adjustPips: intersection of entry variants with `keepVariants` ", "is empty.") keepIdx <- match(common, x@variantIds) - fit <- x@trimmedFit + fit <- x@susieFit if (is.null(fit$lbf_variable)) - stop("adjustPips: entry's trimmedFit has no `lbf_variable` matrix; ", - "PIP renormalization requires lbf_variable.") + stop("adjustPips: entry's susieFit has no `lbf_variable` matrix; ", + "PIP renormalization requires lbf_variable. Re-run the ", + "pipeline with trim = FALSE to retain it.") lbfSub <- fit$lbf_variable[, keepIdx, drop = FALSE] fit$lbf_variable <- lbfSub fit$alpha <- lbfToAlpha(lbfSub) @@ -148,26 +201,133 @@ setMethod("adjustPips", "FineMappingEntry", else fit$mu2[, keepIdx, drop = FALSE] if (!is.null(fit$X_column_scale_factors)) fit$X_column_scale_factors <- fit$X_column_scale_factors[keepIdx] + # Rebuild topLoci consistently from the new fit + the existing + # marginal columns (which are per-variant and just need subsetting). newTopLoci <- x@topLoci - if (nrow(newTopLoci) > 0L) + if (nrow(newTopLoci) > 0L) { newTopLoci <- newTopLoci[newTopLoci$variant_id %in% common, , - drop = FALSE] - if (nrow(newTopLoci) > 0L && "pip" %in% names(newTopLoci)) { - pipByVid <- setNames(fit$pip, common) - newTopLoci$pip <- unname(pipByVid[newTopLoci$variant_id]) + drop = FALSE] + newTopLoci$pip <- as.numeric(fit$pip) + # Posterior mean / SD computed from the fit when alpha + mu/mu2 + # are matrix-shaped. When either is missing or shapes don't + # match, leave the existing column values in place. + alphaMat <- if (!is.null(fit$alpha)) as.matrix(fit$alpha) else NULL + muMat <- if (!is.null(fit$mu)) as.matrix(fit$mu) else NULL + mu2Mat <- if (!is.null(fit$mu2)) as.matrix(fit$mu2) else NULL + if (!is.null(alphaMat) && !is.null(muMat) && + all(dim(alphaMat) == dim(muMat))) { + newTopLoci$posterior_mean <- as.numeric(colSums(alphaMat * muMat)) + if (!is.null(mu2Mat) && all(dim(alphaMat) == dim(mu2Mat))) { + newTopLoci$posterior_sd <- as.numeric(sqrt(pmax( + colSums(alphaMat * mu2Mat) - newTopLoci$posterior_mean^2, 0))) + } + } } new("FineMappingEntry", variantIds = common, - trimmedFit = fit, - topLoci = newTopLoci, - sumstats = x@sumstats) + susieFit = fit, + topLoci = newTopLoci) }) #' @export setMethod("show", "FineMappingEntry", function(object) { - nCs <- if (nrow(object@topLoci) > 0L && "cs" %in% names(object@topLoci)) - length(unique(object@topLoci$cs[object@topLoci$cs > 0])) - else 0L + tl <- object@topLoci + nCs <- if (nrow(tl) > 0L) { + csCols <- grep("^cs_[0-9]+$", names(tl), value = TRUE) + if (length(csCols) > 0L) { + vals <- unique(unlist(lapply(csCols, function(cc) { + v <- tl[[cc]]; v <- v[!grepl("_0$", v)]; v + }))) + length(vals) + } else 0L + } else 0L cat(sprintf("FineMappingEntry: %d variants, %d credible sets\n", length(object@variantIds), nCs)) }) + +# ============================================================================= +# Internal column projectors used by accessors +# ============================================================================= + +# Read a column from the canonical wide topLoci, returning NAs of the +# given type when the column is absent. Lets accessor projectors tolerate +# skeletal entries that lack optional columns. +# @noRd +.tlCol <- function(tl, name, type = c("character", "integer", "numeric")) { + type <- match.arg(type) + if (name %in% colnames(tl)) { + return(switch(type, + character = as.character(tl[[name]]), + integer = as.integer(tl[[name]]), + numeric = as.numeric(tl[[name]]))) + } + switch(type, + character = rep(NA_character_, nrow(tl)), + integer = rep(NA_integer_, nrow(tl)), + numeric = rep(NA_real_, nrow(tl))) +} + +# Project the canonical wide topLoci to the posterior view: identity + +# N/MAF + (beta=posterior_mean, se=posterior_sd) + pip + cs_* + signal_cluster +# + pipeline stamps. Renames `posterior_mean`/`posterior_sd` to `beta`/`se`. +# Missing optional columns are NA-filled. +# @noRd +.projectPosteriorView <- function(tl) { + if (nrow(tl) == 0L) { + return(data.frame( + variant_id = character(0), chrom = character(0), pos = integer(0), + A1 = character(0), A2 = character(0), + N = numeric(0), MAF = numeric(0), + beta = numeric(0), se = numeric(0), pip = numeric(0), + stringsAsFactors = FALSE)) + } + out <- data.frame( + variant_id = .tlCol(tl, "variant_id", "character"), + chrom = .tlCol(tl, "chrom", "character"), + pos = .tlCol(tl, "pos", "integer"), + A1 = .tlCol(tl, "A1", "character"), + A2 = .tlCol(tl, "A2", "character"), + N = .tlCol(tl, "N", "numeric"), + MAF = .tlCol(tl, "MAF", "numeric"), + beta = .tlCol(tl, "posterior_mean", "numeric"), + se = .tlCol(tl, "posterior_sd", "numeric"), + pip = .tlCol(tl, "pip", "numeric"), + stringsAsFactors = FALSE) + # Pass through CS columns (cs_95, cs_70, cs_50, cs_95_purity) and + # pipeline stamps (method, gene, event, grange_*) when present. + extraCols <- intersect( + c("cs_95", "cs_70", "cs_50", "cs_95_purity", + "method", "gene", "event", "grange_start", "grange_end"), + colnames(tl)) + for (cc in extraCols) out[[cc]] <- tl[[cc]] + rownames(out) <- NULL + out +} + +# Project to the marginal view: identity + N/MAF + (beta, se, z, p) where +# beta/se/z/p are the marginal univariate columns renamed from their +# `marginal_*` storage names. Missing optional columns are NA-filled. +# @noRd +.projectMarginalView <- function(tl) { + if (nrow(tl) == 0L) { + return(data.frame( + variant_id = character(0), chrom = character(0), pos = integer(0), + A1 = character(0), A2 = character(0), + N = numeric(0), MAF = numeric(0), + beta = numeric(0), se = numeric(0), z = numeric(0), p = numeric(0), + stringsAsFactors = FALSE)) + } + data.frame( + variant_id = .tlCol(tl, "variant_id", "character"), + chrom = .tlCol(tl, "chrom", "character"), + pos = .tlCol(tl, "pos", "integer"), + A1 = .tlCol(tl, "A1", "character"), + A2 = .tlCol(tl, "A2", "character"), + N = .tlCol(tl, "N", "numeric"), + MAF = .tlCol(tl, "MAF", "numeric"), + beta = .tlCol(tl, "marginal_beta", "numeric"), + se = .tlCol(tl, "marginal_se", "numeric"), + z = .tlCol(tl, "marginal_z", "numeric"), + p = .tlCol(tl, "marginal_p", "numeric"), + stringsAsFactors = FALSE) +} diff --git a/R/FineMappingResultBase.R b/R/FineMappingResultBase.R deleted file mode 100644 index a2ec3ddf..00000000 --- a/R/FineMappingResultBase.R +++ /dev/null @@ -1,61 +0,0 @@ -# ============================================================================= -# FineMappingResultBase S4 virtual class -# ----------------------------------------------------------------------------- -# Shared parent of the QTL and GWAS fine-mapping result collections. -# Concrete subclasses (QtlFineMappingResult, GwasFineMappingResult) carry -# a DFrame of per-fit rows plus a shared ldSketch slot. Downstream -# pipelines dispatch on FineMappingResultBase for behaviors that apply to -# either flavour, and on the concrete subclass when the tuple shape -# matters. -# ============================================================================= - -#' @include allGenerics.R -#' @importFrom methods setClass setMethod new is validObject -NULL - -#' @title Fine-Mapping Result Base Class -#' @description Virtual base class for fine-mapping result collections. -#' Concrete subclasses (\code{QtlFineMappingResult}, -#' \code{GwasFineMappingResult}) carry a \code{DFrame} of per-fit rows -#' and a shared \code{ldSketch} slot. Downstream pipelines should -#' dispatch on \code{FineMappingResultBase} for behaviors that apply to -#' either flavour, and on the concrete subclass when the tuple shape -#' matters. -#' @slot ldSketch The LD reference \code{GenotypeHandle} the fits were -#' computed against, or \code{NULL} when the fits were derived from -#' individual-level data (no LD reference). Used downstream for -#' cross-pipeline LD-sketch identity validation. -#' @export -setClass("FineMappingResultBase", - contains = c("VIRTUAL", "DFrame"), - representation(ldSketch = "ANY")) - -#' @rdname getStudy -#' @export -setMethod("getStudy", "FineMappingResultBase", - function(x) unique(as.character(x$study))) - -#' @rdname getLdSketch -#' @export -setMethod("getLdSketch", "FineMappingResultBase", - function(x, ...) x@ldSketch) - -#' @rdname getMethodNames -#' @export -setMethod("getMethodNames", "FineMappingResultBase", - function(x) unique(as.character(x$method))) - -#' @rdname adjustPips -#' @export -setMethod("adjustPips", "FineMappingResultBase", - function(x, keepVariants, ...) { - if (nrow(x) == 0L) return(x) - entries <- x@listData$entry - for (i in seq_along(entries)) { - adj <- tryCatch(adjustPips(entries[[i]], keepVariants, ...), - error = function(err) NULL) - if (!is.null(adj)) entries[[i]] <- adj - } - x@listData$entry <- entries - x - }) diff --git a/R/GenotypeHandle.R b/R/GenotypeHandle.R index 286248fe..a184ffbf 100644 --- a/R/GenotypeHandle.R +++ b/R/GenotypeHandle.R @@ -6,7 +6,7 @@ # is deferred until extractBlockGenotypes() is called. # ============================================================================= -#' @include allGenerics.R +#' @include AllGenerics.R NULL #' @title Genotype File Handle diff --git a/R/GwasFineMappingResult.R b/R/GwasFineMappingResult.R index 9568d170..3b5a4247 100644 --- a/R/GwasFineMappingResult.R +++ b/R/GwasFineMappingResult.R @@ -10,7 +10,7 @@ # context/trait (GWAS has no per-tuple context or trait axis). # ============================================================================= -#' @include FineMappingResultBase.R tupleSelectors.R +#' @include AllClasses.R tupleSelectors.R NULL setClass("GwasFineMappingResult", @@ -153,17 +153,27 @@ setMethod("getCs", "GwasFineMappingResult", #' @export setMethod("getTopLoci", "GwasFineMappingResult", function(x, type = c("data.frame", "GRanges"), + signalCutoff = 0.025, study = NULL, context = NULL, trait = NULL, method = NULL, ...) { entry <- getFineMappingResult(x, study = study, method = method) - getTopLoci(entry, type = match.arg(type)) + getTopLoci(entry, type = match.arg(type), signalCutoff = signalCutoff) }) -#' @rdname getTrimmedFit +#' @rdname getMarginalEffects #' @export -setMethod("getTrimmedFit", "GwasFineMappingResult", +setMethod("getMarginalEffects", "GwasFineMappingResult", + function(x, maxPval = NULL, + study = NULL, context = NULL, trait = NULL, method = NULL, ...) { + entry <- getFineMappingResult(x, study = study, method = method) + getMarginalEffects(entry, maxPval = maxPval) + }) + +#' @rdname getSusieFit +#' @export +setMethod("getSusieFit", "GwasFineMappingResult", function(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) { entry <- getFineMappingResult(x, study = study, method = method) - getTrimmedFit(entry) + getSusieFit(entry) }) #' @rdname getVariantIds diff --git a/R/LdBlocks.R b/R/LdBlocks.R index 1ccdda72..1d0abf26 100644 --- a/R/LdBlocks.R +++ b/R/LdBlocks.R @@ -7,7 +7,7 @@ # indexed GWAS fine-mapping. # ============================================================================= -#' @include allGenerics.R +#' @include AllGenerics.R NULL setClass("LdBlocks", diff --git a/R/LdStatistic.R b/R/LdStatistic.R index fed9d99e..7df5c2a3 100644 --- a/R/LdStatistic.R +++ b/R/LdStatistic.R @@ -7,7 +7,7 @@ # LD-score-based methods (S-LDSC/g-LDSC). # ============================================================================= -#' @include allGenerics.R LdBlocks.R +#' @include AllGenerics.R LdBlocks.R NULL #' @title LD Statistic (Virtual Base Class) diff --git a/R/QtlDataset.R b/R/QtlDataset.R index 0e6e0e0e..048c0911 100644 --- a/R/QtlDataset.R +++ b/R/QtlDataset.R @@ -11,7 +11,7 @@ # multi-study composition (MultiStudyQtlDataset). # ============================================================================= -#' @include allGenerics.R +#' @include AllGenerics.R NULL setClass("QtlDataset", diff --git a/R/QtlFineMappingResult.R b/R/QtlFineMappingResult.R index 7c0e7e37..dbb864c6 100644 --- a/R/QtlFineMappingResult.R +++ b/R/QtlFineMappingResult.R @@ -11,7 +11,7 @@ # the joint dispatchers. # ============================================================================= -#' @include FineMappingResultBase.R tupleSelectors.R +#' @include AllClasses.R tupleSelectors.R NULL setClass("QtlFineMappingResult", @@ -169,19 +169,29 @@ setMethod("getCs", "QtlFineMappingResult", #' @export setMethod("getTopLoci", "QtlFineMappingResult", function(x, type = c("data.frame", "GRanges"), + signalCutoff = 0.025, study = NULL, context = NULL, trait = NULL, method = NULL, ...) { entry <- getFineMappingResult(x, study, context, trait, method) - getTopLoci(entry, type = match.arg(type)) + getTopLoci(entry, type = match.arg(type), signalCutoff = signalCutoff) }) -#' @rdname getTrimmedFit +#' @rdname getMarginalEffects #' @export -setMethod("getTrimmedFit", "QtlFineMappingResult", +setMethod("getMarginalEffects", "QtlFineMappingResult", + function(x, maxPval = NULL, + study = NULL, context = NULL, trait = NULL, method = NULL, ...) { + entry <- getFineMappingResult(x, study, context, trait, method) + getMarginalEffects(entry, maxPval = maxPval) + }) + +#' @rdname getSusieFit +#' @export +setMethod("getSusieFit", "QtlFineMappingResult", function(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) { entry <- getFineMappingResult(x, study, context, trait, method) - getTrimmedFit(entry) + getSusieFit(entry) }) #' @rdname getVariantIds diff --git a/R/SumStatsBase.R b/R/SumStatsBase.R deleted file mode 100644 index f2434d1f..00000000 --- a/R/SumStatsBase.R +++ /dev/null @@ -1,54 +0,0 @@ -# ============================================================================= -# SumStatsBase S4 virtual class -# ----------------------------------------------------------------------------- -# Shared parent of the QTL and GWAS summary statistics collections. -# Concrete subclasses (QtlSumStats, GwasSumStats) inherit from DFrame and -# share the ldSketch / genome / qcInfo slots. Class-specific accessors -# (getZ / getN / getMaf / nSnps / subsetChr / getVarY / getSumStats) -# stay on the concrete subclass because they rely on the tuple shape -# (3-tuple for QtlSumStats, 1-tuple for GwasSumStats). -# ============================================================================= - -#' @include allGenerics.R GenotypeHandle.R -#' @importFrom methods setClass setMethod new is validObject -NULL - -#' @title Summary Statistics Base Class -#' @description Virtual base class for QTL and GWAS summary statistics -#' collections. Concrete subclasses (\code{QtlSumStats}, -#' \code{GwasSumStats}) inherit from \code{DFrame} and share the -#' \code{ldSketch} / \code{genome} / \code{qcInfo} slots. -#' @slot ldSketch The \code{GenotypeHandle} the QC pipeline harmonized -#' against. Required: \code{summaryStatsQc()} sets it. -#' @slot genome Character, genome build label. -#' @slot qcInfo A \code{list} recording which QC steps ran. Empty -#' \code{list()} on construction; populated by \code{summaryStatsQc()} -#' with a per-step audit record (filter names, drop counts, liftover -#' target, RAISS settings, etc.). Fine-mapping and TWAS-weights -#' pipelines reject inputs where \code{length(getQcInfo(x)) == 0L} — the -#' slot serves as both the gating flag and the audit trail. -#' @export -setClass("SumStatsBase", - contains = c("VIRTUAL", "DFrame"), - representation( - ldSketch = "GenotypeHandle", - genome = "character", - qcInfo = "list" - )) - -#' @rdname getGenome -#' @export -setMethod("getGenome", "SumStatsBase", function(x, ...) x@genome) - -#' @rdname getQcInfo -#' @export -setMethod("getQcInfo", "SumStatsBase", function(x, ...) x@qcInfo) - -#' @rdname getLdSketch -#' @export -setMethod("getLdSketch", "SumStatsBase", function(x, ...) x@ldSketch) - -#' @rdname getStudy -#' @export -setMethod("getStudy", "SumStatsBase", - function(x) unique(as.character(x$study))) diff --git a/R/TwasWeightsEntry.R b/R/TwasWeightsEntry.R index 498b940c..914a924c 100644 --- a/R/TwasWeightsEntry.R +++ b/R/TwasWeightsEntry.R @@ -7,7 +7,7 @@ # every row of a TwasWeights collection. # ============================================================================= -#' @include allGenerics.R +#' @include AllGenerics.R NULL setClass("TwasWeightsEntry", diff --git a/R/allGenerics.R b/R/allGenerics.R index 9526bbea..0a946dd4 100644 --- a/R/allGenerics.R +++ b/R/allGenerics.R @@ -366,35 +366,54 @@ setGeneric("adjustPips", #' @export setGeneric("getPip", function(x, ...) standardGeneric("getPip")) -#' @title Get Trimmed Fit -#' @description Extract the trimmed SuSiE fit. +#' @title Get SuSiE Fit +#' @description Extract the SuSiE fit object from a fine-mapping entry +#' or result. The fit may be the trimmed view (when the pipeline ran +#' with the default \code{trim = TRUE}) or the full untrimmed +#' \code{susie()} return (when \code{trim = FALSE}). #' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. #' @param ... Class-specific selection arguments. -#' @return A list (trimmed SuSiE fit). +#' @return A list (the SuSiE fit object). #' @export -setGeneric("getTrimmedFit", function(x, ...) standardGeneric("getTrimmedFit")) +setGeneric("getSusieFit", function(x, ...) standardGeneric("getSusieFit")) -#' @title Get Variant Names -#' @description Extract variant names. -#' @param x A \code{FineMappingResult} object. +#' @title Get Marginal Effects +#' @description Extract per-variant marginal univariate effects from a +#' fine-mapping entry or result. Returns a \code{data.frame} with +#' identity columns (\code{variant_id, chrom, pos, A1, A2}), context +#' (\code{N, MAF}), and the marginal effect columns +#' (\code{beta, se, z, p}). Populated uniformly across the +#' individual-level and RSS paths. +#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. +#' @param maxPval Optional numeric (length 1). When non-\code{NULL}, +#' filter rows where \code{p > maxPval}. Default \code{NULL} +#' (no filter — return all variants). #' @param ... Class-specific selection arguments. -#' @return Character vector of variant names. +#' @return A \code{data.frame}. #' @export -setGeneric("getVariantNames", - function(x, ...) standardGeneric("getVariantNames")) - -#' @title Get Top Loci -#' @description Extract the top-loci payload as either a -#' \code{data.frame} (default, the on-disk shape) or a \code{GRanges} -#' (parsed from the \code{variant_id} \code{chr:pos:A2:A1} encoding, -#' with the remaining columns carried into \code{mcols}). +setGeneric("getMarginalEffects", + function(x, maxPval = NULL, ...) standardGeneric("getMarginalEffects")) + +#' @title Get Top Loci (posterior view) +#' @description Extract the per-variant posterior fine-mapping payload +#' as either a \code{data.frame} (default) or a \code{GRanges}. +#' Returns identity columns (\code{variant_id, chrom, pos, A1, A2}), +#' context (\code{N, MAF}), the posterior effect columns +#' (\code{beta = posterior_mean, se = posterior_sd}), \code{pip}, +#' and credible-set membership columns (\code{cs_95}, etc.). +#' Rows are filtered by PIP by default — set \code{signalCutoff = 0} +#' to return every variant. #' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. #' @param type One of \code{"data.frame"} (default) or \code{"GRanges"}. +#' @param signalCutoff Numeric (length 1). Drop rows where +#' \code{pip <= signalCutoff}. Default \code{0.025}. Use +#' \code{signalCutoff = 0} to keep every variant. #' @param ... Class-specific selection arguments. #' @return A \code{data.frame} or a \code{GRanges}. #' @export setGeneric("getTopLoci", - function(x, type = c("data.frame", "GRanges"), ...) + function(x, type = c("data.frame", "GRanges"), + signalCutoff = 0.025, ...) standardGeneric("getTopLoci")) #' @title Get Credible Sets @@ -405,23 +424,6 @@ setGeneric("getTopLoci", #' @export setGeneric("getCs", function(x, ...) standardGeneric("getCs")) -#' @title Get Log Bayes Factors -#' @description Extract per-variant log Bayes factors from a fine-mapping result. -#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. -#' @param ... Class-specific selection arguments. -#' @return A data.frame with columns \code{variant_id} and one numeric column -#' per effect. -#' @export -setGeneric("getLbf", function(x, ...) standardGeneric("getLbf")) - -#' @title Get Per-Effect Fine-Mapping Summary -#' @description Extract per-effect information from a fine-mapping result. -#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. -#' @param ... Class-specific selection arguments. -#' @return A data.frame with one row per effect. -#' @export -setGeneric("getEffects", function(x, ...) standardGeneric("getEffects")) - # ============================================================================= # TwasWeights accessor generics # ============================================================================= @@ -468,16 +470,6 @@ setGeneric("getFits", function(x, ...) standardGeneric("getFits")) #' @export setGeneric("getMethodNames", function(x) standardGeneric("getMethodNames")) -#' @title Get Molecular ID (legacy) -#' @description Legacy accessor. The molecular identifier is now stored -#' as the \code{trait} column on \code{TwasWeights} and -#' \code{FineMappingResult} collections — use \code{getTraits(x)} -#' instead. -#' @param x The object. -#' @return Character vector. -#' @export -setGeneric("getMolecularId", function(x) standardGeneric("getMolecularId")) - #' @title Get Data Type #' @description Extract the data-type tag. #' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. @@ -504,11 +496,22 @@ setGeneric("getDataType", function(x, ...) standardGeneric("getDataType")) #' \code{chrom}, \code{pos}, \code{ref}, \code{alt}. #' @param outputPath File path for output. Extension determines format: #' \code{.vcf.gz} or \code{.vcf.bgz} for bgzipped VCF, -#' \code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. +#' \code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. When +#' \code{splitByContext} or \code{splitByTrait} is \code{TRUE} on a +#' \code{FineMappingResult} method call, the corresponding tag is +#' appended to the file stem (e.g. +#' \code{out.vcf} + context = \code{"brain"} → +#' \code{out.brain.vcf}). #' @param sampleName Name for the VCF sample column (default: trait name or #' method name from the S4 object). -#' @param ... Additional arguments passed to methods. -#' @return Invisible path to the written file. +#' @param ... Additional arguments passed to methods. For the +#' \code{FineMappingResult} method these include \code{splitByContext} +#' (logical, default \code{FALSE}) and \code{splitByTrait} (logical, +#' default \code{FALSE}): when either is \code{TRUE}, write one VCF +#' per row of the collection with the tuple value appended to the +#' output filename. +#' @return Invisible path (or character vector of paths when splitting) +#' to the written file(s). #' @export setGeneric("writeSumstatsVcf", function(x, outputPath, sampleName = NULL, ...) standardGeneric("writeSumstatsVcf")) diff --git a/R/colocPipeline.R b/R/colocPipeline.R index 795b7ee5..d0a8736e 100644 --- a/R/colocPipeline.R +++ b/R/colocPipeline.R @@ -340,7 +340,7 @@ colocPipeline <- function(qtlFineMappingResult, filterLbfCsConcentration, priorTol, label = "entry") { - fit <- getTrimmedFit(entry) + fit <- getSusieFit(entry) if (is.null(fit)) { warning(sprintf("colocPipeline: %s has no trimmedFit; skipping.", label)) diff --git a/R/ctwasPipeline.R b/R/ctwasPipeline.R index 673770c7..c536eafa 100644 --- a/R/ctwasPipeline.R +++ b/R/ctwasPipeline.R @@ -30,6 +30,13 @@ #' \code{\link{causalInferencePipeline}}). When supplied, the #' per-(trait, context) Z is used as the \code{z_gene} input to #' \code{ctwas_sumstats} so it is not recomputed. +#' @param fineMappingResult Optional \code{QtlFineMappingResult} or +#' \code{GwasFineMappingResult} carrying the per-variant PIP and +#' credible-set membership data used by the CS / PIP rescue filters +#' (\code{csMinCor} and \code{minPipCutoff}). When \code{NULL} +#' (default) the smart filters are no-ops; only the magnitude filter +#' (\code{twasWeightCutoff}) and the per-gene cap +#' (\code{maxNumVariants}, ordered by \code{|weight|}) apply. #' @param regionId Optional character (length 1) label for the LD #' block. Default \code{"block1"}. #' @param thin,niterPrefit,niter,L Pass-throughs to @@ -37,6 +44,24 @@ #' @param groupPriorVarStructure Pass-through (defaults #' \code{"shared_type"}). #' @param ncore Number of cores. Default \code{1}. +#' @param twasWeightCutoff Numeric (length 1). Drop variants with +#' \code{|weight| < twasWeightCutoff} from each gene's weight matrix +#' before ctwas sees it. Default \code{0} (no filter). +#' @param csMinCor Numeric (length 1). When \code{fineMappingResult} is +#' provided, variants belonging to any 95\% credible set with purity +#' (\code{min_abs_corr}) \code{>= csMinCor} are marked as must-keep +#' and survive the per-gene cap. Default \code{0.8}. Ignored without +#' a \code{fineMappingResult}. +#' @param minPipCutoff Numeric (length 1). When +#' \code{fineMappingResult} is provided, variants with PIP greater +#' than \code{minPipCutoff} are marked as must-keep and survive the +#' per-gene cap. Default \code{0} (no PIP rescue). Ignored without a +#' \code{fineMappingResult}. +#' @param maxNumVariants Numeric (length 1). Cap on per-gene variant +#' count. When the gene has more variants than this, keep all +#' must-keep variants and fill remaining slots by descending PIP +#' (when available) or descending \code{|weight|}. Default +#' \code{Inf} (no cap). #' @param ... Additional arguments forwarded to #' \code{ctwas::ctwas_sumstats}. #' @return Whatever \code{ctwas::ctwas_sumstats} returns (a list with @@ -45,6 +70,7 @@ ctwasPipeline <- function(gwasSumStats, twasWeights, twasZ = NULL, + fineMappingResult = NULL, regionId = "block1", thin = 0.1, niterPrefit = 3L, @@ -56,6 +82,10 @@ ctwasPipeline <- function(gwasSumStats, "shared_all", "independent"), ncore = 1L, + twasWeightCutoff = 0, + csMinCor = 0.8, + minPipCutoff = 0, + maxNumVariants = Inf, ...) { if (!requireNamespace("ctwas", quietly = TRUE)) { stop("Package 'ctwas' is required for ctwasPipeline. ", @@ -71,6 +101,10 @@ ctwasPipeline <- function(gwasSumStats, if (!is.null(twasZ) && !methods::is(twasZ, "GRanges")) stop("`twasZ` must be a GRanges (output of causalInferencePipeline) ", "or NULL.") + if (!is.null(fineMappingResult) && + !methods::is(fineMappingResult, "FineMappingResultBase")) + stop("`fineMappingResult` must be a FineMappingResultBase ", + "(QtlFineMappingResult or GwasFineMappingResult) or NULL.") if (length(regionId) != 1L || !nzchar(regionId)) stop("`regionId` must be a single non-empty character string.") groupPriorVarStructure <- match.arg(groupPriorVarStructure) @@ -103,7 +137,13 @@ ctwasPipeline <- function(gwasSumStats, stringsAsFactors = FALSE) snpMap <- list() snpMap[[regionId]] <- ldPanel$snpInfo - weightsList <- .ctwasBuildWeights(twasWeights, ldPanel) + weightsList <- .ctwasBuildWeights( + twasWeights, ldPanel, + fineMappingResult = fineMappingResult, + twasWeightCutoff = twasWeightCutoff, + csMinCor = csMinCor, + minPipCutoff = minPipCutoff, + maxNumVariants = maxNumVariants) zGene <- if (!is.null(twasZ)) .ctwasBuildZGene(twasZ) else NULL # --- Call the ctwas engine ------------------------------------------ @@ -226,7 +266,12 @@ ctwasPipeline <- function(gwasSumStats, # per-gene genotype re-extraction. Variants absent from the panel # are dropped from that gene's row set. # @noRd -.ctwasBuildWeights <- function(twasWeights, ldPanel) { +.ctwasBuildWeights <- function(twasWeights, ldPanel, + fineMappingResult = NULL, + twasWeightCutoff = 0, + csMinCor = 0.8, + minPipCutoff = 0, + maxNumVariants = Inf) { panelSnps <- rownames(ldPanel$R) panelInfo <- ldPanel$snpInfo out <- list() @@ -241,16 +286,34 @@ ctwasPipeline <- function(gwasSumStats, if (!any(keep)) next vids <- vids[keep]; w <- w[keep] - Rwgt <- ldPanel$R[vids, vids, drop = FALSE] - wgtMat <- matrix(w, ncol = 1L, dimnames = list(vids, "wgt")) - gStudy <- as.character(twasWeights$study)[[i]] gContext <- as.character(twasWeights$context)[[i]] gTrait <- as.character(twasWeights$trait)[[i]] gMethod <- as.character(twasWeights$method)[[i]] key <- sprintf("%s|%s|%s|%s", gStudy, gContext, gTrait, gMethod) - # Per-gene chromosome + BP span derived from the cached snpInfo. + # PIP / credible-set context for the smart filters (csMinCor + + # minPipCutoff). Only available when the caller passed the matching + # FineMappingResult. NULL means we fall back to weight-magnitude + # priority only. + finemapAux <- .ctwasGetFinemapAux(fineMappingResult, gStudy, gContext, + gTrait, gMethod) + + # Apply the four filters in order. + kept <- .ctwasFilterVariants( + vids = vids, w = w, finemapAux = finemapAux, + twasWeightCutoff = twasWeightCutoff, + csMinCor = csMinCor, + minPipCutoff = minPipCutoff, + maxNumVariants = maxNumVariants) + if (length(kept) < 1L) next + vids <- kept$vids; w <- kept$w + + Rwgt <- ldPanel$R[vids, vids, drop = FALSE] + wgtMat <- matrix(w, ncol = 1L, dimnames = list(vids, "wgt")) + + # Per-gene chromosome + BP span derived from the cached snpInfo + # AFTER filtering (so p0/p1 reflect the retained variants). rowIdx <- match(vids, panelInfo$id) gChrom <- as.integer(panelInfo$chrom[[rowIdx[1L]]]) gP0 <- min(as.integer(panelInfo$pos[rowIdx])) @@ -274,6 +337,110 @@ ctwasPipeline <- function(gwasSumStats, out } +# Look up the per-(study, context, trait, method) PIP vector and the +# 95% credible-set membership / purity for one gene from the supplied +# FineMappingResult. Returns NULL when no FineMappingResult was passed +# or no matching tuple exists. Output is a list with: +# pip : named numeric vector keyed by variant_id +# csMembers : list of character vectors (one per CS at 95% coverage) +# csPurity : numeric vector aligned with csMembers +# @noRd +.ctwasGetFinemapAux <- function(fineMappingResult, study, context, trait, + method) { + if (is.null(fineMappingResult)) return(NULL) + selectors <- list(study = study, method = method) + if ("context" %in% names(fineMappingResult)) selectors$context <- context + if ("trait" %in% names(fineMappingResult)) selectors$trait <- trait + entry <- tryCatch( + do.call(getFineMappingResult, + c(list(fineMappingResult), selectors)), + error = function(e) NULL) + if (is.null(entry)) return(NULL) + tl <- entry@topLoci + if (nrow(tl) == 0L) return(NULL) + pip <- if ("pip" %in% names(tl)) + setNames(as.numeric(tl$pip), as.character(tl$variant_id)) + else NULL + # Per-CS membership at 95% coverage. cs_95 stores `_` + # where idx == 0 means "not in any CS". + csMembers <- list(); csPurity <- numeric(0) + if ("cs_95" %in% names(tl)) { + csIdx <- suppressWarnings(as.integer(sub("^.*_", "", tl$cs_95))) + keepIdx <- !is.na(csIdx) & csIdx > 0L + for (k in sort(unique(csIdx[keepIdx]))) { + members <- as.character(tl$variant_id)[csIdx == k & keepIdx] + csMembers[[length(csMembers) + 1L]] <- members + # Pull the purity from cs_95_purity if present; same value + # broadcast to every row in the CS, so any row will do. + p <- if ("cs_95_purity" %in% names(tl)) + as.numeric(tl$cs_95_purity[which(csIdx == k & keepIdx)[1L]]) + else NA_real_ + csPurity <- c(csPurity, p) + } + } + list(pip = pip, csMembers = csMembers, csPurity = csPurity) +} + +# Apply the four trimCtwasVariants filters to one gene's (vids, w) +# pair. Returns a list(vids, w) with the retained subset, or NULL when +# no variants survive. Filter order: +# 1. Magnitude: drop variants with |w| < twasWeightCutoff +# 2. CS rescue: when fineMappingResult is provided, mark variants +# in any high-purity CS (purity >= csMinCor) as +# "must-keep" +# 3. PIP rescue: mark variants with PIP > minPipCutoff as must-keep +# 4. Cap: if surviving variants > maxNumVariants, keep all +# must-keep variants and fill remaining slots by +# descending PIP (or |w| when no PIP available) +# @noRd +.ctwasFilterVariants <- function(vids, w, finemapAux, + twasWeightCutoff, csMinCor, + minPipCutoff, maxNumVariants) { + if (length(vids) == 0L) return(NULL) + # Step 1: magnitude. + if (twasWeightCutoff > 0) { + magKeep <- !is.na(w) & abs(w) >= twasWeightCutoff + vids <- vids[magKeep]; w <- w[magKeep] + if (length(vids) == 0L) return(NULL) + } + # Steps 2-3: PIP / CS rescue (only when fineMappingResult was passed). + mustKeep <- character(0) + if (!is.null(finemapAux)) { + if (length(finemapAux$csMembers) > 0L && csMinCor > 0) { + for (k in seq_along(finemapAux$csMembers)) { + if (!is.na(finemapAux$csPurity[k]) && + finemapAux$csPurity[k] >= csMinCor) { + mustKeep <- union(mustKeep, + intersect(finemapAux$csMembers[[k]], vids)) + } + } + } + if (!is.null(finemapAux$pip) && minPipCutoff > 0) { + hits <- names(finemapAux$pip)[finemapAux$pip > minPipCutoff] + mustKeep <- union(mustKeep, intersect(hits, vids)) + } + } + # Step 4: cap. Always keep must-keep variants; fill the rest by + # descending PIP (when PIP available) or descending |w|. + if (length(vids) > maxNumVariants && is.finite(maxNumVariants)) { + priorities <- if (!is.null(finemapAux) && !is.null(finemapAux$pip)) { + unname(finemapAux$pip[vids]) # NAs for variants without PIP + } else NULL + if (is.null(priorities) || all(is.na(priorities))) { + priorities <- abs(w) + } else { + # Fall back to |w| for variants the PIP table doesn't know about. + priorities[is.na(priorities)] <- abs(w)[is.na(priorities)] + } + # Order: must-keep first, then the rest by descending priority. + isMust <- vids %in% mustKeep + ord <- order(!isMust, -priorities) + keepIdx <- ord[seq_len(min(maxNumVariants, length(vids)))] + vids <- vids[keepIdx]; w <- w[keepIdx] + } + list(vids = vids, w = w) +} + # Build z_gene data.frame from a TWAS-Z GRanges (output of # causalInferencePipeline). One row per (qtlStudy, context, trait, # method, gwasStudy) tuple. diff --git a/R/ctwasWrapper.R b/R/ctwasWrapper.R deleted file mode 100644 index 888c4319..00000000 --- a/R/ctwasWrapper.R +++ /dev/null @@ -1,78 +0,0 @@ -#' Function to select variants for ctwas weights input -#' @param regionData A list of list containing weights list and snp_info list data for multiple genes/events within a single LD block region. -#' @param exportTwasWeightDb A list of list of fine-mapping result data formatted by generate_twas_db function. -#' @param regionBlock A string for region information for region_weights, consisted of chromosome number, star and end position of LD block conneced with "_". -#' @export -trimCtwasVariants <- function(regionData, twasWeightCutoff = 1e-5, csMinCor = 0.8, - minPipCutoff = 0.1, maxNumVariants = 1000) { - # internal functions to select variants for a gene-context pair weight list - selectVariants <- function(groupName, studyName, regionData, csMinCor, minPipCutoff, maxNumVariants) { - weightList <- regionData$weights[[groupName]][[studyName]] - context <- weightList$context - selectedVariantsByContext <- c() - molecularId <- gsub("\\|.*", "", groupName) - - if ("csVariants" %in% names(regionData$susieWeightsIntermediate[[molecularId]][[context]]) & length(regionData$susieWeightsIntermediate[[molecularId]][[context]][["csVariants"]]) != 0) { - csMinAbsCor <- regionData$susieWeightsIntermediate[[molecularId]][[context]]$csPurity$minAbsCorr - for (L in seq_along(regionData$susieWeightsIntermediate[[molecularId]][[context]]$csVariants)) { - # we includ all variants in $cs_variant if min_abs_corr > csMinCor for the set - if (csMinAbsCor[L] >= csMinCor) { - csVariants <- regionData$susieWeightsIntermediate[[molecularId]][[context]]$csVariants[[L]] - selectedVariantsByContext <- csVariants[csVariants %in% rownames(weightList$wgt)] - } - } - } - contextPip <- regionData$susieWeightsIntermediate[[molecularId]][[context]]$pip - # variant IDs are in canonical chr-prefix format from allele_qc pipeline - highPipVariants <- names(contextPip[contextPip > minPipCutoff])[names(contextPip[contextPip > minPipCutoff]) %in% rownames(weightList$wgt)] - selectedVariantsByContext <- unique(c(selectedVariantsByContext, highPipVariants)) - - # prioritize SNPs based on PIP if maxNumVariants different from Inf - availableVariants <- intersect(rownames(weightList$wgt), names(contextPip)) - prioritized <- unique(c(selectedVariantsByContext, setdiff(availableVariants, selectedVariantsByContext))) - prioritized <- prioritized[order(-contextPip[prioritized])] - selectedVariantsByContext <- head(prioritized, maxNumVariants) - weightList$wgt <- weightList$wgt[selectedVariantsByContext, , drop = FALSE] - return(weightList) - } - mergeByStudy <- function(weights) { - weightList <- list() - for (group in names(weights)) { - for (study in names(weights[[group]])) { - weightList[[study]][[group]] <- weights[[group]][[study]] - } - } - return(weightList) - } - - weights <- setNames(lapply(names(regionData$weights), function(group) { - for (study in names(regionData$weights[[group]])) { - regionData$weights[[group]][[study]]$wgt <- regionData$weights[[group]][[study]]$wgt[abs(regionData$weights[[group]][[study]]$wgt[, 1]) >= twasWeightCutoff, , drop = FALSE] - if (nrow(regionData$weights[[group]][[study]]$wgt) < 1) { - regionData$weights[[group]][[study]] <- NULL - next - } - if (all(is.na(regionData$weights[[group]][[study]]$wgt[, 1])) || all(is.nan(regionData$weights[[group]][[study]]$wgt[, 1]))) { - regionData$weights[[group]][[study]] <- NULL - next - } - if (nrow(regionData$weights[[group]][[study]]$wgt) < maxNumVariants) { - regionData$weights[[group]][[study]]$nWgt <- nrow(regionData$weights[[group]][[study]]$wgt) - } else { - regionData$weights[[group]][[study]] <- selectVariants(group, study, regionData, csMinCor = csMinCor, minPipCutoff = minPipCutoff, maxNumVariants = maxNumVariants) - regionData$weights[[group]][[study]]$nWgt <- nrow(regionData$weights[[group]][[study]]$wgt) - } - regionData$weights[[group]] <- Filter(Negate(is.null), regionData$weights[[group]]) - contextRange <- as.integer(sapply(rownames(regionData$weights[[group]][[study]]$wgt), function(variant) strsplit(variant, "\\:")[[1]][2])) - if(twasWeightCutoff!=0 | csMinCor!=0 | minPipCutoff!=0 | maxNumVariants!=Inf){ - regionData$weights[[group]][[study]][["p0"]] = min(contextRange)# update min max position - regionData$weights[[group]][[study]][["p1"]] = max(contextRange) - } - } - return(regionData$weights[[group]]) - }), names(regionData$weights)) - weights <- Filter(Negate(is.null), weights) - weights <- mergeByStudy(weights) - return(weights) -} - diff --git a/R/fineMappingPipeline.R b/R/fineMappingPipeline.R index 8db29775..38d52a5d 100644 --- a/R/fineMappingPipeline.R +++ b/R/fineMappingPipeline.R @@ -144,6 +144,36 @@ #' pipeline performs SuSiE-RSS fine-mapping per (study, ldBlock). #' Required for the GwasSumStats method. #' @param verbose Verbosity (0 silent, 1 default). Default \code{1}. +#' @param phenotypeCovariatesToResidualize Character vector (or +#' \code{NULL}) of phenotype-covariate names to residualize against. +#' \code{NULL} (default) uses every available phenotype covariate. +#' Only meaningful when the input is a \code{QtlDataset} / +#' \code{MultiStudyQtlDataset} (ignored for sumstat inputs). +#' @param genotypeCovariatesToResidualize Character vector (or +#' \code{NULL}) of genotype-covariate column names to residualize +#' against. \code{NULL} uses every available genotype covariate. +#' @param residualizePhenotypeCovariates Logical (length 1). When +#' \code{TRUE} (default) residualize against the phenotype-side +#' covariates listed in \code{phenotypeCovariatesToResidualize}. Set +#' \code{FALSE} to disable phenotype-covariate residualization +#' entirely. The marginal univariate effects stored on each +#' \code{FineMappingEntry} obey the same residualization choice as +#' the SuSiE fit itself — they are computed against the same +#' residualized \code{X} / \code{Y}. +#' @param residualizeGenotypeCovariates Logical (length 1). When +#' \code{TRUE} (default) residualize against the genotype-side +#' covariates listed in \code{genotypeCovariatesToResidualize}. Set +#' \code{FALSE} to disable. +#' @param trim Logical (length 1). When \code{TRUE} (default) the +#' \code{susieFit} slot on each output \code{FineMappingEntry} carries +#' a trimmed view of the SuSiE fit (the minimal subset needed by +#' downstream pipelines). When \code{FALSE} the full untrimmed +#' \code{susie()} return is retained so accessors like +#' \code{getSusieFit()} and non-default-coverage queries through +#' \code{getCs()} can read the full posterior matrices +#' (\code{lbf_variable}, \code{mu}, \code{mu2}, \code{V}). The +#' per-variant \code{topLoci} table is always fully populated +#' regardless of \code{trim}. #' @param ... Reserved for future per-method arguments. #' #' @return A \code{\link{FineMappingResult}} collection keyed by @@ -449,10 +479,57 @@ setGeneric("fineMappingPipeline", # formatFinemappingOutput). Returns a bare FineMappingEntry payload, ready # to be inserted into a FineMappingResult. # @noRd +# Look up residualization flags from the enclosing setMethod frame +# and call `getResidualized{Phenotypes,Genotypes}` with them. Each +# fineMappingPipeline / twasWeightsPipeline method exposes the four +# convenience flags listed in `.resFlagNames`; the wrapper threads them +# through to the accessor so per-call-site changes aren't needed. +.resFlagNames <- c( + "phenotypeCovariatesToResidualize", + "genotypeCovariatesToResidualize", + "residualizePhenotypeCovariates", + "residualizeGenotypeCovariates") + +.resPickFlags <- function() { + out <- list() + # Walk up from the immediate caller; the public setMethod frame is + # where the user-facing args live. sys.frames()[[1]] is the global + # env so stop before that. + frames <- sys.frames() + for (i in seq_along(frames)) { + fr <- frames[[i]] + for (nm in .resFlagNames) { + if (!nm %in% names(out) && exists(nm, envir = fr, inherits = FALSE)) { + out[[nm]] <- get(nm, envir = fr, inherits = FALSE) + } + } + } + out +} + +.fmResidPheno <- function(x, ...) { + do.call(getResidualizedPhenotypes, + c(list(x = x, ...), .resPickFlags())) +} + +.fmResidGeno <- function(x, ...) { + do.call(getResidualizedGenotypes, + c(list(x = x, ...), .resPickFlags())) +} + .fmPostprocessOne <- function(fit, method, dataX, dataY, coverage, secondaryCoverage, signalCutoff, minAbsCorr, csInput = NULL, af = NULL, - region = NULL) { + region = NULL, trim = NULL) { + # Inherit `trim` from the calling method's frame if not passed in + # explicitly. The 10 internal call sites don't currently forward it + # (they predate the trim knob) so we look it up from the caller. This + # keeps the patch surface minimal: each public setMethod gains a + # `trim = TRUE` parameter and that value naturally reaches here. + if (is.null(trim)) { + trim <- tryCatch(get("trim", envir = parent.frame()), + error = function(e) TRUE) + } fits <- setNames(list(fit), method) post <- postprocessFinemappingFits( fits = fits, dataX = dataX, dataY = dataY, @@ -460,7 +537,7 @@ setGeneric("fineMappingPipeline", secondaryCoverage = secondaryCoverage, signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, region = region, - csInput = csInput) + csInput = csInput, trim = isTRUE(trim)) out <- formatFinemappingOutput(post, primaryMethod = method) # `formatFinemappingOutput` returns a list with $finemappingEntry as a # bare FineMappingEntry per the helper's contract. @@ -578,6 +655,11 @@ setMethod("fineMappingPipeline", "QtlDataset", fineMappingResult = NULL, naAction = c("drop", "impute"), verbose = 1, + trim = TRUE, + phenotypeCovariatesToResidualize = NULL, + genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, ...) { naAction <- match.arg(naAction) parsedJointSpec <- parseJointSpecification(jointSpecification, data) @@ -691,9 +773,9 @@ setMethod("fineMappingPipeline", "QtlDataset", } if (length(toRun) == 0L) next - Y <- getResidualizedPhenotypes( + Y <- .fmResidPheno( data, contexts = ctx, traitId = tid, naAction = naAction) - X <- getResidualizedGenotypes( + X <- .fmResidGeno( data, contexts = ctx, traitId = tid, cisWindow = cisWindow, samples = rownames(Y)) common <- intersect(rownames(X), rownames(Y)) @@ -783,12 +865,12 @@ setMethod("fineMappingPipeline", "QtlDataset", # context but getResidualizedPhenotypes already residualises. contextsHere <- job$contexts # Use the union of per-context cis-windows for variant extraction. - Yres <- getResidualizedPhenotypes( + Yres <- .fmResidPheno( data, contexts = contextsHere, traitId = tid, naAction = naAction) if (length(contextsHere) == 1L) Yres <- setNames(list(Yres), contextsHere) commonSamples <- Reduce(intersect, lapply(Yres, rownames)) - X <- getResidualizedGenotypes( + X <- .fmResidGeno( data, contexts = contextsHere, traitId = tid, cisWindow = cisWindow, samples = commonSamples) commonSamples <- intersect(commonSamples, rownames(X)) @@ -856,9 +938,9 @@ setMethod("fineMappingPipeline", "QtlDataset", next } - Y <- getResidualizedPhenotypes( + Y <- .fmResidPheno( data, contexts = ctx, traitId = traits, naAction = naAction) - X <- getResidualizedGenotypes( + X <- .fmResidGeno( data, contexts = ctx, traitId = traits, cisWindow = cisWindow, samples = rownames(Y)) common <- intersect(rownames(X), rownames(Y)) @@ -920,9 +1002,9 @@ setMethod("fineMappingPipeline", "QtlDataset", next } - Y <- getResidualizedPhenotypes( + Y <- .fmResidPheno( data, contexts = ctx, traitId = traits, naAction = naAction) - X <- getResidualizedGenotypes( + X <- .fmResidGeno( data, contexts = ctx, traitId = traits, cisWindow = cisWindow, samples = rownames(Y)) common <- intersect(rownames(X), rownames(Y)) @@ -1002,6 +1084,11 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset", fineMappingResult = NULL, naAction = c("drop", "impute"), verbose = 1, + trim = TRUE, + phenotypeCovariatesToResidualize = NULL, + genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, ...) { naAction <- match.arg(naAction) parsedJointSpec <- parseJointSpecification(jointSpecification, data) @@ -1121,6 +1208,7 @@ setMethod("fineMappingPipeline", "QtlSumStats", minAbsCorr = 0.8, fineMappingResult = NULL, verbose = 1, + trim = TRUE, ...) { .fmAssertQcd(data) parsedJointSpec <- parseJointSpecification(jointSpecification, data) @@ -1350,6 +1438,7 @@ setMethod("fineMappingPipeline", "GwasSumStats", minAbsCorr = 0.8, fineMappingResult = NULL, verbose = 1, + trim = TRUE, ...) { .fmAssertQcd(data) tokens <- .fmNormalizeMethods(methods) diff --git a/R/fineMappingWrappers.R b/R/fineMappingWrappers.R index d2913588..9fb8462b 100644 --- a/R/fineMappingWrappers.R +++ b/R/fineMappingWrappers.R @@ -258,7 +258,8 @@ postprocessFinemappingFits <- function(fits, dataX, dataY = NULL, priorEffTol = 1e-9, minAbsCorr = 0.8, medianAbsCorr = NULL, - csInput = NULL) { + csInput = NULL, + trim = TRUE) { fits <- fits[!vapply(fits, is.null, logical(1))] if (length(fits) == 0) stop("At least one fine-mapping fit must be supplied.") if (is.null(names(fits)) || any(names(fits) == "")) { @@ -278,7 +279,8 @@ postprocessFinemappingFits <- function(fits, dataX, dataY = NULL, region = region, priorEffTol = priorEffTol, minAbsCorr = minAbsCorr, medianAbsCorr = medianAbsCorr, - csInput = csInput + csInput = csInput, + trim = trim ) }) names(posts) <- names(fits) @@ -344,6 +346,7 @@ postprocessFinemappingFit.susiF <- function(fit, method = "fsusie", csInput = NU otherQuantities = NULL, region = NULL, priorEffTol = 1e-9, + trim = TRUE, minAbsCorr = 0.8, medianAbsCorr = NULL, csInput = c("X", "Xcorr", "fsusie")) { @@ -356,32 +359,39 @@ postprocessFinemappingFit.susiF <- function(fit, method = "fsusie", csInput = NU secondaryCoverage = secondaryCoverage, method = method, csInput = csInput, minAbsCorr = minAbsCorr, medianAbsCorr = medianAbsCorr ) - topLoci <- buildTopLoci( + # Always build the canonical unfiltered table; the FineMappingEntry + # slot stores it as-is so accessors can filter by PIP at query time. + # The wrapper-facing `top_loci` (in `res` below) preserves the legacy + # `signalCutoff` behaviour for non-S4 callers. + topLociFull <- buildTopLoci( fit, csTables, variantNames = variantNames, sumstats = sumstats, - af = af, method = method, signalCutoff = signalCutoff, + af = af, method = method, signalCutoff = 0, dataX = dataX, dataY = dataY, otherQuantities = otherQuantities, region = region ) - trimmed <- trimFinemappingFit(fit, effectIdx, method, csTables) - - # Project the rich `top_loci` table down to the slot shape required by - # the FineMappingEntry validity check / vcf_writer / getPip / getCs - # accessors. The wrapper-facing `top_loci` returned to callers is - # unchanged. - s4TopLoci <- .topLociForS4Slot(topLoci) - # Return a bare `FineMappingEntry` payload. The caller (a pipeline step - # or user code) is responsible for wrapping one or more entries into a - # `FineMappingResult` collection with the correct (study, context, - # trait, method) identity tags and the appropriate `ldSketch`. + # When `trim = TRUE` we store a minimal subset of the fit on the + # entry; when `trim = FALSE` we keep the full untrimmed susie return so + # downstream code can access `mu` / `mu2` / `lbf_variable` / `V` / etc. + storedFit <- if (isTRUE(trim)) { + trimFinemappingFit(fit, effectIdx, method, csTables) + } else { + fit + } + fmEntry <- FineMappingEntry( variantIds = variantNames, - trimmedFit = trimmed, - topLoci = s4TopLoci, - sumstats = sumstats) + susieFit = storedFit, + topLoci = topLociFull) + + topLociWrapper <- topLociFull + if (!is.null(signalCutoff) && signalCutoff > 0 && nrow(topLociWrapper) > 0L) { + keep <- !is.na(topLociWrapper$pip) & topLociWrapper$pip > signalCutoff + topLociWrapper <- topLociWrapper[keep, , drop = FALSE] + } res <- list( - top_loci = topLoci, + top_loci = topLociWrapper, finemappingEntry = fmEntry, method = method ) @@ -557,7 +567,7 @@ computeCsTable <- function(fit, dataX, coverage, csInput = c("X", "Xcorr", "fsus #' or an empty data frame if nothing is retained. #' @export buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL, - af = NULL, method, signalCutoff = 0.1, + af = NULL, method, signalCutoff = 0, dataX = NULL, dataY = NULL, otherQuantities = NULL, region = NULL) { @@ -565,7 +575,7 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL, length(method) != 1L || is.na(method) || !nzchar(method)) { stop("buildTopLoci: `method` is required (e.g. \"susie\", \"susieInf\").") } - if (length(csTables) == 0) return(.emptyTopLoci()) + if (length(variantNames) == 0L) return(.emptyTopLoci()) coverageValues <- attr(csTables, "coverage") if (is.null(coverageValues)) coverageValues <- rep(NA_real_, length(csTables)) @@ -588,60 +598,16 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL, postMean <- if (!is.null(mu) && all(dim(alpha) == dim(mu))) { colSums(alpha * mu) } else rep(NA_real_, length(variantNames)) - postSe <- if (!is.null(mu2) && all(dim(alpha) == dim(mu2))) { + postSd <- if (!is.null(mu2) && all(dim(alpha) == dim(mu2))) { sqrt(pmax(colSums(alpha * mu2) - postMean^2, 0)) } else rep(NA_real_, length(variantNames)) - # Collect CS-membership records (variant_idx, cs_idx, coverage) across all - # requested coverages. This is the only intermediate; the 22-column shape - # is projected from it below. - csRecords <- do.call(rbind, lapply(seq_along(csTables), function(i) { - ct <- csTables[[i]] - info <- getCsInfo(ct$sets$cs, getTopVariantsIdx(ct, signalCutoff)) - if (is.null(info) || nrow(info) == 0) return(NULL) - data.frame(variant_idx = as.integer(info$variant_idx), - cs_idx = as.integer(info$cs_idx), - coverage = as.numeric(coverageValues[[i]]), - stringsAsFactors = FALSE) - })) - if (is.null(csRecords) || nrow(csRecords) == 0) return(.emptyTopLoci()) - - # Key grid: one row per (variant_idx, cs_idx). Overlapping CS membership - # within this method is preserved as separate keys. - keyGrid <- unique(csRecords[, c("variant_idx", "cs_idx"), drop = FALSE]) - rownames(keyGrid) <- NULL - nKeys <- nrow(keyGrid) - keyStr <- paste(keyGrid$variant_idx, keyGrid$cs_idx, sep = ":") - - # For each requested coverage, which keys appear in csRecords at that - # coverage? Returns the key's cs_idx if present, else 0L. - idxAt <- function(cov) { - at <- csRecords[abs(csRecords$coverage - cov) < 1e-12, , drop = FALSE] - hits <- paste(at$variant_idx, at$cs_idx, sep = ":") - ifelse(keyStr %in% hits, keyGrid$cs_idx, 0L) - } - idx95 <- idxAt(0.95); idx70 <- idxAt(0.70); idx50 <- idxAt(0.50) - - # Per-coverage CS purity vectors (indexed by 1-based CS index). Only the - # 0.95-coverage purity is currently exported (as cs_95_purity); per-CS - # purities for the other coverages are kept here for downstream / future - # use even though they are not part of the 22-column output. - purityPerCov <- lapply(csTables, .csPurityVec) - cov95 <- which(abs(coverageValues - 0.95) < 1e-12) - purity95 <- if (length(cov95) > 0L) purityPerCov[[cov95[1]]] else numeric() - cs95Purity <- vapply(idx95, function(i) { - if (i <= 0L || i > length(purity95)) return(0) - v <- purity95[i]; if (is.na(v)) 0 else as.numeric(v) - }, numeric(1)) - - vIdx <- keyGrid$variant_idx - variantIdVec <- variantNames[vIdx] + # Parse variant IDs into chrom/pos/A1/A2 (one row per variant). parsed <- tryCatch( - suppressWarnings(parseVariantId(variantIdVec)), + suppressWarnings(parseVariantId(variantNames)), error = function(e) stop("buildTopLoci: parseVariantId failed: ", - conditionMessage(e)) - ) - if (is.null(parsed) || nrow(parsed) != length(variantIdVec)) { + conditionMessage(e))) + if (is.null(parsed) || nrow(parsed) != length(variantNames)) { stop("buildTopLoci: parseVariantId did not return one row per variant.") } invalid <- is.na(parsed$chrom) | is.na(parsed$pos) | @@ -649,39 +615,85 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL, is.na(parsed$A2) | !nzchar(parsed$A2) if (any(invalid)) { stop("buildTopLoci: parseVariantId produced invalid coordinates ", - "for variant_id: ", variantIdVec[which(invalid)[[1]]]) + "for variant_id: ", variantNames[which(invalid)[[1]]]) + } + + # Marginal univariate effects (β, SE, Z, p). Per-variant; populated + # uniformly across individual-level and RSS paths (the caller computes + # the underlying sumstats list). + nV <- length(variantNames) + marginalBeta <- if (!is.null(sumstats$betahat)) as.numeric(sumstats$betahat) + else rep(NA_real_, nV) + marginalSe <- if (!is.null(sumstats$sebetahat)) as.numeric(sumstats$sebetahat) + else rep(NA_real_, nV) + marginalZ <- if (!is.null(sumstats$z)) as.numeric(sumstats$z) + else if (any(!is.na(marginalBeta)) && any(!is.na(marginalSe))) + marginalBeta / marginalSe + else rep(NA_real_, nV) + marginalP <- if (!is.null(sumstats$p)) as.numeric(sumstats$p) + else if (any(!is.na(marginalZ))) 2 * stats::pnorm(-abs(marginalZ)) + else rep(NA_real_, nV) + + # Per-coverage CS membership: for each variant, which CS at each + # coverage level (cs_idx, or 0 if not in any). If a variant belongs + # to multiple CSs at a given coverage, the smallest cs_idx wins. + csIdxAtCoverage <- function(targetCov) { + out <- integer(nV) + hit <- which(abs(coverageValues - targetCov) < 1e-12) + if (length(hit) == 0L) return(out) + sets <- csTables[[hit[1L]]]$sets$cs + if (is.null(sets) || length(sets) == 0L) return(out) + for (csIdx in seq_along(sets)) { + vi <- as.integer(sets[[csIdx]]) + vi <- vi[vi >= 1L & vi <= nV & out[vi] == 0L] + out[vi] <- csIdx + } + out } - pick <- function(x) if (is.null(x)) rep(NA_real_, nKeys) else x[vIdx] + idx95 <- csIdxAtCoverage(0.95) + idx70 <- csIdxAtCoverage(0.70) + idx50 <- csIdxAtCoverage(0.50) + + # 0.95-coverage CS purity, per-variant (0 for non-CS variants). + purityPerCs <- { + h <- which(abs(coverageValues - 0.95) < 1e-12) + if (length(h) > 0L) .csPurityVec(csTables[[h[1L]]]) else numeric() + } + cs95Purity <- vapply(idx95, function(i) { + if (i <= 0L || i > length(purityPerCs)) return(0) + v <- purityPerCs[i]; if (is.na(v)) 0 else as.numeric(v) + }, numeric(1)) + methodTag <- .camelToSnakeMethod(method) out <- data.frame( - "#chr" = parsed$chrom, - start = as.integer(parsed$pos) - 1L, - end = as.integer(parsed$pos), - a1 = parsed$A1, - a2 = parsed$A2, - variant = variantIdVec, - gene = rep(fitGene, nKeys), - event = rep(fitEvent, nKeys), - n = rep(fitN, nKeys), - af = pick(af), - beta = pick(sumstats$betahat), - se = pick(sumstats$sebetahat), - pip = as.numeric(fit$pip[vIdx]), - posterior_effect_mean = postMean[vIdx], - posterior_effect_se = postSe[vIdx], - # cs_ values carry the snake_case method identifier prefix - # (`susie_rss_0`) for downstream-schema stability; the `method` column - # itself carries the camelCase pecotmr-internal identifier. - cs_95 = paste0(.camelToSnakeMethod(method), "_", idx95), - cs_70 = paste0(.camelToSnakeMethod(method), "_", idx70), - cs_50 = paste0(.camelToSnakeMethod(method), "_", idx50), - cs_95_purity = cs95Purity, - method = rep(method, nKeys), - grange_start = rep(grange[["start"]], nKeys), - grange_end = rep(grange[["end"]], nKeys), - stringsAsFactors = FALSE, - check.names = FALSE - ) + variant_id = as.character(variantNames), + chrom = parsed$chrom, + pos = as.integer(parsed$pos), + A1 = parsed$A1, + A2 = parsed$A2, + N = rep(fitN, nV), + MAF = if (is.null(af)) rep(NA_real_, nV) else as.numeric(af), + marginal_beta = marginalBeta, + marginal_se = marginalSe, + marginal_z = marginalZ, + marginal_p = marginalP, + pip = as.numeric(fit$pip), + posterior_mean = postMean, + posterior_sd = postSd, + cs_95 = paste0(methodTag, "_", idx95), + cs_70 = paste0(methodTag, "_", idx70), + cs_50 = paste0(methodTag, "_", idx50), + cs_95_purity = cs95Purity, + method = rep(method, nV), + gene = rep(fitGene, nV), + event = rep(fitEvent, nV), + grange_start = rep(grange[["start"]], nV), + grange_end = rep(grange[["end"]], nV), + stringsAsFactors = FALSE) + if (!is.null(signalCutoff) && signalCutoff > 0) { + keep <- !is.na(out$pip) & out$pip > signalCutoff + out <- out[keep, , drop = FALSE] + } rownames(out) <- NULL out } @@ -724,30 +736,30 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL, .emptyTopLoci <- function() { data.frame( - "#chr" = character(), - start = integer(), - end = integer(), - a1 = character(), - a2 = character(), - variant = character(), - gene = character(), - event = character(), - n = integer(), - af = numeric(), - beta = numeric(), - se = numeric(), - pip = numeric(), - posterior_effect_mean = numeric(), - posterior_effect_se = numeric(), - cs_95 = character(), - cs_70 = character(), - cs_50 = character(), - cs_95_purity = numeric(), - method = character(), - grange_start = integer(), - grange_end = integer(), - stringsAsFactors = FALSE, - check.names = FALSE + variant_id = character(), + chrom = character(), + pos = integer(), + A1 = character(), + A2 = character(), + N = numeric(), + MAF = numeric(), + marginal_beta = numeric(), + marginal_se = numeric(), + marginal_z = numeric(), + marginal_p = numeric(), + pip = numeric(), + posterior_mean = numeric(), + posterior_sd = numeric(), + cs_95 = character(), + cs_70 = character(), + cs_50 = character(), + cs_95_purity = numeric(), + method = character(), + gene = character(), + event = character(), + grange_start = integer(), + grange_end = integer(), + stringsAsFactors = FALSE ) } diff --git a/R/genotypeIo.R b/R/genotypeIo.R index b944610d..98f4ea9c 100644 --- a/R/genotypeIo.R +++ b/R/genotypeIo.R @@ -9,7 +9,7 @@ #' @importFrom S4Vectors DataFrame mcols mcols<- #' @importFrom tools file_ext #' @importFrom methods as -#' @include allGenerics.R +#' @include AllGenerics.R NULL # ============================================================================= diff --git a/R/gwasSumStats.R b/R/gwasSumStats.R index 4a3ed7c5..063a986a 100644 --- a/R/gwasSumStats.R +++ b/R/gwasSumStats.R @@ -8,7 +8,7 @@ # uniformly across rows. # ============================================================================= -#' @include SumStatsBase.R tupleSelectors.R +#' @include AllClasses.R tupleSelectors.R NULL setClass("GwasSumStats", @@ -59,7 +59,7 @@ setMethod("show", "GwasSumStats", function(object) { #' @importFrom GenomicRanges GRanges seqnames start #' @importFrom S4Vectors DataFrame mcols mcols<- SimpleList #' @importFrom IRanges IRanges -#' @include allGenerics.R +#' @include AllGenerics.R NULL # ============================================================================= diff --git a/R/h2Annotations.R b/R/h2Annotations.R index 8739c4a3..4d40456f 100644 --- a/R/h2Annotations.R +++ b/R/h2Annotations.R @@ -7,7 +7,7 @@ #' @importFrom GenomicRanges GRanges #' @importFrom IRanges findOverlaps #' @importFrom S4Vectors queryHits subjectHits -#' @include allGenerics.R +#' @include AllGenerics.R NULL # ============================================================================= diff --git a/R/h2EstimationWrappers.R b/R/h2EstimationWrappers.R index 39a40f62..46d96492 100644 --- a/R/h2EstimationWrappers.R +++ b/R/h2EstimationWrappers.R @@ -1556,7 +1556,7 @@ hdlUnivariate <- function(z, n, eigenRef, annotations = NULL, #' to bridge H2Estimate into the sldscWrapper.R postprocessing pipeline. #' @name pecotmr-h2-wrappers #' @keywords internal -#' @include allGenerics.R +#' @include AllGenerics.R #' @importFrom stats median NULL diff --git a/R/jointDispatchers.R b/R/jointDispatchers.R deleted file mode 100644 index 09cbb0d8..00000000 --- a/R/jointDispatchers.R +++ /dev/null @@ -1,1601 +0,0 @@ -# ============================================================================= -# Joint-specification dispatchers for fineMappingPipeline and -# twasWeightsPipeline. The two pipelines share the same axis taxonomy -# (cross-context, cross-trait, cross-study, composed multi-axis) and the -# same per-axis row-enumeration logic; they differ only in (a) which -# joint method tokens they accept, (b) which fitter they call, and (c) -# whether the result is a `QtlFineMappingResult` or a `TwasWeights` -# collection. -# -# The shared bits live at the top of this file as `.buildJoint*` / -# `.enumerate*` helpers. Each pipeline then keeps a per-axis worker that -# wires those helpers to its fit + result-row construction. -# ============================================================================= - - -# ============================================================================= -# Shared helpers -# ============================================================================= - -# Resolve which studies / contexts / traits participate in `spec` given -# `data`. Filters data scope through the spec's `scope` and any explicit -# pipeline-level `contexts` / `traitIds` arguments. Returns a list with -# `studies` (character), `contexts` (named list keyed by study), `traits` -# (named list keyed by study). -# @noRd -.fmResolveSpecScope <- function(spec, data, contexts = NULL, - traitIds = NULL) { - scope <- spec$scope - studies <- .spListStudies(data) - if (!is.null(scope$study)) - studies <- intersect(studies, scope$study) - - contextsOut <- list() - traitsOut <- list() - for (s in studies) { - ctxAvail <- .spListContexts(data, s) - if (!is.null(scope$context)) - ctxAvail <- intersect(ctxAvail, scope$context) - if (!is.null(contexts)) { - if (is.list(contexts) && s %in% names(contexts)) - ctxAvail <- intersect(ctxAvail, contexts[[s]]) - else if (is.character(contexts)) - ctxAvail <- intersect(ctxAvail, contexts) - } - contextsOut[[s]] <- ctxAvail - - trAvail <- .spListTraits(data, study = s) - if (!is.null(scope$trait)) - trAvail <- intersect(trAvail, scope$trait) - if (!is.null(traitIds)) { - if (is.character(traitIds)) - trAvail <- intersect(trAvail, traitIds) - else if (is.list(traitIds) && s %in% names(traitIds)) { - tv <- traitIds[[s]] - if (is.character(tv)) trAvail <- intersect(trAvail, tv) - } - } - traitsOut[[s]] <- trAvail - } - list(studies = studies, contexts = contextsOut, traits = traitsOut) -} - - -# Build a (variants × tupleRows) Z matrix from a QtlSumStats subset, -# requiring all rows to share an identical SNP order (the post- -# summaryStatsQc contract). Returns list(Z, nVec, variantIds). -# `errorLabel` is woven into the SNP-order error to identify the caller. -# @noRd -.buildJointSumstatZMatrix <- function(data, tupleRows, colLabels, errorLabel) { - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - firstDf <- getSumstatDf(data, - study = studyCol[[tupleRows[[1L]]]], - context = contextCol[[tupleRows[[1L]]]], - trait = traitCol[[tupleRows[[1L]]]], - require = c("SNP", "Z", "N")) - variantIds <- firstDf$variant_id - Z <- matrix(NA_real_, nrow = length(variantIds), ncol = length(tupleRows), - dimnames = list(variantIds, colLabels)) - nVec <- numeric(length(tupleRows)) - for (kk in seq_along(tupleRows)) { - i <- tupleRows[[kk]] - d <- getSumstatDf(data, - study = studyCol[[i]], - context = contextCol[[i]], - trait = traitCol[[i]], - require = c("SNP", "Z", "N")) - if (!identical(d$variant_id, variantIds)) - stop(sprintf("%s: every entry in a joint group must share an identical SNP order after summaryStatsQc().", - errorLabel)) - Z[, kk] <- d$z - nVec[kk] <- stats::median(d$N, na.rm = TRUE) - } - list(Z = Z, nVec = nVec, variantIds = variantIds) -} - - -# Build a multi-context Y matrix for a single (study, trait) from an -# individual-level QtlDataset. Returns list(X, Y, perTraitContexts) or -# NULL when fewer than 2 contexts carry `tid` or the sample / complete-Y -# subset is too small to fit. -# @noRd -.buildIndividualCrossContextXY <- function(data, tid, scopedContexts, - cisWindow, verbose, label) { - perTraitContexts <- character(0) - for (cx in scopedContexts) { - se <- getPhenotypes(data, contexts = cx) - if (tid %in% rownames(se)) - perTraitContexts <- c(perTraitContexts, cx) - } - if (length(perTraitContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "%s: trait '%s' present in %d scoped context(s); skipping.", - label, tid, length(perTraitContexts))) - return(NULL) - } - X <- getResidualizedGenotypes( - data, contexts = perTraitContexts, traitId = tid, - cisWindow = cisWindow) - Yres <- getResidualizedPhenotypes( - data, contexts = perTraitContexts, traitId = tid) - commonSamples <- Reduce(intersect, - c(list(rownames(X)), lapply(Yres, rownames))) - if (length(commonSamples) < 2L) { - if (verbose >= 1) - message(sprintf( - "%s: trait '%s' has too few shared samples across contexts; skipping.", - label, tid)) - return(NULL) - } - X <- X[commonSamples, , drop = FALSE] - Y <- do.call(cbind, lapply(perTraitContexts, function(cx) { - ym <- Yres[[cx]][commonSamples, , drop = FALSE] - colnames(ym) <- cx - ym - })) - keep <- stats::complete.cases(Y) - if (sum(keep) < 2L) { - if (verbose >= 1) - message(sprintf( - "%s: trait '%s' has too few complete-Y subjects; skipping.", - label, tid)) - return(NULL) - } - list(X = X[keep, , drop = FALSE], Y = Y[keep, , drop = FALSE], - perTraitContexts = perTraitContexts) -} - - -# Build a multi-trait Y matrix for a single (study, context) from an -# individual-level QtlDataset. Returns list(X, Y, traitsHere, se) or NULL -# when fewer than 2 traits live in the context or the sample / complete-Y -# subset is too small. -# @noRd -.buildIndividualCrossTraitXY <- function(data, cx, scopedTraits, - cisWindow, verbose, label, study) { - se <- getPhenotypes(data, contexts = cx) - traitsHere <- intersect(scopedTraits, rownames(se)) - if (length(traitsHere) < 2L) { - if (verbose >= 1) - message(sprintf( - "%s: context '%s' (study '%s') has %d scoped trait(s); skipping.", - label, cx, study, length(traitsHere))) - return(NULL) - } - X <- getResidualizedGenotypes( - data, contexts = cx, traitId = traitsHere, cisWindow = cisWindow) - Y <- getResidualizedPhenotypes( - data, contexts = cx, traitId = traitsHere) - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) return(NULL) - X <- X[common, , drop = FALSE]; Y <- Y[common, , drop = FALSE] - keep <- stats::complete.cases(Y) - if (sum(keep) < 2L) return(NULL) - list(X = X[keep, , drop = FALSE], Y = Y[keep, , drop = FALSE], - traitsHere = traitsHere, se = se) -} - - -# Build a composed-axes (context, trait) X/Y for individual-level -# QtlDataset. Returns list(X, Y, tuples) or NULL. -# @noRd -.buildComposedIndividualXY <- function(data, scope, study, cisWindow, - verbose, label) { - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - tuples <- list() - for (cx in scopedContexts) { - se <- getPhenotypes(data, contexts = cx) - for (tid in intersect(scopedTraits, rownames(se))) { - tuples[[length(tuples) + 1L]] <- list(context = cx, trait = tid) - } - } - if (length(tuples) < 2L) { - if (verbose >= 1) - message(sprintf( - "%s: study '%s' has %d (context, trait) tuple(s) in scope; skipping.", - label, study, length(tuples))) - return(NULL) - } - allContexts <- unique(vapply(tuples, function(t) t$context, character(1L))) - allTraits <- unique(vapply(tuples, function(t) t$trait, character(1L))) - X <- getResidualizedGenotypes( - data, contexts = allContexts, traitId = allTraits, cisWindow = cisWindow) - YresList <- getResidualizedPhenotypes( - data, contexts = allContexts, traitId = allTraits) - if (length(allContexts) == 1L) YresList <- setNames(list(YresList), allContexts) - commonSamples <- Reduce(intersect, - c(list(rownames(X)), lapply(YresList, rownames))) - if (length(commonSamples) < 2L) return(NULL) - X <- X[commonSamples, , drop = FALSE] - yCols <- list(); colLabels <- character(0) - for (t in tuples) { - ym <- YresList[[t$context]] - if (!(t$trait %in% colnames(ym))) next - col <- ym[commonSamples, t$trait, drop = FALSE] - colnames(col) <- paste(t$context, t$trait, sep = ":") - yCols[[length(yCols) + 1L]] <- col - colLabels <- c(colLabels, paste(t$context, t$trait, sep = ":")) - } - if (length(yCols) < 2L) return(NULL) - Y <- do.call(cbind, yCols) - keep <- stats::complete.cases(Y) - if (sum(keep) < 2L) return(NULL) - list(X = X[keep, , drop = FALSE], Y = Y[keep, , drop = FALSE], - tuples = tuples) -} - - -# Enumerate composed-axes row groups for a QtlSumStats input. Returns the -# list of (rowIdx) per group along with the per-axis identity columns -# needed to label the output row. Groups containing fewer than 2 rows -# are returned unfiltered; the caller decides whether to skip. -# @noRd -.enumerateComposedSumstatGroups <- function(spec, data, scope) { - axes <- spec$axes - complement <- setdiff(c("study", "context", "trait"), axes) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - inScope <- vapply(seq_len(nrow(data)), function(i) { - s <- studyCol[i]; cx <- contextCol[i]; tr <- traitCol[i] - (s %in% scope$studies) && - (cx %in% scope$contexts[[s]]) && - (tr %in% scope$traits[[s]]) - }, logical(1L)) - rowIdx <- which(inScope) - if (length(rowIdx) == 0L) return(NULL) - groupKey <- if (length(complement) == 0L) { - rep("__all__", length(rowIdx)) - } else { - do.call(paste, c(lapply(complement, function(a) - switch(a, study = studyCol[rowIdx], - context = contextCol[rowIdx], - trait = traitCol[rowIdx])), - sep = "||")) - } - groups <- split(rowIdx, groupKey) - list(groups = groups, axes = axes, - studyCol = studyCol, contextCol = contextCol, traitCol = traitCol) -} - - -# ============================================================================= -# Fine-mapping dispatchers -# ============================================================================= - -# Cross-context joint dispatcher for QtlDataset. For each trait in scope -# with >= 2 contexts in scope, fits mvsusieR::mvsusie on the multi-column -# Y matrix and emits ONE result row with context = "joint" and -# jointContexts = "ctx1;ctx2;...". -# @noRd -.fmDispatchCrossContextQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext: study '%s' has %d context(s) in scope; skipping cross-context fits.", - study, length(scopedContexts))) - return(NULL) - } - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (tid in scopedTraits) { - xy <- .buildIndividualCrossContextXY( - data, tid, scopedContexts, cisWindow, verbose, - label = "jointCrossContext") - if (is.null(xy)) next - - if (verbose >= 1) - message(sprintf( - "jointCrossContext: fitting mvsusie for (study='%s', trait='%s') across contexts (%s) ...", - study, tid, paste(xy$perTraitContexts, collapse = ", "))) - fit <- fitMvsusie( - X = xy$X, Y = xy$Y, - prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), - coverage = coverage) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(xy$perTraitContexts, collapse = ";")) - } - - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = NULL) -} - - -# Cross-trait joint dispatcher for QtlDataset. Per (study, context), fits -# mvsusieR::mvsusie or fsusieR::susiF (when in `methods`) jointly across -# the scoped traits within that context. Emits ONE result row per -# (study, context, method) with trait = "joint" and jointTraits populated. -# @noRd -.fmDispatchCrossTraitQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - jointMethods <- intersect(methods, c("mvsusie", "fsusie")) - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (cx in scopedContexts) { - xy <- .buildIndividualCrossTraitXY( - data, cx, scopedTraits, cisWindow, verbose, - label = "jointCrossTrait", study = study) - if (is.null(xy)) next - - for (mm in jointMethods) { - if (verbose >= 1) - message(sprintf( - "jointCrossTrait: fitting %s for (study='%s', context='%s') across traits (%s) ...", - mm, study, cx, paste(xy$traitsHere, collapse = ", "))) - if (mm == "mvsusie") { - fit <- fitMvsusie( - X = xy$X, Y = xy$Y, - prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), - coverage = coverage) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") - } else { - rr <- SummarizedExperiment::rowRanges(xy$se) - ord <- match(colnames(xy$Y), rownames(xy$se)) - rr <- rr[ord] - pos <- (GenomicRanges::start(rr) + GenomicRanges::end(rr)) / 2 - fit <- fitFsusie(X = xy$X, Y = xy$Y, pos = pos) - fit <- .setFinemappingFitClass(fit, "fsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "fsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "fsusie") - } - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, mm) - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(xy$traitsHere, collapse = ";")) - } - } - - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = NULL) -} - - -# Cross-context joint dispatcher for QtlSumStats input. Groups the -# selected sumstats rows by (study, trait); each group with >= 2 contexts -# in scope produces one mvsusie_rss fit and one result row with context = -# "joint" and jointContexts populated. -# @noRd -.fmDispatchCrossContextQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (QtlSumStats): study '%s' has %d context(s) in scope; skipping.", - s, length(scopedContexts))) - next - } - for (tid in scopedTraits) { - tupleRows <- which(studyCol == s & traitCol == tid & - contextCol %in% scopedContexts) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (QtlSumStats): (study='%s', trait='%s') has %d scoped context(s); skipping.", - s, tid, length(tupleRows))) - next - } - ctxNames <- contextCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, ctxNames, - errorLabel = "jointCrossContext (QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "jointCrossContext (QtlSumStats): fitting mvsusie_rss for (study='%s', trait='%s', %d contexts) ...", - s, tid, length(ctxNames))) - fit <- fitMvsusieRss( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), - coverage = coverage) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(ctxNames, collapse = ";")) - } - } - - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = ldSketch) -} - - -# Cross-trait joint dispatcher for QtlSumStats: groups by (study, context), -# requires >= 2 scoped traits per group. mvsusie_rss only -- no RSS fsusie. -# @noRd -.fmDispatchCrossTraitQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - if ("fsusie" %in% methods) - stop("jointCrossTrait (QtlSumStats): fsusie has no RSS variant; ", - "fsusie cannot participate in sumstats-based joint fits.") - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - for (cx in scopedContexts) { - tupleRows <- which(studyCol == s & contextCol == cx & - traitCol %in% scopedTraits) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (QtlSumStats): (study='%s', context='%s') has %d scoped trait(s); skipping.", - s, cx, length(tupleRows))) - next - } - trNames <- traitCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, trNames, - errorLabel = "jointCrossTrait (QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (QtlSumStats): fitting mvsusie_rss for (study='%s', context='%s', %d traits) ...", - s, cx, length(trNames))) - fit <- fitMvsusieRss( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), - coverage = coverage) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(trNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = ldSketch) -} - - -# Cross-study joint dispatcher for QtlSumStats: groups by (context, trait), -# requires >= 2 scoped studies per group. Sumstats-only by definition; -# individual-level studies are excluded with a message at the caller. -# mvsusie_rss only. -# @noRd -.fmDispatchCrossStudyQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - if ("fsusie" %in% methods) - stop("jointCrossStudy: fsusie cannot participate (no RSS variant).") - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - allCtxs <- unique(unlist(scope$contexts, use.names = FALSE)) - allTrs <- unique(unlist(scope$traits, use.names = FALSE)) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointStudies <- character(0) - - for (cx in allCtxs) { - for (tid in allTrs) { - tupleRows <- which(contextCol == cx & traitCol == tid & - studyCol %in% scope$studies) - keep <- logical(length(tupleRows)) - for (k in seq_along(tupleRows)) { - s <- studyCol[tupleRows[k]] - keep[k] <- (cx %in% scope$contexts[[s]]) && - (tid %in% scope$traits[[s]]) - } - tupleRows <- tupleRows[keep] - if (length(tupleRows) < 2L) { - if (length(tupleRows) > 0L && verbose >= 1) - message(sprintf( - "jointCrossStudy: (context='%s', trait='%s') has %d study(ies) in scope; skipping.", - cx, tid, length(tupleRows))) - next - } - stNames <- studyCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, stNames, - errorLabel = "jointCrossStudy") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "jointCrossStudy: fitting mvsusie_rss for (context='%s', trait='%s', %d studies) ...", - cx, tid, length(stNames))) - fit <- fitMvsusieRss( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), - coverage = coverage) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - rowStudy <- c(rowStudy, "joint") - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - paste(stNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = rowJointStudies, - ldSketch = ldSketch) -} - - -# Composed multi-axis joint dispatcher for QtlDataset. Only axes = -# c("context", "trait") is meaningful for a single-study individual- -# level input. Iterates per (study) (just one), enumerates the -# (context, trait) tuples in scope where the trait exists in the -# context, and fits one mvsusie joint over those tuples. -# @noRd -.fmDispatchComposedQtlDataset <- function(spec, data, methods, - contexts, traitIds, cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - axes <- spec$axes - if ("study" %in% axes) - stop("composed jointSpecification (QtlDataset): axes including 'study' require sumstats input.") - if (!setequal(axes, c("context", "trait"))) - stop(sprintf( - "composed jointSpecification (QtlDataset): unsupported axes (%s) for individual-level input.", - paste(axes, collapse = ", "))) - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, - verbose, - label = "composed joint (QtlDataset)") - if (is.null(xy)) return(NULL) - - if (verbose >= 1) - message(sprintf( - "composed joint (QtlDataset): fitting mvsusie for study='%s' over %d (context, trait) columns ...", - study, ncol(xy$Y))) - fit <- fitMvsusie( - X = xy$X, Y = xy$Y, - prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), - coverage = coverage) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") - QtlFineMappingResult( - study = study, - context = "joint", - trait = "joint", - method = "mvsusie", - entry = list(entry), - jointContexts = paste(vapply(xy$tuples, function(t) t$context, - character(1)), collapse = ";"), - jointTraits = paste(vapply(xy$tuples, function(t) t$trait, - character(1)), collapse = ";"), - ldSketch = NULL) -} - - -# Composed multi-axis joint dispatcher for QtlSumStats. Handles any -# `axes` subset of {study, context, trait} of size >= 2 by iterating the -# complement-axis Cartesian product and emitting one joint fit per -# iteration unit. -# @noRd -.fmDispatchComposedQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - if ("fsusie" %in% methods) - stop("composed jointSpecification (QtlSumStats): fsusie has no RSS variant.") - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - groupInfo <- .enumerateComposedSumstatGroups(spec, data, scope) - if (is.null(groupInfo)) return(NULL) - axes <- groupInfo$axes - studyCol <- groupInfo$studyCol - contextCol <- groupInfo$contextCol - traitCol <- groupInfo$traitCol - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list() - rowJointStudies <- character(0) - rowJointContexts <- character(0) - rowJointTraits <- character(0) - - for (gIdx in groupInfo$groups) { - if (length(gIdx) < 2L) { - if (verbose >= 1) - message(sprintf( - "composed joint (QtlSumStats): group has %d row(s); skipping.", - length(gIdx))) - next - } - colLabels <- vapply(gIdx, function(i) - paste(studyCol[i], contextCol[i], traitCol[i], sep = ":"), - character(1L)) - jz <- .buildJointSumstatZMatrix( - data, gIdx, colLabels, - errorLabel = "composed joint (QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "composed joint (QtlSumStats): fitting mvsusie_rss for axes=(%s), %d columns ...", - paste(axes, collapse = ", "), length(gIdx))) - fit <- fitMvsusieRss( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), - coverage = coverage) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - - repStudy <- if ("study" %in% axes) "joint" else studyCol[gIdx[[1L]]] - repContext <- if ("context" %in% axes) "joint" else contextCol[gIdx[[1L]]] - repTrait <- if ("trait" %in% axes) "joint" else traitCol[gIdx[[1L]]] - rowStudy <- c(rowStudy, repStudy) - rowContext <- c(rowContext, repContext) - rowTrait <- c(rowTrait, repTrait) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - if ("study" %in% axes) paste(studyCol[gIdx], collapse = ";") - else NA_character_) - rowJointContexts <- c(rowJointContexts, - if ("context" %in% axes) paste(contextCol[gIdx], collapse = ";") - else NA_character_) - rowJointTraits <- c(rowJointTraits, - if ("trait" %in% axes) paste(traitCol[gIdx], collapse = ";") - else NA_character_) - } - - if (length(rowStudy) == 0L) return(NULL) - jsArg <- if (all(is.na(rowJointStudies))) NULL else rowJointStudies - jcArg <- if (all(is.na(rowJointContexts))) NULL else rowJointContexts - jtArg <- if (all(is.na(rowJointTraits))) NULL else rowJointTraits - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = jsArg, - jointContexts = jcArg, - jointTraits = jtArg, - ldSketch = ldSketch) -} - - -# Top-level joint dispatcher for fineMappingPipeline(QtlDataset). -# @noRd -.fmDispatchJointSpecsQtlDataset <- function(parsedJointSpec, data, - methods, contexts, traitIds, - cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- .fmDispatchComposedQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, ldSketch = NULL) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = .fmDispatchCrossContextQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), - trait = .fmDispatchCrossTraitQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), - study = stop( - "fineMappingPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input. ", - "QtlDataset represents a single individual-level study; cross-study joints operate on the sumstats slot of MultiStudyQtlDataset or on QtlSumStats directly."), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, ldSketch = NULL) - } - out -} - - -# Top-level joint dispatcher for fineMappingPipeline(QtlSumStats). -# @noRd -.fmDispatchJointSpecsQtlSumStats <- function(parsedJointSpec, data, - methods, contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- .fmDispatchComposedQtlSumStats( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, ldSketch = getLdSketch(data)) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = .fmDispatchCrossContextQtlSumStats( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), - trait = .fmDispatchCrossTraitQtlSumStats( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), - study = .fmDispatchCrossStudyQtlSumStats( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, - ldSketch = getLdSketch(data)) - } - out -} - - -# Top-level joint dispatcher for fineMappingPipeline(MultiStudyQtlDataset). -# Routes per-component AND per-axis: a spec with `axes = "study"` only -# touches the sumStats slot; `axes = "context"` and `axes = "trait"` run -# on every component. -# @noRd -.fmDispatchJointSpecsMultiStudy <- function(parsedJointSpec, data, - methods, contexts, traitIds, - cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose) { - out <- NULL - embeddedLd <- NULL - qtlDatasets <- getQtlDatasets(data) - sumStats <- getSumStats(data) - - studyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, - function(s) "study" %in% s$axes, logical(1L))] - nonStudyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, - function(s) !("study" %in% s$axes), logical(1L))] - - if (length(studyAxisSpecs) > 0L && length(qtlDatasets) > 0L && verbose >= 1) { - message(sprintf( - "jointCrossStudy: excluding individual-level studies (%s) from cross-study fits (no LD sketch available); sumstats studies participate.", - paste(names(qtlDatasets), collapse = ", "))) - } - - if (length(nonStudyAxisSpecs) > 0L) { - for (qdName in names(qtlDatasets)) { - qd <- qtlDatasets[[qdName]] - qdRes <- .fmDispatchJointSpecsQtlDataset( - nonStudyAxisSpecs, qd, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) - if (!is.null(qdRes)) - out <- if (is.null(out)) qdRes - else .rbindFineMappingResult(out, qdRes, ldSketch = NULL) - } - } - - if (!is.null(sumStats)) { - ssRes <- .fmDispatchJointSpecsQtlSumStats( - parsedJointSpec, sumStats, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) - if (!is.null(ssRes)) { - embeddedLd <- getLdSketch(ssRes) - out <- if (is.null(out)) ssRes - else .rbindFineMappingResult(out, ssRes, - ldSketch = embeddedLd) - } - } else if (length(studyAxisSpecs) > 0L && verbose >= 1) { - message("jointCrossStudy: no sumStats slot present on this MultiStudyQtlDataset; cross-study specs produce no result.") - } - out -} - - -# ============================================================================= -# TWAS-weights dispatchers -# ============================================================================= - -# Cross-context joint dispatcher for QtlDataset (twas). Mr.mash across -# scoped contexts per (study, trait). -# @noRd -.twasDispatchCrossContextQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, dataType, - verbose) { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlDataset): study '%s' has %d context(s) in scope; skipping.", - study, length(scopedContexts))) - return(NULL) - } - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (tid in scopedTraits) { - xy <- .buildIndividualCrossContextXY( - data, tid, scopedContexts, cisWindow, verbose, - label = "jointCrossContext (twas QtlDataset)") - if (is.null(xy)) next - - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlDataset): fitting mr.mash for (study='%s', trait='%s') across contexts (%s) ...", - study, tid, paste(xy$perTraitContexts, collapse = ", "))) - weights <- mrmashWeights(X = xy$X, Y = xy$Y) - if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - standardized = FALSE, - dataType = dataType) - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(xy$perTraitContexts, collapse = ";")) - } - - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = NULL) -} - - -# Cross-trait joint dispatcher for QtlDataset (twas). Mr.mash per -# (study, context) across scoped traits. -# @noRd -.twasDispatchCrossTraitQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, dataType, - verbose) { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (cx in scopedContexts) { - xy <- .buildIndividualCrossTraitXY( - data, cx, scopedTraits, cisWindow, verbose, - label = "jointCrossTrait (twas)", study = study) - if (is.null(xy)) next - - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (twas): fitting mr.mash for (study='%s', context='%s') across traits (%s) ...", - study, cx, paste(xy$traitsHere, collapse = ", "))) - weights <- mrmashWeights(X = xy$X, Y = xy$Y) - if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - standardized = FALSE, - dataType = dataType) - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(xy$traitsHere, collapse = ";")) - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = NULL) -} - - -# Cross-context joint dispatcher for QtlSumStats (twas). Mr.mash.rss per -# (study, trait). -# @noRd -.twasDispatchCrossContextQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose) { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlSumStats): study '%s' has %d context(s) in scope; skipping.", - s, length(scopedContexts))) - next - } - for (tid in scopedTraits) { - tupleRows <- which(studyCol == s & traitCol == tid & - contextCol %in% scopedContexts) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlSumStats): (study='%s', trait='%s') has %d scoped context(s); skipping.", - s, tid, length(tupleRows))) - next - } - ctxNames <- contextCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, ctxNames, - errorLabel = "jointCrossContext (twas QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlSumStats): fitting mr.mash.rss for (study='%s', trait='%s', %d contexts) ...", - s, tid, length(ctxNames))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - standardized = TRUE, - dataType = dataType) - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(ctxNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = ldSketch) -} - - -# Cross-trait joint dispatcher for QtlSumStats (twas). Mr.mash.rss per -# (study, context). -# @noRd -.twasDispatchCrossTraitQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose) { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - for (cx in scopedContexts) { - tupleRows <- which(studyCol == s & contextCol == cx & - traitCol %in% scopedTraits) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (twas QtlSumStats): (study='%s', context='%s') has %d scoped trait(s); skipping.", - s, cx, length(tupleRows))) - next - } - trNames <- traitCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, trNames, - errorLabel = "jointCrossTrait (twas QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (twas QtlSumStats): fitting mr.mash.rss for (study='%s', context='%s', %d traits) ...", - s, cx, length(trNames))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - standardized = TRUE, - dataType = dataType) - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(trNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = ldSketch) -} - - -# Cross-study joint dispatcher for QtlSumStats (twas). Mr.mash.rss per -# (context, trait). -# @noRd -.twasDispatchCrossStudyQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose) { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - allCtxs <- unique(unlist(scope$contexts, use.names = FALSE)) - allTrs <- unique(unlist(scope$traits, use.names = FALSE)) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointStudies <- character(0) - - for (cx in allCtxs) { - for (tid in allTrs) { - tupleRows <- which(contextCol == cx & traitCol == tid & - studyCol %in% scope$studies) - keep <- logical(length(tupleRows)) - for (k in seq_along(tupleRows)) { - s <- studyCol[tupleRows[k]] - keep[k] <- (cx %in% scope$contexts[[s]]) && - (tid %in% scope$traits[[s]]) - } - tupleRows <- tupleRows[keep] - if (length(tupleRows) < 2L) { - if (length(tupleRows) > 0L && verbose >= 1) - message(sprintf( - "jointCrossStudy (twas): (context='%s', trait='%s') has %d study(ies) in scope; skipping.", - cx, tid, length(tupleRows))) - next - } - stNames <- studyCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, stNames, - errorLabel = "jointCrossStudy (twas)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "jointCrossStudy (twas): fitting mr.mash.rss for (context='%s', trait='%s', %d studies) ...", - cx, tid, length(stNames))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - standardized = TRUE, - dataType = dataType) - rowStudy <- c(rowStudy, "joint") - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - paste(stNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = rowJointStudies, - ldSketch = ldSketch) -} - - -# Composed multi-axis joint dispatcher for QtlSumStats (twas). -# @noRd -.twasDispatchComposedQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose) { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - groupInfo <- .enumerateComposedSumstatGroups(spec, data, scope) - if (is.null(groupInfo)) return(NULL) - axes <- groupInfo$axes - studyCol <- groupInfo$studyCol - contextCol <- groupInfo$contextCol - traitCol <- groupInfo$traitCol - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list() - rowJointStudies <- character(0) - rowJointContexts <- character(0) - rowJointTraits <- character(0) - - for (gIdx in groupInfo$groups) { - if (length(gIdx) < 2L) { - if (verbose >= 1) - message(sprintf( - "composed joint (twas QtlSumStats): group has %d row(s); skipping.", - length(gIdx))) - next - } - colLabels <- vapply(gIdx, function(i) - paste(studyCol[i], contextCol[i], traitCol[i], sep = ":"), - character(1L)) - jz <- .buildJointSumstatZMatrix( - data, gIdx, colLabels, - errorLabel = "composed joint (twas QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "composed joint (twas QtlSumStats): fitting mr.mash.rss for axes=(%s), %d columns ...", - paste(axes, collapse = ", "), length(gIdx))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - standardized = TRUE, - dataType = dataType) - - repStudy <- if ("study" %in% axes) "joint" else studyCol[gIdx[[1L]]] - repContext <- if ("context" %in% axes) "joint" else contextCol[gIdx[[1L]]] - repTrait <- if ("trait" %in% axes) "joint" else traitCol[gIdx[[1L]]] - rowStudy <- c(rowStudy, repStudy) - rowContext <- c(rowContext, repContext) - rowTrait <- c(rowTrait, repTrait) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - if ("study" %in% axes) paste(studyCol[gIdx], collapse = ";") - else NA_character_) - rowJointContexts <- c(rowJointContexts, - if ("context" %in% axes) paste(contextCol[gIdx], collapse = ";") - else NA_character_) - rowJointTraits <- c(rowJointTraits, - if ("trait" %in% axes) paste(traitCol[gIdx], collapse = ";") - else NA_character_) - } - if (length(rowStudy) == 0L) return(NULL) - jsArg <- if (all(is.na(rowJointStudies))) NULL else rowJointStudies - jcArg <- if (all(is.na(rowJointContexts))) NULL else rowJointContexts - jtArg <- if (all(is.na(rowJointTraits))) NULL else rowJointTraits - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = jsArg, - jointContexts = jcArg, - jointTraits = jtArg, - ldSketch = ldSketch) -} - - -# Composed multi-axis joint dispatcher for QtlDataset (twas). axes = -# c("context", "trait") only. -# @noRd -.twasDispatchComposedQtlDataset <- function(spec, data, methods, - contexts, traitIds, cisWindow, - dataType, verbose) { - axes <- spec$axes - if ("study" %in% axes) - stop("composed jointSpecification (twas QtlDataset): axes including 'study' require sumstats input.") - if (!setequal(axes, c("context", "trait"))) - stop(sprintf("composed jointSpecification (twas QtlDataset): unsupported axes (%s) for individual-level input.", - paste(axes, collapse = ", "))) - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, - verbose, - label = "composed joint (twas QtlDataset)") - if (is.null(xy)) return(NULL) - - if (verbose >= 1) - message(sprintf( - "composed joint (twas QtlDataset): fitting mr.mash for study='%s' over %d (context, trait) columns ...", - study, ncol(xy$Y))) - weights <- mrmashWeights(X = xy$X, Y = xy$Y) - if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - standardized = FALSE, - dataType = dataType) - TwasWeights( - study = study, - context = "joint", - trait = "joint", - method = "mrmash", - entry = list(entry), - jointContexts = paste(vapply(xy$tuples, function(t) t$context, - character(1)), collapse = ";"), - jointTraits = paste(vapply(xy$tuples, function(t) t$trait, - character(1)), collapse = ";"), - ldSketch = NULL) -} - - -# Top-level joint dispatcher for twasWeightsPipeline(QtlDataset). -# @noRd -.twasDispatchJointSpecsQtlDataset <- function(parsedJointSpec, data, - methods, contexts, traitIds, - cisWindow, dataType, - verbose) { - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- .twasDispatchComposedQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = NULL) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = .twasDispatchCrossContextQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose), - trait = .twasDispatchCrossTraitQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose), - study = stop( - "twasWeightsPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input."), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = NULL) - } - out -} - - -# Top-level joint dispatcher for twasWeightsPipeline(QtlSumStats). -# @noRd -.twasDispatchJointSpecsQtlSumStats <- function(parsedJointSpec, data, - methods, contexts, traitIds, - dataType, verbose) { - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- .twasDispatchComposedQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = getLdSketch(data)) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = .twasDispatchCrossContextQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose), - trait = .twasDispatchCrossTraitQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose), - study = .twasDispatchCrossStudyQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = getLdSketch(data)) - } - out -} - - -# Top-level joint dispatcher for twasWeightsPipeline(MultiStudyQtlDataset). -# @noRd -.twasDispatchJointSpecsMultiStudy <- function(parsedJointSpec, data, - methods, contexts, traitIds, - cisWindow, dataType, verbose) { - out <- NULL - embeddedLd <- NULL - qtlDatasets <- getQtlDatasets(data) - sumStats <- getSumStats(data) - - studyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, - function(s) "study" %in% s$axes, logical(1L))] - nonStudyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, - function(s) !("study" %in% s$axes), logical(1L))] - - if (length(studyAxisSpecs) > 0L && length(qtlDatasets) > 0L && verbose >= 1) { - message(sprintf( - "jointCrossStudy (twas): excluding individual-level studies (%s) from cross-study fits; sumstats studies participate.", - paste(names(qtlDatasets), collapse = ", "))) - } - - if (length(nonStudyAxisSpecs) > 0L) { - for (qdName in names(qtlDatasets)) { - qd <- qtlDatasets[[qdName]] - qdRes <- .twasDispatchJointSpecsQtlDataset( - nonStudyAxisSpecs, qd, methods, contexts, traitIds, cisWindow, - dataType, verbose) - if (!is.null(qdRes)) - out <- if (is.null(out)) qdRes - else .rbindTwasWeights(out, qdRes, ldSketch = NULL) - } - } - - if (!is.null(sumStats)) { - ssRes <- .twasDispatchJointSpecsQtlSumStats( - parsedJointSpec, sumStats, methods, contexts, traitIds, dataType, - verbose) - if (!is.null(ssRes)) { - embeddedLd <- getLdSketch(ssRes) - out <- if (is.null(out)) ssRes - else .rbindTwasWeights(out, ssRes, ldSketch = embeddedLd) - } - } else if (length(studyAxisSpecs) > 0L && verbose >= 1) { - message("jointCrossStudy (twas): no sumStats slot present on this MultiStudyQtlDataset; cross-study specs produce no result.") - } - out -} diff --git a/R/jointSpecification.R b/R/jointSpecification.R index 3436db43..3177c7f4 100644 --- a/R/jointSpecification.R +++ b/R/jointSpecification.R @@ -541,3 +541,1595 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { } invisible(NULL) } + +# ============================================================================= +# Joint-specification dispatchers (merged from former R/jointDispatchers.R) +# ============================================================================= + + +# ============================================================================= +# Shared helpers +# ============================================================================= + +# Resolve which studies / contexts / traits participate in `spec` given +# `data`. Filters data scope through the spec's `scope` and any explicit +# pipeline-level `contexts` / `traitIds` arguments. Returns a list with +# `studies` (character), `contexts` (named list keyed by study), `traits` +# (named list keyed by study). +# @noRd +.fmResolveSpecScope <- function(spec, data, contexts = NULL, + traitIds = NULL) { + scope <- spec$scope + studies <- .spListStudies(data) + if (!is.null(scope$study)) + studies <- intersect(studies, scope$study) + + contextsOut <- list() + traitsOut <- list() + for (s in studies) { + ctxAvail <- .spListContexts(data, s) + if (!is.null(scope$context)) + ctxAvail <- intersect(ctxAvail, scope$context) + if (!is.null(contexts)) { + if (is.list(contexts) && s %in% names(contexts)) + ctxAvail <- intersect(ctxAvail, contexts[[s]]) + else if (is.character(contexts)) + ctxAvail <- intersect(ctxAvail, contexts) + } + contextsOut[[s]] <- ctxAvail + + trAvail <- .spListTraits(data, study = s) + if (!is.null(scope$trait)) + trAvail <- intersect(trAvail, scope$trait) + if (!is.null(traitIds)) { + if (is.character(traitIds)) + trAvail <- intersect(trAvail, traitIds) + else if (is.list(traitIds) && s %in% names(traitIds)) { + tv <- traitIds[[s]] + if (is.character(tv)) trAvail <- intersect(trAvail, tv) + } + } + traitsOut[[s]] <- trAvail + } + list(studies = studies, contexts = contextsOut, traits = traitsOut) +} + + +# Build a (variants × tupleRows) Z matrix from a QtlSumStats subset, +# requiring all rows to share an identical SNP order (the post- +# summaryStatsQc contract). Returns list(Z, nVec, variantIds). +# `errorLabel` is woven into the SNP-order error to identify the caller. +# @noRd +.buildJointSumstatZMatrix <- function(data, tupleRows, colLabels, errorLabel) { + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + firstDf <- getSumstatDf(data, + study = studyCol[[tupleRows[[1L]]]], + context = contextCol[[tupleRows[[1L]]]], + trait = traitCol[[tupleRows[[1L]]]], + require = c("SNP", "Z", "N")) + variantIds <- firstDf$variant_id + Z <- matrix(NA_real_, nrow = length(variantIds), ncol = length(tupleRows), + dimnames = list(variantIds, colLabels)) + nVec <- numeric(length(tupleRows)) + for (kk in seq_along(tupleRows)) { + i <- tupleRows[[kk]] + d <- getSumstatDf(data, + study = studyCol[[i]], + context = contextCol[[i]], + trait = traitCol[[i]], + require = c("SNP", "Z", "N")) + if (!identical(d$variant_id, variantIds)) + stop(sprintf("%s: every entry in a joint group must share an identical SNP order after summaryStatsQc().", + errorLabel)) + Z[, kk] <- d$z + nVec[kk] <- stats::median(d$N, na.rm = TRUE) + } + list(Z = Z, nVec = nVec, variantIds = variantIds) +} + + +# Build a multi-context Y matrix for a single (study, trait) from an +# individual-level QtlDataset. Returns list(X, Y, perTraitContexts) or +# NULL when fewer than 2 contexts carry `tid` or the sample / complete-Y +# subset is too small to fit. +# @noRd +.buildIndividualCrossContextXY <- function(data, tid, scopedContexts, + cisWindow, verbose, label) { + perTraitContexts <- character(0) + for (cx in scopedContexts) { + se <- getPhenotypes(data, contexts = cx) + if (tid %in% rownames(se)) + perTraitContexts <- c(perTraitContexts, cx) + } + if (length(perTraitContexts) < 2L) { + if (verbose >= 1) + message(sprintf( + "%s: trait '%s' present in %d scoped context(s); skipping.", + label, tid, length(perTraitContexts))) + return(NULL) + } + X <- .fmResidGeno( + data, contexts = perTraitContexts, traitId = tid, + cisWindow = cisWindow) + Yres <- .fmResidPheno( + data, contexts = perTraitContexts, traitId = tid) + commonSamples <- Reduce(intersect, + c(list(rownames(X)), lapply(Yres, rownames))) + if (length(commonSamples) < 2L) { + if (verbose >= 1) + message(sprintf( + "%s: trait '%s' has too few shared samples across contexts; skipping.", + label, tid)) + return(NULL) + } + X <- X[commonSamples, , drop = FALSE] + Y <- do.call(cbind, lapply(perTraitContexts, function(cx) { + ym <- Yres[[cx]][commonSamples, , drop = FALSE] + colnames(ym) <- cx + ym + })) + keep <- stats::complete.cases(Y) + if (sum(keep) < 2L) { + if (verbose >= 1) + message(sprintf( + "%s: trait '%s' has too few complete-Y subjects; skipping.", + label, tid)) + return(NULL) + } + list(X = X[keep, , drop = FALSE], Y = Y[keep, , drop = FALSE], + perTraitContexts = perTraitContexts) +} + + +# Build a multi-trait Y matrix for a single (study, context) from an +# individual-level QtlDataset. Returns list(X, Y, traitsHere, se) or NULL +# when fewer than 2 traits live in the context or the sample / complete-Y +# subset is too small. +# @noRd +.buildIndividualCrossTraitXY <- function(data, cx, scopedTraits, + cisWindow, verbose, label, study) { + se <- getPhenotypes(data, contexts = cx) + traitsHere <- intersect(scopedTraits, rownames(se)) + if (length(traitsHere) < 2L) { + if (verbose >= 1) + message(sprintf( + "%s: context '%s' (study '%s') has %d scoped trait(s); skipping.", + label, cx, study, length(traitsHere))) + return(NULL) + } + X <- .fmResidGeno( + data, contexts = cx, traitId = traitsHere, cisWindow = cisWindow) + Y <- .fmResidPheno( + data, contexts = cx, traitId = traitsHere) + common <- intersect(rownames(X), rownames(Y)) + if (length(common) < 2L) return(NULL) + X <- X[common, , drop = FALSE]; Y <- Y[common, , drop = FALSE] + keep <- stats::complete.cases(Y) + if (sum(keep) < 2L) return(NULL) + list(X = X[keep, , drop = FALSE], Y = Y[keep, , drop = FALSE], + traitsHere = traitsHere, se = se) +} + + +# Build a composed-axes (context, trait) X/Y for individual-level +# QtlDataset. Returns list(X, Y, tuples) or NULL. +# @noRd +.buildComposedIndividualXY <- function(data, scope, study, cisWindow, + verbose, label) { + scopedContexts <- scope$contexts[[study]] + scopedTraits <- scope$traits[[study]] + tuples <- list() + for (cx in scopedContexts) { + se <- getPhenotypes(data, contexts = cx) + for (tid in intersect(scopedTraits, rownames(se))) { + tuples[[length(tuples) + 1L]] <- list(context = cx, trait = tid) + } + } + if (length(tuples) < 2L) { + if (verbose >= 1) + message(sprintf( + "%s: study '%s' has %d (context, trait) tuple(s) in scope; skipping.", + label, study, length(tuples))) + return(NULL) + } + allContexts <- unique(vapply(tuples, function(t) t$context, character(1L))) + allTraits <- unique(vapply(tuples, function(t) t$trait, character(1L))) + X <- .fmResidGeno( + data, contexts = allContexts, traitId = allTraits, cisWindow = cisWindow) + YresList <- .fmResidPheno( + data, contexts = allContexts, traitId = allTraits) + if (length(allContexts) == 1L) YresList <- setNames(list(YresList), allContexts) + commonSamples <- Reduce(intersect, + c(list(rownames(X)), lapply(YresList, rownames))) + if (length(commonSamples) < 2L) return(NULL) + X <- X[commonSamples, , drop = FALSE] + yCols <- list(); colLabels <- character(0) + for (t in tuples) { + ym <- YresList[[t$context]] + if (!(t$trait %in% colnames(ym))) next + col <- ym[commonSamples, t$trait, drop = FALSE] + colnames(col) <- paste(t$context, t$trait, sep = ":") + yCols[[length(yCols) + 1L]] <- col + colLabels <- c(colLabels, paste(t$context, t$trait, sep = ":")) + } + if (length(yCols) < 2L) return(NULL) + Y <- do.call(cbind, yCols) + keep <- stats::complete.cases(Y) + if (sum(keep) < 2L) return(NULL) + list(X = X[keep, , drop = FALSE], Y = Y[keep, , drop = FALSE], + tuples = tuples) +} + + +# Enumerate composed-axes row groups for a QtlSumStats input. Returns the +# list of (rowIdx) per group along with the per-axis identity columns +# needed to label the output row. Groups containing fewer than 2 rows +# are returned unfiltered; the caller decides whether to skip. +# @noRd +.enumerateComposedSumstatGroups <- function(spec, data, scope) { + axes <- spec$axes + complement <- setdiff(c("study", "context", "trait"), axes) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + inScope <- vapply(seq_len(nrow(data)), function(i) { + s <- studyCol[i]; cx <- contextCol[i]; tr <- traitCol[i] + (s %in% scope$studies) && + (cx %in% scope$contexts[[s]]) && + (tr %in% scope$traits[[s]]) + }, logical(1L)) + rowIdx <- which(inScope) + if (length(rowIdx) == 0L) return(NULL) + groupKey <- if (length(complement) == 0L) { + rep("__all__", length(rowIdx)) + } else { + do.call(paste, c(lapply(complement, function(a) + switch(a, study = studyCol[rowIdx], + context = contextCol[rowIdx], + trait = traitCol[rowIdx])), + sep = "||")) + } + groups <- split(rowIdx, groupKey) + list(groups = groups, axes = axes, + studyCol = studyCol, contextCol = contextCol, traitCol = traitCol) +} + + +# ============================================================================= +# Fine-mapping dispatchers +# ============================================================================= + +# Cross-context joint dispatcher for QtlDataset. For each trait in scope +# with >= 2 contexts in scope, fits mvsusieR::mvsusie on the multi-column +# Y matrix and emits ONE result row with context = "joint" and +# jointContexts = "ctx1;ctx2;...". +# @noRd +.fmDispatchCrossContextQtlDataset <- function(spec, data, methods, + contexts, traitIds, + cisWindow, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + jointMethods <- intersect(methods, "mvsusie") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + study <- getStudy(data) + if (!(study %in% scope$studies)) return(NULL) + scopedContexts <- scope$contexts[[study]] + scopedTraits <- scope$traits[[study]] + if (length(scopedContexts) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossContext: study '%s' has %d context(s) in scope; skipping cross-context fits.", + study, length(scopedContexts))) + return(NULL) + } + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointContexts <- character(0) + + for (tid in scopedTraits) { + xy <- .buildIndividualCrossContextXY( + data, tid, scopedContexts, cisWindow, verbose, + label = "jointCrossContext") + if (is.null(xy)) next + + if (verbose >= 1) + message(sprintf( + "jointCrossContext: fitting mvsusie for (study='%s', trait='%s') across contexts (%s) ...", + study, tid, paste(xy$perTraitContexts, collapse = ", "))) + fit <- fitMvsusie( + X = xy$X, Y = xy$Y, + prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), + coverage = coverage) + fit <- .setFinemappingFitClass(fit, "mvsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "mvsusie", + dataX = xy$X, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "X") + rowStudy <- c(rowStudy, study) + rowContext <- c(rowContext, "joint") + rowTrait <- c(rowTrait, tid) + rowMethod <- c(rowMethod, "mvsusie") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointContexts <- c(rowJointContexts, + paste(xy$perTraitContexts, collapse = ";")) + } + + if (length(rowStudy) == 0L) return(NULL) + QtlFineMappingResult( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointContexts = rowJointContexts, + ldSketch = NULL) +} + + +# Cross-trait joint dispatcher for QtlDataset. Per (study, context), fits +# mvsusieR::mvsusie or fsusieR::susiF (when in `methods`) jointly across +# the scoped traits within that context. Emits ONE result row per +# (study, context, method) with trait = "joint" and jointTraits populated. +# @noRd +.fmDispatchCrossTraitQtlDataset <- function(spec, data, methods, + contexts, traitIds, + cisWindow, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + jointMethods <- intersect(methods, c("mvsusie", "fsusie")) + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + study <- getStudy(data) + if (!(study %in% scope$studies)) return(NULL) + scopedContexts <- scope$contexts[[study]] + scopedTraits <- scope$traits[[study]] + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointTraits <- character(0) + + for (cx in scopedContexts) { + xy <- .buildIndividualCrossTraitXY( + data, cx, scopedTraits, cisWindow, verbose, + label = "jointCrossTrait", study = study) + if (is.null(xy)) next + + for (mm in jointMethods) { + if (verbose >= 1) + message(sprintf( + "jointCrossTrait: fitting %s for (study='%s', context='%s') across traits (%s) ...", + mm, study, cx, paste(xy$traitsHere, collapse = ", "))) + if (mm == "mvsusie") { + fit <- fitMvsusie( + X = xy$X, Y = xy$Y, + prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), + coverage = coverage) + fit <- .setFinemappingFitClass(fit, "mvsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "mvsusie", + dataX = xy$X, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "X") + } else { + rr <- SummarizedExperiment::rowRanges(xy$se) + ord <- match(colnames(xy$Y), rownames(xy$se)) + rr <- rr[ord] + pos <- (GenomicRanges::start(rr) + GenomicRanges::end(rr)) / 2 + fit <- fitFsusie(X = xy$X, Y = xy$Y, pos = pos) + fit <- .setFinemappingFitClass(fit, "fsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "fsusie", + dataX = xy$X, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "fsusie") + } + rowStudy <- c(rowStudy, study) + rowContext <- c(rowContext, cx) + rowTrait <- c(rowTrait, "joint") + rowMethod <- c(rowMethod, mm) + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointTraits <- c(rowJointTraits, + paste(xy$traitsHere, collapse = ";")) + } + } + + if (length(rowStudy) == 0L) return(NULL) + QtlFineMappingResult( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointTraits = rowJointTraits, + ldSketch = NULL) +} + + +# Cross-context joint dispatcher for QtlSumStats input. Groups the +# selected sumstats rows by (study, trait); each group with >= 2 contexts +# in scope produces one mvsusie_rss fit and one result row with context = +# "joint" and jointContexts populated. +# @noRd +.fmDispatchCrossContextQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + jointMethods <- intersect(methods, "mvsusie") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointContexts <- character(0) + + for (s in scope$studies) { + scopedContexts <- scope$contexts[[s]] + scopedTraits <- scope$traits[[s]] + if (length(scopedContexts) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossContext (QtlSumStats): study '%s' has %d context(s) in scope; skipping.", + s, length(scopedContexts))) + next + } + for (tid in scopedTraits) { + tupleRows <- which(studyCol == s & traitCol == tid & + contextCol %in% scopedContexts) + if (length(tupleRows) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossContext (QtlSumStats): (study='%s', trait='%s') has %d scoped context(s); skipping.", + s, tid, length(tupleRows))) + next + } + ctxNames <- contextCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, ctxNames, + errorLabel = "jointCrossContext (QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + if (verbose >= 1) + message(sprintf( + "jointCrossContext (QtlSumStats): fitting mvsusie_rss for (study='%s', trait='%s', %d contexts) ...", + s, tid, length(ctxNames))) + fit <- fitMvsusieRss( + Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), + prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), + coverage = coverage) + fit <- .setFinemappingFitClass(fit, "mvsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "mvsusie", + dataX = ldMat, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "Xcorr") + rowStudy <- c(rowStudy, s) + rowContext <- c(rowContext, "joint") + rowTrait <- c(rowTrait, tid) + rowMethod <- c(rowMethod, "mvsusie") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointContexts <- c(rowJointContexts, + paste(ctxNames, collapse = ";")) + } + } + + if (length(rowStudy) == 0L) return(NULL) + QtlFineMappingResult( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointContexts = rowJointContexts, + ldSketch = ldSketch) +} + + +# Cross-trait joint dispatcher for QtlSumStats: groups by (study, context), +# requires >= 2 scoped traits per group. mvsusie_rss only -- no RSS fsusie. +# @noRd +.fmDispatchCrossTraitQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + if ("fsusie" %in% methods) + stop("jointCrossTrait (QtlSumStats): fsusie has no RSS variant; ", + "fsusie cannot participate in sumstats-based joint fits.") + jointMethods <- intersect(methods, "mvsusie") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointTraits <- character(0) + + for (s in scope$studies) { + scopedContexts <- scope$contexts[[s]] + scopedTraits <- scope$traits[[s]] + for (cx in scopedContexts) { + tupleRows <- which(studyCol == s & contextCol == cx & + traitCol %in% scopedTraits) + if (length(tupleRows) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossTrait (QtlSumStats): (study='%s', context='%s') has %d scoped trait(s); skipping.", + s, cx, length(tupleRows))) + next + } + trNames <- traitCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, trNames, + errorLabel = "jointCrossTrait (QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + if (verbose >= 1) + message(sprintf( + "jointCrossTrait (QtlSumStats): fitting mvsusie_rss for (study='%s', context='%s', %d traits) ...", + s, cx, length(trNames))) + fit <- fitMvsusieRss( + Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), + prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), + coverage = coverage) + fit <- .setFinemappingFitClass(fit, "mvsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "mvsusie", + dataX = ldMat, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "Xcorr") + rowStudy <- c(rowStudy, s) + rowContext <- c(rowContext, cx) + rowTrait <- c(rowTrait, "joint") + rowMethod <- c(rowMethod, "mvsusie") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointTraits <- c(rowJointTraits, + paste(trNames, collapse = ";")) + } + } + if (length(rowStudy) == 0L) return(NULL) + QtlFineMappingResult( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointTraits = rowJointTraits, + ldSketch = ldSketch) +} + + +# Cross-study joint dispatcher for QtlSumStats: groups by (context, trait), +# requires >= 2 scoped studies per group. Sumstats-only by definition; +# individual-level studies are excluded with a message at the caller. +# mvsusie_rss only. +# @noRd +.fmDispatchCrossStudyQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + if ("fsusie" %in% methods) + stop("jointCrossStudy: fsusie cannot participate (no RSS variant).") + jointMethods <- intersect(methods, "mvsusie") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + + allCtxs <- unique(unlist(scope$contexts, use.names = FALSE)) + allTrs <- unique(unlist(scope$traits, use.names = FALSE)) + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointStudies <- character(0) + + for (cx in allCtxs) { + for (tid in allTrs) { + tupleRows <- which(contextCol == cx & traitCol == tid & + studyCol %in% scope$studies) + keep <- logical(length(tupleRows)) + for (k in seq_along(tupleRows)) { + s <- studyCol[tupleRows[k]] + keep[k] <- (cx %in% scope$contexts[[s]]) && + (tid %in% scope$traits[[s]]) + } + tupleRows <- tupleRows[keep] + if (length(tupleRows) < 2L) { + if (length(tupleRows) > 0L && verbose >= 1) + message(sprintf( + "jointCrossStudy: (context='%s', trait='%s') has %d study(ies) in scope; skipping.", + cx, tid, length(tupleRows))) + next + } + stNames <- studyCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, stNames, + errorLabel = "jointCrossStudy") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + if (verbose >= 1) + message(sprintf( + "jointCrossStudy: fitting mvsusie_rss for (context='%s', trait='%s', %d studies) ...", + cx, tid, length(stNames))) + fit <- fitMvsusieRss( + Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), + prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), + coverage = coverage) + fit <- .setFinemappingFitClass(fit, "mvsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "mvsusie", + dataX = ldMat, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "Xcorr") + rowStudy <- c(rowStudy, "joint") + rowContext <- c(rowContext, cx) + rowTrait <- c(rowTrait, tid) + rowMethod <- c(rowMethod, "mvsusie") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointStudies <- c(rowJointStudies, + paste(stNames, collapse = ";")) + } + } + if (length(rowStudy) == 0L) return(NULL) + QtlFineMappingResult( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointStudies = rowJointStudies, + ldSketch = ldSketch) +} + + +# Composed multi-axis joint dispatcher for QtlDataset. Only axes = +# c("context", "trait") is meaningful for a single-study individual- +# level input. Iterates per (study) (just one), enumerates the +# (context, trait) tuples in scope where the trait exists in the +# context, and fits one mvsusie joint over those tuples. +# @noRd +.fmDispatchComposedQtlDataset <- function(spec, data, methods, + contexts, traitIds, cisWindow, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + axes <- spec$axes + if ("study" %in% axes) + stop("composed jointSpecification (QtlDataset): axes including 'study' require sumstats input.") + if (!setequal(axes, c("context", "trait"))) + stop(sprintf( + "composed jointSpecification (QtlDataset): unsupported axes (%s) for individual-level input.", + paste(axes, collapse = ", "))) + jointMethods <- intersect(methods, "mvsusie") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + study <- getStudy(data) + if (!(study %in% scope$studies)) return(NULL) + xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, + verbose, + label = "composed joint (QtlDataset)") + if (is.null(xy)) return(NULL) + + if (verbose >= 1) + message(sprintf( + "composed joint (QtlDataset): fitting mvsusie for study='%s' over %d (context, trait) columns ...", + study, ncol(xy$Y))) + fit <- fitMvsusie( + X = xy$X, Y = xy$Y, + prior_variance = mvsusieR::create_mixture_prior(R = ncol(xy$Y)), + coverage = coverage) + fit <- .setFinemappingFitClass(fit, "mvsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "mvsusie", + dataX = xy$X, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "X") + QtlFineMappingResult( + study = study, + context = "joint", + trait = "joint", + method = "mvsusie", + entry = list(entry), + jointContexts = paste(vapply(xy$tuples, function(t) t$context, + character(1)), collapse = ";"), + jointTraits = paste(vapply(xy$tuples, function(t) t$trait, + character(1)), collapse = ";"), + ldSketch = NULL) +} + + +# Composed multi-axis joint dispatcher for QtlSumStats. Handles any +# `axes` subset of {study, context, trait} of size >= 2 by iterating the +# complement-axis Cartesian product and emitting one joint fit per +# iteration unit. +# @noRd +.fmDispatchComposedQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + if ("fsusie" %in% methods) + stop("composed jointSpecification (QtlSumStats): fsusie has no RSS variant.") + jointMethods <- intersect(methods, "mvsusie") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + groupInfo <- .enumerateComposedSumstatGroups(spec, data, scope) + if (is.null(groupInfo)) return(NULL) + axes <- groupInfo$axes + studyCol <- groupInfo$studyCol + contextCol <- groupInfo$contextCol + traitCol <- groupInfo$traitCol + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list() + rowJointStudies <- character(0) + rowJointContexts <- character(0) + rowJointTraits <- character(0) + + for (gIdx in groupInfo$groups) { + if (length(gIdx) < 2L) { + if (verbose >= 1) + message(sprintf( + "composed joint (QtlSumStats): group has %d row(s); skipping.", + length(gIdx))) + next + } + colLabels <- vapply(gIdx, function(i) + paste(studyCol[i], contextCol[i], traitCol[i], sep = ":"), + character(1L)) + jz <- .buildJointSumstatZMatrix( + data, gIdx, colLabels, + errorLabel = "composed joint (QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + if (verbose >= 1) + message(sprintf( + "composed joint (QtlSumStats): fitting mvsusie_rss for axes=(%s), %d columns ...", + paste(axes, collapse = ", "), length(gIdx))) + fit <- fitMvsusieRss( + Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), + prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), + coverage = coverage) + fit <- .setFinemappingFitClass(fit, "mvsusie") + entry <- .fmPostprocessOne( + fit = fit, method = "mvsusie", + dataX = ldMat, dataY = NULL, + coverage = coverage, + secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, + csInput = "Xcorr") + + repStudy <- if ("study" %in% axes) "joint" else studyCol[gIdx[[1L]]] + repContext <- if ("context" %in% axes) "joint" else contextCol[gIdx[[1L]]] + repTrait <- if ("trait" %in% axes) "joint" else traitCol[gIdx[[1L]]] + rowStudy <- c(rowStudy, repStudy) + rowContext <- c(rowContext, repContext) + rowTrait <- c(rowTrait, repTrait) + rowMethod <- c(rowMethod, "mvsusie") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointStudies <- c(rowJointStudies, + if ("study" %in% axes) paste(studyCol[gIdx], collapse = ";") + else NA_character_) + rowJointContexts <- c(rowJointContexts, + if ("context" %in% axes) paste(contextCol[gIdx], collapse = ";") + else NA_character_) + rowJointTraits <- c(rowJointTraits, + if ("trait" %in% axes) paste(traitCol[gIdx], collapse = ";") + else NA_character_) + } + + if (length(rowStudy) == 0L) return(NULL) + jsArg <- if (all(is.na(rowJointStudies))) NULL else rowJointStudies + jcArg <- if (all(is.na(rowJointContexts))) NULL else rowJointContexts + jtArg <- if (all(is.na(rowJointTraits))) NULL else rowJointTraits + QtlFineMappingResult( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointStudies = jsArg, + jointContexts = jcArg, + jointTraits = jtArg, + ldSketch = ldSketch) +} + + +# Top-level joint dispatcher for fineMappingPipeline(QtlDataset). +# @noRd +.fmDispatchJointSpecsQtlDataset <- function(parsedJointSpec, data, + methods, contexts, traitIds, + cisWindow, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + out <- NULL + for (i in seq_along(parsedJointSpec)) { + spec <- parsedJointSpec[[i]] + axes <- spec$axes + if (length(axes) > 1L) { + res <- .fmDispatchComposedQtlDataset( + spec, data, methods, contexts, traitIds, cisWindow, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindFineMappingResult(out, res, ldSketch = NULL) + next + } + axis <- axes[[1L]] + res <- switch(axis, + context = .fmDispatchCrossContextQtlDataset( + spec, data, methods, contexts, traitIds, cisWindow, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + trait = .fmDispatchCrossTraitQtlDataset( + spec, data, methods, contexts, traitIds, cisWindow, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + study = stop( + "fineMappingPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input. ", + "QtlDataset represents a single individual-level study; cross-study joints operate on the sumstats slot of MultiStudyQtlDataset or on QtlSumStats directly."), + stop(sprintf("Unsupported axis: %s", axis))) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindFineMappingResult(out, res, ldSketch = NULL) + } + out +} + + +# Top-level joint dispatcher for fineMappingPipeline(QtlSumStats). +# @noRd +.fmDispatchJointSpecsQtlSumStats <- function(parsedJointSpec, data, + methods, contexts, traitIds, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + out <- NULL + for (i in seq_along(parsedJointSpec)) { + spec <- parsedJointSpec[[i]] + axes <- spec$axes + if (length(axes) > 1L) { + res <- .fmDispatchComposedQtlSumStats( + spec, data, methods, contexts, traitIds, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindFineMappingResult(out, res, ldSketch = getLdSketch(data)) + next + } + axis <- axes[[1L]] + res <- switch(axis, + context = .fmDispatchCrossContextQtlSumStats( + spec, data, methods, contexts, traitIds, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + trait = .fmDispatchCrossTraitQtlSumStats( + spec, data, methods, contexts, traitIds, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + study = .fmDispatchCrossStudyQtlSumStats( + spec, data, methods, contexts, traitIds, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose), + stop(sprintf("Unsupported axis: %s", axis))) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindFineMappingResult(out, res, + ldSketch = getLdSketch(data)) + } + out +} + + +# Top-level joint dispatcher for fineMappingPipeline(MultiStudyQtlDataset). +# Routes per-component AND per-axis: a spec with `axes = "study"` only +# touches the sumStats slot; `axes = "context"` and `axes = "trait"` run +# on every component. +# @noRd +.fmDispatchJointSpecsMultiStudy <- function(parsedJointSpec, data, + methods, contexts, traitIds, + cisWindow, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose) { + out <- NULL + embeddedLd <- NULL + qtlDatasets <- getQtlDatasets(data) + sumStats <- getSumStats(data) + + studyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, + function(s) "study" %in% s$axes, logical(1L))] + nonStudyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, + function(s) !("study" %in% s$axes), logical(1L))] + + if (length(studyAxisSpecs) > 0L && length(qtlDatasets) > 0L && verbose >= 1) { + message(sprintf( + "jointCrossStudy: excluding individual-level studies (%s) from cross-study fits (no LD sketch available); sumstats studies participate.", + paste(names(qtlDatasets), collapse = ", "))) + } + + if (length(nonStudyAxisSpecs) > 0L) { + for (qdName in names(qtlDatasets)) { + qd <- qtlDatasets[[qdName]] + qdRes <- .fmDispatchJointSpecsQtlDataset( + nonStudyAxisSpecs, qd, methods, contexts, traitIds, cisWindow, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + if (!is.null(qdRes)) + out <- if (is.null(out)) qdRes + else .rbindFineMappingResult(out, qdRes, ldSketch = NULL) + } + } + + if (!is.null(sumStats)) { + ssRes <- .fmDispatchJointSpecsQtlSumStats( + parsedJointSpec, sumStats, methods, contexts, traitIds, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose) + if (!is.null(ssRes)) { + embeddedLd <- getLdSketch(ssRes) + out <- if (is.null(out)) ssRes + else .rbindFineMappingResult(out, ssRes, + ldSketch = embeddedLd) + } + } else if (length(studyAxisSpecs) > 0L && verbose >= 1) { + message("jointCrossStudy: no sumStats slot present on this MultiStudyQtlDataset; cross-study specs produce no result.") + } + out +} + + +# ============================================================================= +# TWAS-weights dispatchers +# ============================================================================= + +# Cross-context joint dispatcher for QtlDataset (twas). Mr.mash across +# scoped contexts per (study, trait). +# @noRd +.twasDispatchCrossContextQtlDataset <- function(spec, data, methods, + contexts, traitIds, + cisWindow, dataType, + verbose) { + jointMethods <- intersect(methods, "mrmash") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + study <- getStudy(data) + if (!(study %in% scope$studies)) return(NULL) + scopedContexts <- scope$contexts[[study]] + scopedTraits <- scope$traits[[study]] + if (length(scopedContexts) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossContext (twas QtlDataset): study '%s' has %d context(s) in scope; skipping.", + study, length(scopedContexts))) + return(NULL) + } + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointContexts <- character(0) + + for (tid in scopedTraits) { + xy <- .buildIndividualCrossContextXY( + data, tid, scopedContexts, cisWindow, verbose, + label = "jointCrossContext (twas QtlDataset)") + if (is.null(xy)) next + + if (verbose >= 1) + message(sprintf( + "jointCrossContext (twas QtlDataset): fitting mr.mash for (study='%s', trait='%s') across contexts (%s) ...", + study, tid, paste(xy$perTraitContexts, collapse = ", "))) + weights <- mrmashWeights(X = xy$X, Y = xy$Y) + if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) + entry <- TwasWeightsEntry( + variantIds = rownames(weights), + weights = weights, + standardized = FALSE, + dataType = dataType) + rowStudy <- c(rowStudy, study) + rowContext <- c(rowContext, "joint") + rowTrait <- c(rowTrait, tid) + rowMethod <- c(rowMethod, "mrmash") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointContexts <- c(rowJointContexts, + paste(xy$perTraitContexts, collapse = ";")) + } + + if (length(rowStudy) == 0L) return(NULL) + TwasWeights( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointContexts = rowJointContexts, + ldSketch = NULL) +} + + +# Cross-trait joint dispatcher for QtlDataset (twas). Mr.mash per +# (study, context) across scoped traits. +# @noRd +.twasDispatchCrossTraitQtlDataset <- function(spec, data, methods, + contexts, traitIds, + cisWindow, dataType, + verbose) { + jointMethods <- intersect(methods, "mrmash") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + study <- getStudy(data) + if (!(study %in% scope$studies)) return(NULL) + scopedContexts <- scope$contexts[[study]] + scopedTraits <- scope$traits[[study]] + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointTraits <- character(0) + + for (cx in scopedContexts) { + xy <- .buildIndividualCrossTraitXY( + data, cx, scopedTraits, cisWindow, verbose, + label = "jointCrossTrait (twas)", study = study) + if (is.null(xy)) next + + if (verbose >= 1) + message(sprintf( + "jointCrossTrait (twas): fitting mr.mash for (study='%s', context='%s') across traits (%s) ...", + study, cx, paste(xy$traitsHere, collapse = ", "))) + weights <- mrmashWeights(X = xy$X, Y = xy$Y) + if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) + entry <- TwasWeightsEntry( + variantIds = rownames(weights), + weights = weights, + standardized = FALSE, + dataType = dataType) + rowStudy <- c(rowStudy, study) + rowContext <- c(rowContext, cx) + rowTrait <- c(rowTrait, "joint") + rowMethod <- c(rowMethod, "mrmash") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointTraits <- c(rowJointTraits, + paste(xy$traitsHere, collapse = ";")) + } + if (length(rowStudy) == 0L) return(NULL) + TwasWeights( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointTraits = rowJointTraits, + ldSketch = NULL) +} + + +# Cross-context joint dispatcher for QtlSumStats (twas). Mr.mash.rss per +# (study, trait). +# @noRd +.twasDispatchCrossContextQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + dataType, verbose) { + jointMethods <- intersect(methods, "mrmash") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointContexts <- character(0) + + for (s in scope$studies) { + scopedContexts <- scope$contexts[[s]] + scopedTraits <- scope$traits[[s]] + if (length(scopedContexts) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossContext (twas QtlSumStats): study '%s' has %d context(s) in scope; skipping.", + s, length(scopedContexts))) + next + } + for (tid in scopedTraits) { + tupleRows <- which(studyCol == s & traitCol == tid & + contextCol %in% scopedContexts) + if (length(tupleRows) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossContext (twas QtlSumStats): (study='%s', trait='%s') has %d scoped context(s); skipping.", + s, tid, length(tupleRows))) + next + } + ctxNames <- contextCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, ctxNames, + errorLabel = "jointCrossContext (twas QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + stat <- list(z = jz$Z, N = jz$nVec) + if (verbose >= 1) + message(sprintf( + "jointCrossContext (twas QtlSumStats): fitting mr.mash.rss for (study='%s', trait='%s', %d contexts) ...", + s, tid, length(ctxNames))) + weights <- mrmashRssWeights(stat = stat, LD = ldMat) + if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds + entry <- TwasWeightsEntry( + variantIds = rownames(weights), + weights = weights, + standardized = TRUE, + dataType = dataType) + rowStudy <- c(rowStudy, s) + rowContext <- c(rowContext, "joint") + rowTrait <- c(rowTrait, tid) + rowMethod <- c(rowMethod, "mrmash") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointContexts <- c(rowJointContexts, + paste(ctxNames, collapse = ";")) + } + } + if (length(rowStudy) == 0L) return(NULL) + TwasWeights( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointContexts = rowJointContexts, + ldSketch = ldSketch) +} + + +# Cross-trait joint dispatcher for QtlSumStats (twas). Mr.mash.rss per +# (study, context). +# @noRd +.twasDispatchCrossTraitQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + dataType, verbose) { + jointMethods <- intersect(methods, "mrmash") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointTraits <- character(0) + + for (s in scope$studies) { + scopedContexts <- scope$contexts[[s]] + scopedTraits <- scope$traits[[s]] + for (cx in scopedContexts) { + tupleRows <- which(studyCol == s & contextCol == cx & + traitCol %in% scopedTraits) + if (length(tupleRows) < 2L) { + if (verbose >= 1) + message(sprintf( + "jointCrossTrait (twas QtlSumStats): (study='%s', context='%s') has %d scoped trait(s); skipping.", + s, cx, length(tupleRows))) + next + } + trNames <- traitCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, trNames, + errorLabel = "jointCrossTrait (twas QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + stat <- list(z = jz$Z, N = jz$nVec) + if (verbose >= 1) + message(sprintf( + "jointCrossTrait (twas QtlSumStats): fitting mr.mash.rss for (study='%s', context='%s', %d traits) ...", + s, cx, length(trNames))) + weights <- mrmashRssWeights(stat = stat, LD = ldMat) + if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds + entry <- TwasWeightsEntry( + variantIds = rownames(weights), + weights = weights, + standardized = TRUE, + dataType = dataType) + rowStudy <- c(rowStudy, s) + rowContext <- c(rowContext, cx) + rowTrait <- c(rowTrait, "joint") + rowMethod <- c(rowMethod, "mrmash") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointTraits <- c(rowJointTraits, + paste(trNames, collapse = ";")) + } + } + if (length(rowStudy) == 0L) return(NULL) + TwasWeights( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointTraits = rowJointTraits, + ldSketch = ldSketch) +} + + +# Cross-study joint dispatcher for QtlSumStats (twas). Mr.mash.rss per +# (context, trait). +# @noRd +.twasDispatchCrossStudyQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + dataType, verbose) { + jointMethods <- intersect(methods, "mrmash") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + + allCtxs <- unique(unlist(scope$contexts, use.names = FALSE)) + allTrs <- unique(unlist(scope$traits, use.names = FALSE)) + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list(); rowJointStudies <- character(0) + + for (cx in allCtxs) { + for (tid in allTrs) { + tupleRows <- which(contextCol == cx & traitCol == tid & + studyCol %in% scope$studies) + keep <- logical(length(tupleRows)) + for (k in seq_along(tupleRows)) { + s <- studyCol[tupleRows[k]] + keep[k] <- (cx %in% scope$contexts[[s]]) && + (tid %in% scope$traits[[s]]) + } + tupleRows <- tupleRows[keep] + if (length(tupleRows) < 2L) { + if (length(tupleRows) > 0L && verbose >= 1) + message(sprintf( + "jointCrossStudy (twas): (context='%s', trait='%s') has %d study(ies) in scope; skipping.", + cx, tid, length(tupleRows))) + next + } + stNames <- studyCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, stNames, + errorLabel = "jointCrossStudy (twas)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + stat <- list(z = jz$Z, N = jz$nVec) + if (verbose >= 1) + message(sprintf( + "jointCrossStudy (twas): fitting mr.mash.rss for (context='%s', trait='%s', %d studies) ...", + cx, tid, length(stNames))) + weights <- mrmashRssWeights(stat = stat, LD = ldMat) + if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds + entry <- TwasWeightsEntry( + variantIds = rownames(weights), + weights = weights, + standardized = TRUE, + dataType = dataType) + rowStudy <- c(rowStudy, "joint") + rowContext <- c(rowContext, cx) + rowTrait <- c(rowTrait, tid) + rowMethod <- c(rowMethod, "mrmash") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointStudies <- c(rowJointStudies, + paste(stNames, collapse = ";")) + } + } + if (length(rowStudy) == 0L) return(NULL) + TwasWeights( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointStudies = rowJointStudies, + ldSketch = ldSketch) +} + + +# Composed multi-axis joint dispatcher for QtlSumStats (twas). +# @noRd +.twasDispatchComposedQtlSumStats <- function(spec, data, methods, + contexts, traitIds, + dataType, verbose) { + jointMethods <- intersect(methods, "mrmash") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + ldSketch <- getLdSketch(data) + groupInfo <- .enumerateComposedSumstatGroups(spec, data, scope) + if (is.null(groupInfo)) return(NULL) + axes <- groupInfo$axes + studyCol <- groupInfo$studyCol + contextCol <- groupInfo$contextCol + traitCol <- groupInfo$traitCol + + rowStudy <- character(0); rowContext <- character(0) + rowTrait <- character(0); rowMethod <- character(0) + rowEntries <- list() + rowJointStudies <- character(0) + rowJointContexts <- character(0) + rowJointTraits <- character(0) + + for (gIdx in groupInfo$groups) { + if (length(gIdx) < 2L) { + if (verbose >= 1) + message(sprintf( + "composed joint (twas QtlSumStats): group has %d row(s); skipping.", + length(gIdx))) + next + } + colLabels <- vapply(gIdx, function(i) + paste(studyCol[i], contextCol[i], traitCol[i], sep = ":"), + character(1L)) + jz <- .buildJointSumstatZMatrix( + data, gIdx, colLabels, + errorLabel = "composed joint (twas QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + stat <- list(z = jz$Z, N = jz$nVec) + if (verbose >= 1) + message(sprintf( + "composed joint (twas QtlSumStats): fitting mr.mash.rss for axes=(%s), %d columns ...", + paste(axes, collapse = ", "), length(gIdx))) + weights <- mrmashRssWeights(stat = stat, LD = ldMat) + if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds + entry <- TwasWeightsEntry( + variantIds = rownames(weights), + weights = weights, + standardized = TRUE, + dataType = dataType) + + repStudy <- if ("study" %in% axes) "joint" else studyCol[gIdx[[1L]]] + repContext <- if ("context" %in% axes) "joint" else contextCol[gIdx[[1L]]] + repTrait <- if ("trait" %in% axes) "joint" else traitCol[gIdx[[1L]]] + rowStudy <- c(rowStudy, repStudy) + rowContext <- c(rowContext, repContext) + rowTrait <- c(rowTrait, repTrait) + rowMethod <- c(rowMethod, "mrmash") + rowEntries[[length(rowEntries) + 1L]] <- entry + rowJointStudies <- c(rowJointStudies, + if ("study" %in% axes) paste(studyCol[gIdx], collapse = ";") + else NA_character_) + rowJointContexts <- c(rowJointContexts, + if ("context" %in% axes) paste(contextCol[gIdx], collapse = ";") + else NA_character_) + rowJointTraits <- c(rowJointTraits, + if ("trait" %in% axes) paste(traitCol[gIdx], collapse = ";") + else NA_character_) + } + if (length(rowStudy) == 0L) return(NULL) + jsArg <- if (all(is.na(rowJointStudies))) NULL else rowJointStudies + jcArg <- if (all(is.na(rowJointContexts))) NULL else rowJointContexts + jtArg <- if (all(is.na(rowJointTraits))) NULL else rowJointTraits + TwasWeights( + study = rowStudy, + context = rowContext, + trait = rowTrait, + method = rowMethod, + entry = rowEntries, + jointStudies = jsArg, + jointContexts = jcArg, + jointTraits = jtArg, + ldSketch = ldSketch) +} + + +# Composed multi-axis joint dispatcher for QtlDataset (twas). axes = +# c("context", "trait") only. +# @noRd +.twasDispatchComposedQtlDataset <- function(spec, data, methods, + contexts, traitIds, cisWindow, + dataType, verbose) { + axes <- spec$axes + if ("study" %in% axes) + stop("composed jointSpecification (twas QtlDataset): axes including 'study' require sumstats input.") + if (!setequal(axes, c("context", "trait"))) + stop(sprintf("composed jointSpecification (twas QtlDataset): unsupported axes (%s) for individual-level input.", + paste(axes, collapse = ", "))) + jointMethods <- intersect(methods, "mrmash") + if (length(jointMethods) == 0L) return(NULL) + + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + study <- getStudy(data) + if (!(study %in% scope$studies)) return(NULL) + xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, + verbose, + label = "composed joint (twas QtlDataset)") + if (is.null(xy)) return(NULL) + + if (verbose >= 1) + message(sprintf( + "composed joint (twas QtlDataset): fitting mr.mash for study='%s' over %d (context, trait) columns ...", + study, ncol(xy$Y))) + weights <- mrmashWeights(X = xy$X, Y = xy$Y) + if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) + entry <- TwasWeightsEntry( + variantIds = rownames(weights), + weights = weights, + standardized = FALSE, + dataType = dataType) + TwasWeights( + study = study, + context = "joint", + trait = "joint", + method = "mrmash", + entry = list(entry), + jointContexts = paste(vapply(xy$tuples, function(t) t$context, + character(1)), collapse = ";"), + jointTraits = paste(vapply(xy$tuples, function(t) t$trait, + character(1)), collapse = ";"), + ldSketch = NULL) +} + + +# Top-level joint dispatcher for twasWeightsPipeline(QtlDataset). +# @noRd +.twasDispatchJointSpecsQtlDataset <- function(parsedJointSpec, data, + methods, contexts, traitIds, + cisWindow, dataType, + verbose) { + out <- NULL + for (i in seq_along(parsedJointSpec)) { + spec <- parsedJointSpec[[i]] + axes <- spec$axes + if (length(axes) > 1L) { + res <- .twasDispatchComposedQtlDataset( + spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindTwasWeights(out, res, ldSketch = NULL) + next + } + axis <- axes[[1L]] + res <- switch(axis, + context = .twasDispatchCrossContextQtlDataset( + spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose), + trait = .twasDispatchCrossTraitQtlDataset( + spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose), + study = stop( + "twasWeightsPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input."), + stop(sprintf("Unsupported axis: %s", axis))) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindTwasWeights(out, res, ldSketch = NULL) + } + out +} + + +# Top-level joint dispatcher for twasWeightsPipeline(QtlSumStats). +# @noRd +.twasDispatchJointSpecsQtlSumStats <- function(parsedJointSpec, data, + methods, contexts, traitIds, + dataType, verbose) { + out <- NULL + for (i in seq_along(parsedJointSpec)) { + spec <- parsedJointSpec[[i]] + axes <- spec$axes + if (length(axes) > 1L) { + res <- .twasDispatchComposedQtlSumStats( + spec, data, methods, contexts, traitIds, dataType, verbose) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindTwasWeights(out, res, ldSketch = getLdSketch(data)) + next + } + axis <- axes[[1L]] + res <- switch(axis, + context = .twasDispatchCrossContextQtlSumStats( + spec, data, methods, contexts, traitIds, dataType, verbose), + trait = .twasDispatchCrossTraitQtlSumStats( + spec, data, methods, contexts, traitIds, dataType, verbose), + study = .twasDispatchCrossStudyQtlSumStats( + spec, data, methods, contexts, traitIds, dataType, verbose), + stop(sprintf("Unsupported axis: %s", axis))) + if (!is.null(res)) + out <- if (is.null(out)) res + else .rbindTwasWeights(out, res, ldSketch = getLdSketch(data)) + } + out +} + + +# Top-level joint dispatcher for twasWeightsPipeline(MultiStudyQtlDataset). +# @noRd +.twasDispatchJointSpecsMultiStudy <- function(parsedJointSpec, data, + methods, contexts, traitIds, + cisWindow, dataType, verbose) { + out <- NULL + embeddedLd <- NULL + qtlDatasets <- getQtlDatasets(data) + sumStats <- getSumStats(data) + + studyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, + function(s) "study" %in% s$axes, logical(1L))] + nonStudyAxisSpecs <- parsedJointSpec[vapply(parsedJointSpec, + function(s) !("study" %in% s$axes), logical(1L))] + + if (length(studyAxisSpecs) > 0L && length(qtlDatasets) > 0L && verbose >= 1) { + message(sprintf( + "jointCrossStudy (twas): excluding individual-level studies (%s) from cross-study fits; sumstats studies participate.", + paste(names(qtlDatasets), collapse = ", "))) + } + + if (length(nonStudyAxisSpecs) > 0L) { + for (qdName in names(qtlDatasets)) { + qd <- qtlDatasets[[qdName]] + qdRes <- .twasDispatchJointSpecsQtlDataset( + nonStudyAxisSpecs, qd, methods, contexts, traitIds, cisWindow, + dataType, verbose) + if (!is.null(qdRes)) + out <- if (is.null(out)) qdRes + else .rbindTwasWeights(out, qdRes, ldSketch = NULL) + } + } + + if (!is.null(sumStats)) { + ssRes <- .twasDispatchJointSpecsQtlSumStats( + parsedJointSpec, sumStats, methods, contexts, traitIds, dataType, + verbose) + if (!is.null(ssRes)) { + embeddedLd <- getLdSketch(ssRes) + out <- if (is.null(out)) ssRes + else .rbindTwasWeights(out, ssRes, ldSketch = embeddedLd) + } + } else if (length(studyAxisSpecs) > 0L && verbose >= 1) { + message("jointCrossStudy (twas): no sumStats slot present on this MultiStudyQtlDataset; cross-study specs produce no result.") + } + out +} diff --git a/R/qtlEnrichmentPipeline.R b/R/qtlEnrichmentPipeline.R index f6465e01..cfbc4b43 100644 --- a/R/qtlEnrichmentPipeline.R +++ b/R/qtlEnrichmentPipeline.R @@ -151,7 +151,7 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult, pieces <- list() for (i in idx) { entry <- gwasFmr$entry[[i]] - fit <- getTrimmedFit(entry) + fit <- getSusieFit(entry) if (is.null(fit) || is.null(fit$pip)) next pip <- as.numeric(fit$pip) ids <- if (!is.null(names(fit$pip))) names(fit$pip) @@ -190,7 +190,7 @@ qtlEnrichmentPipeline <- function(gwasFineMappingResult, out <- list() for (i in idx) { entry <- qtlFmr$entry[[i]] - fit <- getTrimmedFit(entry) + fit <- getSusieFit(entry) if (is.null(fit) || is.null(fit$alpha) || is.null(fit$pip)) next pV <- if (!is.null(fit$V)) fit$V else if (!is.null(fit$prior_variance)) fit$prior_variance diff --git a/R/qtlSumStats.R b/R/qtlSumStats.R index 0f4012f9..290df53a 100644 --- a/R/qtlSumStats.R +++ b/R/qtlSumStats.R @@ -9,7 +9,7 @@ # summaryStatsQc() passes have been run. # ============================================================================= -#' @include SumStatsBase.R tupleSelectors.R +#' @include AllClasses.R tupleSelectors.R NULL setClass("QtlSumStats", @@ -52,7 +52,7 @@ setClass("QtlSumStats", #' @keywords internal #' @importFrom GenomicRanges GRanges seqnames start #' @importFrom S4Vectors DataFrame SimpleList mcols -#' @include allGenerics.R +#' @include AllGenerics.R NULL # ============================================================================= diff --git a/R/sumstatsQc.R b/R/sumstatsQc.R index 3dc61590..0996d597 100644 --- a/R/sumstatsQc.R +++ b/R/sumstatsQc.R @@ -1319,7 +1319,7 @@ getSusieResult <- function(conData) { if (length(conData) == 0) return(NULL) fm <- conData$finemappingEntry if (is.null(fm) || !is(fm, "FineMappingEntry")) return(NULL) - trimmed <- getTrimmedFit(fm) + trimmed <- getSusieFit(fm) if (length(trimmed) == 0) return(NULL) trimmed } @@ -1356,7 +1356,7 @@ getSusieResult <- function(conData) { #' @export extractCsInfo <- function(conData, csNames, topLociTable) { fm <- conData$finemappingEntry - trimmed <- getTrimmedFit(fm) + trimmed <- getSusieFit(fm) variantNames <- getVariantIds(fm) results <- map(seq_along(csNames), function(i) { csName <- csNames[i] @@ -1435,7 +1435,7 @@ extractCsInfo <- function(conData, csNames, topLociTable) { #' @export extractTopPipInfo <- function(conData) { fm <- conData$finemappingEntry - trimmed <- getTrimmedFit(fm) + trimmed <- getSusieFit(fm) variantNames <- getVariantIds(fm) # Find the variant with the highest PIP topPipIndex <- which.max(trimmed$pip) diff --git a/R/twasWeights.R b/R/twasWeights.R index 60c00016..a5043dbb 100644 --- a/R/twasWeights.R +++ b/R/twasWeights.R @@ -10,7 +10,7 @@ # (learnTwasWeights, CV, ensemble, etc.) follow at the bottom. # ============================================================================= -#' @include allGenerics.R tupleSelectors.R +#' @include AllGenerics.R tupleSelectors.R NULL setClass("TwasWeights", diff --git a/R/twasWeightsPipeline.R b/R/twasWeightsPipeline.R index 816a5762..d2088d1f 100644 --- a/R/twasWeightsPipeline.R +++ b/R/twasWeightsPipeline.R @@ -365,7 +365,7 @@ as.character(fineMappingResult$context) == context & as.character(fineMappingResult$trait) == trait) if (length(idx) > 0L) { - out[[canonical]] <- getTrimmedFit(fineMappingResult$entry[[idx[[1L]]]]) + out[[canonical]] <- getSusieFit(fineMappingResult$entry[[idx[[1L]]]]) } } out @@ -470,9 +470,20 @@ #' @param estimatePi If TRUE, estimate spike-and-slab sparsity from #' mr.ash before BGLR / qgg spike-and-slab methods that consume it. #' @param phenotypeCovariatesToResidualize,genotypeCovariatesToResidualize -#' Pass-through to \code{\link{getResidualizedPhenotypes}} and -#' \code{\link{getResidualizedGenotypes}} for QtlDataset input. -#' Default \code{NULL} (use all covariates). +#' Character vector (or \code{NULL}) of covariate column names to +#' residualize against. Forwarded to +#' \code{\link{getResidualizedPhenotypes}} / +#' \code{\link{getResidualizedGenotypes}} for \code{QtlDataset} / +#' \code{MultiStudyQtlDataset} input. Default \code{NULL} (use all +#' available covariates). Ignored for sumstat inputs. +#' @param residualizePhenotypeCovariates Logical (length 1). When +#' \code{TRUE} (default) residualize against the phenotype-side +#' covariates listed in \code{phenotypeCovariatesToResidualize}; set +#' \code{FALSE} to disable. +#' @param residualizeGenotypeCovariates Logical (length 1). When +#' \code{TRUE} (default) residualize against the genotype-side +#' covariates listed in \code{genotypeCovariatesToResidualize}; set +#' \code{FALSE} to disable. #' @param dataType Optional data-type tag stamped into every #' \code{TwasWeightsEntry$dataType} (e.g. \code{"expression"}). #' @param verbose Verbosity (0 silent, 1 default, 2 includes external @@ -511,6 +522,8 @@ setMethod("twasWeightsPipeline", "QtlDataset", estimatePi = TRUE, phenotypeCovariatesToResidualize = NULL, genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, dataType = NULL, naAction = c("drop", "impute"), verbose = 1, @@ -603,12 +616,12 @@ setMethod("twasWeightsPipeline", "QtlDataset", cachedTw <- .twasBuildFromCachedRows(cachedRows, study, ctx, tid) if (length(remaining) == 0L) return(cachedTw) - Y <- getResidualizedPhenotypes( + Y <- .fmResidPheno( data, contexts = ctx, traitId = tid, phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, naAction = naAction) - X <- getResidualizedGenotypes( + X <- .fmResidGeno( data, contexts = ctx, traitId = tid, cisWindow = cisWindow, phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, @@ -654,7 +667,7 @@ setMethod("twasWeightsPipeline", "QtlDataset", # Joint over selected (contexts, traits): residualize, intersect # samples across contexts, drop subjects with any-NA in Y. Xlist <- lapply(useCtx, function(ctx) { - getResidualizedGenotypes( + .fmResidGeno( data, contexts = ctx, traitId = traits, cisWindow = cisWindow, phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, @@ -667,7 +680,7 @@ setMethod("twasWeightsPipeline", "QtlDataset", } X <- Xlist[[1L]][commonSamples, , drop = FALSE] - Yres <- getResidualizedPhenotypes( + Yres <- .fmResidPheno( data, contexts = useCtx, traitId = traits, phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, @@ -1025,6 +1038,10 @@ setMethod("twasWeightsPipeline", "MultiStudyQtlDataset", twasWeights = NULL, naAction = c("drop", "impute"), verbose = 1, + phenotypeCovariatesToResidualize = NULL, + genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, ...) { naAction <- match.arg(naAction) parsedJointSpec <- parseJointSpecification(jointSpecification, data) diff --git a/R/vcfWriter.R b/R/vcfWriter.R index 6ee234ee..31f55c18 100644 --- a/R/vcfWriter.R +++ b/R/vcfWriter.R @@ -1,4 +1,4 @@ -#' @include allGenerics.R +#' @include AllGenerics.R #' @importFrom S4Vectors DataFrame SimpleList mcols #' @importFrom GenomicRanges GRanges seqnames #' @importFrom IRanges DataFrameList @@ -61,97 +61,162 @@ setMethod("writeSumstatsVcf", signature("GwasSumStats"), setMethod("writeSumstatsVcf", signature("FineMappingResultBase"), function(x, outputPath, sampleName = NULL, study = NULL, context = NULL, trait = NULL, method = NULL, + splitByContext = FALSE, splitByTrait = FALSE, ...) { if (!requireNamespace("VariantAnnotation", quietly = TRUE)) stop("Package 'VariantAnnotation' is required for writeSumstatsVcf") - # Resolve the single (study, context, trait, method) row to write. - if (is.null(study) || is.null(context) || is.null(trait) || is.null(method)) { - if (nrow(x) != 1L) { - stop("This FineMappingResult has ", nrow(x), " entries. ", - "Pass `study`, `context`, `trait`, and `method` to select one.") - } - study <- as.character(x$study)[1L] - context <- as.character(x$context)[1L] - trait <- as.character(x$trait)[1L] - method <- as.character(x$method)[1L] + # Resolve the set of rows to write. With both selectors NULL and no + # split flags, the collection must have exactly one row. Splitting + # iterates over the unique values of the requested axis. + rowSpecs <- .resolveFineMappingRows( + x, study = study, context = context, trait = trait, method = method, + splitByContext = splitByContext, splitByTrait = splitByTrait) + out <- character(length(rowSpecs)) + for (i in seq_along(rowSpecs)) { + spec <- rowSpecs[[i]] + out[[i]] <- .writeFineMappingVcf(x, spec, + outputPath = outputPath, + sampleName = sampleName, + splitByContext = splitByContext, + splitByTrait = splitByTrait) } - entry <- getFineMappingResult(x, study, context, trait, method) - sampleName <- sampleName %||% sprintf("%s|%s|%s|%s", - study, context, trait, method) - tl <- getTopLoci(entry) - if (nrow(tl) == 0) stop("FineMappingEntry has no topLoci to write") - - parsed <- parseVariantId(tl$variant_id) - nSnps <- nrow(parsed) + invisible(out) +}) - geno <- list() - genoHeaderRows <- character(0) - genoNumber <- character(0) - genoType <- character(0) - genoDesc <- character(0) - - # PIP - pipCol <- resolvePipColumn(tl) - if (!is.null(pipCol)) { - geno[["PIP"]] <- matrix(tl[[pipCol]], nSnps) - genoHeaderRows <- c(genoHeaderRows, "PIP") - genoNumber <- c(genoNumber, "A") - genoType <- c(genoType, "Float") - genoDesc <- c(genoDesc, "Posterior inclusion probability") +# Resolve which (study, context, trait, method) rows to write. Without +# the split flags this returns a single spec; with `splitByContext` or +# `splitByTrait` the collection's rows are walked and one spec is emitted +# per row (after applying any explicit selector filters). +# @noRd +.resolveFineMappingRows <- function(x, study, context, trait, method, + splitByContext, splitByTrait) { + hasContextSlot <- "context" %in% names(x) + hasTraitSlot <- "trait" %in% names(x) + rows <- seq_len(nrow(x)) + if (!is.null(study)) rows <- rows[as.character(x$study)[rows] == study] + if (hasContextSlot && !is.null(context)) + rows <- rows[as.character(x$context)[rows] == context] + if (hasTraitSlot && !is.null(trait)) + rows <- rows[as.character(x$trait)[rows] == trait] + if (!is.null(method)) rows <- rows[as.character(x$method)[rows] == method] + if (length(rows) == 0L) + stop("writeSumstatsVcf: no rows match the supplied selectors.") + if (!isTRUE(splitByContext) && !isTRUE(splitByTrait)) { + if (length(rows) != 1L) + stop("This FineMappingResult has ", length(rows), " matching rows. ", + "Pass `study`/`context`/`trait`/`method` to select one, or ", + "set `splitByContext = TRUE` / `splitByTrait = TRUE` to emit ", + "one file per row.") + return(list(.rowSpec(x, rows[[1L]]))) } + lapply(rows, function(r) .rowSpec(x, r)) +} - # CS - csCol <- grep("^cs_index", colnames(tl), value = TRUE) - if (length(csCol) > 0) { - geno[["CS"]] <- matrix(as.integer(tl[[csCol[1]]]), nSnps) - genoHeaderRows <- c(genoHeaderRows, "CS") - genoNumber <- c(genoNumber, "A") - genoType <- c(genoType, "Integer") - genoDesc <- c(genoDesc, "Credible set index (0 = not in any CS)") - } +# Build a (study, context, trait, method) spec list for one row index. +# @noRd +.rowSpec <- function(x, r) { + list( + study = as.character(x$study)[r], + context = if ("context" %in% names(x)) as.character(x$context)[r] + else NA_character_, + trait = if ("trait" %in% names(x)) as.character(x$trait)[r] + else NA_character_, + method = as.character(x$method)[r]) +} - # Effect size / SE if available - if ("beta" %in% colnames(tl)) { - geno[["ES"]] <- matrix(tl$beta, nSnps) - genoHeaderRows <- c(genoHeaderRows, "ES") - genoNumber <- c(genoNumber, "A") - genoType <- c(genoType, "Float") - genoDesc <- c(genoDesc, "Effect size estimate relative to the alternative allele") - } - if ("se" %in% colnames(tl)) { - geno[["SE"]] <- matrix(tl$se, nSnps) - genoHeaderRows <- c(genoHeaderRows, "SE") - genoNumber <- c(genoNumber, "A") - genoType <- c(genoType, "Float") - genoDesc <- c(genoDesc, "Standard error of effect size estimate") +# Internal worker: write one (study, context, trait, method) tuple to a +# single VCF. When `splitByContext` / `splitByTrait` is in play the +# output path is decorated with the corresponding tag(s) so multiple +# files don't collide. +# @noRd +.writeFineMappingVcf <- function(x, spec, outputPath, sampleName, + splitByContext, splitByTrait) { + entry <- getFineMappingResult(x, spec$study, spec$context, spec$trait, + spec$method) + finalPath <- .decorateOutputPath(outputPath, spec, splitByContext, + splitByTrait) + sn <- sampleName %||% sprintf("%s|%s|%s|%s", + spec$study, spec$context %||% "_", + spec$trait %||% "_", spec$method) + + # Body of the VCF is exclusively marginal univariate effects — no + # posterior output. By design the fine-mapping write-out emits the + # marginal sumstats so consumers can run their own downstream + # analysis (coloc, TWAS, etc.) on a uniform per-variant table. + marginal <- getMarginalEffects(entry) + if (nrow(marginal) == 0) + stop("writeSumstatsVcf: entry [", sn, "] has no variants to write") + + nSnps <- nrow(marginal) + geno <- list() + hdrRows <- character(0); hdrNum <- character(0) + hdrType <- character(0); hdrDesc <- character(0) + addGeno <- function(name, vec, type, desc) { + geno[[name]] <<- matrix(vec, nSnps) + hdrRows <<- c(hdrRows, name); hdrNum <<- c(hdrNum, "A") + hdrType <<- c(hdrType, type); hdrDesc <<- c(hdrDesc, desc) } - if ("z" %in% colnames(tl)) { - pval <- 2 * pnorm(-abs(tl$z)) - geno[["LP"]] <- matrix(-log10(pval), nSnps) - genoHeaderRows <- c(genoHeaderRows, "LP") - genoNumber <- c(genoNumber, "A") - genoType <- c(genoType, "Float") - genoDesc <- c(genoDesc, "-log10 p-value for effect estimate") + if (any(!is.na(marginal$beta))) + addGeno("ES", marginal$beta, "Float", + "Marginal univariate effect-size estimate (effect allele)") + if (any(!is.na(marginal$se))) + addGeno("SE", marginal$se, "Float", + "Standard error of the marginal effect-size estimate") + if (any(!is.na(marginal$p))) { + lp <- ifelse(is.na(marginal$p) | marginal$p <= 0, + NA_real_, -log10(marginal$p)) + addGeno("LP", lp, "Float", + "-log10 p-value of the marginal univariate effect") } + if (any(!is.na(marginal$N))) + addGeno("SS", as.integer(marginal$N), "Integer", "Sample size") + if (any(!is.na(marginal$MAF))) + addGeno("AF", marginal$MAF, "Float", "Minor allele frequency") genoHeader <- DataFrame( - Number = genoNumber, - Type = genoType, - Description = genoDesc, - row.names = genoHeaderRows) + Number = hdrNum, Type = hdrType, Description = hdrDesc, + row.names = hdrRows) .writeVcfImpl( - chrom = parsed$chrom, - pos = parsed$pos, - ref = parsed$A2, - alt = parsed$A1, - snpIds = tl$variant_id, + chrom = marginal$chrom, + pos = marginal$pos, + ref = marginal$A2, + alt = marginal$A1, + snpIds = marginal$variant_id, geno = geno, genoHeader = genoHeader, - sampleName = sampleName, - outputPath = outputPath) -}) + sampleName = sn, + outputPath = finalPath) + finalPath +} + +# Decorate `outputPath` with the spec's context / trait tags when split +# flags are set. Preserves the file extension. Examples: +# "out.vcf" + (context="brain") -> "out.brain.vcf" +# "out.vcf.bgz" + (context="brain", trait="ENSG1") -> "out.brain.ENSG1.vcf.bgz" +# @noRd +.decorateOutputPath <- function(outputPath, spec, splitByContext, + splitByTrait) { + if (!isTRUE(splitByContext) && !isTRUE(splitByTrait)) return(outputPath) + ext <- tolower(tools::file_ext(outputPath)) + composite <- ext == "bgz" || ext == "gz" + base <- if (composite) { + sub("\\.[^.]+\\.(bgz|gz)$", "", outputPath, ignore.case = TRUE) + } else { + tools::file_path_sans_ext(outputPath) + } + ext_keep <- substr(outputPath, nchar(base) + 1L, nchar(outputPath)) + tags <- character(0) + if (isTRUE(splitByContext) && + !is.null(spec$context) && !is.na(spec$context) && nzchar(spec$context)) + tags <- c(tags, spec$context) + if (isTRUE(splitByTrait) && + !is.null(spec$trait) && !is.na(spec$trait) && nzchar(spec$trait)) + tags <- c(tags, spec$trait) + if (length(tags) == 0L) return(outputPath) + paste0(base, ".", paste(tags, collapse = "."), ext_keep) +} # Internal implementation shared by all methods # @noRd diff --git a/_pkgdown.yml b/_pkgdown.yml index e7651428..f8d980c5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -80,6 +80,450 @@ articles: contents: - qtl-gwas-resources +reference: + - title: "Class definitions" + desc: > + S4 class definitions. The `-class` topic documents the slots and + validity constraints; the matching constructor topic (next + section) documents the user-facing factory function. + contents: + - AnnotationMatrix-class + - FineMappingResultBase-class + - GenotypeHandle-class + - H2Estimate-class + - LdData-class + - LdEigen-class + - LdScore-class + - LdStatistic-class + - MultiStudyQtlDataset-class + - SumStatsBase-class + + - title: "Class constructors" + desc: > + User-facing constructors. For classes that have both a `-class` + topic and a constructor topic, prefer the constructor for + day-to-day use; the `-class` topic is the authoritative slot + reference. + contents: + - AnnotationMatrix + - FineMappingEntry + - GenotypeHandle + - GwasFineMappingResult + - GwasSumStats + - LdData + - MultiStudyQtlDataset + - QtlDataset + - QtlFineMappingResult + - QtlSumStats + - TwasWeights + - TwasWeightsEntry + + - title: "Class methods" + desc: > + Accessor and behaviour methods grouped by the class they're + defined on. Generics with implementations on multiple classes + appear under each. S4 pipeline dispatch methods + (`fineMappingPipeline`, `colocboostPipeline`, + `twasWeightsPipeline`) are listed under "Pipelines" further + down rather than repeated per class. + + - subtitle: "AnnotationMatrix" + contents: + - getAnnotationMeta + - getAnnotations + - getBaseline + - getCandidates + - getGenome + - getSnpRanges + + - subtitle: "FineMappingEntry" + contents: + - adjustPips + - getCs + - getMarginalEffects + - getPip + - getSusieFit + - getTopLoci + - getVariantIds + + - subtitle: "FineMappingResultBase" + contents: + - getLdSketch + - getMethodNames + - getStudy + - writeSumstatsVcf + + - subtitle: "GenotypeHandle" + contents: + - getFormat + - getNSamples + - getPath + - getPgenPtr + - getSampleIds + - getSnpInfo + - readGenotypes + + - subtitle: "GwasFineMappingResult" + contents: + - getContexts + - getFineMappingResult + - getTraits + + - subtitle: "GwasSumStats" + contents: + - getMaf + - getN + - getSumStats + - getSumstatDf + - getVarY + - getZ + - nSnps + - subsetChr + + - subtitle: "H2Estimate" + contents: + - getEnrichment + - getH2 + - getLocal + - getScoreStats + - getTauBlocks + + - subtitle: "LdBlocks" + contents: + - getBlocks + + - subtitle: "LdData" + contents: + - getBlockMetadata + - getCorrelation + - getGenotypeHandle + - getGenotypes + - getMixtureWeights + - getNRef + - getRefPanel + - getSnpIdx + - getVariantInfo + - hasGenotypes + + - subtitle: "LdEigen" + contents: + - getEigenList + + - subtitle: "LdScore" + contents: + - getLdMatrixList + - getLdScoreWeights + - getLdScores + + - subtitle: "LdStatistic" + contents: + - getInSample + - getLdBlocks + + - subtitle: "MultiStudyQtlDataset" + contents: + - getQtlDatasets + + - subtitle: "QtlDataset" + contents: + - getGenotypeCovariates + - getGenotypes + - getPhenotypeCovariates + - getPhenotypes + - getResidualizedGenotypes + - getResidualizedPhenotypes + - getScaleResiduals + + - subtitle: "QtlFineMappingResult" + contents: + - getFineMappingResult + + - subtitle: "QtlSumStats" + contents: + - getContexts + - getTraits + + - subtitle: "SumStatsBase" + contents: + - getQcInfo + + - subtitle: "TwasWeights" + contents: + - getCvPerformance + - getDataType + - getFits + - getStandardized + - getTwasWeights + - getWeights + + - subtitle: "TwasWeightsEntry" + contents: + # Same generic surface as TwasWeights — dispatches on per-row entries. + - getCvPerformance + - getDataType + - getFits + - getStandardized + - getVariantIds + - getWeights + + - title: "Pipelines" + desc: > + End-to-end pipeline entry points. Each takes one or more S4 + input classes and returns a SumStats, FineMappingResult, + TwasWeights collection, or a tabular summary. + contents: + - causalInferencePipeline + - colocboostPipeline + - colocPipeline + - ctwasPipeline + - fineMappingPipeline + - mashPipeline + - qtlEnrichmentPipeline + - twasWeightsPipeline + + - title: "Quality control" + desc: > + Summary-statistics QC orchestrator plus the underlying allele + harmonization, LD-mismatch, kriging, SLALOM, DENTIST, RAISS, + and relatedness checks. + contents: + - summaryStatsQc + - alignVariantNames + - ldMismatchQc + - krigingOutlierQc + - slalom + - dentist + - dentistSingleWindow + - autoDecision + - raiss + - mergeVariantInfo + - parseCsCorr + - filterRelatedness + + - title: "LD infrastructure" + desc: > + Loading and manipulating LD matrices, plus design-matrix + conditioning utilities. + contents: + - loadLdMatrix + - loadLdSketch + - checkLd + - enforceDesignFullRank + - ldClumpByScore + - ldLoader + - ldPruneByCorrelation + - filterVariantsByLdReference + + - title: "Genotype I/O" + desc: > + Reading and manipulating per-region genotype data outside the + GenotypeHandle accessor surface. + contents: + - extractBlockGenotypes + - computeBlockLdCor + - loadGenotypeRegion + - readAfreq + - getRefVariantInfo + - invertMinmaxScaling + + - title: "Variant ID and region helpers" + desc: > + Parsing, formatting, and overlap checks for variant IDs + (`chr:pos:A2:A1`) and genomic regions (`chr:start-end`). + contents: + - parseVariantId + - normalizeVariantId + - classifyVariantType + - regionToDf + - regionsOverlap + - findOverlappingRegions + + - title: "Fine-mapping" + desc: > + SuSiE-family fit wrappers (individual-level and summary-stat + variants) plus the shared post-processing that produces the + unified `top_loci` table. The VCF writer for fine-mapping + results is documented under + \code{FineMappingResultBase} in the Class methods section above. + contents: + - susieWeights + - susieRssWeights + - susieInfWeights + - susieInfRssWeights + - susieAshWeights + - susieAshRssWeights + - mvsusieWeights + - mvsusieRssWeights + - fitMvsusie + - fitMvsusieRss + - fitFsusie + - fitSusieInfThenSusieRss + - fsusieGetCs + - fsusieWrapper + - getSusieResult + - buildTopLoci + - extractCsInfo + - extractTopPipInfo + - formatFinemappingOutput + - lbfToAlpha + - postprocessFinemappingFits + + - title: "TWAS" + desc: > + Per-method weight-training functions invoked by + `twasWeightsPipeline()`, the cross-validation / ensembling + machinery that combines them, and multi-method p-value + combination. Grouped by method family below. (SuSiE-family + weight functions live under "Fine-mapping" above since they + double as fine-mappers.) + + - subtitle: "Penalized regression" + contents: + - lassosumRss + - lassosumRssWeights + - mcpWeights + - mcpRssWeights + - scadWeights + - scadRssWeights + - l0learnWeights + - l0learnRssWeights + - penalizedRss + + - subtitle: "Bayesian alphabet" + contents: + - bayesAlphabetWeights + - bayesAWeights + - bayesBWeights + - bayesCWeights + - bayesLWeights + - bayesNWeights + - bayesRWeights + - bLassoWeights + - prsCs + - prsCsWeights + - sdpr + - sdprWeights + - dprWeights + + - subtitle: "mr.mash / mr.ash" + contents: + - mrashWeights + - mrAshRssWeights + - mrmashWeights + - mrmashRssWeights + - mrmashWrapper + - buildMrmashPriorMatrices + - computeCovDiag + - computeCovFlash + + - subtitle: "Ensembling / cross-validation / prediction" + contents: + - learnTwasWeights + - ensembleWeights + - twasWeightsCv + - twasPredict + - twasZ + - estimateSparsity + + - subtitle: "Multi-method p-value combination" + contents: + - combinePValues + + - title: "mash helpers" + desc: > + Utilities exposed by the mash track for posterior contrast + construction, mixture management, and meta-analysis. + contents: + - makePairwiseContrastCol + - fitMashContrast + - metaAnalysisPerCell + - sanitizeMashData + - sliceMashData + - updateMashModelCov + + - title: "Heritability and enrichment" + desc: > + Heritability estimation, LD-score computation, stratified + LD-score regression utilities, and per-variant QTL/GWAS + enrichment analysis (low-level kernel that powers + \code{qtlEnrichmentPipeline}). + contents: + - estimateH2 + - computeLdScores + - computeSldscAnnotSd + - computeSldscMRef + - readAnnotations + - readSldscTrait + - standardizeSldscTrait + - h2EstimateToSldscTrait + - isBinarySldscAnnot + - metaSldscRandom + - qtlEnrichment + + - title: "Bundled datasets and example helpers" + desc: > + Synthetic data shipped with the package for vignettes and + tests. `fixupExampleGenotypePaths()` re-points the bundled + GenotypeHandle paths at the install-time `inst/extdata/` + location. + contents: + - fixupExampleGenotypePaths + - qtl_dataset_example + - qtl_sumstats_example + - qtl_sumstats_multicontext_example + - gwas_sumstats_s4_example + - multi_study_qtl_dataset_example + - eqtl_region_example + - gwas_sumstats_example + - gwas_finemapping_example + - qtl_finemapping_example + - multitraite_data + + - title: internal + desc: > + Deprecated entry points retained for backward compatibility. + Each emits a `.Deprecated()` warning and forwards to the + canonical replacement. Hidden from the reference index but + still searchable and rendered. + contents: + - alleleQc + - matchRefPanel + - xqtlEnrichmentWrapper + - computeQtlEnrichment + - colocWrapper + - colocPostProcessor + - enlocPipeline + - ctwasBimfileLoader + - getCtwasMetaData + - loadRegionalAssociationData + - loadRegionalUnivariateData + - loadRegionalRegressionData + - loadRegionalMultivariateData + - loadRegionalFunctionalData + - loadTwasWeights + - loadRssData + - loadMultitaskRegionalData + - regionDataToIndInput + - regionDataToRssInput + - harmonizeTwas + - harmonizeGwas + - loadStudyLd + - univariateAnalysisPipeline + - rssAnalysisPipeline + - multivariateAnalysisPipeline + - susieRssPipeline + - twasPipeline + - twasMultivariateWeightsPipeline + - standardiseSumstatsColumns + - loadTsvRegion + - batchLoadTwasWeights + - loadMultitraitTensorqtlSumstat + - loadMultitraitRSumstat + # Implementation details with .Rd topics but no front-facing role. + - as.data.frame.GwasSumStats + - rescaleCovW0 + - "getSumStats,GwasSumStats-method" + footer: structure: left: developed_by diff --git a/man/FineMappingEntry.Rd b/man/FineMappingEntry.Rd index bc1489bf..acbf0e39 100644 --- a/man/FineMappingEntry.Rd +++ b/man/FineMappingEntry.Rd @@ -2,48 +2,29 @@ % Please edit documentation in R/FineMappingEntry.R \name{FineMappingEntry} \alias{FineMappingEntry} -\title{TWAS Weights Entry (per-tuple payload)} +\title{Create a FineMappingEntry Object} \usage{ -FineMappingEntry(variantIds, trimmedFit, topLoci, sumstats = NULL) +FineMappingEntry(variantIds, susieFit, topLoci) } \arguments{ -\item{variantIds}{Character vector of variant IDs.} - -\item{trimmedFit}{Method-specific fit object.} - -\item{topLoci}{Long-format \code{data.frame}.} - -\item{sumstats}{Optional list of summary statistics, or \code{NULL}.} +\item{variantIds}{Character vector of variant IDs in fit order.} + +\item{susieFit}{The SuSiE fit object (full or trimmed; controlled by +the pipeline's \code{trim} parameter).} + +\item{topLoci}{Per-variant \code{data.frame} in canonical schema: +identity columns (\code{variant_id, chrom, pos, A1, A2}), context +(\code{N, MAF}), marginal columns (\code{marginal_beta, +marginal_se, marginal_z, marginal_p}), posterior columns +(\code{pip, posterior_mean, posterior_sd, cs_*, cs_*_purity}), +pipeline stamps (\code{method, gene, event, grange_start, +grange_end}). Unfiltered: one row per variant in the fit.} } \value{ A \code{FineMappingEntry} object. } \description{ -S4 container for one method's TWAS weights, attached to - a \code{TwasWeights} row. One entry corresponds to one - \code{(study, context, trait, method)} tuple. - Construct a \code{FineMappingEntry} payload for one \code{(study, context, trait, method)} row of a \code{FineMappingResult} collection. } -\section{Slots}{ - -\describe{ -\item{\code{variantIds}}{Character vector of variant IDs that have weights.} - -\item{\code{weights}}{Numeric vector (single-method, single-outcome) or -matrix (multi-outcome).} - -\item{\code{fits}}{Optional method-specific fit object.} - -\item{\code{cvPerformance}}{Optional named list of CV metrics (\code{rsq}, -\code{pval}, etc.).} - -\item{\code{standardized}}{Logical (length 1). Whether the weights are on the -standardized scale.} - -\item{\code{dataType}}{Data-type tag for downstream usage (e.g., -\code{"expression"}, \code{"splicing"}); may be \code{NULL}.} -}} - diff --git a/man/FineMappingResultBase-class.Rd b/man/FineMappingResultBase-class.Rd index 969f311e..4e10a942 100644 --- a/man/FineMappingResultBase-class.Rd +++ b/man/FineMappingResultBase-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/FineMappingResultBase.R +% Please edit documentation in R/AllClasses.R \docType{class} \name{FineMappingResultBase-class} \alias{FineMappingResultBase-class} diff --git a/man/SumStatsBase-class.Rd b/man/SumStatsBase-class.Rd index e841ae4e..5c332e83 100644 --- a/man/SumStatsBase-class.Rd +++ b/man/SumStatsBase-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SumStatsBase.R +% Please edit documentation in R/AllClasses.R \docType{class} \name{SumStatsBase-class} \alias{SumStatsBase-class} diff --git a/man/adjustPips.Rd b/man/adjustPips.Rd index a8e8187d..1cb85d08 100644 --- a/man/adjustPips.Rd +++ b/man/adjustPips.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingEntry.R, -% R/FineMappingResultBase.R +% Please edit documentation in R/AllGenerics.R, R/AllClasses.R, +% R/FineMappingEntry.R \name{adjustPips} \alias{adjustPips} -\alias{adjustPips,FineMappingEntry-method} \alias{adjustPips,FineMappingResultBase-method} +\alias{adjustPips,FineMappingEntry-method} \title{Renormalize Fine-Mapping PIPs to a Variant Subset} \usage{ adjustPips(x, keepVariants, ...) -\S4method{adjustPips}{FineMappingEntry}(x, keepVariants, ...) - \S4method{adjustPips}{FineMappingResultBase}(x, keepVariants, ...) + +\S4method{adjustPips}{FineMappingEntry}(x, keepVariants, ...) } \arguments{ \item{x}{A \code{FineMappingEntry} or \code{FineMappingResultBase}.} diff --git a/man/buildTopLoci.Rd b/man/buildTopLoci.Rd index 19936448..91fcfdb4 100644 --- a/man/buildTopLoci.Rd +++ b/man/buildTopLoci.Rd @@ -11,7 +11,7 @@ buildTopLoci( sumstats = NULL, af = NULL, method, - signalCutoff = 0.1, + signalCutoff = 0, dataX = NULL, dataY = NULL, otherQuantities = NULL, diff --git a/man/computeLdScores.Rd b/man/computeLdScores.Rd index 21ea422c..ab4f0445 100644 --- a/man/computeLdScores.Rd +++ b/man/computeLdScores.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/h2EstimationWrappers.R +% Please edit documentation in R/AllGenerics.R, R/h2EstimationWrappers.R \name{computeLdScores} \alias{computeLdScores} \alias{computeLdScores,LdEigen-method} diff --git a/man/ctwasPipeline.Rd b/man/ctwasPipeline.Rd index e9a143a2..d568030e 100644 --- a/man/ctwasPipeline.Rd +++ b/man/ctwasPipeline.Rd @@ -8,6 +8,7 @@ ctwasPipeline( gwasSumStats, twasWeights, twasZ = NULL, + fineMappingResult = NULL, regionId = "block1", thin = 0.1, niterPrefit = 3L, @@ -16,6 +17,10 @@ ctwasPipeline( groupPriorVarStructure = c("shared_type", "shared_context", "shared_nonSNP", "shared_all", "independent"), ncore = 1L, + twasWeightCutoff = 0, + csMinCor = 0.8, + minPipCutoff = 0, + maxNumVariants = Inf, ... ) } @@ -31,6 +36,14 @@ context, trait, method) weights over the same LD block.} per-(trait, context) Z is used as the \code{z_gene} input to \code{ctwas_sumstats} so it is not recomputed.} +\item{fineMappingResult}{Optional \code{QtlFineMappingResult} or +\code{GwasFineMappingResult} carrying the per-variant PIP and +credible-set membership data used by the CS / PIP rescue filters +(\code{csMinCor} and \code{minPipCutoff}). When \code{NULL} +(default) the smart filters are no-ops; only the magnitude filter +(\code{twasWeightCutoff}) and the per-gene cap +(\code{maxNumVariants}, ordered by \code{|weight|}) apply.} + \item{regionId}{Optional character (length 1) label for the LD block. Default \code{"block1"}.} @@ -42,6 +55,28 @@ block. Default \code{"block1"}.} \item{ncore}{Number of cores. Default \code{1}.} +\item{twasWeightCutoff}{Numeric (length 1). Drop variants with +\code{|weight| < twasWeightCutoff} from each gene's weight matrix +before ctwas sees it. Default \code{0} (no filter).} + +\item{csMinCor}{Numeric (length 1). When \code{fineMappingResult} is +provided, variants belonging to any 95\% credible set with purity +(\code{min_abs_corr}) \code{>= csMinCor} are marked as must-keep +and survive the per-gene cap. Default \code{0.8}. Ignored without +a \code{fineMappingResult}.} + +\item{minPipCutoff}{Numeric (length 1). When +\code{fineMappingResult} is provided, variants with PIP greater +than \code{minPipCutoff} are marked as must-keep and survive the +per-gene cap. Default \code{0} (no PIP rescue). Ignored without a +\code{fineMappingResult}.} + +\item{maxNumVariants}{Numeric (length 1). Cap on per-gene variant +count. When the gene has more variants than this, keep all +must-keep variants and fill remaining slots by descending PIP +(when available) or descending \code{|weight|}. Default +\code{Inf} (no cap).} + \item{...}{Additional arguments forwarded to \code{ctwas::ctwas_sumstats}.} } diff --git a/man/estimateH2.Rd b/man/estimateH2.Rd index cc833b8d..3c9f677a 100644 --- a/man/estimateH2.Rd +++ b/man/estimateH2.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/h2EstimationWrappers.R +% Please edit documentation in R/AllGenerics.R, R/h2EstimationWrappers.R \name{estimateH2} \alias{estimateH2} \alias{estimateH2,GwasSumStats,LdStatistic-method} diff --git a/man/fineMappingPipeline.Rd b/man/fineMappingPipeline.Rd index f7c771c3..2ba95d7a 100644 --- a/man/fineMappingPipeline.Rd +++ b/man/fineMappingPipeline.Rd @@ -27,6 +27,11 @@ fineMappingPipeline(data, ...) fineMappingResult = NULL, naAction = c("drop", "impute"), verbose = 1, + trim = TRUE, + phenotypeCovariatesToResidualize = NULL, + genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, ... ) @@ -46,6 +51,11 @@ fineMappingPipeline(data, ...) fineMappingResult = NULL, naAction = c("drop", "impute"), verbose = 1, + trim = TRUE, + phenotypeCovariatesToResidualize = NULL, + genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, ... ) @@ -62,6 +72,7 @@ fineMappingPipeline(data, ...) minAbsCorr = 0.8, fineMappingResult = NULL, verbose = 1, + trim = TRUE, ... ) @@ -75,6 +86,7 @@ fineMappingPipeline(data, ...) minAbsCorr = 0.8, fineMappingResult = NULL, verbose = 1, + trim = TRUE, ... ) @@ -136,6 +148,41 @@ to use as a resume cache; tuples already present are not refit.} \item{verbose}{Verbosity (0 silent, 1 default). Default \code{1}.} +\item{trim}{Logical (length 1). When \code{TRUE} (default) the +\code{susieFit} slot on each output \code{FineMappingEntry} carries +a trimmed view of the SuSiE fit (the minimal subset needed by +downstream pipelines). When \code{FALSE} the full untrimmed +\code{susie()} return is retained so accessors like +\code{getSusieFit()} and non-default-coverage queries through +\code{getCs()} can read the full posterior matrices +(\code{lbf_variable}, \code{mu}, \code{mu2}, \code{V}). The +per-variant \code{topLoci} table is always fully populated +regardless of \code{trim}.} + +\item{phenotypeCovariatesToResidualize}{Character vector (or +\code{NULL}) of phenotype-covariate names to residualize against. +\code{NULL} (default) uses every available phenotype covariate. +Only meaningful when the input is a \code{QtlDataset} / +\code{MultiStudyQtlDataset} (ignored for sumstat inputs).} + +\item{genotypeCovariatesToResidualize}{Character vector (or +\code{NULL}) of genotype-covariate column names to residualize +against. \code{NULL} uses every available genotype covariate.} + +\item{residualizePhenotypeCovariates}{Logical (length 1). When +\code{TRUE} (default) residualize against the phenotype-side +covariates listed in \code{phenotypeCovariatesToResidualize}. Set +\code{FALSE} to disable phenotype-covariate residualization +entirely. The marginal univariate effects stored on each +\code{FineMappingEntry} obey the same residualization choice as +the SuSiE fit itself — they are computed against the same +residualized \code{X} / \code{Y}.} + +\item{residualizeGenotypeCovariates}{Logical (length 1). When +\code{TRUE} (default) residualize against the genotype-side +covariates listed in \code{genotypeCovariatesToResidualize}. Set +\code{FALSE} to disable.} + \item{ldBlocks}{For \code{GwasSumStats} input only: an \code{LdBlocks} object describing the LD-block partition. The pipeline performs SuSiE-RSS fine-mapping per (study, ldBlock). diff --git a/man/getAnnotationMeta.Rd b/man/getAnnotationMeta.Rd index 2285d964..908ecbc9 100644 --- a/man/getAnnotationMeta.Rd +++ b/man/getAnnotationMeta.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/AnnotationMatrix.R +% Please edit documentation in R/AllGenerics.R, R/AnnotationMatrix.R \name{getAnnotationMeta} \alias{getAnnotationMeta} \alias{getAnnotationMeta,AnnotationMatrix-method} diff --git a/man/getAnnotations.Rd b/man/getAnnotations.Rd index 8e943ea3..db101dde 100644 --- a/man/getAnnotations.Rd +++ b/man/getAnnotations.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/AnnotationMatrix.R +% Please edit documentation in R/AllGenerics.R, R/AnnotationMatrix.R \name{getAnnotations} \alias{getAnnotations} \alias{getAnnotations,AnnotationMatrix-method} diff --git a/man/getBlockMetadata.Rd b/man/getBlockMetadata.Rd index d02b67e8..407474e3 100644 --- a/man/getBlockMetadata.Rd +++ b/man/getBlockMetadata.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{getBlockMetadata} \alias{getBlockMetadata} \alias{getBlockMetadata,LdData-method} diff --git a/man/getBlocks.Rd b/man/getBlocks.Rd index 8b0c79ed..3ee36771 100644 --- a/man/getBlocks.Rd +++ b/man/getBlocks.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdBlocks.R +% Please edit documentation in R/AllGenerics.R, R/LdBlocks.R \name{getBlocks} \alias{getBlocks} \alias{getBlocks,LdBlocks-method} diff --git a/man/getContexts.Rd b/man/getContexts.Rd index 8dc1c89c..a497ca47 100644 --- a/man/getContexts.Rd +++ b/man/getContexts.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GwasFineMappingResult.R, +% Please edit documentation in R/AllGenerics.R, R/GwasFineMappingResult.R, % R/QtlDataset.R, R/QtlFineMappingResult.R, R/qtlSumStats.R, R/twasWeights.R \name{getContexts} \alias{getContexts} diff --git a/man/getCorrelation.Rd b/man/getCorrelation.Rd index 49c366cc..3503e2e4 100644 --- a/man/getCorrelation.Rd +++ b/man/getCorrelation.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{getCorrelation} \alias{getCorrelation} \alias{getCorrelation,LdData-method} diff --git a/man/getCs.Rd b/man/getCs.Rd index 05be06b2..4decf71d 100644 --- a/man/getCs.Rd +++ b/man/getCs.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingEntry.R, +% Please edit documentation in R/AllGenerics.R, R/FineMappingEntry.R, % R/GwasFineMappingResult.R, R/QtlFineMappingResult.R \name{getCs} \alias{getCs} diff --git a/man/getCvPerformance.Rd b/man/getCvPerformance.Rd index 6811148b..551aaf9b 100644 --- a/man/getCvPerformance.Rd +++ b/man/getCvPerformance.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/TwasWeightsEntry.R, +% Please edit documentation in R/AllGenerics.R, R/TwasWeightsEntry.R, % R/twasWeights.R \name{getCvPerformance} \alias{getCvPerformance} diff --git a/man/getDataType.Rd b/man/getDataType.Rd index 5a665213..79eca7d5 100644 --- a/man/getDataType.Rd +++ b/man/getDataType.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/TwasWeightsEntry.R, +% Please edit documentation in R/AllGenerics.R, R/TwasWeightsEntry.R, % R/twasWeights.R \name{getDataType} \alias{getDataType} diff --git a/man/getEffects.Rd b/man/getEffects.Rd deleted file mode 100644 index 007a4de5..00000000 --- a/man/getEffects.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R -\name{getEffects} -\alias{getEffects} -\title{Get Per-Effect Fine-Mapping Summary} -\usage{ -getEffects(x, ...) -} -\arguments{ -\item{x}{A \code{FineMappingEntry} or \code{FineMappingResult}.} - -\item{...}{Class-specific selection arguments.} -} -\value{ -A data.frame with one row per effect. -} -\description{ -Extract per-effect information from a fine-mapping result. -} diff --git a/man/getEigenList.Rd b/man/getEigenList.Rd index 83bef250..86f8737c 100644 --- a/man/getEigenList.Rd +++ b/man/getEigenList.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdEigen.R +% Please edit documentation in R/AllGenerics.R, R/LdEigen.R \name{getEigenList} \alias{getEigenList} \alias{getEigenList,LdEigen-method} diff --git a/man/getEnrichment.Rd b/man/getEnrichment.Rd index d8feb810..e7002976 100644 --- a/man/getEnrichment.Rd +++ b/man/getEnrichment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/H2Estimate.R +% Please edit documentation in R/AllGenerics.R, R/H2Estimate.R \name{getEnrichment} \alias{getEnrichment} \alias{getEnrichment,H2Estimate-method} diff --git a/man/getFineMappingResult.Rd b/man/getFineMappingResult.Rd index 46bd3b3c..f98f908a 100644 --- a/man/getFineMappingResult.Rd +++ b/man/getFineMappingResult.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GwasFineMappingResult.R +% Please edit documentation in R/AllGenerics.R, R/GwasFineMappingResult.R \name{getFineMappingResult} \alias{getFineMappingResult} \alias{getFineMappingResult,GwasFineMappingResult-method} diff --git a/man/getFits.Rd b/man/getFits.Rd index 0933d493..b584b0cd 100644 --- a/man/getFits.Rd +++ b/man/getFits.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/TwasWeightsEntry.R, +% Please edit documentation in R/AllGenerics.R, R/TwasWeightsEntry.R, % R/twasWeights.R \name{getFits} \alias{getFits} diff --git a/man/getFormat.Rd b/man/getFormat.Rd index 2290edf3..772faab9 100644 --- a/man/getFormat.Rd +++ b/man/getFormat.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GenotypeHandle.R +% Please edit documentation in R/AllGenerics.R, R/GenotypeHandle.R \name{getFormat} \alias{getFormat} \alias{getFormat,GenotypeHandle-method} diff --git a/man/getGenome.Rd b/man/getGenome.Rd index 6d8e0293..52ea4421 100644 --- a/man/getGenome.Rd +++ b/man/getGenome.Rd @@ -1,23 +1,23 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/AnnotationMatrix.R, -% R/LdBlocks.R, R/LdStatistic.R, R/SumStatsBase.R +% Please edit documentation in R/AllGenerics.R, R/AllClasses.R, +% R/AnnotationMatrix.R, R/LdBlocks.R, R/LdStatistic.R \name{getGenome} \alias{getGenome} +\alias{getGenome,SumStatsBase-method} \alias{getGenome,AnnotationMatrix-method} \alias{getGenome,LdBlocks-method} \alias{getGenome,LdStatistic-method} -\alias{getGenome,SumStatsBase-method} \title{Get the Genome Build} \usage{ getGenome(x, ...) +\S4method{getGenome}{SumStatsBase}(x, ...) + \S4method{getGenome}{AnnotationMatrix}(x, ...) \S4method{getGenome}{LdBlocks}(x, ...) \S4method{getGenome}{LdStatistic}(x, ...) - -\S4method{getGenome}{SumStatsBase}(x, ...) } \arguments{ \item{x}{A \code{GwasSumStats} or \code{QtlSumStats} object.} diff --git a/man/getGenotypeCovariates.Rd b/man/getGenotypeCovariates.Rd index a366da67..6b5aa30b 100644 --- a/man/getGenotypeCovariates.Rd +++ b/man/getGenotypeCovariates.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/QtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/QtlDataset.R \name{getGenotypeCovariates} \alias{getGenotypeCovariates} \alias{getGenotypeCovariates,QtlDataset-method} diff --git a/man/getGenotypeHandle.Rd b/man/getGenotypeHandle.Rd index 5eec7ea4..0db3f060 100644 --- a/man/getGenotypeHandle.Rd +++ b/man/getGenotypeHandle.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{getGenotypeHandle} \alias{getGenotypeHandle} \alias{getGenotypeHandle,LdData-method} diff --git a/man/getGenotypes.Rd b/man/getGenotypes.Rd index 40bacd8d..f0ffc322 100644 --- a/man/getGenotypes.Rd +++ b/man/getGenotypes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R, R/QtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R, R/QtlDataset.R \name{getGenotypes} \alias{getGenotypes} \alias{getGenotypes,LdData-method} diff --git a/man/getH2.Rd b/man/getH2.Rd index 82adc18f..d0af0e79 100644 --- a/man/getH2.Rd +++ b/man/getH2.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/H2Estimate.R +% Please edit documentation in R/AllGenerics.R, R/H2Estimate.R \name{getH2} \alias{getH2} \alias{getH2,H2Estimate-method} diff --git a/man/getInSample.Rd b/man/getInSample.Rd index 8df13d8c..504f3366 100644 --- a/man/getInSample.Rd +++ b/man/getInSample.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdStatistic.R +% Please edit documentation in R/AllGenerics.R, R/LdStatistic.R \name{getInSample} \alias{getInSample} \alias{getInSample,LdStatistic-method} diff --git a/man/getLbf.Rd b/man/getLbf.Rd deleted file mode 100644 index 919ce22b..00000000 --- a/man/getLbf.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R -\name{getLbf} -\alias{getLbf} -\title{Get Log Bayes Factors} -\usage{ -getLbf(x, ...) -} -\arguments{ -\item{x}{A \code{FineMappingEntry} or \code{FineMappingResult}.} - -\item{...}{Class-specific selection arguments.} -} -\value{ -A data.frame with columns \code{variant_id} and one numeric column - per effect. -} -\description{ -Extract per-variant log Bayes factors from a fine-mapping result. -} diff --git a/man/getLdBlocks.Rd b/man/getLdBlocks.Rd index a187ad68..bd845e0b 100644 --- a/man/getLdBlocks.Rd +++ b/man/getLdBlocks.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdStatistic.R +% Please edit documentation in R/AllGenerics.R, R/LdStatistic.R \name{getLdBlocks} \alias{getLdBlocks} \alias{getLdBlocks,LdStatistic-method} diff --git a/man/getLdMatrixList.Rd b/man/getLdMatrixList.Rd index 9170af99..77abc13b 100644 --- a/man/getLdMatrixList.Rd +++ b/man/getLdMatrixList.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdScore.R +% Please edit documentation in R/AllGenerics.R, R/LdScore.R \name{getLdMatrixList} \alias{getLdMatrixList} \alias{getLdMatrixList,LdScore-method} diff --git a/man/getLdScoreWeights.Rd b/man/getLdScoreWeights.Rd index 12f3d553..203fac7e 100644 --- a/man/getLdScoreWeights.Rd +++ b/man/getLdScoreWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdScore.R +% Please edit documentation in R/AllGenerics.R, R/LdScore.R \name{getLdScoreWeights} \alias{getLdScoreWeights} \alias{getLdScoreWeights,LdScore-method} diff --git a/man/getLdScores.Rd b/man/getLdScores.Rd index 459e0290..3a88e759 100644 --- a/man/getLdScores.Rd +++ b/man/getLdScores.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdScore.R +% Please edit documentation in R/AllGenerics.R, R/LdScore.R \name{getLdScores} \alias{getLdScores} \alias{getLdScores,LdScore-method} diff --git a/man/getLdSketch.Rd b/man/getLdSketch.Rd index ad582c72..0eddf6ab 100644 --- a/man/getLdSketch.Rd +++ b/man/getLdSketch.Rd @@ -1,19 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingResultBase.R, -% R/SumStatsBase.R, R/twasWeights.R +% Please edit documentation in R/AllGenerics.R, R/AllClasses.R, R/twasWeights.R \name{getLdSketch} \alias{getLdSketch} -\alias{getLdSketch,FineMappingResultBase-method} \alias{getLdSketch,SumStatsBase-method} +\alias{getLdSketch,FineMappingResultBase-method} \alias{getLdSketch,TwasWeights-method} \title{Get LD Sketch} \usage{ getLdSketch(x, ...) -\S4method{getLdSketch}{FineMappingResultBase}(x, ...) - \S4method{getLdSketch}{SumStatsBase}(x, ...) +\S4method{getLdSketch}{FineMappingResultBase}(x, ...) + \S4method{getLdSketch}{TwasWeights}(x, ...) } \arguments{ diff --git a/man/getLocal.Rd b/man/getLocal.Rd index c658d438..c83a8047 100644 --- a/man/getLocal.Rd +++ b/man/getLocal.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/H2Estimate.R +% Please edit documentation in R/AllGenerics.R, R/H2Estimate.R \name{getLocal} \alias{getLocal} \alias{getLocal,H2Estimate-method} diff --git a/man/getMaf.Rd b/man/getMaf.Rd index 6d839e22..63033d1b 100644 --- a/man/getMaf.Rd +++ b/man/getMaf.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/QtlDataset.R, +% Please edit documentation in R/AllGenerics.R, R/QtlDataset.R, % R/gwasSumStats.R, R/qtlSumStats.R \name{getMaf} \alias{getMaf} diff --git a/man/getMarginalEffects.Rd b/man/getMarginalEffects.Rd new file mode 100644 index 00000000..a3302048 --- /dev/null +++ b/man/getMarginalEffects.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/FineMappingEntry.R, +% R/GwasFineMappingResult.R, R/QtlFineMappingResult.R +\name{getMarginalEffects} +\alias{getMarginalEffects} +\alias{getMarginalEffects,FineMappingEntry-method} +\alias{getMarginalEffects,GwasFineMappingResult-method} +\alias{getMarginalEffects,QtlFineMappingResult-method} +\title{Get Marginal Effects} +\usage{ +getMarginalEffects(x, maxPval = NULL, ...) + +\S4method{getMarginalEffects}{FineMappingEntry}(x, maxPval = NULL, ...) + +\S4method{getMarginalEffects}{GwasFineMappingResult}( + x, + maxPval = NULL, + study = NULL, + context = NULL, + trait = NULL, + method = NULL, + ... +) + +\S4method{getMarginalEffects}{QtlFineMappingResult}( + x, + maxPval = NULL, + study = NULL, + context = NULL, + trait = NULL, + method = NULL, + ... +) +} +\arguments{ +\item{x}{A \code{FineMappingEntry} or \code{FineMappingResult}.} + +\item{maxPval}{Optional numeric (length 1). When non-\code{NULL}, +filter rows where \code{p > maxPval}. Default \code{NULL} +(no filter — return all variants).} + +\item{...}{Class-specific selection arguments.} +} +\value{ +A \code{data.frame}. +} +\description{ +Extract per-variant marginal univariate effects from a + fine-mapping entry or result. Returns a \code{data.frame} with + identity columns (\code{variant_id, chrom, pos, A1, A2}), context + (\code{N, MAF}), and the marginal effect columns + (\code{beta, se, z, p}). Populated uniformly across the + individual-level and RSS paths. +} diff --git a/man/getMethodNames.Rd b/man/getMethodNames.Rd index 2f9ac13c..fac009c4 100644 --- a/man/getMethodNames.Rd +++ b/man/getMethodNames.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingResultBase.R, -% R/twasWeights.R +% Please edit documentation in R/AllGenerics.R, R/AllClasses.R, R/twasWeights.R \name{getMethodNames} \alias{getMethodNames} \alias{getMethodNames,FineMappingResultBase-method} diff --git a/man/getMixtureWeights.Rd b/man/getMixtureWeights.Rd index 67055289..28ebe3af 100644 --- a/man/getMixtureWeights.Rd +++ b/man/getMixtureWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{getMixtureWeights} \alias{getMixtureWeights} \alias{getMixtureWeights,LdData-method} diff --git a/man/getMolecularId.Rd b/man/getMolecularId.Rd deleted file mode 100644 index 72d02c5e..00000000 --- a/man/getMolecularId.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R -\name{getMolecularId} -\alias{getMolecularId} -\title{Get Molecular ID (legacy)} -\usage{ -getMolecularId(x) -} -\arguments{ -\item{x}{The object.} -} -\value{ -Character vector. -} -\description{ -Legacy accessor. The molecular identifier is now stored - as the \code{trait} column on \code{TwasWeights} and - \code{FineMappingResult} collections — use \code{getTraits(x)} - instead. -} diff --git a/man/getN.Rd b/man/getN.Rd index bd73e8b1..bdc73e9b 100644 --- a/man/getN.Rd +++ b/man/getN.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/gwasSumStats.R, +% Please edit documentation in R/AllGenerics.R, R/gwasSumStats.R, % R/qtlSumStats.R \name{getN} \alias{getN} diff --git a/man/getNRef.Rd b/man/getNRef.Rd index cf0e4b54..54b1dad4 100644 --- a/man/getNRef.Rd +++ b/man/getNRef.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R, R/LdStatistic.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R, R/LdStatistic.R \name{getNRef} \alias{getNRef} \alias{getNRef,LdData-method} diff --git a/man/getNSamples.Rd b/man/getNSamples.Rd index d946daa9..c04892ee 100644 --- a/man/getNSamples.Rd +++ b/man/getNSamples.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GenotypeHandle.R +% Please edit documentation in R/AllGenerics.R, R/GenotypeHandle.R \name{getNSamples} \alias{getNSamples} \alias{getNSamples,GenotypeHandle-method} diff --git a/man/getPath.Rd b/man/getPath.Rd index aaa19c8f..aea08407 100644 --- a/man/getPath.Rd +++ b/man/getPath.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GenotypeHandle.R +% Please edit documentation in R/AllGenerics.R, R/GenotypeHandle.R \name{getPath} \alias{getPath} \alias{getPath,GenotypeHandle-method} diff --git a/man/getPgenPtr.Rd b/man/getPgenPtr.Rd index 3a777ed0..8d118086 100644 --- a/man/getPgenPtr.Rd +++ b/man/getPgenPtr.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GenotypeHandle.R +% Please edit documentation in R/AllGenerics.R, R/GenotypeHandle.R \name{getPgenPtr} \alias{getPgenPtr} \alias{getPgenPtr,GenotypeHandle-method} diff --git a/man/getPhenotypeCovariates.Rd b/man/getPhenotypeCovariates.Rd index c583fd47..30cd4ff4 100644 --- a/man/getPhenotypeCovariates.Rd +++ b/man/getPhenotypeCovariates.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/QtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/QtlDataset.R \name{getPhenotypeCovariates} \alias{getPhenotypeCovariates} \alias{getPhenotypeCovariates,QtlDataset-method} diff --git a/man/getPhenotypes.Rd b/man/getPhenotypes.Rd index 92a946b3..fdc41c22 100644 --- a/man/getPhenotypes.Rd +++ b/man/getPhenotypes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/QtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/QtlDataset.R \name{getPhenotypes} \alias{getPhenotypes} \alias{getPhenotypes,QtlDataset-method} diff --git a/man/getPip.Rd b/man/getPip.Rd index ff171bcf..0a4ed899 100644 --- a/man/getPip.Rd +++ b/man/getPip.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingEntry.R, +% Please edit documentation in R/AllGenerics.R, R/FineMappingEntry.R, % R/GwasFineMappingResult.R, R/QtlFineMappingResult.R \name{getPip} \alias{getPip} diff --git a/man/getQcInfo.Rd b/man/getQcInfo.Rd index cb2629b5..e5d9b357 100644 --- a/man/getQcInfo.Rd +++ b/man/getQcInfo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/SumStatsBase.R +% Please edit documentation in R/AllGenerics.R, R/AllClasses.R \name{getQcInfo} \alias{getQcInfo} \alias{getQcInfo,SumStatsBase-method} diff --git a/man/getQtlDatasets.Rd b/man/getQtlDatasets.Rd index a430c424..09fab6da 100644 --- a/man/getQtlDatasets.Rd +++ b/man/getQtlDatasets.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/MultiStudyQtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/MultiStudyQtlDataset.R \name{getQtlDatasets} \alias{getQtlDatasets} \alias{getQtlDatasets,MultiStudyQtlDataset-method} diff --git a/man/getRefPanel.Rd b/man/getRefPanel.Rd index 951031e5..a17be34a 100644 --- a/man/getRefPanel.Rd +++ b/man/getRefPanel.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{getRefPanel} \alias{getRefPanel} \alias{getRefPanel,LdData-method} diff --git a/man/getResidualizedGenotypes.Rd b/man/getResidualizedGenotypes.Rd index 1b731a52..a6e46b57 100644 --- a/man/getResidualizedGenotypes.Rd +++ b/man/getResidualizedGenotypes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/QtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/QtlDataset.R \name{getResidualizedGenotypes} \alias{getResidualizedGenotypes} \alias{getResidualizedGenotypes,QtlDataset-method} diff --git a/man/getResidualizedPhenotypes.Rd b/man/getResidualizedPhenotypes.Rd index ae00aec7..3cdf022d 100644 --- a/man/getResidualizedPhenotypes.Rd +++ b/man/getResidualizedPhenotypes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/QtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/QtlDataset.R \name{getResidualizedPhenotypes} \alias{getResidualizedPhenotypes} \alias{getResidualizedPhenotypes,QtlDataset-method} diff --git a/man/getSampleIds.Rd b/man/getSampleIds.Rd index 30942b7c..2b0ff755 100644 --- a/man/getSampleIds.Rd +++ b/man/getSampleIds.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GenotypeHandle.R +% Please edit documentation in R/AllGenerics.R, R/GenotypeHandle.R \name{getSampleIds} \alias{getSampleIds} \alias{getSampleIds,GenotypeHandle-method} diff --git a/man/getScaleResiduals.Rd b/man/getScaleResiduals.Rd index aca59dcd..08288bcb 100644 --- a/man/getScaleResiduals.Rd +++ b/man/getScaleResiduals.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/QtlDataset.R +% Please edit documentation in R/AllGenerics.R, R/QtlDataset.R \name{getScaleResiduals} \alias{getScaleResiduals} \alias{getScaleResiduals,QtlDataset-method} diff --git a/man/getScoreStats.Rd b/man/getScoreStats.Rd index 13d0da1d..324021e4 100644 --- a/man/getScoreStats.Rd +++ b/man/getScoreStats.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/H2Estimate.R +% Please edit documentation in R/AllGenerics.R, R/H2Estimate.R \name{getScoreStats} \alias{getScoreStats} \alias{getScoreStats,H2Estimate-method} diff --git a/man/getSnpIdx.Rd b/man/getSnpIdx.Rd index 93d63e72..db01461a 100644 --- a/man/getSnpIdx.Rd +++ b/man/getSnpIdx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{getSnpIdx} \alias{getSnpIdx} \alias{getSnpIdx,LdData-method} diff --git a/man/getSnpInfo.Rd b/man/getSnpInfo.Rd index 1f372237..f2057401 100644 --- a/man/getSnpInfo.Rd +++ b/man/getSnpInfo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GenotypeHandle.R, +% Please edit documentation in R/AllGenerics.R, R/GenotypeHandle.R, % R/LdStatistic.R \name{getSnpInfo} \alias{getSnpInfo} diff --git a/man/getSnpRanges.Rd b/man/getSnpRanges.Rd index 5caf7d4b..d7fefbd7 100644 --- a/man/getSnpRanges.Rd +++ b/man/getSnpRanges.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/AnnotationMatrix.R +% Please edit documentation in R/AllGenerics.R, R/AnnotationMatrix.R \name{getSnpRanges} \alias{getSnpRanges} \alias{getSnpRanges,AnnotationMatrix-method} diff --git a/man/getStandardized.Rd b/man/getStandardized.Rd index ff0b959a..63fe1d6d 100644 --- a/man/getStandardized.Rd +++ b/man/getStandardized.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/TwasWeightsEntry.R, +% Please edit documentation in R/AllGenerics.R, R/TwasWeightsEntry.R, % R/twasWeights.R \name{getStandardized} \alias{getStandardized} diff --git a/man/getStudy.Rd b/man/getStudy.Rd index 43abc7d2..6cac6089 100644 --- a/man/getStudy.Rd +++ b/man/getStudy.Rd @@ -1,25 +1,25 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingResultBase.R, -% R/QtlDataset.R, R/MultiStudyQtlDataset.R, R/SumStatsBase.R, R/twasWeights.R +% Please edit documentation in R/AllGenerics.R, R/AllClasses.R, R/QtlDataset.R, +% R/MultiStudyQtlDataset.R, R/twasWeights.R \name{getStudy} \alias{getStudy} +\alias{getStudy,SumStatsBase-method} \alias{getStudy,FineMappingResultBase-method} \alias{getStudy,QtlDataset-method} \alias{getStudy,MultiStudyQtlDataset-method} -\alias{getStudy,SumStatsBase-method} \alias{getStudy,TwasWeights-method} \title{Get Study Identifier} \usage{ getStudy(x) +\S4method{getStudy}{SumStatsBase}(x) + \S4method{getStudy}{FineMappingResultBase}(x) \S4method{getStudy}{QtlDataset}(x) \S4method{getStudy}{MultiStudyQtlDataset}(x) -\S4method{getStudy}{SumStatsBase}(x) - \S4method{getStudy}{TwasWeights}(x) } \arguments{ diff --git a/man/getSumStats.Rd b/man/getSumStats.Rd index d89b03ba..62d598fa 100644 --- a/man/getSumStats.Rd +++ b/man/getSumStats.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/MultiStudyQtlDataset.R, +% Please edit documentation in R/AllGenerics.R, R/MultiStudyQtlDataset.R, % R/qtlSumStats.R \name{getSumStats} \alias{getSumStats} diff --git a/man/getSumstatDf.Rd b/man/getSumstatDf.Rd index ad2b396e..7b3c55b3 100644 --- a/man/getSumstatDf.Rd +++ b/man/getSumstatDf.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/gwasSumStats.R, +% Please edit documentation in R/AllGenerics.R, R/gwasSumStats.R, % R/qtlSumStats.R \name{getSumstatDf} \alias{getSumstatDf} diff --git a/man/getSusieFit.Rd b/man/getSusieFit.Rd new file mode 100644 index 00000000..ddf192f5 --- /dev/null +++ b/man/getSusieFit.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/FineMappingEntry.R, +% R/GwasFineMappingResult.R, R/QtlFineMappingResult.R +\name{getSusieFit} +\alias{getSusieFit} +\alias{getSusieFit,FineMappingEntry-method} +\alias{getSusieFit,GwasFineMappingResult-method} +\alias{getSusieFit,QtlFineMappingResult-method} +\title{Get SuSiE Fit} +\usage{ +getSusieFit(x, ...) + +\S4method{getSusieFit}{FineMappingEntry}(x, ...) + +\S4method{getSusieFit}{GwasFineMappingResult}(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) + +\S4method{getSusieFit}{QtlFineMappingResult}(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) +} +\arguments{ +\item{x}{A \code{FineMappingEntry} or \code{FineMappingResult}.} + +\item{...}{Class-specific selection arguments.} +} +\value{ +A list (the SuSiE fit object). +} +\description{ +Extract the SuSiE fit object from a fine-mapping entry + or result. The fit may be the trimmed view (when the pipeline ran + with the default \code{trim = TRUE}) or the full untrimmed + \code{susie()} return (when \code{trim = FALSE}). +} diff --git a/man/getTauBlocks.Rd b/man/getTauBlocks.Rd index 79b76cff..ef0794bb 100644 --- a/man/getTauBlocks.Rd +++ b/man/getTauBlocks.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/H2Estimate.R +% Please edit documentation in R/AllGenerics.R, R/H2Estimate.R \name{getTauBlocks} \alias{getTauBlocks} \alias{getTauBlocks,H2Estimate-method} diff --git a/man/getTopLoci.Rd b/man/getTopLoci.Rd index 8ce5cbed..12045a4e 100644 --- a/man/getTopLoci.Rd +++ b/man/getTopLoci.Rd @@ -1,20 +1,21 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingEntry.R, +% Please edit documentation in R/AllGenerics.R, R/FineMappingEntry.R, % R/GwasFineMappingResult.R, R/QtlFineMappingResult.R \name{getTopLoci} \alias{getTopLoci} \alias{getTopLoci,FineMappingEntry-method} \alias{getTopLoci,GwasFineMappingResult-method} \alias{getTopLoci,QtlFineMappingResult-method} -\title{Get Top Loci} +\title{Get Top Loci (posterior view)} \usage{ -getTopLoci(x, type = c("data.frame", "GRanges"), ...) +getTopLoci(x, type = c("data.frame", "GRanges"), signalCutoff = 0.025, ...) -\S4method{getTopLoci}{FineMappingEntry}(x, type = c("data.frame", "GRanges"), ...) +\S4method{getTopLoci}{FineMappingEntry}(x, type = c("data.frame", "GRanges"), signalCutoff = 0.025, ...) \S4method{getTopLoci}{GwasFineMappingResult}( x, type = c("data.frame", "GRanges"), + signalCutoff = 0.025, study = NULL, context = NULL, trait = NULL, @@ -25,6 +26,7 @@ getTopLoci(x, type = c("data.frame", "GRanges"), ...) \S4method{getTopLoci}{QtlFineMappingResult}( x, type = c("data.frame", "GRanges"), + signalCutoff = 0.025, study = NULL, context = NULL, trait = NULL, @@ -37,14 +39,22 @@ getTopLoci(x, type = c("data.frame", "GRanges"), ...) \item{type}{One of \code{"data.frame"} (default) or \code{"GRanges"}.} +\item{signalCutoff}{Numeric (length 1). Drop rows where +\code{pip <= signalCutoff}. Default \code{0.025}. Use +\code{signalCutoff = 0} to keep every variant.} + \item{...}{Class-specific selection arguments.} } \value{ A \code{data.frame} or a \code{GRanges}. } \description{ -Extract the top-loci payload as either a - \code{data.frame} (default, the on-disk shape) or a \code{GRanges} - (parsed from the \code{variant_id} \code{chr:pos:A2:A1} encoding, - with the remaining columns carried into \code{mcols}). +Extract the per-variant posterior fine-mapping payload + as either a \code{data.frame} (default) or a \code{GRanges}. + Returns identity columns (\code{variant_id, chrom, pos, A1, A2}), + context (\code{N, MAF}), the posterior effect columns + (\code{beta = posterior_mean, se = posterior_sd}), \code{pip}, + and credible-set membership columns (\code{cs_95}, etc.). + Rows are filtered by PIP by default — set \code{signalCutoff = 0} + to return every variant. } diff --git a/man/getTraits.Rd b/man/getTraits.Rd index 044e66de..b67a7a9d 100644 --- a/man/getTraits.Rd +++ b/man/getTraits.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/GwasFineMappingResult.R, +% Please edit documentation in R/AllGenerics.R, R/GwasFineMappingResult.R, % R/QtlFineMappingResult.R, R/qtlSumStats.R, R/twasWeights.R \name{getTraits} \alias{getTraits} diff --git a/man/getTrimmedFit.Rd b/man/getTrimmedFit.Rd deleted file mode 100644 index dbd5cfa3..00000000 --- a/man/getTrimmedFit.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingEntry.R, -% R/GwasFineMappingResult.R, R/QtlFineMappingResult.R -\name{getTrimmedFit} -\alias{getTrimmedFit} -\alias{getTrimmedFit,FineMappingEntry-method} -\alias{getTrimmedFit,GwasFineMappingResult-method} -\alias{getTrimmedFit,QtlFineMappingResult-method} -\title{Get Trimmed Fit} -\usage{ -getTrimmedFit(x, ...) - -\S4method{getTrimmedFit}{FineMappingEntry}(x, ...) - -\S4method{getTrimmedFit}{GwasFineMappingResult}( - x, - study = NULL, - context = NULL, - trait = NULL, - method = NULL, - ... -) - -\S4method{getTrimmedFit}{QtlFineMappingResult}( - x, - study = NULL, - context = NULL, - trait = NULL, - method = NULL, - ... -) -} -\arguments{ -\item{x}{A \code{FineMappingEntry} or \code{FineMappingResult}.} - -\item{...}{Class-specific selection arguments.} -} -\value{ -A list (trimmed SuSiE fit). -} -\description{ -Extract the trimmed SuSiE fit. -} diff --git a/man/getVarY.Rd b/man/getVarY.Rd index 8fb28069..374ddef9 100644 --- a/man/getVarY.Rd +++ b/man/getVarY.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/gwasSumStats.R, +% Please edit documentation in R/AllGenerics.R, R/gwasSumStats.R, % R/qtlSumStats.R \name{getVarY} \alias{getVarY} diff --git a/man/getVariantIds.Rd b/man/getVariantIds.Rd index e7b63b4e..7b5b1a3b 100644 --- a/man/getVariantIds.Rd +++ b/man/getVariantIds.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/FineMappingEntry.R, +% Please edit documentation in R/AllGenerics.R, R/FineMappingEntry.R, % R/GwasFineMappingResult.R, R/LdData.R, R/QtlFineMappingResult.R, % R/TwasWeightsEntry.R, R/twasWeights.R \name{getVariantIds} diff --git a/man/getVariantInfo.Rd b/man/getVariantInfo.Rd index 8f28e276..10bbbe79 100644 --- a/man/getVariantInfo.Rd +++ b/man/getVariantInfo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{getVariantInfo} \alias{getVariantInfo} \alias{getVariantInfo,LdData-method} diff --git a/man/getVariantNames.Rd b/man/getVariantNames.Rd deleted file mode 100644 index 0df3327b..00000000 --- a/man/getVariantNames.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R -\name{getVariantNames} -\alias{getVariantNames} -\title{Get Variant Names} -\usage{ -getVariantNames(x, ...) -} -\arguments{ -\item{x}{A \code{FineMappingResult} object.} - -\item{...}{Class-specific selection arguments.} -} -\value{ -Character vector of variant names. -} -\description{ -Extract variant names. -} diff --git a/man/getWeights.Rd b/man/getWeights.Rd index 5a814b37..99de398c 100644 --- a/man/getWeights.Rd +++ b/man/getWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/TwasWeightsEntry.R, +% Please edit documentation in R/AllGenerics.R, R/TwasWeightsEntry.R, % R/twasWeights.R \name{getWeights} \alias{getWeights} diff --git a/man/getZ.Rd b/man/getZ.Rd index 87d98571..11fb59d0 100644 --- a/man/getZ.Rd +++ b/man/getZ.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/gwasSumStats.R, +% Please edit documentation in R/AllGenerics.R, R/gwasSumStats.R, % R/qtlSumStats.R \name{getZ} \alias{getZ} diff --git a/man/hasGenotypes.Rd b/man/hasGenotypes.Rd index 8a31c288..3488f6a0 100644 --- a/man/hasGenotypes.Rd +++ b/man/hasGenotypes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/LdData.R +% Please edit documentation in R/AllGenerics.R, R/LdData.R \name{hasGenotypes} \alias{hasGenotypes} \alias{hasGenotypes,LdData-method} diff --git a/man/nSnps.Rd b/man/nSnps.Rd index b9c63daa..01ba3c50 100644 --- a/man/nSnps.Rd +++ b/man/nSnps.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/gwasSumStats.R, +% Please edit documentation in R/AllGenerics.R, R/gwasSumStats.R, % R/qtlSumStats.R \name{nSnps} \alias{nSnps} diff --git a/man/pecotmr-generics.Rd b/man/pecotmr-generics.Rd index d21c156d..99c9da7d 100644 --- a/man/pecotmr-generics.Rd +++ b/man/pecotmr-generics.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R +% Please edit documentation in R/AllGenerics.R \name{pecotmr-generics} \alias{pecotmr-generics} \title{S4 Generic Function Definitions} diff --git a/man/postprocessFinemappingFits.Rd b/man/postprocessFinemappingFits.Rd index cb2d95e6..78ae6c85 100644 --- a/man/postprocessFinemappingFits.Rd +++ b/man/postprocessFinemappingFits.Rd @@ -19,7 +19,8 @@ postprocessFinemappingFits( priorEffTol = 1e-09, minAbsCorr = 0.8, medianAbsCorr = NULL, - csInput = NULL + csInput = NULL, + trim = TRUE ) } \arguments{ diff --git a/man/readAnnotations.Rd b/man/readAnnotations.Rd index 7d9f63fa..76e6116c 100644 --- a/man/readAnnotations.Rd +++ b/man/readAnnotations.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/h2Annotations.R +% Please edit documentation in R/AllGenerics.R, R/h2Annotations.R \name{readAnnotations} \alias{readAnnotations} \alias{readAnnotations,character-method} diff --git a/man/readGenotypes.Rd b/man/readGenotypes.Rd index e29910a2..03480e0b 100644 --- a/man/readGenotypes.Rd +++ b/man/readGenotypes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/genotypeIo.R +% Please edit documentation in R/AllGenerics.R, R/genotypeIo.R \name{readGenotypes} \alias{readGenotypes} \alias{readGenotypes,character-method} diff --git a/man/subsetChr.Rd b/man/subsetChr.Rd index 99af6bdb..11a500e0 100644 --- a/man/subsetChr.Rd +++ b/man/subsetChr.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/gwasSumStats.R, +% Please edit documentation in R/AllGenerics.R, R/gwasSumStats.R, % R/qtlSumStats.R \name{subsetChr} \alias{subsetChr} diff --git a/man/trimCtwasVariants.Rd b/man/trimCtwasVariants.Rd deleted file mode 100644 index 9be03e8e..00000000 --- a/man/trimCtwasVariants.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ctwasWrapper.R -\name{trimCtwasVariants} -\alias{trimCtwasVariants} -\title{Function to select variants for ctwas weights input} -\usage{ -trimCtwasVariants( - regionData, - twasWeightCutoff = 1e-05, - csMinCor = 0.8, - minPipCutoff = 0.1, - maxNumVariants = 1000 -) -} -\arguments{ -\item{regionData}{A list of list containing weights list and snp_info list data for multiple genes/events within a single LD block region.} - -\item{exportTwasWeightDb}{A list of list of fine-mapping result data formatted by generate_twas_db function.} - -\item{regionBlock}{A string for region information for region_weights, consisted of chromosome number, star and end position of LD block conneced with "_".} -} -\description{ -Function to select variants for ctwas weights input -} diff --git a/man/twasWeightsPipeline.Rd b/man/twasWeightsPipeline.Rd index c1694775..898954bf 100644 --- a/man/twasWeightsPipeline.Rd +++ b/man/twasWeightsPipeline.Rd @@ -32,6 +32,8 @@ twasWeightsPipeline(data, ...) estimatePi = TRUE, phenotypeCovariatesToResidualize = NULL, genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, dataType = NULL, naAction = c("drop", "impute"), verbose = 1, @@ -62,6 +64,10 @@ twasWeightsPipeline(data, ...) twasWeights = NULL, naAction = c("drop", "impute"), verbose = 1, + phenotypeCovariatesToResidualize = NULL, + genotypeCovariatesToResidualize = NULL, + residualizePhenotypeCovariates = TRUE, + residualizeGenotypeCovariates = TRUE, ... ) @@ -149,9 +155,22 @@ inclusion. Default 0.01.} \item{estimatePi}{If TRUE, estimate spike-and-slab sparsity from mr.ash before BGLR / qgg spike-and-slab methods that consume it.} -\item{phenotypeCovariatesToResidualize, genotypeCovariatesToResidualize}{Pass-through to \code{\link{getResidualizedPhenotypes}} and -\code{\link{getResidualizedGenotypes}} for QtlDataset input. -Default \code{NULL} (use all covariates).} +\item{phenotypeCovariatesToResidualize, genotypeCovariatesToResidualize}{Character vector (or \code{NULL}) of covariate column names to +residualize against. Forwarded to +\code{\link{getResidualizedPhenotypes}} / +\code{\link{getResidualizedGenotypes}} for \code{QtlDataset} / +\code{MultiStudyQtlDataset} input. Default \code{NULL} (use all +available covariates). Ignored for sumstat inputs.} + +\item{residualizePhenotypeCovariates}{Logical (length 1). When +\code{TRUE} (default) residualize against the phenotype-side +covariates listed in \code{phenotypeCovariatesToResidualize}; set +\code{FALSE} to disable.} + +\item{residualizeGenotypeCovariates}{Logical (length 1). When +\code{TRUE} (default) residualize against the genotype-side +covariates listed in \code{genotypeCovariatesToResidualize}; set +\code{FALSE} to disable.} \item{dataType}{Optional data-type tag stamped into every \code{TwasWeightsEntry$dataType} (e.g. \code{"expression"}).} diff --git a/man/writeSumstatsVcf.Rd b/man/writeSumstatsVcf.Rd index 8819ba51..c8f4c6b8 100644 --- a/man/writeSumstatsVcf.Rd +++ b/man/writeSumstatsVcf.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/vcfWriter.R +% Please edit documentation in R/AllGenerics.R, R/vcfWriter.R \name{writeSumstatsVcf} \alias{writeSumstatsVcf} \alias{writeSumstatsVcf,GwasSumStats-method} @@ -18,6 +18,8 @@ writeSumstatsVcf(x, outputPath, sampleName = NULL, ...) context = NULL, trait = NULL, method = NULL, + splitByContext = FALSE, + splitByTrait = FALSE, ... ) } @@ -28,15 +30,26 @@ writeSumstatsVcf(x, outputPath, sampleName = NULL, ...) \item{outputPath}{File path for output. Extension determines format: \code{.vcf.gz} or \code{.vcf.bgz} for bgzipped VCF, -\code{.bcf} for BCF, \code{.vcf} for uncompressed VCF.} +\code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. When +\code{splitByContext} or \code{splitByTrait} is \code{TRUE} on a +\code{FineMappingResult} method call, the corresponding tag is +appended to the file stem (e.g. +\code{out.vcf} + context = \code{"brain"} → +\code{out.brain.vcf}).} \item{sampleName}{Name for the VCF sample column (default: trait name or method name from the S4 object).} -\item{...}{Additional arguments passed to methods.} +\item{...}{Additional arguments passed to methods. For the +\code{FineMappingResult} method these include \code{splitByContext} +(logical, default \code{FALSE}) and \code{splitByTrait} (logical, +default \code{FALSE}): when either is \code{TRUE}, write one VCF +per row of the collection with the tuple value appended to the +output filename.} } \value{ -Invisible path to the written file. +Invisible path (or character vector of paths when splitting) + to the written file(s). } \description{ Creates a VCF object from GWAS summary statistics or fine-mapping results diff --git a/tests/testthat/helper-collectionAccessors.R b/tests/testthat/helper-collectionAccessors.R index 318256fe..27ec001c 100644 --- a/tests/testthat/helper-collectionAccessors.R +++ b/tests/testthat/helper-collectionAccessors.R @@ -6,17 +6,29 @@ context("Collection-level accessors") .ca_makeTopLoci <- function(n = 3, withCs = TRUE) { tl <- data.frame( - variant_id = paste0("chr1:", 100 * seq_len(n), ":A:G"), - pip = seq(0.9, by = -0.1, length.out = n), + variant_id = paste0("chr1:", 100 * seq_len(n), ":A:G"), + chrom = rep("1", n), + pos = as.integer(100 * seq_len(n)), + A1 = rep("G", n), + A2 = rep("A", n), + N = rep(1000, n), + MAF = rep(0.1, n), + marginal_beta = rep(0.1, n), + marginal_se = rep(0.05, n), + marginal_z = rep(2.0, n), + marginal_p = rep(0.05, n), + pip = seq(0.9, by = -0.1, length.out = n), + posterior_mean = rep(0.05, n), + posterior_sd = rep(0.02, n), stringsAsFactors = FALSE) - if (withCs) tl$cs <- c(1L, 1L, 0L)[seq_len(n)] + if (withCs) tl$cs_95 <- paste0("susie_", c(1L, 1L, 0L)[seq_len(n)]) tl } .ca_makeFmEntry <- function(n = 3) { FineMappingEntry( variantIds = paste0("chr1:", 100 * seq_len(n), ":A:G"), - trimmedFit = list(payload = sprintf("fit_n=%d", n)), + susieFit = list(payload = sprintf("fit_n=%d", n)), topLoci = .ca_makeTopLoci(n)) } diff --git a/tests/testthat/helper-s4Constructors.R b/tests/testthat/helper-s4Constructors.R index 7bc0491c..19a104bf 100644 --- a/tests/testthat/helper-s4Constructors.R +++ b/tests/testthat/helper-s4Constructors.R @@ -22,16 +22,28 @@ context("s4Constructors") .sc_makeTopLoci <- function(n = 3) { data.frame( - variant_id = paste0("chr1:", 100 * seq_len(n), ":A:G"), - pip = seq(0.9, by = -0.1, length.out = n), - cs = c(1L, 1L, 0L)[seq_len(n)], + variant_id = paste0("chr1:", 100 * seq_len(n), ":A:G"), + chrom = rep("1", n), + pos = as.integer(100 * seq_len(n)), + A1 = rep("G", n), + A2 = rep("A", n), + N = rep(1000, n), + MAF = rep(0.1, n), + marginal_beta = rep(0.1, n), + marginal_se = rep(0.05, n), + marginal_z = rep(2.0, n), + marginal_p = rep(0.05, n), + pip = seq(0.9, by = -0.1, length.out = n), + posterior_mean = rep(0.05, n), + posterior_sd = rep(0.02, n), + cs_95 = paste0("susie_", c(1L, 1L, 0L)[seq_len(n)]), stringsAsFactors = FALSE) } .sc_makeFineMappingEntry <- function(n = 3) { FineMappingEntry( variantIds = paste0("chr1:", 100 * seq_len(n), ":A:G"), - trimmedFit = list(fake = TRUE), + susieFit = list(fake = TRUE), topLoci = .sc_makeTopLoci(n)) } diff --git a/tests/testthat/helper-showMethods.R b/tests/testthat/helper-showMethods.R index d4d4e3ac..a5c3e8b7 100644 --- a/tests/testthat/helper-showMethods.R +++ b/tests/testthat/helper-showMethods.R @@ -22,13 +22,25 @@ context("show methods") .sh_makeFmEntry <- function(n = 3, with_cs = TRUE) { tl <- data.frame( - variant_id = paste0("chr1:", 100 * seq_len(n), ":A:G"), - pip = seq(0.9, by = -0.1, length.out = n), + variant_id = paste0("chr1:", 100 * seq_len(n), ":A:G"), + chrom = rep("1", n), + pos = as.integer(100 * seq_len(n)), + A1 = rep("G", n), + A2 = rep("A", n), + N = rep(1000, n), + MAF = rep(0.1, n), + marginal_beta = rep(0.1, n), + marginal_se = rep(0.05, n), + marginal_z = rep(2.0, n), + marginal_p = rep(0.05, n), + pip = seq(0.9, by = -0.1, length.out = n), + posterior_mean = rep(0.05, n), + posterior_sd = rep(0.02, n), stringsAsFactors = FALSE) - if (with_cs) tl$cs <- c(1L, 1L, 0L)[seq_len(n)] + if (with_cs) tl$cs_95 <- paste0("susie_", c(1L, 1L, 0L)[seq_len(n)]) FineMappingEntry( variantIds = tl$variant_id, - trimmedFit = list(), + susieFit = list(), topLoci = tl) } diff --git a/tests/testthat/test_FineMappingEntry.R b/tests/testthat/test_FineMappingEntry.R index 3c025193..d70d5cbe 100644 --- a/tests/testthat/test_FineMappingEntry.R +++ b/tests/testthat/test_FineMappingEntry.R @@ -11,7 +11,7 @@ pip <- as.numeric(1 - apply(1 - alpha, 2, prod)) FineMappingEntry( variantIds = vids, - trimmedFit = list( + susieFit = list( pip = pip, alpha = alpha, lbf_variable = lbf, @@ -44,7 +44,7 @@ test_that("getTopLoci(type='GRanges') converts topLoci data.frame to GRanges", { stringsAsFactors = FALSE ) ent <- FineMappingEntry(variantIds = tl$variant_id, - trimmedFit = list(), topLoci = tl) + susieFit = list(), topLoci = tl) gr <- getTopLoci(ent, type = "GRanges") expect_s4_class(gr, "GRanges") expect_equal(length(gr), 2) @@ -54,7 +54,7 @@ test_that("getTopLoci(type='GRanges') converts topLoci data.frame to GRanges", { test_that("getTopLoci(type='GRanges') handles empty input", { ent <- FineMappingEntry(variantIds = character(0), - trimmedFit = list(), + susieFit = list(), topLoci = data.frame()) gr <- getTopLoci(ent, type = "GRanges") expect_s4_class(gr, "GRanges") @@ -69,7 +69,7 @@ test_that("getTopLoci defaults to data.frame", { stringsAsFactors = FALSE ) ent <- FineMappingEntry(variantIds = tl$variant_id, - trimmedFit = list(), topLoci = tl) + susieFit = list(), topLoci = tl) expect_s3_class(getTopLoci(ent), "data.frame") }) @@ -85,14 +85,14 @@ test_that("adjustPips renormalizes PIPs on a kept FineMappingEntry subset", { adj <- adjustPips(entry, keep) expect_s4_class(adj, "FineMappingEntry") expect_equal(adj@variantIds, keep) - expect_equal(ncol(adj@trimmedFit$lbf_variable), 4) + expect_equal(ncol(adj@susieFit$lbf_variable), 4) # Renormalized: each effect's alpha row sums to 1 (when row has any signal) - expect_true(all(abs(rowSums(adj@trimmedFit$alpha) - 1) < 1e-10)) + expect_true(all(abs(rowSums(adj@susieFit$alpha) - 1) < 1e-10)) # PIPs match topLoci - expect_equal(adj@topLoci$pip, adj@trimmedFit$pip) + expect_equal(adj@topLoci$pip, adj@susieFit$pip) # PIPs change under renormalization origPips <- getPip(entry) - expect_false(identical(unname(origPips[keep]), adj@trimmedFit$pip)) + expect_false(identical(unname(origPips[keep]), adj@susieFit$pip)) }) @@ -131,15 +131,17 @@ test_that("adjustPips on a FineMappingResultBase collection renormalizes each en test_that("FineMappingEntry: constructor stores slots and accessors return them", { tl <- .sc_makeTopLoci(3) + tl$variant_id <- c("a", "b", "c") entry <- FineMappingEntry( variantIds = c("a", "b", "c"), - trimmedFit = list(payload = 1L), - topLoci = tl, - sumstats = list(z = c(1, 2, 3))) + susieFit = list(payload = 1L), + topLoci = tl) expect_s4_class(entry, "FineMappingEntry") expect_equal(getVariantIds(entry), c("a", "b", "c")) - expect_equal(getTrimmedFit(entry), list(payload = 1L)) - expect_equal(getTopLoci(entry), tl) + expect_equal(getSusieFit(entry), list(payload = 1L)) + # getTopLoci returns the projected posterior view, not the raw slot + out <- getTopLoci(entry, signalCutoff = 0) + expect_equal(out$variant_id, c("a", "b", "c")) }) @@ -155,18 +157,17 @@ test_that("FineMappingEntry: getPip returns named pip vector keyed by variant_id test_that("FineMappingEntry: getPip returns numeric(0) when topLoci is empty", { entry <- FineMappingEntry( variantIds = character(0), - trimmedFit = list(), + susieFit = list(), topLoci = data.frame(variant_id = character(0), pip = numeric(0), stringsAsFactors = FALSE)) expect_equal(getPip(entry), numeric(0)) }) -test_that("FineMappingEntry: getCs filters to rows with cs > 0", { - entry <- .sc_makeFineMappingEntry(3) # last row has cs = 0 +test_that("FineMappingEntry: getCs filters to rows in any credible set", { + entry <- .sc_makeFineMappingEntry(3) # last row has cs_95 = "susie_0" res <- getCs(entry) expect_equal(nrow(res), 2L) - expect_true(all(res$cs > 0)) }) @@ -174,9 +175,9 @@ test_that("FineMappingEntry: validity errors when topLoci is missing required co expect_error( FineMappingEntry( variantIds = "v1", - trimmedFit = list(), + susieFit = list(), topLoci = data.frame(other = 1, stringsAsFactors = FALSE)), - "topLoci missing columns" + "topLoci missing required columns" ) }) @@ -207,7 +208,7 @@ test_that("show.FineMappingEntry reports variant count and CS count", { tl <- data.frame(variant_id = c("a", "b"), pip = c(0.1, 0.2), stringsAsFactors = FALSE) e_no_cs <- FineMappingEntry(variantIds = c("a", "b"), - trimmedFit = list(), topLoci = tl) + susieFit = list(), topLoci = tl) out_no <- capture.output(show(e_no_cs)) expect_true(any(grepl("0 credible sets", out_no))) }) diff --git a/tests/testthat/test_GwasFineMappingResult.R b/tests/testthat/test_GwasFineMappingResult.R index 8d861f82..9137cac6 100644 --- a/tests/testthat/test_GwasFineMappingResult.R +++ b/tests/testthat/test_GwasFineMappingResult.R @@ -100,13 +100,18 @@ test_that("GwasFineMappingResult: getContexts/getTraits return NULL", { }) -test_that("GwasFineMappingResult: getCs/getTopLoci/getTrimmedFit/getVariantIds dispatch", { +test_that("GwasFineMappingResult: getCs/getTopLoci/getSusieFit/getVariantIds dispatch", { e <- .ca_makeFmEntry(3) res <- GwasFineMappingResult(study = "g1", method = "susie", entry = list(e)) expect_equal(nrow(getCs(res)), 2L) - expect_equal(getTopLoci(res), .ca_makeTopLoci(3)) - expect_equal(getTrimmedFit(res), list(payload = "fit_n=3")) + # getTopLoci returns the projected posterior view (filtered by default + # signalCutoff = 0.025; .ca_makeTopLoci sets all pip > 0.025 so all rows + # survive). Compare on the projected shape, not the slot's raw shape. + tl <- getTopLoci(res, signalCutoff = 0) + expect_equal(nrow(tl), 3L) + expect_equal(tl$variant_id, .ca_makeTopLoci(3)$variant_id) + expect_equal(getSusieFit(res), list(payload = "fit_n=3")) expect_equal(length(getVariantIds(res)), 3L) }) diff --git a/tests/testthat/test_QtlFineMappingResult.R b/tests/testthat/test_QtlFineMappingResult.R index a8a7b7dd..5af72035 100644 --- a/tests/testthat/test_QtlFineMappingResult.R +++ b/tests/testthat/test_QtlFineMappingResult.R @@ -244,31 +244,32 @@ test_that("QtlFineMappingResult: getPip(returnList = TRUE) wraps in pipe-keyed l test_that("QtlFineMappingResult: getCs filters to credible sets", { - e <- .ca_makeFmEntry(3) # cs = c(1, 1, 0) + e <- .ca_makeFmEntry(3) # cs_95 = c("susie_1", "susie_1", "susie_0") res <- QtlFineMappingResult( study = "s1", context = "c1", trait = "t1", method = "susie", entry = list(e)) cs <- getCs(res) expect_equal(nrow(cs), 2L) - expect_true(all(cs$cs > 0)) }) -test_that("QtlFineMappingResult: getTopLoci returns the entry's topLoci", { +test_that("QtlFineMappingResult: getTopLoci returns the entry's topLoci (projected)", { e <- .ca_makeFmEntry(3) res <- QtlFineMappingResult( study = "s1", context = "c1", trait = "t1", method = "susie", entry = list(e)) - expect_equal(getTopLoci(res), .ca_makeTopLoci(3)) + tl <- getTopLoci(res, signalCutoff = 0) + expect_equal(nrow(tl), 3L) + expect_equal(tl$variant_id, .ca_makeTopLoci(3)$variant_id) }) -test_that("QtlFineMappingResult: getTrimmedFit reads the entry's trimmedFit", { +test_that("QtlFineMappingResult: getSusieFit reads the entry's trimmedFit", { e <- .ca_makeFmEntry(3) res <- QtlFineMappingResult( study = "s1", context = "c1", trait = "t1", method = "susie", entry = list(e)) - expect_equal(getTrimmedFit(res), list(payload = "fit_n=3")) + expect_equal(getSusieFit(res), list(payload = "fit_n=3")) }) diff --git a/tests/testthat/test_causalInferencePipeline.R b/tests/testthat/test_causalInferencePipeline.R index ffca4c9d..6c7b2b85 100644 --- a/tests/testthat/test_causalInferencePipeline.R +++ b/tests/testthat/test_causalInferencePipeline.R @@ -80,14 +80,18 @@ context("causalInferencePipeline") } .cip_makeQtlFmr <- function(variant_ids = paste0("v", 1:5)) { + n <- length(variant_ids) tl <- data.frame( - variant_id = variant_ids, - pip = c(0.9, 0.05, 0.5, 0.8, 0.01), - betahat = c(0.2, 0.05, -0.1, 0.3, 0.0), - sebetahat = rep(0.05, length(variant_ids)), + variant_id = variant_ids, + pip = c(0.9, 0.05, 0.5, 0.8, 0.01), + # posterior_mean / posterior_sd carry the "fine-mapped causal effect" + # estimates that getTopLoci surfaces as beta / se in its projected + # output (the column names downstream MR / TWAS code reads). + posterior_mean = c(0.2, 0.05, -0.1, 0.3, 0.0), + posterior_sd = rep(0.05, n), stringsAsFactors = FALSE) e <- FineMappingEntry(variantIds = variant_ids, - trimmedFit = list(), + susieFit = list(), topLoci = tl) QtlFineMappingResult( study = "Q1", context = "c1", trait = "t1", method = "susie", @@ -128,7 +132,7 @@ test_that("causalInferencePipeline: rejects non-TwasWeights twasWeights arg", { }) test_that("causalInferencePipeline: rejects GwasFineMappingResult for the QTL slot", { - e <- FineMappingEntry(variantIds = "v1", trimmedFit = list(), + e <- FineMappingEntry(variantIds = "v1", susieFit = list(), topLoci = data.frame(variant_id = "v1", pip = 0.1, stringsAsFactors = FALSE)) gfmr <- GwasFineMappingResult( diff --git a/tests/testthat/test_colocPipeline.R b/tests/testthat/test_colocPipeline.R index 7fdfca00..63fd943e 100644 --- a/tests/testthat/test_colocPipeline.R +++ b/tests/testthat/test_colocPipeline.R @@ -27,8 +27,23 @@ context("colocPipeline") .cp_makeFmEntry <- function(variant_ids = paste0("chr1:", 100 * (1:5), ":A:G"), withLbf = TRUE, n_eff = 2L) { pip <- seq(0.9, by = -0.15, length.out = length(variant_ids)) - tl <- data.frame(variant_id = variant_ids, pip = pip, - stringsAsFactors = FALSE) + n <- length(variant_ids) + tl <- data.frame( + variant_id = variant_ids, + chrom = rep("1", n), + pos = as.integer(100 * (1:n)), + A1 = rep("G", n), + A2 = rep("A", n), + N = rep(1000, n), + MAF = rep(0.1, n), + marginal_beta = rep(0.1, n), + marginal_se = rep(0.05, n), + marginal_z = rep(2.0, n), + marginal_p = rep(0.05, n), + pip = pip, + posterior_mean = rep(0.05, n), + posterior_sd = rep(0.02, n), + stringsAsFactors = FALSE) fit <- list( alpha = matrix(1/length(variant_ids), nrow = n_eff, ncol = length(variant_ids), @@ -40,7 +55,7 @@ context("colocPipeline") nrow = n_eff, ncol = length(variant_ids), dimnames = list(NULL, variant_ids)) FineMappingEntry(variantIds = variant_ids, - trimmedFit = fit, + susieFit = fit, topLoci = tl) } @@ -228,7 +243,7 @@ test_that("colocPipeline: empty result has the documented schema", { emptyFit <- list(alpha = matrix(0, 1, 1), pip = c(v1 = 0), V = 0, lbf_variable = matrix(NA_real_, 1, 1)) e <- FineMappingEntry(variantIds = "v1", - trimmedFit = emptyFit, + susieFit = emptyFit, topLoci = data.frame(variant_id = "v1", pip = 0, stringsAsFactors = FALSE)) gfmr <- GwasFineMappingResult( @@ -253,7 +268,7 @@ test_that("colocPipeline: empty result has the documented schema", { test_that(".colocExtractLbfFromEntry: entry without trimmedFit returns NULL with warning", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = NULL, + susieFit = NULL, topLoci = data.frame(variant_id = "v1", pip = 0.1, stringsAsFactors = FALSE)) expect_warning( @@ -271,7 +286,7 @@ test_that(".colocExtractLbfFromEntry: filterLbfCs subsets by cs_index", { lbf_variable = matrix(1:12, 3, 4, dimnames = list(NULL, paste0("v", 1:4))), sets = list(cs_index = c(1L, 3L))) # keep effects 1 and 3 e <- FineMappingEntry(variantIds = paste0("v", 1:4), - trimmedFit = fit, + susieFit = fit, topLoci = data.frame(variant_id = paste0("v", 1:4), pip = c(0.9, 0.1, 0.5, 0.2), stringsAsFactors = FALSE)) diff --git a/tests/testthat/test_ctwasPipeline.R b/tests/testthat/test_ctwasPipeline.R index f9215724..f0ca21d9 100644 --- a/tests/testthat/test_ctwasPipeline.R +++ b/tests/testthat/test_ctwasPipeline.R @@ -391,3 +391,114 @@ test_that("ctwasPipeline: real-engine end-to-end on the bundled example panel", expect_true(all(c("susie_pip", "susie_alpha", "region_id") %in% colnames(res$susie_alpha_res))) }) + +# =========================================================================== +# .ctwasFilterVariants — ported from R/ctwasWrapper.R::trimCtwasVariants +# =========================================================================== +# The filter has four knobs: +# 1. twasWeightCutoff — drop |w| < cutoff +# 2. csMinCor — high-purity CS rescue (must-keep) +# 3. minPipCutoff — high-PIP rescue (must-keep) +# 4. maxNumVariants — per-gene cap, prioritized by PIP then |w| + +test_that(".ctwasFilterVariants: twasWeightCutoff drops low-magnitude variants", { + vids <- paste0("v", 1:6) + w <- c(0.5, 0.001, 0.3, 0.0005, -0.4, 0) + out <- pecotmr:::.ctwasFilterVariants( + vids = vids, w = w, finemapAux = NULL, + twasWeightCutoff = 0.01, csMinCor = 0.8, + minPipCutoff = 0, maxNumVariants = Inf) + # Survivors: v1 (0.5), v3 (0.3), v5 (-0.4) — three with |w| >= 0.01 + expect_setequal(out$vids, c("v1", "v3", "v5")) +}) + +test_that(".ctwasFilterVariants: maxNumVariants caps by |w| when no PIP", { + vids <- paste0("v", 1:5) + w <- c(0.1, 0.5, 0.2, 0.4, 0.05) + out <- pecotmr:::.ctwasFilterVariants( + vids = vids, w = w, finemapAux = NULL, + twasWeightCutoff = 0, csMinCor = 0.8, + minPipCutoff = 0, maxNumVariants = 3) + # Top 3 by |w|: v2 (0.5), v4 (0.4), v3 (0.2) + expect_setequal(out$vids, c("v2", "v4", "v3")) +}) + +test_that(".ctwasFilterVariants: minPipCutoff rescues high-PIP variants from cap", { + vids <- paste0("v", 1:5) + w <- c(0.5, 0.4, 0.3, 0.2, 0.1) + finemapAux <- list( + pip = setNames(c(0.01, 0.02, 0.8, 0.01, 0.95), vids), + csMembers = list(), + csPurity = numeric(0)) + out <- pecotmr:::.ctwasFilterVariants( + vids = vids, w = w, finemapAux = finemapAux, + twasWeightCutoff = 0, csMinCor = 0.8, + minPipCutoff = 0.5, maxNumVariants = 2) + # Must-keep (PIP > 0.5): v3, v5. Cap is 2 → both kept. + expect_setequal(out$vids, c("v3", "v5")) +}) + +test_that(".ctwasFilterVariants: csMinCor rescues high-purity CS members from cap", { + vids <- paste0("v", 1:6) + w <- c(0.5, 0.4, 0.3, 0.2, 0.1, 0.05) + finemapAux <- list( + pip = setNames(rep(0, length(vids)), vids), + csMembers = list(c("v3", "v6"), c("v2", "v4")), + csPurity = c(0.9, 0.5)) # CS 1 (v3, v6) is high-purity + out <- pecotmr:::.ctwasFilterVariants( + vids = vids, w = w, finemapAux = finemapAux, + twasWeightCutoff = 0, csMinCor = 0.8, + minPipCutoff = 0, maxNumVariants = 3) + # Must-keep from high-purity CS: v3, v6. Remaining slot filled by + # next-highest |w| that isn't must-keep: v1 (0.5). + expect_setequal(out$vids, c("v3", "v6", "v1")) +}) + +test_that(".ctwasFilterVariants: returns NULL when no variants survive", { + vids <- paste0("v", 1:3) + w <- c(0.001, 0.0005, 0.002) + out <- pecotmr:::.ctwasFilterVariants( + vids = vids, w = w, finemapAux = NULL, + twasWeightCutoff = 0.5, csMinCor = 0.8, + minPipCutoff = 0, maxNumVariants = Inf) + expect_null(out) +}) + +test_that(".ctwasBuildWeights: maxNumVariants caps the per-gene weight matrix", { + data(qtl_dataset_example) + qd <- fixupExampleGenotypePaths(qtl_dataset_example) + gh <- qd@genotypes + vids <- gh@snpInfo$SNP[1:5] + ent <- TwasWeightsEntry( + variantIds = vids, + weights = c(0.1, -0.2, 0.05, 0.3, 0.15)) + tw <- TwasWeights( + study = "study1", context = "brain", + trait = "ENSG_example", method = "susie", + entry = list(ent), ldSketch = gh) + ldPanel <- pecotmr:::.ctwasComputeFullPanelLd(gh) + wl <- pecotmr:::.ctwasBuildWeights(tw, ldPanel, maxNumVariants = 3L) + expect_equal(wl[[1L]]$n_wgt, 3L) + expect_equal(nrow(wl[[1L]]$wgt), 3L) + # Top 3 by |w| from c(0.1, -0.2, 0.05, 0.3, 0.15): 0.3, -0.2, 0.15 + expect_setequal(rownames(wl[[1L]]$wgt), vids[c(4L, 2L, 5L)]) +}) + +test_that(".ctwasBuildWeights: twasWeightCutoff drops low-magnitude variants", { + data(qtl_dataset_example) + qd <- fixupExampleGenotypePaths(qtl_dataset_example) + gh <- qd@genotypes + vids <- gh@snpInfo$SNP[1:5] + ent <- TwasWeightsEntry( + variantIds = vids, + # v1 (0.005) and v3 (0.001) will be dropped at cutoff 0.01 + weights = c(0.005, 0.2, 0.001, 0.3, 0.1)) + tw <- TwasWeights( + study = "study1", context = "brain", + trait = "ENSG_example", method = "susie", + entry = list(ent), ldSketch = gh) + ldPanel <- pecotmr:::.ctwasComputeFullPanelLd(gh) + wl <- pecotmr:::.ctwasBuildWeights(tw, ldPanel, twasWeightCutoff = 0.01) + expect_equal(wl[[1L]]$n_wgt, 3L) + expect_setequal(rownames(wl[[1L]]$wgt), vids[c(2L, 4L, 5L)]) +}) diff --git a/tests/testthat/test_ctwasWrapper.R b/tests/testthat/test_ctwasWrapper.R deleted file mode 100644 index c98a368e..00000000 --- a/tests/testthat/test_ctwasWrapper.R +++ /dev/null @@ -1,296 +0,0 @@ -context("ctwas") - -# =========================================================================== -# ctwas wrapper tests -# =========================================================================== - - -# ---------- trimCtwasVariants -------------------------------------------- - -# Helper: build a minimal region_data structure that trimCtwasVariants expects -make_mock_region_data <- function() { - # Variant IDs in canonical format (chr:pos:A2:A1) - variant_ids <- c("chr1:1000:A:G", "chr1:2000:C:T", "chr1:3000:G:A", "chr1:4000:T:C") - - # Weight matrix (4 variants x 1 weight column) - wgt <- matrix(c(0.5, 0.0001, 0.3, -0.2), nrow = 4, ncol = 1) - rownames(wgt) <- variant_ids - - gene_id <- "GENE1|ctx1" - context <- "ctx1" - study <- "study1" - - weights <- list() - weights[[gene_id]] <- list() - weights[[gene_id]][[study]] <- list( - wgt = wgt, - context = context, - p0 = 1000, - p1 = 4000 - ) - - # SuSiE intermediate info - pip_vals <- c(0.8, 0.05, 0.6, 0.02) - names(pip_vals) <- variant_ids - - susieWeightsIntermediate <- list() - susieWeightsIntermediate[["GENE1"]] <- list() - susieWeightsIntermediate[["GENE1"]][[context]] <- list( - pip = pip_vals, - csVariants = list(variant_ids[c(1, 3)]), - csPurity = list(minAbsCorr = 0.9) - ) - - list( - weights = weights, - susieWeightsIntermediate = susieWeightsIntermediate - ) -} - -test_that("trimCtwasVariants removes variants below weight cutoff", { - rd <- make_mock_region_data() - # Default cutoff 1e-5, variant 2 has weight 0.0001 (above), so all 4 should pass default cutoff - result <- trimCtwasVariants(rd, twasWeightCutoff = 1e-5) - expect_true(is.list(result)) - # With a higher cutoff, the near-zero variant should be removed - result_strict <- trimCtwasVariants(rd, twasWeightCutoff = 0.001) - # study1 should exist in result - expect_true("study1" %in% names(result_strict)) - # Get the gene-level result - gene_weights <- result_strict[["study1"]][["GENE1|ctx1"]] - # Variant 2 has abs(weight) = 0.0001 < 0.001, so should be removed - expect_false("chr1:2000:C:T" %in% rownames(gene_weights$wgt)) -}) - -test_that("trimCtwasVariants removes gene when all weights below cutoff", { - rd <- make_mock_region_data() - # Set cutoff so high that all variants are dropped - result <- trimCtwasVariants(rd, twasWeightCutoff = 10) - # Gene should be removed entirely since no weights pass the cutoff - # Result should be an empty list - expect_equal(length(result), 0) -}) - -test_that("trimCtwasVariants returns result keyed by study", { - rd <- make_mock_region_data() - result <- trimCtwasVariants(rd, twasWeightCutoff = 1e-5) - # merge_by_study reorganizes: weights[[study]][[group]] - expect_true("study1" %in% names(result)) - expect_true("GENE1|ctx1" %in% names(result[["study1"]])) -}) - -test_that("trimCtwasVariants updates p0 and p1 positions", { - rd <- make_mock_region_data() - # Use a weight cutoff that removes the variant at position 2000 - result <- trimCtwasVariants(rd, twasWeightCutoff = 0.001) - gene_weights <- result[["study1"]][["GENE1|ctx1"]] - # p0 and p1 should reflect the range of remaining variant positions - remaining_positions <- as.integer(sapply( - rownames(gene_weights$wgt), - function(v) strsplit(v, ":")[[1]][2] - )) - expect_equal(gene_weights$p0, min(remaining_positions)) - expect_equal(gene_weights$p1, max(remaining_positions)) -}) - -test_that("trimCtwasVariants respects max_num_variants", { - rd <- make_mock_region_data() - # Request max 2 variants; since nrow(wgt) == 4 >= max_num_variants == 2, - # it triggers select_variants which picks by PIP priority - result <- trimCtwasVariants(rd, twasWeightCutoff = 1e-5, maxNumVariants = 2) - gene_weights <- result[["study1"]][["GENE1|ctx1"]] - expect_true(nrow(gene_weights$wgt) <= 2) -}) - -test_that("trimCtwasVariants handles NA weights by removing group", { - rd <- make_mock_region_data() - # Replace all weights with NA - rd$weights[["GENE1|ctx1"]][["study1"]]$wgt[, 1] <- NA - result <- trimCtwasVariants(rd, twasWeightCutoff = 0) - # The group should be removed because all weights are NA - expect_equal(length(result), 0) -}) - -test_that("trimCtwasVariants handles multiple genes", { - rd <- make_mock_region_data() - - # Add a second gene - variant_ids2 <- c("chr1:5000:A:G", "chr1:6000:C:T") - wgt2 <- matrix(c(0.4, -0.3), nrow = 2, ncol = 1) - rownames(wgt2) <- variant_ids2 - - rd$weights[["GENE2|ctx1"]] <- list() - rd$weights[["GENE2|ctx1"]][["study1"]] <- list( - wgt = wgt2, - context = "ctx1", - p0 = 5000, - p1 = 6000 - ) - - pip_vals2 <- c(0.7, 0.4) - names(pip_vals2) <- variant_ids2 - rd$susieWeightsIntermediate[["GENE2"]] <- list() - rd$susieWeightsIntermediate[["GENE2"]][["ctx1"]] <- list( - pip = pip_vals2, - csVariants = list(variant_ids2[1]), - csPurity = list(minAbsCorr = 0.95) - ) - - result <- trimCtwasVariants(rd, twasWeightCutoff = 1e-5) - expect_true("GENE1|ctx1" %in% names(result[["study1"]])) - expect_true("GENE2|ctx1" %in% names(result[["study1"]])) -}) - -test_that("trimCtwasVariants select_variants uses csMinCor to include CS variants", { - rd <- make_mock_region_data() - # csPurity minAbsCorr = 0.9, so with csMinCor = 0.8 the CS variants - # (variant 1 and 3) should be included. Max 2 variants. - result <- trimCtwasVariants(rd, - twasWeightCutoff = 1e-5, - csMinCor = 0.8, - minPipCutoff = 0.0, - maxNumVariants = 2 - ) - gene_weights <- result[["study1"]][["GENE1|ctx1"]] - included <- rownames(gene_weights$wgt) - # CS variants chr1:1000:A:G and chr1:3000:G:A have highest PIPs (0.8 and 0.6) - # and are in the CS, so they should be prioritized - expect_true("chr1:1000:A:G" %in% included) - expect_true("chr1:3000:G:A" %in% included) -}) - -# =========================================================================== -# Deprecated wrapper: ctwasBimfileLoader -# =========================================================================== - -test_that("ctwasBimfileLoader reads .bim and returns legacy column names", { - bim_path <- tempfile(fileext = ".bim") - on.exit(unlink(bim_path), add = TRUE) - cat("1\tchr1:1000:A:G\t0\t1000\tA\tG\n", file = bim_path) - cat("1\tchr1:2000:C:T\t0\t2000\tC\tT\n", file = bim_path, append = TRUE) - - expect_warning( - res <- ctwasBimfileLoader(bim_path), - "deprecated" - ) - expect_equal(colnames(res), c("chrom", "id", "GD", "pos", "A1", "A2")) - expect_equal(nrow(res), 2) - expect_equal(res$pos, c(1000, 2000)) -}) - -test_that("ctwasBimfileLoader accepts .bed path and resolves .bim", { - bim_path <- tempfile(fileext = ".bim") - bed_path <- sub("\\.bim$", ".bed", bim_path) - on.exit(unlink(c(bim_path, bed_path)), add = TRUE) - cat("22\trs100\t0\t50000\tA\tG\n", file = bim_path) - - expect_warning( - res <- ctwasBimfileLoader(bed_path), - "deprecated" - ) - expect_equal(nrow(res), 1) - expect_equal(res$pos, 50000) -}) - -test_that("ctwasBimfileLoader normalizes variant IDs", { - bim_path <- tempfile(fileext = ".bim") - on.exit(unlink(bim_path), add = TRUE) - cat("1\tchr1:1000:A:G\t0\t1000\tA\tG\n", file = bim_path) - - expect_warning( - res <- ctwasBimfileLoader(bim_path), - "deprecated" - ) - # normalizeVariantId should have been applied - expect_equal(res$id, normalizeVariantId("chr1:1000:A:G")) -}) - -test_that("ctwasBimfileLoader works with real test fixture", { - bim_path <- test_path("test_data", "protocol_example.genotype.bim") - skip_if_not(file.exists(bim_path), "Test fixture not available") - - expect_warning( - res <- ctwasBimfileLoader(bim_path), - "deprecated" - ) - expect_equal(colnames(res), c("chrom", "id", "GD", "pos", "A1", "A2")) - expect_equal(nrow(res), 100) -}) - -# =========================================================================== -# Deprecated wrapper: getCtwasMetaData -# =========================================================================== - -test_that("getCtwasMetaData reads LD metadata and returns ldInfo + regionInfo", { - meta_file <- tempfile(fileext = ".tsv") - on.exit(unlink(meta_file), add = TRUE) - writeLines( - paste("chrom", "start", "end", "path", sep = "\t"), - meta_file - ) - cat(paste("chr1", "1000", "2000", "block1.cor.xz", sep = "\t"), "\n", - file = meta_file, append = TRUE) - cat(paste("chr1", "2000", "3000", "block2.cor.xz", sep = "\t"), "\n", - file = meta_file, append = TRUE) - - expect_warning( - res <- getCtwasMetaData(meta_file), - "deprecated" - ) - expect_true(is.list(res)) - expect_true("ldInfo" %in% names(res)) - expect_true("regionInfo" %in% names(res)) - - expect_equal(nrow(res$ldInfo), 2) - expect_equal(colnames(res$ldInfo), c("region_id", "LD_file", "SNP_file")) - expect_equal(res$ldInfo$region_id, c("1_1000_2000", "1_2000_3000")) - - expect_equal(nrow(res$regionInfo), 2) - expect_equal(colnames(res$regionInfo), c("chrom", "start", "stop", "region_id")) - expect_equal(res$regionInfo$chrom, c(1L, 1L)) - expect_equal(res$regionInfo$start, c(1000L, 2000L)) - expect_equal(res$regionInfo$stop, c(2000L, 3000L)) -}) - -test_that("getCtwasMetaData subset_region_ids filters correctly", { - meta_file <- tempfile(fileext = ".tsv") - on.exit(unlink(meta_file), add = TRUE) - writeLines( - paste("chrom", "start", "end", "path", sep = "\t"), - meta_file - ) - cat(paste("chr1", "1000", "2000", "block1.cor.xz", sep = "\t"), "\n", - file = meta_file, append = TRUE) - cat(paste("chr1", "2000", "3000", "block2.cor.xz", sep = "\t"), "\n", - file = meta_file, append = TRUE) - cat(paste("chr2", "5000", "6000", "block3.cor.xz", sep = "\t"), "\n", - file = meta_file, append = TRUE) - - expect_warning( - res <- getCtwasMetaData(meta_file, subsetRegionIds = "1_1000_2000"), - "deprecated" - ) - expect_equal(nrow(res$regionInfo), 1) - expect_equal(res$regionInfo$region_id, "1_1000_2000") - # ldInfo is not subsetted (matches original behavior) - expect_equal(nrow(res$ldInfo), 3) -}) - -test_that("getCtwasMetaData LD_file paths are relative to metadata directory", { - tmpdir <- tempdir() - meta_file <- file.path(tmpdir, "ld_meta.tsv") - on.exit(unlink(meta_file), add = TRUE) - writeLines( - paste("chrom", "start", "end", "path", sep = "\t"), - meta_file - ) - cat(paste("chr1", "100", "200", "subdir/block.cor.xz", sep = "\t"), "\n", - file = meta_file, append = TRUE) - - expect_warning( - res <- getCtwasMetaData(meta_file), - "deprecated" - ) - expect_equal(res$ldInfo$LD_file, file.path(tmpdir, "subdir/block.cor.xz")) - expect_equal(res$ldInfo$SNP_file, paste0(file.path(tmpdir, "subdir/block.cor.xz"), ".bim")) -}) diff --git a/tests/testthat/test_deprecated.R b/tests/testthat/test_deprecated.R index 37ae0c63..d435a12f 100644 --- a/tests/testthat/test_deprecated.R +++ b/tests/testthat/test_deprecated.R @@ -149,8 +149,23 @@ create_allele_data <- function(seed, n=100, match_min_prop=0.8, ambiguous=FALSE, .ep_makeFmEntry <- function(variant_ids = paste0("chr1:", 100*(1:5), ":A:G"), n_eff = 2L) { pip <- seq(0.9, by = -0.15, length.out = length(variant_ids)) - tl <- data.frame(variant_id = variant_ids, pip = pip, - stringsAsFactors = FALSE) + n <- length(variant_ids) + tl <- data.frame( + variant_id = variant_ids, + chrom = rep("1", n), + pos = as.integer(100 * (1:n)), + A1 = rep("G", n), + A2 = rep("A", n), + N = rep(1000, n), + MAF = rep(0.1, n), + marginal_beta = rep(0.1, n), + marginal_se = rep(0.05, n), + marginal_z = rep(2.0, n), + marginal_p = rep(0.05, n), + pip = pip, + posterior_mean = rep(0.05, n), + posterior_sd = rep(0.02, n), + stringsAsFactors = FALSE) set.seed(1) fit <- list( alpha = matrix(1/length(variant_ids), @@ -162,7 +177,7 @@ create_allele_data <- function(seed, n=100, match_min_prop=0.8, ambiguous=FALSE, nrow = n_eff, ncol = length(variant_ids), dimnames = list(NULL, variant_ids))) FineMappingEntry(variantIds = variant_ids, - trimmedFit = fit, + susieFit = fit, topLoci = tl) } @@ -640,9 +655,14 @@ test_that("enlocPipeline: qLbf NULL (QTL entry's LBF rows drop after priorTol) s lbf_variable = matrix(0, nrow = 1, ncol = 1, dimnames = list(NULL, "v1")), V = 0.0) e <- FineMappingEntry(variantIds = "v1", - trimmedFit = emptyFit, - topLoci = data.frame(variant_id = "v1", pip = 0, - stringsAsFactors = FALSE)) + susieFit = emptyFit, + topLoci = data.frame( + variant_id = "v1", chrom = "1", pos = 100L, + A1 = "G", A2 = "A", N = 1000, MAF = 0.1, + marginal_beta = 0.1, marginal_se = 0.05, + marginal_z = 2, marginal_p = 0.05, + pip = 0, posterior_mean = 0, posterior_sd = 0, + stringsAsFactors = FALSE)) qfmr <- QtlFineMappingResult( study = "Q1", context = "c1", trait = "t1", method = "susie", entry = list(e), @@ -686,9 +706,14 @@ test_that("enlocPipeline: empty result schema includes enrichment + p12Used", { emptyFit <- list(alpha = matrix(0, 1, 1), pip = c(v1 = 0), V = 0, lbf_variable = matrix(NA_real_, 1, 1)) e <- FineMappingEntry(variantIds = "v1", - trimmedFit = emptyFit, - topLoci = data.frame(variant_id = "v1", pip = 0, - stringsAsFactors = FALSE)) + susieFit = emptyFit, + topLoci = data.frame( + variant_id = "v1", chrom = "1", pos = 100L, + A1 = "G", A2 = "A", N = 1000, MAF = 0.1, + marginal_beta = 0.1, marginal_se = 0.05, + marginal_z = 2, marginal_p = 0.05, + pip = 0, posterior_mean = 0, posterior_sd = 0, + stringsAsFactors = FALSE)) gfmr <- GwasFineMappingResult( study = "G1", method = "susie", entry = list(e), diff --git a/tests/testthat/test_fineMappingPipeline.R b/tests/testthat/test_fineMappingPipeline.R index 057885c0..d85b1ade 100644 --- a/tests/testthat/test_fineMappingPipeline.R +++ b/tests/testthat/test_fineMappingPipeline.R @@ -147,7 +147,7 @@ context("fineMappingPipeline") if (is.null(vids)) vids <- "v_unknown" FineMappingEntry( variantIds = vids, - trimmedFit = list(method = method, payload = fit), + susieFit = list(method = method, payload = fit), topLoci = data.frame(variant_id = vids, pip = seq(0.9, by = -0.1, length.out = length(vids)), @@ -240,7 +240,7 @@ test_that(".fmCacheLookup: NULL fineMappingResult returns NULL", { test_that(".fmCacheLookup: returns matching entry by 4-tuple", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(token = "susie"), + susieFit = list(token = "susie"), topLoci = data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE)) fmr <- QtlFineMappingResult( @@ -254,7 +254,7 @@ test_that(".fmCacheLookup: returns matching entry by 4-tuple", { test_that(".fmCacheLookupGwas: returns matching entry by (study, method)", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(token = "susie"), + susieFit = list(token = "susie"), topLoci = data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE)) fmr <- GwasFineMappingResult(study = "g1", method = "susie", @@ -268,7 +268,7 @@ test_that(".fmCacheLookupGwas: returns matching entry by (study, method)", { test_that(".fmCacheLookup: non-QtlFineMappingResult input returns NULL", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(token = "susie"), + susieFit = list(token = "susie"), topLoci = data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE)) gwasFmr <- GwasFineMappingResult(study = "g1", method = "susie", @@ -279,7 +279,7 @@ test_that(".fmCacheLookup: non-QtlFineMappingResult input returns NULL", { test_that(".fmCacheLookupGwas: non-GwasFineMappingResult input returns NULL", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(token = "susie"), + susieFit = list(token = "susie"), topLoci = data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE)) qtlFmr <- QtlFineMappingResult( @@ -314,7 +314,7 @@ test_that(".fmBuildGwasResult: empty entries errors", { test_that(".rbindFineMappingResult: rejects non-FineMappingResultBase input", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(token = "susie"), + susieFit = list(token = "susie"), topLoci = data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE)) fmr <- QtlFineMappingResult( @@ -333,7 +333,7 @@ test_that(".rbindFineMappingResult: rejects non-FineMappingResultBase input", { test_that(".rbindFineMappingResult: rejects mixed Qtl/Gwas inputs", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(token = "susie"), + susieFit = list(token = "susie"), topLoci = data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE)) qtlFmr <- QtlFineMappingResult( @@ -350,7 +350,7 @@ test_that(".rbindFineMappingResult: rejects mixed Qtl/Gwas inputs", { test_that(".rbindFineMappingResult: concatenates two GwasFineMappingResult collections", { e <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(token = "susie"), + susieFit = list(token = "susie"), topLoci = data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE)) a <- GwasFineMappingResult(study = "g1", method = "susie", entry = list(e)) @@ -976,7 +976,7 @@ test_that("fineMappingPipeline(QtlSumStats): cache hit short-circuits the RSS fi ss <- .fmp_makeQtlSumStats() cachedEntry <- FineMappingEntry( variantIds = paste0("v", 1:5), - trimmedFit = list(token = "susie_cached"), + susieFit = list(token = "susie_cached"), topLoci = data.frame(variant_id = paste0("v", 1:5), pip = seq(0.9, 0.1, length.out = 5), stringsAsFactors = FALSE)) @@ -1009,7 +1009,7 @@ test_that("fineMappingPipeline(GwasSumStats): cache hit short-circuits the RSS f gss <- .fmp_makeGwasSumStats() cachedEntry <- FineMappingEntry( variantIds = paste0("v", 1:5), - trimmedFit = list(token = "susie_cached"), + susieFit = list(token = "susie_cached"), topLoci = data.frame(variant_id = paste0("v", 1:5), pip = seq(0.9, 0.1, length.out = 5), stringsAsFactors = FALSE)) @@ -1041,7 +1041,7 @@ test_that("fineMappingPipeline(GwasSumStats): wrong-shape cache (QtlFineMappingR gss <- .fmp_makeGwasSumStats() cachedEntry <- FineMappingEntry( variantIds = paste0("v", 1:5), - trimmedFit = list(token = "susie_cached"), + susieFit = list(token = "susie_cached"), topLoci = data.frame(variant_id = paste0("v", 1:5), pip = rep(0.5, 5), stringsAsFactors = FALSE)) @@ -1070,7 +1070,7 @@ test_that("fineMappingPipeline(QtlDataset): cache hit avoids the fitter", { # Build a cache that already has the (study1, brain, ENSG_A, susie) row. cachedEntry <- FineMappingEntry( variantIds = paste0("v", 1:3), - trimmedFit = list(token = "susie_cached"), + susieFit = list(token = "susie_cached"), topLoci = data.frame(variant_id = paste0("v", 1:3), pip = c(0.9, 0.5, 0.1), stringsAsFactors = FALSE)) @@ -1192,6 +1192,75 @@ test_that("loadRssData is a deprecated no-op", { expect_null(res) }) +# =========================================================================== +# Residualization flag propagation +# =========================================================================== +# .resPickFlags() walks up the call stack and harvests the four +# residualization flags from whichever frame defines them. The +# fineMappingPipeline / twasWeightsPipeline setMethod signatures +# define these flags so they reach `getResidualized{Phenotypes, +# Genotypes}` via the .fmResid* wrappers without per-call-site +# threading. + +test_that(".resPickFlags picks up flags from the enclosing frame", { + outerFn <- function() { + # Mirror the QtlDataset setMethod's residualization signature. + residualizePhenotypeCovariates <- FALSE + residualizeGenotypeCovariates <- TRUE + phenotypeCovariatesToResidualize <- c("age", "sex") + genotypeCovariatesToResidualize <- NULL + innerFn <- function() { + pecotmr:::.resPickFlags() + } + innerFn() + } + flags <- outerFn() + expect_false(flags$residualizePhenotypeCovariates) + expect_true(flags$residualizeGenotypeCovariates) + expect_equal(flags$phenotypeCovariatesToResidualize, c("age", "sex")) + expect_null(flags$genotypeCovariatesToResidualize) +}) + +test_that(".resPickFlags returns an empty list when nothing is in scope", { + flags <- pecotmr:::.resPickFlags() + # Top-level call should not pick up any of the flags (the names are + # not defined here). + expect_false(any(c("residualizePhenotypeCovariates", + "residualizeGenotypeCovariates") %in% names(flags))) +}) + +test_that(".fmResidGeno / .fmResidPheno forward picked-up flags to the real accessors", { + capturedGeno <- NULL + capturedPheno <- NULL + fakeGeno <- function(x, ...) { + capturedGeno <<- list(...); matrix(0, 0, 0) + } + fakePheno <- function(x, ...) { + capturedPheno <<- list(...); matrix(0, 0, 0) + } + local_mocked_bindings( + getResidualizedGenotypes = fakeGeno, + getResidualizedPhenotypes = fakePheno, + .package = "pecotmr") + + # Emulate the setMethod frame: define the four flags then call the + # wrappers. + outerFn <- function() { + residualizePhenotypeCovariates <- FALSE + residualizeGenotypeCovariates <- TRUE + phenotypeCovariatesToResidualize <- "age" + genotypeCovariatesToResidualize <- NULL + pecotmr:::.fmResidGeno(NULL, contexts = "c1") + pecotmr:::.fmResidPheno(NULL, contexts = "c1") + } + outerFn() + expect_false(capturedGeno$residualizePhenotypeCovariates) + expect_true(capturedGeno$residualizeGenotypeCovariates) + expect_equal(capturedGeno$phenotypeCovariatesToResidualize, "age") + expect_false(capturedPheno$residualizePhenotypeCovariates) + expect_true(capturedPheno$residualizeGenotypeCovariates) +}) + # =========================================================================== # Removed during the post-S4-refactor cleanup (for traceability) # --------------------------------------------------------------------------- diff --git a/tests/testthat/test_fineMappingWrappers.R b/tests/testthat/test_fineMappingWrappers.R index c77a5cf3..936d050f 100644 --- a/tests/testthat/test_fineMappingWrappers.R +++ b/tests/testthat/test_fineMappingWrappers.R @@ -352,7 +352,7 @@ test_that("postprocessFinemappingFits keeps all effects when V is NULL", { coverage = 0.95 ) result <- formatFinemappingOutput(post, primaryMethod = "susieRss") - trimmed <- getTrimmedFit(result$finemappingEntry) + trimmed <- getSusieFit(result$finemappingEntry) # With V=NULL, eff_idx = 1:L, so trimmed alpha should keep all L rows expect_equal(nrow(trimmed$alpha), L) # V should be NULL in trimmed output @@ -404,7 +404,7 @@ test_that("postprocessFinemappingFits stores outcome_names, coef, and clfsr for # outcome_names should be stored as contextNames expect_equal(result$contextNames, cnames) - trimmed <- getTrimmedFit(result$finemappingEntry) + trimmed <- getSusieFit(result$finemappingEntry) # coef should come from mvsusieR::coef.mvsusie expect_equal(trimmed$coef, fake_coef[-1, , drop = FALSE]) # conditional_lfsr should be trimmed to eff_idx @@ -420,7 +420,7 @@ test_that("formatFinemappingOutput does not duplicate top loci variants", { ) fm <- FineMappingEntry( variantIds = paste0("v", 1:4), - trimmedFit = list(pip = 1:4), + susieFit = list(pip = 1:4), topLoci = data.frame(variant_id = character(0), pip = numeric(0)) ) post <- list( @@ -508,12 +508,12 @@ if (!exists(".make_univariate_data", inherits = FALSE)) { } .UNIFIED_TOP_LOCI_COLS <- c( - "#chr", "start", "end", "a1", "a2", - "variant", "gene", "event", - "n", "af", "beta", "se", - "pip", "posterior_effect_mean", "posterior_effect_se", + "variant_id", "chrom", "pos", "A1", "A2", + "N", "MAF", + "marginal_beta", "marginal_se", "marginal_z", "marginal_p", + "pip", "posterior_mean", "posterior_sd", "cs_95", "cs_70", "cs_50", "cs_95_purity", - "method", "grange_start", "grange_end" + "method", "gene", "event", "grange_start", "grange_end" ) # Synthesize a SuSiE-like fit + cs_tables with explicit per-coverage CS @@ -577,7 +577,8 @@ if (!exists(".make_univariate_data", inherits = FALSE)) { } .runBuildTopLoci <- function(inp, method = "susie", signalCutoff = 0.05, - sumstats = NULL, af = NULL, + af = NULL, + sumstats = NULL, otherQuantities = NULL, region = NULL) { buildTopLoci( @@ -603,23 +604,27 @@ test_that("buildTopLoci returns the exact 22-column schema in order with stable out <- empty_fn() expect_equal(names(out), .UNIFIED_TOP_LOCI_COLS) expect_equal(nrow(out), 0L) - expect_true(is.character(out$"#chr")) - expect_true(is.integer(out$start)) - expect_true(is.integer(out$end)) - expect_true(is.character(out$variant)) - expect_true(is.character(out$gene)) - expect_true(is.character(out$event)) - expect_true(is.integer(out$n)) - expect_true(is.numeric(out$af)) - expect_false("maf" %in% names(out)) + expect_true(is.character(out$variant_id)) + expect_true(is.character(out$chrom)) + expect_true(is.integer(out$pos)) + expect_true(is.character(out$A1)) + expect_true(is.character(out$A2)) + expect_true(is.numeric(out$N)) + expect_true(is.numeric(out$MAF)) + expect_true(is.numeric(out$marginal_beta)) + expect_true(is.numeric(out$marginal_se)) + expect_true(is.numeric(out$marginal_z)) + expect_true(is.numeric(out$marginal_p)) expect_true(is.numeric(out$pip)) - expect_true(is.numeric(out$posterior_effect_mean)) - expect_true(is.numeric(out$posterior_effect_se)) + expect_true(is.numeric(out$posterior_mean)) + expect_true(is.numeric(out$posterior_sd)) expect_true(is.character(out$cs_95)) expect_true(is.character(out$cs_70)) expect_true(is.character(out$cs_50)) expect_true(is.numeric(out$cs_95_purity)) expect_true(is.character(out$method)) + expect_true(is.character(out$gene)) + expect_true(is.character(out$event)) expect_true(is.integer(out$grange_start)) expect_true(is.integer(out$grange_end)) }) @@ -643,7 +648,7 @@ test_that("buildTopLoci emits 22 columns in the fixed order on a non-empty fit", expect_equal(unique(out$event), "Ast_DeJager_eQTL_ENSG00000179403") expect_equal(unique(out$grange_start), 10823338L) expect_equal(unique(out$grange_end), 14348298L) - expect_equal(unique(out$n), 419L) + expect_equal(unique(out$N), 419L) expect_equal(unique(out$method), "susie") }) @@ -727,7 +732,7 @@ test_that("cs_95_purity = 0 when cs_95 is '_0', and in (0, 1] otherwise" expect_true(all(in_cs$cs_95_purity > 0 & in_cs$cs_95_purity <= 1)) }) -test_that("overlapping CS within one method produces one row per CS membership", { +test_that("overlapping CS within one method: one row per variant; smallest cs_idx wins", { variant_ids <- c("chr1:100:A:G") # One variant belongs to CS 1 AND CS 2 at 95-cov (overlap). cs_at_cov <- list("0.95" = list(1L, 1L), @@ -735,12 +740,12 @@ test_that("overlapping CS within one method produces one row per CS membership", "0.5" = list(1L, 1L)) inp <- .fake_fit_and_cs(variant_ids, cs_at_cov, pip = 0.9) out <- .runBuildTopLoci(inp, method = "susie") - # Two rows: one for CS 1 membership, one for CS 2 membership; same - # (variant, gene, method). - expect_equal(nrow(out), 2L) - expect_equal(unique(out$variant), "chr1:100:A:G") - expect_equal(unique(out$method), "susie") - expect_setequal(out$cs_95, c("susie_1", "susie_2")) + # Canonical schema is one row per variant. When a variant is in + # multiple CSs at a coverage, the smallest cs_idx is reported. + expect_equal(nrow(out), 1L) + expect_equal(out$variant_id, "chr1:100:A:G") + expect_equal(out$method, "susie") + expect_equal(out$cs_95, "susie_1") }) test_that("overlapping CS across methods produces one row per method", { @@ -851,7 +856,7 @@ test_that("formatFinemappingOutput exposes finemappingEntry with S4 accessors", expect_true("finemappingEntry" %in% names(out)) fm <- out$finemappingEntry expect_true(is.character(getVariantIds(fm)) && length(getVariantIds(fm)) == ncol(d$X)) - expect_true(is.list(getTrimmedFit(fm)) && !is.null(getTrimmedFit(fm)$pip)) + expect_true(is.list(getSusieFit(fm)) && !is.null(getSusieFit(fm)$pip)) }) test_that("missing region produces NA grange columns rather than silent omission", { @@ -870,7 +875,7 @@ test_that("missing region produces NA grange columns rather than silent omission expect_equal(unique(out$event), "ctx_ENSG00000179403") }) -test_that("posterior_effect_mean equals colSums(alpha*mu); posterior_effect_se equals sqrt(pmax(colSums(alpha*mu2) - mean^2, 0))", { +test_that("posterior_mean equals colSums(alpha*mu); posterior_sd equals sqrt(pmax(colSums(alpha*mu2) - mean^2, 0))", { variant_ids <- c("chr1:100:A:G", "chr1:200:C:T") cs_at_cov <- list("0.95" = list(c(1L, 2L)), "0.7" = list(c(1L, 2L)), @@ -881,11 +886,11 @@ test_that("posterior_effect_mean equals colSums(alpha*mu); posterior_effect_se e expected_se <- sqrt(pmax(colSums(inp$fit$alpha * inp$fit$mu2) - expected_mean^2, 0)) # Match per variant index by looking up via variant string. for (i in seq_along(variant_ids)) { - row <- out[out$variant == variant_ids[i], , drop = FALSE] + row <- out[out$variant_id == variant_ids[i], , drop = FALSE] expect_true(nrow(row) >= 1L) - expect_equal(unique(row$posterior_effect_mean), expected_mean[i], + expect_equal(unique(row$posterior_mean), expected_mean[i], tolerance = 1e-10) - expect_equal(unique(row$posterior_effect_se), expected_se[i], + expect_equal(unique(row$posterior_sd), expected_se[i], tolerance = 1e-10) } }) diff --git a/tests/testthat/test_genotypeIo.R b/tests/testthat/test_genotypeIo.R index 8714b572..0212f721 100644 --- a/tests/testthat/test_genotypeIo.R +++ b/tests/testthat/test_genotypeIo.R @@ -1283,7 +1283,7 @@ test_that("extractBlockGenotypes returns SummarizedExperiment", { pip <- as.numeric(1 - apply(1 - alpha, 2, prod)) FineMappingEntry( variantIds = vids, - trimmedFit = list( + susieFit = list( pip = pip, alpha = alpha, lbf_variable = lbf, diff --git a/tests/testthat/test_jointDispatchers.R b/tests/testthat/test_jointDispatchers.R deleted file mode 100644 index f76936f3..00000000 --- a/tests/testthat/test_jointDispatchers.R +++ /dev/null @@ -1,462 +0,0 @@ -context("joint dispatchers (fineMappingDispatcher / twasDispatcher)") - -# ============================================================================ -# Strategy: each joint-dispatcher function is exercised by driving -# fineMappingPipeline / twasWeightsPipeline through the user-facing -# `jointSpecification` argument and mocking the underlying fitters -# (mvsusieRss, mrmashWeights, mrmashRssWeights, ...). The mocks return tiny -# stub objects so postprocessing builds plausible result rows. -# ============================================================================ - -# ----------------------------------------------------------------------------- -# Fixture builders -# ----------------------------------------------------------------------------- - -.jd_makeHandle <- function(snp_n = 5L, n_samples = 30L) { - new("GenotypeHandle", - path = "/tmp/jd.gds", - format = "gds", - snpInfo = data.frame( - SNP = paste0("v", seq_len(snp_n)), - CHR = rep("1", snp_n), - BP = seq(100L, by = 100L, length.out = snp_n), - A1 = rep("A", snp_n), - A2 = rep("G", snp_n), - stringsAsFactors = FALSE), - nSamples = n_samples, - sampleIds = paste0("s", seq_len(n_samples)), - pgenPtr = NULL) -} - -.jd_mockExtractor <- function(seed = 11, n_samples = 30L) { - function(handle, snpIdx, meanImpute = TRUE) { - set.seed(seed) - panel <- matrix(rbinom(n_samples * nrow(handle@snpInfo), 2, 0.3), - nrow = n_samples, ncol = nrow(handle@snpInfo), - dimnames = list(handle@sampleIds, handle@snpInfo$SNP)) - sub <- panel[, snpIdx, drop = FALSE] - rr <- GenomicRanges::GRanges( - seqnames = paste0("chr", handle@snpInfo$CHR[snpIdx]), - ranges = IRanges::IRanges(start = handle@snpInfo$BP[snpIdx], - width = 1L)) - S4Vectors::mcols(rr) <- S4Vectors::DataFrame( - SNP = handle@snpInfo$SNP[snpIdx], - A1 = handle@snpInfo$A1[snpIdx], - A2 = handle@snpInfo$A2[snpIdx]) - cd <- S4Vectors::DataFrame(sampleId = handle@sampleIds, - row.names = handle@sampleIds) - dosage <- t(sub) - rownames(dosage) <- handle@snpInfo$SNP[snpIdx] - colnames(dosage) <- handle@sampleIds - SummarizedExperiment::SummarizedExperiment( - assays = list(dosage = dosage), - rowRanges = rr, - colData = cd) - } -} - -# Multi-(study, context, trait) QtlSumStats. Every row carries the same SNP -# order (5 variants) so jointCrossContext / jointCrossTrait / jointCrossStudy -# can stack Z columns without alignment problems. -.jd_makeQtlSumStats <- function(studies = "Q1", - contexts = c("c1", "c2"), - traits = "t1") { - rows <- expand.grid(study = studies, context = contexts, trait = traits, - stringsAsFactors = FALSE) - makeGr <- function() { - gr <- GenomicRanges::GRanges( - seqnames = "chr1", - ranges = IRanges::IRanges(start = seq(100L, by = 100L, - length.out = 5L), - width = 1L)) - S4Vectors::mcols(gr) <- S4Vectors::DataFrame( - SNP = paste0("v", 1:5), - A1 = rep("A", 5), A2 = rep("G", 5), - Z = rnorm(5), N = rep(1000L, 5)) - gr - } - QtlSumStats( - study = rows$study, - context = rows$context, - trait = rows$trait, - entry = lapply(seq_len(nrow(rows)), function(.) makeGr()), - genome = "hg19", - ldSketch = .jd_makeHandle(), - qcInfo = list(step1 = "ok")) -} - -# ----------------------------------------------------------------------------- -# Mocks for SuSiE / mvsusie / mr.mash families -# ----------------------------------------------------------------------------- - -.jd_mockMvsusie <- function() { - function(X, Y, prior_variance, coverage) { - list(token = "mvsusie", n_X_cols = ncol(X), n_Y_cols = ncol(Y)) - } -} - -.jd_mockMvsusieRss <- function() { - function(Z, R, N, prior_variance, coverage) { - list(token = "mvsusieRss", nVariants = nrow(Z), nOutcomes = ncol(Z)) - } -} - -.jd_mockMixturePrior <- function() { - function(R, ...) list(R = R) -} - -# A stub postprocessor that returns a tiny FineMappingEntry. Mirrors the -# `.fmp_mockPostprocess` shape from test_fineMappingPipeline.R. -.jd_mockPostprocess <- function() { - function(fit, method, dataX, dataY, coverage, secondaryCoverage, - signalCutoff, minAbsCorr, csInput = NULL, af = NULL, - region = NULL) { - if (is.matrix(dataX)) { - vids <- colnames(dataX) - } else if (is.list(dataY) && !is.null(dataY$z)) { - vids <- names(dataY$z) - } else { - vids <- "v_unknown" - } - if (is.null(vids)) vids <- "v_unknown" - FineMappingEntry( - variantIds = vids, - trimmedFit = list(method = method, payload = fit), - topLoci = data.frame(variant_id = vids, - pip = seq(0.9, by = -0.1, - length.out = length(vids)), - stringsAsFactors = FALSE)) - } -} - -.jd_mockMrmashWeights <- function() { - function(X, Y, ...) { - w <- matrix(0, nrow = ncol(X), ncol = ncol(Y), - dimnames = list(colnames(X), colnames(Y))) - w - } -} - -.jd_mockMrmashRssWeights <- function() { - function(stat, LD, ...) { - nCols <- if (is.matrix(stat$z)) ncol(stat$z) else 1L - nVars <- if (is.matrix(stat$z)) nrow(stat$z) else length(stat$z) - w <- matrix(0, nrow = nVars, ncol = nCols) - rownames(w) <- if (is.matrix(stat$z)) rownames(stat$z) - else stat$variantNames - if (is.matrix(stat$z) && !is.null(colnames(stat$z))) - colnames(w) <- colnames(stat$z) - w - } -} - -# ============================================================================= -# fineMappingDispatcher: QtlSumStats -# ============================================================================= - -test_that("fineMappingPipeline(QtlSumStats): jointSpec='context' fits one joint per (study, trait)", { - ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = c("c1", "c2"), - traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - .fmPostprocessOne = .jd_mockPostprocess(), - .package = "pecotmr") - local_mocked_bindings( - mvsusie_rss = .jd_mockMvsusieRss(), - create_mixture_prior = .jd_mockMixturePrior(), - .package = "mvsusieR") - res <- suppressMessages( - fineMappingPipeline(ss, methods = "mvsusie", - jointSpecification = "context")) - expect_s4_class(res, "QtlFineMappingResult") - expect_equal(nrow(res), 1L) - expect_equal(as.character(res$context), "joint") - expect_true(grepl("c1;c2|c2;c1", as.character(res$jointContexts))) -}) - -test_that("fineMappingPipeline(QtlSumStats): jointSpec='context' with only one context skips", { - ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - .fmPostprocessOne = .jd_mockPostprocess(), - .package = "pecotmr") - local_mocked_bindings( - mvsusie_rss = .jd_mockMvsusieRss(), - create_mixture_prior = .jd_mockMixturePrior(), - .package = "mvsusieR") - expect_error( - suppressMessages( - fineMappingPipeline(ss, methods = "mvsusie", - jointSpecification = "context")), - "no joint fits produced" - ) -}) - -test_that("fineMappingPipeline(QtlSumStats): jointSpec='trait' fits one joint per (study, context)", { - ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", - traits = c("t1", "t2")) - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - .fmPostprocessOne = .jd_mockPostprocess(), - .package = "pecotmr") - local_mocked_bindings( - mvsusie_rss = .jd_mockMvsusieRss(), - create_mixture_prior = .jd_mockMixturePrior(), - .package = "mvsusieR") - res <- suppressMessages( - fineMappingPipeline(ss, methods = "mvsusie", - jointSpecification = "trait")) - expect_s4_class(res, "QtlFineMappingResult") - expect_equal(nrow(res), 1L) - expect_equal(as.character(res$trait), "joint") -}) - -test_that("fineMappingPipeline(QtlSumStats): jointSpec='trait' with fsusie errors (no RSS variant)", { - ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", - traits = c("t1", "t2")) - expect_error( - fineMappingPipeline(ss, methods = "fsusie", - jointSpecification = "trait"), - "fsusie" - ) -}) - -test_that("fineMappingPipeline(QtlSumStats): jointSpec='study' fits one joint per (context, trait)", { - ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), contexts = "c1", - traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - .fmPostprocessOne = .jd_mockPostprocess(), - .package = "pecotmr") - local_mocked_bindings( - mvsusie_rss = .jd_mockMvsusieRss(), - create_mixture_prior = .jd_mockMixturePrior(), - .package = "mvsusieR") - res <- suppressMessages( - fineMappingPipeline(ss, methods = "mvsusie", - jointSpecification = "study")) - expect_s4_class(res, "QtlFineMappingResult") - expect_equal(nrow(res), 1L) - expect_equal(as.character(res$study), "joint") -}) - -test_that("fineMappingPipeline(QtlSumStats): composed jointSpec axes={'study','context'} fits", { - ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), - contexts = c("c1", "c2"), - traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - .fmPostprocessOne = .jd_mockPostprocess(), - .package = "pecotmr") - local_mocked_bindings( - mvsusie_rss = .jd_mockMvsusieRss(), - create_mixture_prior = .jd_mockMixturePrior(), - .package = "mvsusieR") - res <- suppressMessages( - fineMappingPipeline(ss, methods = "mvsusie", - jointSpecification = list(c("study", "context")))) - expect_s4_class(res, "QtlFineMappingResult") - expect_true("jointStudies" %in% names(res)) - expect_true("jointContexts" %in% names(res)) - expect_true(any(as.character(res$study) == "joint")) - expect_true(any(as.character(res$context) == "joint")) -}) - -test_that("fineMappingPipeline(QtlSumStats): composed jointSpec rejects fsusie", { - ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), - contexts = c("c1", "c2"), - traits = "t1") - expect_error( - fineMappingPipeline(ss, methods = "fsusie", - jointSpecification = list(c("study", "context"))), - "fsusie" - ) -}) - -# ============================================================================= -# twasDispatcher: QtlDataset -# ============================================================================= - -.jd_makeSe <- function(traits = c("t1", "t2"), n_samples = 30L, - starts = NULL) { - if (is.null(starts)) - starts <- seq(1000L, by = 1000L, length.out = length(traits)) - rng <- GenomicRanges::GRanges( - seqnames = rep("chr1", length(traits)), - ranges = IRanges::IRanges(start = starts, width = 500L)) - names(rng) <- traits - set.seed(0) - expr <- matrix(rnorm(length(traits) * n_samples), - nrow = length(traits), ncol = n_samples, - dimnames = list(traits, paste0("s", seq_len(n_samples)))) - cd <- S4Vectors::DataFrame( - sex = rep(c(0, 1), length.out = n_samples), - age = seq_len(n_samples), - row.names = paste0("s", seq_len(n_samples))) - SummarizedExperiment::SummarizedExperiment( - assays = list(expression = expr), - rowRanges = rng, - colData = cd) -} - -.jd_makeQtlDataset <- function(study = "Q1", - contexts = c("c1", "c2"), - traits = c("t1", "t2")) { - phen <- setNames(lapply(contexts, - function(.) .jd_makeSe(traits = traits)), - contexts) - QtlDataset( - study = study, - genotypes = .jd_makeHandle(), - phenotypes = phen, - genotypeCovariates = matrix(numeric(0), nrow = 0, ncol = 0)) -} - -test_that("twasWeightsPipeline(QtlDataset): jointSpec='context' fits mr.mash per trait", { - qd <- .jd_makeQtlDataset(study = "Q1", - contexts = c("c1", "c2"), - traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashWeights = .jd_mockMrmashWeights(), - .package = "pecotmr") - res <- suppressMessages( - twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, - jointSpecification = "context")) - expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$context), "joint") - expect_true("jointContexts" %in% names(res)) -}) - -test_that("twasWeightsPipeline(QtlDataset): jointSpec='context' with only one context skips", { - qd <- .jd_makeQtlDataset(study = "Q1", - contexts = "c1", - traits = c("t1", "t2")) - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashWeights = .jd_mockMrmashWeights(), - .package = "pecotmr") - expect_error( - suppressMessages( - twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, - jointSpecification = "context")), - "no joint fits produced|context" - ) -}) - -test_that("twasWeightsPipeline(QtlDataset): jointSpec='trait' fits mr.mash per context", { - qd <- .jd_makeQtlDataset(study = "Q1", - contexts = "c1", - traits = c("t1", "t2")) - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashWeights = .jd_mockMrmashWeights(), - .package = "pecotmr") - res <- suppressMessages( - twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, - jointSpecification = "trait")) - expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$trait), "joint") - expect_true("jointTraits" %in% names(res)) -}) - -test_that("twasWeightsPipeline(QtlDataset): study-axis fails on individual data", { - qd <- .jd_makeQtlDataset(study = "Q1", - contexts = c("c1", "c2"), - traits = "t1") - expect_error( - twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, - jointSpecification = "study"), - "requires sumstats input" - ) -}) - -test_that("twasWeightsPipeline(QtlDataset): composed jointSpec axes=c('context','trait') fits", { - qd <- .jd_makeQtlDataset(study = "Q1", - contexts = c("c1", "c2"), - traits = c("t1", "t2")) - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashWeights = .jd_mockMrmashWeights(), - .package = "pecotmr") - res <- suppressMessages( - twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, - jointSpecification = list(c("context", "trait")))) - expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$context), "joint") - expect_equal(as.character(res$trait), "joint") -}) - -test_that("twasWeightsPipeline(QtlDataset): composed jointSpec including 'study' errors", { - qd <- .jd_makeQtlDataset(study = "Q1", - contexts = c("c1", "c2"), - traits = "t1") - expect_error( - twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, - jointSpecification = list(c("study", "context"))), - "require sumstats|requires sumstats" - ) -}) - -# ============================================================================= -# twasDispatcher: QtlSumStats -# ============================================================================= - -test_that("twasWeightsPipeline(QtlSumStats): jointSpec='context' fits mr.mash.rss per (study, trait)", { - ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = c("c1", "c2"), - traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashRssWeights = .jd_mockMrmashRssWeights(), - .package = "pecotmr") - res <- suppressMessages( - twasWeightsPipeline(ss, methods = "mrmash", - jointSpecification = "context")) - expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$context), "joint") - expect_true("jointContexts" %in% names(res)) -}) - -test_that("twasWeightsPipeline(QtlSumStats): jointSpec='trait' fits mr.mash.rss per (study, context)", { - ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", - traits = c("t1", "t2")) - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashRssWeights = .jd_mockMrmashRssWeights(), - .package = "pecotmr") - res <- suppressMessages( - twasWeightsPipeline(ss, methods = "mrmash", - jointSpecification = "trait")) - expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$trait), "joint") -}) - -test_that("twasWeightsPipeline(QtlSumStats): jointSpec='study' fits mr.mash.rss per (context, trait)", { - ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), contexts = "c1", - traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashRssWeights = .jd_mockMrmashRssWeights(), - .package = "pecotmr") - res <- suppressMessages( - twasWeightsPipeline(ss, methods = "mrmash", - jointSpecification = "study")) - expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$study), "joint") -}) - -test_that("twasWeightsPipeline(QtlSumStats): composed jointSpec axes=c('study','context') fits", { - ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), - contexts = c("c1", "c2"), - traits = "t1") - local_mocked_bindings( - extractBlockGenotypes = .jd_mockExtractor(), - mrmashRssWeights = .jd_mockMrmashRssWeights(), - .package = "pecotmr") - res <- suppressMessages( - twasWeightsPipeline(ss, methods = "mrmash", - jointSpecification = list(c("study", "context")))) - expect_s4_class(res, "TwasWeights") - expect_true("jointStudies" %in% names(res)) - expect_true("jointContexts" %in% names(res)) -}) diff --git a/tests/testthat/test_jointSpecification.R b/tests/testthat/test_jointSpecification.R index 29727eaa..4379f1e3 100644 --- a/tests/testthat/test_jointSpecification.R +++ b/tests/testthat/test_jointSpecification.R @@ -532,3 +532,470 @@ test_that("validateMethodsVsJointSpec: split-form methods skipped", { joints <- pecotmr:::parseJointSpecification("context", qd) expect_silent(pecotmr:::validateMethodsVsJointSpec(parsed, joints)) }) + +# ============================================================================ +# Joint dispatchers (merged from former tests/testthat/test_jointDispatchers.R) +# ============================================================================ + +context("joint dispatchers (fineMappingDispatcher / twasDispatcher)") + +# ============================================================================ +# Strategy: each joint-dispatcher function is exercised by driving +# fineMappingPipeline / twasWeightsPipeline through the user-facing +# `jointSpecification` argument and mocking the underlying fitters +# (mvsusieRss, mrmashWeights, mrmashRssWeights, ...). The mocks return tiny +# stub objects so postprocessing builds plausible result rows. +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Fixture builders +# ----------------------------------------------------------------------------- + +.jd_makeHandle <- function(snp_n = 5L, n_samples = 30L) { + new("GenotypeHandle", + path = "/tmp/jd.gds", + format = "gds", + snpInfo = data.frame( + SNP = paste0("v", seq_len(snp_n)), + CHR = rep("1", snp_n), + BP = seq(100L, by = 100L, length.out = snp_n), + A1 = rep("A", snp_n), + A2 = rep("G", snp_n), + stringsAsFactors = FALSE), + nSamples = n_samples, + sampleIds = paste0("s", seq_len(n_samples)), + pgenPtr = NULL) +} + +.jd_mockExtractor <- function(seed = 11, n_samples = 30L) { + function(handle, snpIdx, meanImpute = TRUE) { + set.seed(seed) + panel <- matrix(rbinom(n_samples * nrow(handle@snpInfo), 2, 0.3), + nrow = n_samples, ncol = nrow(handle@snpInfo), + dimnames = list(handle@sampleIds, handle@snpInfo$SNP)) + sub <- panel[, snpIdx, drop = FALSE] + rr <- GenomicRanges::GRanges( + seqnames = paste0("chr", handle@snpInfo$CHR[snpIdx]), + ranges = IRanges::IRanges(start = handle@snpInfo$BP[snpIdx], + width = 1L)) + S4Vectors::mcols(rr) <- S4Vectors::DataFrame( + SNP = handle@snpInfo$SNP[snpIdx], + A1 = handle@snpInfo$A1[snpIdx], + A2 = handle@snpInfo$A2[snpIdx]) + cd <- S4Vectors::DataFrame(sampleId = handle@sampleIds, + row.names = handle@sampleIds) + dosage <- t(sub) + rownames(dosage) <- handle@snpInfo$SNP[snpIdx] + colnames(dosage) <- handle@sampleIds + SummarizedExperiment::SummarizedExperiment( + assays = list(dosage = dosage), + rowRanges = rr, + colData = cd) + } +} + +# Multi-(study, context, trait) QtlSumStats. Every row carries the same SNP +# order (5 variants) so jointCrossContext / jointCrossTrait / jointCrossStudy +# can stack Z columns without alignment problems. +.jd_makeQtlSumStats <- function(studies = "Q1", + contexts = c("c1", "c2"), + traits = "t1") { + rows <- expand.grid(study = studies, context = contexts, trait = traits, + stringsAsFactors = FALSE) + makeGr <- function() { + gr <- GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges(start = seq(100L, by = 100L, + length.out = 5L), + width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = paste0("v", 1:5), + A1 = rep("A", 5), A2 = rep("G", 5), + Z = rnorm(5), N = rep(1000L, 5)) + gr + } + QtlSumStats( + study = rows$study, + context = rows$context, + trait = rows$trait, + entry = lapply(seq_len(nrow(rows)), function(.) makeGr()), + genome = "hg19", + ldSketch = .jd_makeHandle(), + qcInfo = list(step1 = "ok")) +} + +# ----------------------------------------------------------------------------- +# Mocks for SuSiE / mvsusie / mr.mash families +# ----------------------------------------------------------------------------- + +.jd_mockMvsusie <- function() { + function(X, Y, prior_variance, coverage) { + list(token = "mvsusie", n_X_cols = ncol(X), n_Y_cols = ncol(Y)) + } +} + +.jd_mockMvsusieRss <- function() { + function(Z, R, N, prior_variance, coverage) { + list(token = "mvsusieRss", nVariants = nrow(Z), nOutcomes = ncol(Z)) + } +} + +.jd_mockMixturePrior <- function() { + function(R, ...) list(R = R) +} + +# A stub postprocessor that returns a tiny FineMappingEntry. Mirrors the +# `.fmp_mockPostprocess` shape from test_fineMappingPipeline.R. +.jd_mockPostprocess <- function() { + function(fit, method, dataX, dataY, coverage, secondaryCoverage, + signalCutoff, minAbsCorr, csInput = NULL, af = NULL, + region = NULL) { + if (is.matrix(dataX)) { + vids <- colnames(dataX) + } else if (is.list(dataY) && !is.null(dataY$z)) { + vids <- names(dataY$z) + } else { + vids <- "v_unknown" + } + if (is.null(vids)) vids <- "v_unknown" + FineMappingEntry( + variantIds = vids, + susieFit = list(method = method, payload = fit), + topLoci = data.frame(variant_id = vids, + pip = seq(0.9, by = -0.1, + length.out = length(vids)), + stringsAsFactors = FALSE)) + } +} + +.jd_mockMrmashWeights <- function() { + function(X, Y, ...) { + w <- matrix(0, nrow = ncol(X), ncol = ncol(Y), + dimnames = list(colnames(X), colnames(Y))) + w + } +} + +.jd_mockMrmashRssWeights <- function() { + function(stat, LD, ...) { + nCols <- if (is.matrix(stat$z)) ncol(stat$z) else 1L + nVars <- if (is.matrix(stat$z)) nrow(stat$z) else length(stat$z) + w <- matrix(0, nrow = nVars, ncol = nCols) + rownames(w) <- if (is.matrix(stat$z)) rownames(stat$z) + else stat$variantNames + if (is.matrix(stat$z) && !is.null(colnames(stat$z))) + colnames(w) <- colnames(stat$z) + w + } +} + +# ============================================================================= +# fineMappingDispatcher: QtlSumStats +# ============================================================================= + +test_that("fineMappingPipeline(QtlSumStats): jointSpec='context' fits one joint per (study, trait)", { + ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = c("c1", "c2"), + traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + .fmPostprocessOne = .jd_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie_rss = .jd_mockMvsusieRss(), + create_mixture_prior = .jd_mockMixturePrior(), + .package = "mvsusieR") + res <- suppressMessages( + fineMappingPipeline(ss, methods = "mvsusie", + jointSpecification = "context")) + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 1L) + expect_equal(as.character(res$context), "joint") + expect_true(grepl("c1;c2|c2;c1", as.character(res$jointContexts))) +}) + +test_that("fineMappingPipeline(QtlSumStats): jointSpec='context' with only one context skips", { + ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + .fmPostprocessOne = .jd_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie_rss = .jd_mockMvsusieRss(), + create_mixture_prior = .jd_mockMixturePrior(), + .package = "mvsusieR") + expect_error( + suppressMessages( + fineMappingPipeline(ss, methods = "mvsusie", + jointSpecification = "context")), + "no joint fits produced" + ) +}) + +test_that("fineMappingPipeline(QtlSumStats): jointSpec='trait' fits one joint per (study, context)", { + ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", + traits = c("t1", "t2")) + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + .fmPostprocessOne = .jd_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie_rss = .jd_mockMvsusieRss(), + create_mixture_prior = .jd_mockMixturePrior(), + .package = "mvsusieR") + res <- suppressMessages( + fineMappingPipeline(ss, methods = "mvsusie", + jointSpecification = "trait")) + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 1L) + expect_equal(as.character(res$trait), "joint") +}) + +test_that("fineMappingPipeline(QtlSumStats): jointSpec='trait' with fsusie errors (no RSS variant)", { + ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", + traits = c("t1", "t2")) + expect_error( + fineMappingPipeline(ss, methods = "fsusie", + jointSpecification = "trait"), + "fsusie" + ) +}) + +test_that("fineMappingPipeline(QtlSumStats): jointSpec='study' fits one joint per (context, trait)", { + ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), contexts = "c1", + traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + .fmPostprocessOne = .jd_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie_rss = .jd_mockMvsusieRss(), + create_mixture_prior = .jd_mockMixturePrior(), + .package = "mvsusieR") + res <- suppressMessages( + fineMappingPipeline(ss, methods = "mvsusie", + jointSpecification = "study")) + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 1L) + expect_equal(as.character(res$study), "joint") +}) + +test_that("fineMappingPipeline(QtlSumStats): composed jointSpec axes={'study','context'} fits", { + ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), + contexts = c("c1", "c2"), + traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + .fmPostprocessOne = .jd_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie_rss = .jd_mockMvsusieRss(), + create_mixture_prior = .jd_mockMixturePrior(), + .package = "mvsusieR") + res <- suppressMessages( + fineMappingPipeline(ss, methods = "mvsusie", + jointSpecification = list(c("study", "context")))) + expect_s4_class(res, "QtlFineMappingResult") + expect_true("jointStudies" %in% names(res)) + expect_true("jointContexts" %in% names(res)) + expect_true(any(as.character(res$study) == "joint")) + expect_true(any(as.character(res$context) == "joint")) +}) + +test_that("fineMappingPipeline(QtlSumStats): composed jointSpec rejects fsusie", { + ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), + contexts = c("c1", "c2"), + traits = "t1") + expect_error( + fineMappingPipeline(ss, methods = "fsusie", + jointSpecification = list(c("study", "context"))), + "fsusie" + ) +}) + +# ============================================================================= +# twasDispatcher: QtlDataset +# ============================================================================= + +.jd_makeSe <- function(traits = c("t1", "t2"), n_samples = 30L, + starts = NULL) { + if (is.null(starts)) + starts <- seq(1000L, by = 1000L, length.out = length(traits)) + rng <- GenomicRanges::GRanges( + seqnames = rep("chr1", length(traits)), + ranges = IRanges::IRanges(start = starts, width = 500L)) + names(rng) <- traits + set.seed(0) + expr <- matrix(rnorm(length(traits) * n_samples), + nrow = length(traits), ncol = n_samples, + dimnames = list(traits, paste0("s", seq_len(n_samples)))) + cd <- S4Vectors::DataFrame( + sex = rep(c(0, 1), length.out = n_samples), + age = seq_len(n_samples), + row.names = paste0("s", seq_len(n_samples))) + SummarizedExperiment::SummarizedExperiment( + assays = list(expression = expr), + rowRanges = rng, + colData = cd) +} + +.jd_makeQtlDataset <- function(study = "Q1", + contexts = c("c1", "c2"), + traits = c("t1", "t2")) { + phen <- setNames(lapply(contexts, + function(.) .jd_makeSe(traits = traits)), + contexts) + QtlDataset( + study = study, + genotypes = .jd_makeHandle(), + phenotypes = phen, + genotypeCovariates = matrix(numeric(0), nrow = 0, ncol = 0)) +} + +test_that("twasWeightsPipeline(QtlDataset): jointSpec='context' fits mr.mash per trait", { + qd <- .jd_makeQtlDataset(study = "Q1", + contexts = c("c1", "c2"), + traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashWeights = .jd_mockMrmashWeights(), + .package = "pecotmr") + res <- suppressMessages( + twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, + jointSpecification = "context")) + expect_s4_class(res, "TwasWeights") + expect_equal(as.character(res$context), "joint") + expect_true("jointContexts" %in% names(res)) +}) + +test_that("twasWeightsPipeline(QtlDataset): jointSpec='context' with only one context skips", { + qd <- .jd_makeQtlDataset(study = "Q1", + contexts = "c1", + traits = c("t1", "t2")) + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashWeights = .jd_mockMrmashWeights(), + .package = "pecotmr") + expect_error( + suppressMessages( + twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, + jointSpecification = "context")), + "no joint fits produced|context" + ) +}) + +test_that("twasWeightsPipeline(QtlDataset): jointSpec='trait' fits mr.mash per context", { + qd <- .jd_makeQtlDataset(study = "Q1", + contexts = "c1", + traits = c("t1", "t2")) + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashWeights = .jd_mockMrmashWeights(), + .package = "pecotmr") + res <- suppressMessages( + twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, + jointSpecification = "trait")) + expect_s4_class(res, "TwasWeights") + expect_equal(as.character(res$trait), "joint") + expect_true("jointTraits" %in% names(res)) +}) + +test_that("twasWeightsPipeline(QtlDataset): study-axis fails on individual data", { + qd <- .jd_makeQtlDataset(study = "Q1", + contexts = c("c1", "c2"), + traits = "t1") + expect_error( + twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, + jointSpecification = "study"), + "requires sumstats input" + ) +}) + +test_that("twasWeightsPipeline(QtlDataset): composed jointSpec axes=c('context','trait') fits", { + qd <- .jd_makeQtlDataset(study = "Q1", + contexts = c("c1", "c2"), + traits = c("t1", "t2")) + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashWeights = .jd_mockMrmashWeights(), + .package = "pecotmr") + res <- suppressMessages( + twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, + jointSpecification = list(c("context", "trait")))) + expect_s4_class(res, "TwasWeights") + expect_equal(as.character(res$context), "joint") + expect_equal(as.character(res$trait), "joint") +}) + +test_that("twasWeightsPipeline(QtlDataset): composed jointSpec including 'study' errors", { + qd <- .jd_makeQtlDataset(study = "Q1", + contexts = c("c1", "c2"), + traits = "t1") + expect_error( + twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, + jointSpecification = list(c("study", "context"))), + "require sumstats|requires sumstats" + ) +}) + +# ============================================================================= +# twasDispatcher: QtlSumStats +# ============================================================================= + +test_that("twasWeightsPipeline(QtlSumStats): jointSpec='context' fits mr.mash.rss per (study, trait)", { + ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = c("c1", "c2"), + traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashRssWeights = .jd_mockMrmashRssWeights(), + .package = "pecotmr") + res <- suppressMessages( + twasWeightsPipeline(ss, methods = "mrmash", + jointSpecification = "context")) + expect_s4_class(res, "TwasWeights") + expect_equal(as.character(res$context), "joint") + expect_true("jointContexts" %in% names(res)) +}) + +test_that("twasWeightsPipeline(QtlSumStats): jointSpec='trait' fits mr.mash.rss per (study, context)", { + ss <- .jd_makeQtlSumStats(studies = "Q1", contexts = "c1", + traits = c("t1", "t2")) + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashRssWeights = .jd_mockMrmashRssWeights(), + .package = "pecotmr") + res <- suppressMessages( + twasWeightsPipeline(ss, methods = "mrmash", + jointSpecification = "trait")) + expect_s4_class(res, "TwasWeights") + expect_equal(as.character(res$trait), "joint") +}) + +test_that("twasWeightsPipeline(QtlSumStats): jointSpec='study' fits mr.mash.rss per (context, trait)", { + ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), contexts = "c1", + traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashRssWeights = .jd_mockMrmashRssWeights(), + .package = "pecotmr") + res <- suppressMessages( + twasWeightsPipeline(ss, methods = "mrmash", + jointSpecification = "study")) + expect_s4_class(res, "TwasWeights") + expect_equal(as.character(res$study), "joint") +}) + +test_that("twasWeightsPipeline(QtlSumStats): composed jointSpec axes=c('study','context') fits", { + ss <- .jd_makeQtlSumStats(studies = c("Q1", "Q2"), + contexts = c("c1", "c2"), + traits = "t1") + local_mocked_bindings( + extractBlockGenotypes = .jd_mockExtractor(), + mrmashRssWeights = .jd_mockMrmashRssWeights(), + .package = "pecotmr") + res <- suppressMessages( + twasWeightsPipeline(ss, methods = "mrmash", + jointSpecification = list(c("study", "context")))) + expect_s4_class(res, "TwasWeights") + expect_true("jointStudies" %in% names(res)) + expect_true("jointContexts" %in% names(res)) +}) diff --git a/tests/testthat/test_mashWrapper.R b/tests/testthat/test_mashWrapper.R index 0fcfd8a4..8cba2573 100644 --- a/tests/testthat/test_mashWrapper.R +++ b/tests/testthat/test_mashWrapper.R @@ -8,7 +8,7 @@ context("mash_wrapper") .testFineMappingEntry <- function(variantNames) { FineMappingEntry( variantIds = variantNames, - trimmedFit = list(pip = rep(0.5, length(variantNames))), + susieFit = list(pip = rep(0.5, length(variantNames))), topLoci = data.frame(variant_id = character(0), pip = numeric(0), stringsAsFactors = FALSE) diff --git a/tests/testthat/test_qtlEnrichmentPipeline.R b/tests/testthat/test_qtlEnrichmentPipeline.R index adf254e0..0b629e0d 100644 --- a/tests/testthat/test_qtlEnrichmentPipeline.R +++ b/tests/testthat/test_qtlEnrichmentPipeline.R @@ -32,7 +32,7 @@ context("qtlEnrichmentPipeline") fit <- list(alpha = alpha, pip = setNames(pip, variant_ids), V = 0.1) FineMappingEntry(variantIds = variant_ids, - trimmedFit = fit, + susieFit = fit, topLoci = tl) } @@ -169,7 +169,7 @@ test_that("qtlEnrichmentPipeline: empty input collections yield the empty schema # vector is empty. emptyEntry <- FineMappingEntry( variantIds = "v1", - trimmedFit = list(), # no pip -> .enrBuildGwasPipVector returns numeric(0) + susieFit = list(), # no pip -> .enrBuildGwasPipVector returns numeric(0) topLoci = data.frame(variant_id = "v1", pip = 0.1, stringsAsFactors = FALSE)) gfmr <- GwasFineMappingResult( diff --git a/tests/testthat/test_sumstatsQc.R b/tests/testthat/test_sumstatsQc.R index 70c4903e..7dd04d74 100644 --- a/tests/testthat/test_sumstatsQc.R +++ b/tests/testthat/test_sumstatsQc.R @@ -3897,14 +3897,14 @@ test_that("sliding_window_loop errors on infinite loop", { context("univariate_rss_diagnostics") -.testFineMappingEntry <- function(variantIds, trimmedFit = list(), +.testFineMappingEntry <- function(variantIds, susieFit = list(), topLoci = data.frame( variant_id = character(0), pip = numeric(0), stringsAsFactors = FALSE)) { FineMappingEntry( variantIds = variantIds, - trimmedFit = trimmedFit, + susieFit = susieFit, topLoci = topLoci ) } @@ -3927,7 +3927,7 @@ test_that("getSusieResult returns trimmed result when present", { mock_result <- list(pip = c(0.1, 0.5, 0.3), sets = list(cs = list())) con_data <- list(finemappingEntry = .testFineMappingEntry( variantIds = c("1:100:A:G", "1:200:C:T", "1:300:G:A"), - trimmedFit = mock_result + susieFit = mock_result )) result <- getSusieResult(con_data) expect_equal(result, mock_result) @@ -3941,7 +3941,7 @@ test_that("extractTopPipInfo finds top PIP variant", { con_data <- list( finemappingEntry = .testFineMappingEntry( variantIds = c("1:100:A:G", "1:200:C:T", "1:300:G:A"), - trimmedFit = list(pip = c(0.1, 0.7, 0.2)) + susieFit = list(pip = c(0.1, 0.7, 0.2)) ), sumstats = list(z = c(1.0, 3.5, -0.5)) ) @@ -3958,7 +3958,7 @@ test_that("extractTopPipInfo computes p_value from z", { con_data <- list( finemappingEntry = .testFineMappingEntry( variantIds = c("1:100:A:G", "1:200:C:T", "1:300:G:A"), - trimmedFit = list(pip = c(0.9, 0.05, 0.05)) + susieFit = list(pip = c(0.9, 0.05, 0.05)) ), sumstats = list(z = c(5.0, 0.5, -0.3)) ) @@ -3971,7 +3971,7 @@ test_that("extractTopPipInfo handles ties by taking first max", { con_data <- list( finemappingEntry = .testFineMappingEntry( variantIds = c("1:100:A:G", "1:200:C:T", "1:300:G:A"), - trimmedFit = list(pip = c(0.5, 0.5, 0.5)) + susieFit = list(pip = c(0.5, 0.5, 0.5)) ), sumstats = list(z = c(1.0, 2.0, 3.0)) ) @@ -3988,7 +3988,7 @@ test_that("extractCsInfo extracts single CS correctly", { con_data <- list( finemappingEntry = .testFineMappingEntry( variantIds = c("1:100:A:G", "1:200:C:T", "1:300:G:A"), - trimmedFit = list( + susieFit = list( sets = list(cs = list(L_1 = c(1, 2))), cs_corr = NULL ) @@ -4013,7 +4013,7 @@ test_that("extractCsInfo extracts multiple CSs with cs_corr", { con_data <- list( finemappingEntry = .testFineMappingEntry( variantIds = c("1:100:A:G", "1:200:C:T", "1:300:G:A", "1:400:T:C"), - trimmedFit = list( + susieFit = list( sets = list( cs = list(L_1 = c(1, 2), L_2 = c(3, 4)) ), @@ -4040,7 +4040,7 @@ test_that("extractCsInfo computes p_value from z-score", { con_data <- list( finemappingEntry = .testFineMappingEntry( variantIds = c("1:100:A:G", "1:200:C:T"), - trimmedFit = list( + susieFit = list( sets = list(cs = list(L_1 = c(1, 2))), cs_corr = NULL ) diff --git a/tests/testthat/test_twasWeightsPipeline.R b/tests/testthat/test_twasWeightsPipeline.R index 47a8f33e..379e6fa4 100644 --- a/tests/testthat/test_twasWeightsPipeline.R +++ b/tests/testthat/test_twasWeightsPipeline.R @@ -1425,7 +1425,7 @@ context("twasWeights internal helpers (extra)") .tw_makeFmEntry <- function(method_tag = "susie", n = 3) { FineMappingEntry( variantIds = paste0("v", seq_len(n)), - trimmedFit = list(payload = method_tag), + susieFit = list(payload = method_tag), topLoci = data.frame(variant_id = paste0("v", seq_len(n)), pip = seq(0.9, by = -0.1, length.out = n), stringsAsFactors = FALSE)) diff --git a/tests/testthat/test_vcfWriter.R b/tests/testthat/test_vcfWriter.R index f069dac1..ad531230 100644 --- a/tests/testthat/test_vcfWriter.R +++ b/tests/testthat/test_vcfWriter.R @@ -34,21 +34,32 @@ make_test_gwas_sumstats <- function(n = 5) { } make_test_finemapping_result <- function(n = 5) { + beta <- seq(0.5, by = -0.1, length.out = n) + se <- rep(0.1, n) + zv <- seq(5.0, by = -1.0, length.out = n) tl <- data.frame( - variant_id = paste0("chr1:", seq(100, by = 100, length.out = n), ":T:A"), - method = rep("susie", n), - pip = seq(0.9, by = -0.1, length.out = n), - cs_index_95 = c(1L, 1L, 0L, 2L, 0L)[seq_len(n)], - beta = seq(0.5, by = -0.1, length.out = n), - se = rep(0.1, n), - z = seq(5.0, by = -1.0, length.out = n), + variant_id = paste0("chr1:", seq(100, by = 100, length.out = n), ":T:A"), + chrom = rep("1", n), + pos = as.integer(seq(100, by = 100, length.out = n)), + A1 = rep("A", n), + A2 = rep("T", n), + N = rep(1000, n), + MAF = rep(0.1, n), + marginal_beta = beta, + marginal_se = se, + marginal_z = zv, + marginal_p = 2 * pnorm(-abs(zv)), + pip = seq(0.9, by = -0.1, length.out = n), + posterior_mean = beta * 0.5, + posterior_sd = se * 0.5, + cs_95 = paste0("susie_", c(1L, 1L, 0L, 2L, 0L)[seq_len(n)]), + method = rep("susie", n), stringsAsFactors = FALSE ) entry <- FineMappingEntry( variantIds = tl$variant_id, - trimmedFit = list(), - topLoci = tl, - sumstats = NULL) + susieFit = list(), + topLoci = tl) GwasFineMappingResult( study = "test_study", method = "susie", @@ -161,15 +172,26 @@ test_that("writeSumstatsVcf errors on empty FineMappingResult", { skip_if_not_installed("Biostrings") empty_tl <- data.frame( - variant_id = character(0), - pip = numeric(0), + variant_id = character(0), + chrom = character(0), + pos = integer(0), + A1 = character(0), + A2 = character(0), + N = numeric(0), + MAF = numeric(0), + marginal_beta = numeric(0), + marginal_se = numeric(0), + marginal_z = numeric(0), + marginal_p = numeric(0), + pip = numeric(0), + posterior_mean = numeric(0), + posterior_sd = numeric(0), stringsAsFactors = FALSE ) entry <- FineMappingEntry( variantIds = character(0), - trimmedFit = list(), - topLoci = empty_tl, - sumstats = NULL) + susieFit = list(), + topLoci = empty_tl) fm_empty <- GwasFineMappingResult( study = "test_study", method = "susie", @@ -178,5 +200,95 @@ test_that("writeSumstatsVcf errors on empty FineMappingResult", { out <- tempfile(fileext = ".vcf") on.exit(unlink(out), add = TRUE) - expect_error(writeSumstatsVcf(fm_empty, out), "no topLoci") + expect_error(writeSumstatsVcf(fm_empty, out), "no variants to write") +}) + +# ============================================================================= +# splitByContext / splitByTrait — one file per tuple +# ============================================================================= + +.make_multi_tuple_qtl_fmr <- function() { + contexts <- c("brain", "blood") + traits <- c("ENSG_A", "ENSG_B") + entries <- lapply(seq_along(contexts), function(i) { + ids <- paste0("chr1:", 100 * (1:3), ":T:A") + tl <- data.frame( + variant_id = ids, + chrom = rep("1", 3), + pos = c(100L, 200L, 300L), + A1 = rep("A", 3), + A2 = rep("T", 3), + N = rep(1000, 3), + MAF = rep(0.1, 3), + marginal_beta = c(0.3, 0.1, -0.2) + i / 100, + marginal_se = rep(0.05, 3), + marginal_z = c(6.0, 2.0, -4.0), + marginal_p = c(1e-9, 0.045, 6e-5), + pip = c(0.9, 0.5, 0.7), + posterior_mean = rep(0.05, 3), + posterior_sd = rep(0.02, 3), + cs_95 = paste0("susie_", c(1L, 1L, 0L)), + stringsAsFactors = FALSE) + FineMappingEntry(variantIds = ids, susieFit = list(), topLoci = tl) + }) + QtlFineMappingResult( + study = rep("study1", 2), + context = contexts, + trait = traits, + method = rep("susie", 2), + entry = entries) +} + +test_that("writeSumstatsVcf(FineMappingResult): splitByContext emits one VCF per context", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + fmr <- .make_multi_tuple_qtl_fmr() + baseOut <- tempfile(fileext = ".vcf") + on.exit(unlink(list.files(dirname(baseOut), + pattern = basename(tools::file_path_sans_ext(baseOut)), + full.names = TRUE)), add = TRUE) + paths <- writeSumstatsVcf(fmr, baseOut, splitByContext = TRUE) + expect_length(paths, 2L) + # Each path is decorated with the context tag. + expect_true(any(grepl("\\.brain\\.vcf$", paths))) + expect_true(any(grepl("\\.blood\\.vcf$", paths))) + for (p in paths) expect_true(file.exists(p)) +}) + +test_that("writeSumstatsVcf(FineMappingResult): splitByTrait emits one VCF per trait", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + fmr <- .make_multi_tuple_qtl_fmr() + baseOut <- tempfile(fileext = ".vcf") + on.exit(unlink(list.files(dirname(baseOut), + pattern = basename(tools::file_path_sans_ext(baseOut)), + full.names = TRUE)), add = TRUE) + paths <- writeSumstatsVcf(fmr, baseOut, splitByTrait = TRUE) + expect_length(paths, 2L) + expect_true(any(grepl("\\.ENSG_A\\.vcf$", paths))) + expect_true(any(grepl("\\.ENSG_B\\.vcf$", paths))) +}) + +test_that("writeSumstatsVcf(FineMappingResult): splitByContext + splitByTrait combines tags", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + fmr <- .make_multi_tuple_qtl_fmr() + baseOut <- tempfile(fileext = ".vcf") + on.exit(unlink(list.files(dirname(baseOut), + pattern = basename(tools::file_path_sans_ext(baseOut)), + full.names = TRUE)), add = TRUE) + paths <- writeSumstatsVcf(fmr, baseOut, + splitByContext = TRUE, splitByTrait = TRUE) + expect_length(paths, 2L) + expect_true(any(grepl("\\.brain\\.ENSG_A\\.vcf$", paths))) + expect_true(any(grepl("\\.blood\\.ENSG_B\\.vcf$", paths))) +}) + +test_that("writeSumstatsVcf(FineMappingResult): multi-row without split flags requires selectors", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + fmr <- .make_multi_tuple_qtl_fmr() + out <- tempfile(fileext = ".vcf") + expect_error(writeSumstatsVcf(fmr, out), + "2 matching rows") }) From 65bfcc34a97b909a097270889bccd5b9cf4252df Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Sun, 21 Jun 2026 23:57:28 -0700 Subject: [PATCH 2/4] improve finemappingresult and twasweightspipeline --- R/AllGenerics.R | 799 ++++++++++++++++++++ R/allGenerics.R | 19 +- R/twasWeightsPipeline.R | 281 ++++++- man/getMarginalEffects.Rd | 2 +- man/writeSumstatsVcf.Rd | 17 +- tests/testthat/test_deprecated.R | 47 +- tests/testthat/test_qtlEnrichmentPipeline.R | 131 ++++ tests/testthat/test_twasWeightsPipeline.R | 401 +++++++++- 8 files changed, 1581 insertions(+), 116 deletions(-) create mode 100644 R/AllGenerics.R diff --git a/R/AllGenerics.R b/R/AllGenerics.R new file mode 100644 index 00000000..6ef0af4d --- /dev/null +++ b/R/AllGenerics.R @@ -0,0 +1,799 @@ +#' @title S4 Generic Function Definitions +#' @description All S4 generic function definitions for pecotmr. +#' @name pecotmr-generics +#' @keywords internal +#' @importFrom methods setGeneric +NULL + +# ============================================================================= +# High-level estimation generic +# ============================================================================= + +#' @title Estimate SNP Heritability +#' @description Estimate SNP heritability from GWAS summary statistics using +#' one of three methods: LDER, g-LDSC, or HDL/sHDL. +#' @param sumstats A \code{GwasSumStats} object. +#' @param ldRef An \code{LdStatistic} object (method-appropriate subclass). +#' @param method Character, one of "lder", "gldsc", "hdl". +#' @param annotations An \code{AnnotationMatrix} object, or NULL for +#' unstratified estimation. +#' @param local Logical, whether to compute per-block local estimates. +#' @param ... Additional method-specific arguments. +#' @return An \code{H2Estimate} object. +#' @export +setGeneric("estimateH2", + function(sumstats, ldRef, method = "lder", annotations = NULL, + local = FALSE, ...) + standardGeneric("estimateH2") +) + +# ============================================================================= +# LD score computation +# ============================================================================= + +#' @title Compute LD Scores +#' @description Compute LD scores from an LD reference, optionally +#' stratified by annotations. +#' @param ldRef An \code{LdStatistic} object. +#' @param annotations An \code{AnnotationMatrix} object, or NULL. +#' @param ... Additional arguments. +#' @return A numeric matrix of LD scores (SNPs x annotations+1). +#' @export +setGeneric("computeLdScores", + function(ldRef, annotations = NULL, ...) + standardGeneric("computeLdScores") +) + +# ============================================================================= +# I/O generics +# ============================================================================= + +#' @title Read Genotype Data +#' @description Read genotype data from various formats (VCF, plink1, +#' plink2, GDS) and return a \code{GenotypeHandle} for deferred +#' genotype loading. +#' @param path Character, path to the genotype file. +#' @param format Character, one of "vcf", "plink1", "plink2", "gds". +#' If NULL, inferred from file extension. +#' @param ... Additional arguments. +#' @return A \code{GenotypeHandle} object. +#' @export +setGeneric("readGenotypes", + function(path, format = NULL, ...) + standardGeneric("readGenotypes") +) + +#' @title Read Annotations +#' @description Read genomic annotations from files (BED, BigWig, +#' S-LDSC .annot format, or GRanges objects) and create an +#' AnnotationMatrix. +#' @param paths Named character vector of file paths, or a named list +#' of GRanges objects. Names become annotation names. +#' @param snpRanges A \code{GRanges} object defining SNP positions. +#' @param annotationMeta A \code{data.frame} with annotation metadata +#' (name, tier, type). If NULL, auto-detected from file format. +#' @param genome Character, genome build. +#' @param ... Additional arguments. +#' @return An \code{AnnotationMatrix} object. +#' @export +setGeneric("readAnnotations", + function(paths, snpRanges, annotationMeta = NULL, + genome = "hg19", ...) + standardGeneric("readAnnotations") +) + +# ============================================================================= +# Accessor generics +# ============================================================================= + +#' @title Get Local Estimates +#' @description Extract per-block local estimates from a result object. +#' @param object An \code{H2Estimate} object. +#' @return A \code{data.frame} of local estimates, or NULL. +#' @export +setGeneric("getLocal", function(object) standardGeneric("getLocal")) + +#' @title Get Enrichment Estimates +#' @description Extract annotation enrichment estimates from a result object. +#' @param object An \code{H2Estimate} object. +#' @return A \code{data.frame} of enrichment estimates, or NULL. +#' @export +setGeneric("getEnrichment", + function(object) standardGeneric("getEnrichment")) + +#' @title Get Score Statistics +#' @description Extract score statistics for candidate annotations. +#' @param object An \code{H2Estimate} object. +#' @return A list with \code{z} and \code{R}, or NULL. +#' @export +setGeneric("getScoreStats", + function(object) standardGeneric("getScoreStats")) + +# ============================================================================= +# GwasSumStats accessor generics +# ============================================================================= + +#' @title Get Z-scores +#' @description Extract z-score vector from a \code{GwasSumStats} or +#' \code{QtlSumStats} entry, selected by its identity tuple. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param ... Class-specific selection arguments (e.g., \code{study} for +#' \code{GwasSumStats}; \code{study}, \code{context}, \code{trait} for +#' \code{QtlSumStats}). +#' @return Numeric vector of z-scores. +#' @export +setGeneric("getZ", function(x, ...) standardGeneric("getZ")) + +#' @title Get Sample Sizes +#' @description Extract sample size vector from a \code{GwasSumStats} or +#' \code{QtlSumStats} entry, selected by its identity tuple. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param ... Class-specific selection arguments. +#' @return Numeric vector of sample sizes. +#' @export +setGeneric("getN", function(x, ...) standardGeneric("getN")) + +#' @title Get Minor Allele Frequencies +#' @description Extract MAF vector from a GwasSumStats object. +#' @param x A \code{GwasSumStats} or \code{QtlDataset} object. +#' @param ... Class-specific selection arguments (e.g., \code{region}, +#' \code{cisWindow} for \code{QtlDataset}). +#' @return Numeric vector of MAFs, or NULL if not available. +#' @export +setGeneric("getMaf", function(x, ...) standardGeneric("getMaf")) + +#' @title Get Number of SNPs +#' @description Number of SNPs in a \code{GwasSumStats} or +#' \code{QtlSumStats} entry, selected by its identity tuple. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param ... Class-specific selection arguments. +#' @return Integer. +#' @export +setGeneric("nSnps", function(x, ...) standardGeneric("nSnps")) + +#' @title Subset by Chromosome +#' @description Extract a chromosome-specific subset of a GwasSumStats object. +#' @param x A \code{GwasSumStats} object. +#' @param chr Character, chromosome name (e.g., "1", "chr1"). +#' @return A \code{GwasSumStats} object. +#' @export +setGeneric("subsetChr", function(x, chr) standardGeneric("subsetChr")) + +#' @title Get Phenotype Variance +#' @description Extract phenotype variance from a \code{GwasSumStats} or +#' \code{QtlSumStats} entry, selected by its identity tuple. Returns +#' \code{NULL} when the entry has no \code{varY} recorded. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param ... Class-specific selection arguments. +#' @return Numeric phenotype variance, or NULL. +#' @export +setGeneric("getVarY", function(x, ...) standardGeneric("getVarY")) + +#' @title Get a Single Summary-Statistic Entry or Embedded Collection +#' @description Behavior depends on the class of \code{x}: +#' \describe{ +#' \item{For \code{GwasSumStats} / \code{QtlSumStats}}{Returns the +#' per-variant \code{GRanges} of summary statistics for one entry, +#' selected by its identity tuple (\code{study} for GWAS; +#' \code{study}, \code{context}, \code{trait} for QTL).} +#' \item{For \code{MultiStudyQtlDataset}}{Returns the embedded +#' \code{QtlSumStats} collection (the summary-statistic-only +#' studies), or \code{NULL} when absent. No selection arguments +#' are accepted in this case.} +#' } +#' @param x A \code{GwasSumStats}, \code{QtlSumStats}, or +#' \code{MultiStudyQtlDataset} object. +#' @param ... Class-specific selection arguments (see above). +#' @return A \code{GRanges}, a \code{QtlSumStats}, or \code{NULL}. +#' @export +setGeneric("getSumStats", function(x, ...) standardGeneric("getSumStats")) + +#' @title Get Standardized Sumstat Data Frame for One Tuple +#' @description Return a per-tuple summary-statistics \code{data.frame} +#' in the standardized layout \code{variant_id, chrom, pos, A1, A2, +#' z, beta, se, N, maf} (optional columns omitted when absent on the +#' entry). Combines tuple-keyed row selection (\code{getSumStats}) +#' with mcols unpacking; replaces the pre-S4 idiom of pulling +#' \code{S4Vectors::mcols(entry)$} directly inside pipelines. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param ... Class-specific selectors (\code{study} for +#' \code{GwasSumStats}; \code{study}, \code{context}, \code{trait} +#' for \code{QtlSumStats}) plus pass-throughs \code{require}, +#' \code{derive}, \code{keepChrPrefix} forwarded to the underlying +#' unpacker. +#' @return A \code{data.frame}. +#' @export +setGeneric("getSumstatDf", function(x, ...) standardGeneric("getSumstatDf")) + +#' @title Get the Embedded QtlDataset List +#' @description Return the named list of \code{QtlDataset} objects +#' carried by a \code{MultiStudyQtlDataset}. +#' @param x A \code{MultiStudyQtlDataset} object. +#' @return A named list of \code{QtlDataset} objects. +#' @export +setGeneric("getQtlDatasets", + function(x) standardGeneric("getQtlDatasets")) + +#' @title Get the Genome Build +#' @description Return the genome build that the collection's LD sketch +#' and every entry are aligned to. Because all entries in a +#' \code{GwasSumStats} or \code{QtlSumStats} share the LD sketch, the +#' genome build is a single value at the collection level. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param ... Unused (present for method-signature compatibility). +#' @return Character (length 1). +#' @export +setGeneric("getGenome", function(x, ...) standardGeneric("getGenome")) + +#' @title Get QC Audit Record +#' @description Return the audit record of QC steps applied to this +#' collection. An empty \code{list()} (default on construction) means +#' \code{\link{summaryStatsQc}} has not yet been run. Pipelines that +#' require harmonized sumstats (\code{fineMappingPipeline}, +#' \code{twasWeightsPipeline}, and downstream consumers) reject inputs +#' where \code{length(getQcInfo(x)) == 0L}. +#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. +#' @param ... Unused. +#' @return A \code{list} (possibly empty). +#' @export +setGeneric("getQcInfo", function(x, ...) standardGeneric("getQcInfo")) + +#' @title Get LD Sketch +#' @description Return the \code{GenotypeHandle} carrying the LD +#' reference for this collection. Defined on classes that embed an +#' \code{ldSketch} slot: \code{GwasSumStats}, \code{QtlSumStats}, +#' \code{FineMappingResult}, \code{TwasWeights}. Returns \code{NULL} +#' when the slot is unset (e.g. a \code{TwasWeights} fit from +#' individual-level data via \code{QtlDataset}). +#' @param x An S4 object that carries an \code{ldSketch} slot. +#' @param ... Unused. +#' @return A \code{GenotypeHandle} or \code{NULL}. +#' @export +setGeneric("getLdSketch", function(x, ...) standardGeneric("getLdSketch")) + +# ============================================================================= +# LdData accessor generics +# ============================================================================= + +#' @title Get LD Correlation Matrix +#' @description Extract the LD correlation matrix from an \code{LdData} +#' object. If only a genotype handle is available, recomputes R from +#' genotypes on the fly. +#' @param x An \code{LdData} object. +#' @return A correlation matrix, or a list of per-block matrices. +#' @export +setGeneric("getCorrelation", function(x) standardGeneric("getCorrelation")) + +#' @title Get Genotype Matrix +#' @description Extract a genotype matrix from an object that carries +#' genotype data. For an \code{LdData}, returns the underlying genotype +#' matrix via its handle (or \code{NULL} if no handle is available). +#' For a \code{QtlDataset}, returns the genotype matrix for a selected +#' set of traits or region (see method documentation for the +#' per-class selection arguments). +#' @param x The object to extract from. +#' @param ... Class-specific selection arguments (e.g., \code{traitId}, +#' \code{region}, \code{cisWindow} for \code{QtlDataset}). +#' @return A numeric matrix, a list of matrices, or \code{NULL}. +#' @export +setGeneric("getGenotypes", function(x, ...) standardGeneric("getGenotypes")) + +#' @title Check Genotype Availability +#' @description Check whether an \code{LdData} object has a genotype +#' handle for extracting raw genotypes. +#' @param x An \code{LdData} object. +#' @return Logical. +#' @export +setGeneric("hasGenotypes", function(x) standardGeneric("hasGenotypes")) + +#' @title Get Variant IDs +#' @description Extract variant ID vector from an object that carries one +#' (e.g., \code{LdData}, \code{FineMappingEntry}, \code{TwasWeightsEntry}) +#' or from one entry of a collection class selected by its identity +#' tuple. +#' @param x The object. +#' @param ... Class-specific selection arguments. +#' @return Character vector of variant IDs. +#' @export +setGeneric("getVariantIds", function(x, ...) standardGeneric("getVariantIds")) + +#' @title Get Phenotype List +#' @description Extract phenotype data from an object that carries it. +#' For a \code{QtlDataset}, the user can optionally select specific +#' contexts, traits, or a region (see method documentation for the +#' per-class selection arguments). +#' @param x The object to extract from. +#' @param ... Class-specific selection arguments (e.g., \code{contexts}, +#' \code{traitId}, \code{region}). +#' @return A named list of phenotype matrices or +#' \code{SummarizedExperiment} objects. +#' @export +setGeneric("getPhenotypes", function(x, ...) standardGeneric("getPhenotypes")) +# ============================================================================= +# FineMappingResult accessor generics +# ============================================================================= + +#' @title Get a Single Fine-Mapping Entry +#' @description Return the \code{FineMappingEntry} for one +#' \code{(study, context, trait, method)} row of a +#' \code{FineMappingResult} collection. +#' @param x A \code{FineMappingResult} object. +#' @param study,context,trait,method Single character identifiers. All +#' required when the collection has more than one row; optional when +#' the collection has a single row. +#' @return A \code{FineMappingEntry} object. +#' @export +setGeneric("getFineMappingResult", + function(x, study = NULL, context = NULL, trait = NULL, method = NULL) + standardGeneric("getFineMappingResult")) + +#' @title Renormalize Fine-Mapping PIPs to a Variant Subset +#' @description Re-derive a \code{FineMappingEntry}'s PIPs (and the +#' \code{topLoci} table) after restricting to a kept variant subset. +#' For each effect the \code{lbf_variable} row is subset to the kept +#' variants, renormalized via \code{lbfToAlpha()}, and the per-variant +#' PIPs are recomputed as \code{1 - prod_l(1 - alpha[l, p])}. +#' +#' The two scenarios this supports: +#' \itemize{ +#' \item The user declined to impute missing variants in a GWAS +#' \code{SumStats}, so a downstream fine-mapping result needs +#' PIPs restricted to the GWAS-covered intersection. +#' \item Colocalization between a GWAS \code{FineMappingResult} and a +#' QTL \code{FineMappingResult} computed on different variant +#' sets — the GWAS PIPs (or QTL PIPs) get renormalized to the +#' common variant set. +#' } +#' +#' @param x A \code{FineMappingEntry} or \code{FineMappingResultBase}. +#' @param keepVariants Character vector of variant IDs to keep. Intersected +#' with the entry's own \code{variantIds}; an empty intersection raises +#' an error. +#' @param ... Future expansion. +#' @return The same flavour of object with PIPs renormalized on the kept +#' subset. +#' @export +setGeneric("adjustPips", + function(x, keepVariants, ...) standardGeneric("adjustPips")) + +#' @title Get PIP Values +#' @description Extract posterior inclusion probabilities from a single +#' \code{FineMappingEntry} or from one entry of a +#' \code{FineMappingResult} (selected by its identity tuple). +#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. +#' @param ... Class-specific selection arguments. +#' @return A named numeric vector of PIPs. +#' @export +setGeneric("getPip", function(x, ...) standardGeneric("getPip")) + +#' @title Get SuSiE Fit +#' @description Extract the SuSiE fit object from a fine-mapping entry +#' or result. The fit may be the trimmed view (when the pipeline ran +#' with the default \code{trim = TRUE}) or the full untrimmed +#' \code{susie()} return (when \code{trim = FALSE}). +#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. +#' @param ... Class-specific selection arguments. +#' @return A list (the SuSiE fit object). +#' @export +setGeneric("getSusieFit", function(x, ...) standardGeneric("getSusieFit")) + +#' @title Get Marginal Effects +#' @description Extract per-variant marginal univariate effects from a +#' fine-mapping entry or result. Returns a \code{data.frame} with +#' identity columns (\code{variant_id, chrom, pos, A1, A2}), context +#' (\code{N, MAF}), and the marginal effect columns +#' (\code{beta, se, z, p}). Populated uniformly across the +#' individual-level and RSS paths. +#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. +#' @param maxPval Optional numeric (length 1). When non-\code{NULL}, +#' filter rows where \code{p > maxPval}. Default \code{NULL} +#' (no filter). +#' @param ... Class-specific selection arguments. +#' @return A \code{data.frame}. +#' @export +setGeneric("getMarginalEffects", + function(x, maxPval = NULL, ...) standardGeneric("getMarginalEffects")) + +#' @title Get Top Loci (posterior view) +#' @description Extract the per-variant posterior fine-mapping payload +#' as either a \code{data.frame} (default) or a \code{GRanges}. +#' Returns identity columns (\code{variant_id, chrom, pos, A1, A2}), +#' context (\code{N, MAF}), the posterior effect columns +#' (\code{beta = posterior_mean, se = posterior_sd}), \code{pip}, +#' and credible-set membership columns (\code{cs_95}, etc.). +#' Rows are filtered by PIP by default — set \code{signalCutoff = 0} +#' to return every variant. +#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. +#' @param type One of \code{"data.frame"} (default) or \code{"GRanges"}. +#' @param signalCutoff Numeric (length 1). Drop rows where +#' \code{pip <= signalCutoff}. Default \code{0.025}. Use +#' \code{signalCutoff = 0} to keep every variant. +#' @param ... Class-specific selection arguments. +#' @return A \code{data.frame} or a \code{GRanges}. +#' @export +setGeneric("getTopLoci", + function(x, type = c("data.frame", "GRanges"), + signalCutoff = 0.025, ...) + standardGeneric("getTopLoci")) + +#' @title Get Credible Sets +#' @description Extract credible set assignments at the requested coverage. +#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. +#' @param ... Class-specific selection arguments plus \code{coverage}. +#' @return A data.frame of credible set information. +#' @export +setGeneric("getCs", function(x, ...) standardGeneric("getCs")) + +# ============================================================================= +# TwasWeights accessor generics +# ============================================================================= + +#' @title Get TWAS Weights +#' @description Extract weights from a \code{TwasWeightsEntry} or from +#' one entry of a \code{TwasWeights} collection. +#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. +#' @param ... Class-specific selection arguments. +#' @return A numeric vector or matrix of weights. +#' @export +setGeneric("getWeights", function(x, ...) standardGeneric("getWeights")) + +#' @title Get Standardized Flag +#' @description Check whether weights are on the standardized scale. +#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. +#' @param ... Class-specific selection arguments. +#' @return Logical. +#' @export +setGeneric("getStandardized", + function(x, ...) standardGeneric("getStandardized")) + +#' @title Get CV Performance +#' @description Extract cross-validation performance metrics. +#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. +#' @param ... Class-specific selection arguments. +#' @return Method-specific (typically a list). +#' @export +setGeneric("getCvPerformance", + function(x, ...) standardGeneric("getCvPerformance")) + +#' @title Get Model Fits +#' @description Extract fitted model objects. +#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. +#' @param ... Class-specific selection arguments. +#' @return Method-specific (typically a list). +#' @export +setGeneric("getFits", function(x, ...) standardGeneric("getFits")) + +#' @title Get Method Names +#' @description Extract method names from a collection class. +#' @param x A \code{FineMappingResult} or \code{TwasWeights} object. +#' @return Character vector. +#' @export +setGeneric("getMethodNames", function(x) standardGeneric("getMethodNames")) + +#' @title Get Data Type +#' @description Extract the data-type tag. +#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. +#' @param ... Class-specific selection arguments. +#' @return A character vector or NULL. +#' @export +setGeneric("getDataType", function(x, ...) standardGeneric("getDataType")) + +# ============================================================================= +# AlleleQcResult accessor generics +# QcResult accessor generics +# ============================================================================= +# VCF/BCF writer generic +# ============================================================================= + +#' Write summary statistics or fine-mapping results to VCF/BCF +#' +#' Creates a VCF object from GWAS summary statistics or fine-mapping results +#' and writes it to disk. Supports bgzipped VCF (.vcf.gz/.vcf.bgz) and +#' BCF (.bcf) output formats via VariantAnnotation and Rsamtools. +#' +#' @param x Input data: a \code{GwasSumStats} object, a +#' \code{FineMappingResult} object, or a data.frame with columns +#' \code{chrom}, \code{pos}, \code{ref}, \code{alt}. +#' @param outputPath File path for output. Extension determines format: +#' \code{.vcf.gz} or \code{.vcf.bgz} for bgzipped VCF, +#' \code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. +#' @param sampleName Name for the VCF sample column (default: trait name or +#' method name from the S4 object). +#' @param ... Additional arguments passed to methods. +#' @return Invisible path to the written file. +#' @export +setGeneric("writeSumstatsVcf", + function(x, outputPath, sampleName = NULL, ...) standardGeneric("writeSumstatsVcf")) + +# ============================================================================= +# QtlDataset accessor generics +# ============================================================================= + +#' @title Get Study Identifier +#' @description Return the study identifier carried by a \code{QtlDataset}. +#' @param x A \code{QtlDataset} object. +#' @return Character (length 1). +#' @export +setGeneric("getStudy", function(x) standardGeneric("getStudy")) + +#' @title Get Context Names +#' @description Return the names of all contexts carried by an object +#' (e.g., the keys of the \code{phenotypes} list on a \code{QtlDataset}, +#' or the unique \code{context} values of a \code{QtlSumStats}). +#' @param x The object. +#' @return Character vector of context names. +#' @export +setGeneric("getContexts", function(x) standardGeneric("getContexts")) + +#' @title Get Unique Trait Names +#' @description Return the unique trait identifiers carried by a +#' collection class (e.g., \code{QtlSumStats}). +#' @param x The object. +#' @return Character vector of unique trait names. +#' @export +setGeneric("getTraits", function(x) standardGeneric("getTraits")) + +#' @title Get Residualized Genotypes +#' @description Residualize the genotype matrix against the per-context +#' phenotype covariates and the genotype covariates, optionally +#' subsetting variants to those falling within a trait's cis-window or +#' an explicit region. +#' @param x A \code{QtlDataset} object. +#' @param ... Selection arguments: \code{traitId}, \code{region}, +#' \code{cisWindow}, \code{phenotypeCovariatesToRemove}, +#' \code{genotypeCovariatesToRemove}. +#' @return A numeric matrix (samples x variants). +#' @export +setGeneric("getResidualizedGenotypes", + function(x, ...) standardGeneric("getResidualizedGenotypes")) + +#' @title Get Residualized Phenotypes +#' @description Residualize the per-context phenotype matrices against +#' the per-context phenotype covariates and the genotype covariates, +#' for one or more requested contexts. +#' @param x A \code{QtlDataset} object. +#' @param ... Selection arguments: \code{contexts} (required), +#' \code{traitId}, \code{region}, +#' \code{phenotypeCovariatesToRemove}, +#' \code{genotypeCovariatesToRemove}. +#' @return A named list of numeric matrices keyed by context. +#' @export +setGeneric("getResidualizedPhenotypes", + function(x, ...) standardGeneric("getResidualizedPhenotypes")) + +#' @title Get Per-Context Phenotype Covariates +#' @description Return per-context phenotype covariate matrices, taken +#' from the \code{colData} of each context's \code{SummarizedExperiment}. +#' @param x A \code{QtlDataset} object. +#' @param contexts Character vector of context names (subset of +#' \code{names(getPhenotypes(x))}). +#' @return A named list of matrices keyed by context. +#' @export +setGeneric("getPhenotypeCovariates", + function(x, contexts) standardGeneric("getPhenotypeCovariates")) + +#' @title Get Genotype Covariates +#' @description Return the single genotype-derived covariate matrix +#' carried by a \code{QtlDataset} (e.g., ancestry PCs). +#' @param x A \code{QtlDataset} object. +#' @return Numeric matrix (samples x covariates). +#' @export +setGeneric("getGenotypeCovariates", + function(x) standardGeneric("getGenotypeCovariates")) + +#' @title Get scaleResiduals Flag +#' @description Whether residualization accessors scale residuals to unit +#' variance. +#' @param x A \code{QtlDataset} object. +#' @return Logical (length 1). +#' @export +setGeneric("getScaleResiduals", + function(x) standardGeneric("getScaleResiduals")) + +# ============================================================================= +# GenotypeHandle / LD-statistic / Annotation / LdData / H2Estimate accessors +# ============================================================================= + +#' @title Get SNP Info +#' @description Return the cached SNP metadata data.frame +#' (columns: SNP, CHR, BP, A1, A2, optionally MAF). +#' @param x A \code{GenotypeHandle} or \code{LdStatistic}. +#' @return A data.frame. +#' @export +setGeneric("getSnpInfo", function(x) standardGeneric("getSnpInfo")) + +#' @title Get Genotype Storage Format +#' @description Return the detected genotype storage format. +#' @param x A \code{GenotypeHandle}. +#' @return Character (length 1): one of "gds", "vcf", "plink1", "plink2". +#' @export +setGeneric("getFormat", function(x) standardGeneric("getFormat")) + +#' @title Get File Path +#' @description Return the underlying genotype file path or stem. +#' @param x A \code{GenotypeHandle}. +#' @return Character (length 1). +#' @export +setGeneric("getPath", function(x) standardGeneric("getPath")) + +#' @title Get Sample Identifiers +#' @description Return the sample-id vector. +#' @param x A \code{GenotypeHandle}. +#' @return Character vector. +#' @export +setGeneric("getSampleIds", function(x) standardGeneric("getSampleIds")) + +#' @title Get plink2 pgen Pointer +#' @description Return the cached external pointer to the plink2 pgen +#' handle (NULL when the handle is not pgen-backed). +#' @param x A \code{GenotypeHandle}. +#' @return An external pointer or NULL. +#' @export +setGeneric("getPgenPtr", function(x) standardGeneric("getPgenPtr")) + +#' @title Get Sample Count +#' @description Return the number of samples carried by a +#' \code{GenotypeHandle}. +#' @param x A \code{GenotypeHandle}. +#' @return Integer (length 1). +#' @export +setGeneric("getNSamples", function(x) standardGeneric("getNSamples")) + +#' @title Get Per-Block Eigendecompositions +#' @description Return the per-block eigendecomposition list carried by +#' an \code{LdEigen} object. +#' @param x An \code{LdEigen}. +#' @return List of per-block eigen decompositions. +#' @export +setGeneric("getEigenList", function(x) standardGeneric("getEigenList")) + +#' @title Get LD Reference Panel Size +#' @description Return the reference-panel sample size used to compute +#' an \code{LdStatistic} or carried by an \code{LdData}. +#' @param x An \code{LdStatistic} or \code{LdData}. +#' @return Integer (length 1). +#' @export +setGeneric("getNRef", function(x) standardGeneric("getNRef")) + +#' @title Get In-Sample Flag +#' @description Whether the LD reference panel is from the same cohort +#' as the GWAS (affects bias correction). +#' @param x An \code{LdStatistic}. +#' @return Logical (length 1). +#' @export +setGeneric("getInSample", function(x) standardGeneric("getInSample")) + +#' @title Get LD Scores +#' @description Return the per-SNP LD score matrix carried by an +#' \code{LdScore} object. +#' @param x An \code{LdScore}. +#' @return Numeric matrix (SNPs x annotations+1). +#' @export +setGeneric("getLdScores", function(x) standardGeneric("getLdScores")) + +#' @title Get LD-Score Regression Weights +#' @description Return the per-SNP regression weights vector carried by +#' an \code{LdScore} object. +#' @param x An \code{LdScore}. +#' @return Numeric vector. +#' @export +setGeneric("getLdScoreWeights", + function(x) standardGeneric("getLdScoreWeights")) + +#' @title Get Per-Block LD Matrix List +#' @description Return the list of per-block LD (R^2) matrices used for +#' the FGLS residual covariance in g-LDSC. +#' @param x An \code{LdScore}. +#' @return List of matrices (empty list for S-LDSC). +#' @export +setGeneric("getLdMatrixList", + function(x) standardGeneric("getLdMatrixList")) + +#' @title Get LD Block Container +#' @description Return the \code{LdBlocks} object carried by an +#' \code{LdStatistic}. +#' @param x An \code{LdStatistic}. +#' @return An \code{LdBlocks} object. +#' @export +setGeneric("getLdBlocks", function(x) standardGeneric("getLdBlocks")) + +#' @title Get Annotation Matrix +#' @description Return the (SNPs x annotations) annotation matrix. +#' @param x An \code{AnnotationMatrix}. +#' @return Numeric matrix or dgCMatrix. +#' @export +setGeneric("getAnnotations", + function(x) standardGeneric("getAnnotations")) + +#' @title Get Annotation Metadata +#' @description Return the per-annotation metadata data.frame (columns +#' \code{name}, \code{tier}, \code{type}). +#' @param x An \code{AnnotationMatrix}. +#' @return A data.frame. +#' @export +setGeneric("getAnnotationMeta", + function(x) standardGeneric("getAnnotationMeta")) + +#' @title Get SNP Ranges +#' @description Return the per-SNP \code{GRanges} carried by an +#' \code{AnnotationMatrix}. +#' @param x An \code{AnnotationMatrix}. +#' @return A \code{GRanges} object. +#' @export +setGeneric("getSnpRanges", function(x) standardGeneric("getSnpRanges")) + +#' @title Get LD Block Ranges +#' @description Return the per-block \code{GRanges} carried by an +#' \code{LdBlocks} object. +#' @param x An \code{LdBlocks}. +#' @return A \code{GRanges} object. +#' @export +setGeneric("getBlocks", function(x) standardGeneric("getBlocks")) + +#' @title Get GenotypeHandle from LdData +#' @description Return the \code{GenotypeHandle} (or list of handles for +#' mixture panels) carried by an \code{LdData}. +#' @param x An \code{LdData}. +#' @return A \code{GenotypeHandle}, a list of them, or NULL. +#' @export +setGeneric("getGenotypeHandle", + function(x) standardGeneric("getGenotypeHandle")) + +#' @title Get Mixture Weights +#' @description Return the per-panel mixing proportions carried by an +#' \code{LdData} when its \code{genotypeHandle} slot is a list of +#' panels. NULL for single-panel objects. +#' @param x An \code{LdData}. +#' @return Numeric vector or NULL. +#' @export +setGeneric("getMixtureWeights", + function(x) standardGeneric("getMixtureWeights")) + +#' @title Get SNP Indices +#' @description Return the integer indices into the handle's snpInfo +#' carried by an \code{LdData}. +#' @param x An \code{LdData}. +#' @return Integer vector or NULL. +#' @export +setGeneric("getSnpIdx", function(x) standardGeneric("getSnpIdx")) + +#' @title Get Variant GRanges +#' @description Return the variant metadata \code{GRanges} of an +#' \code{LdData}. +#' @param x An \code{LdData}. +#' @return A \code{GRanges}. +#' @export +setGeneric("getVariantInfo", function(x) standardGeneric("getVariantInfo")) + +#' @title Get Block Metadata +#' @description Return the block metadata (\code{LdBlocks} or +#' \code{data.frame}) carried by an \code{LdData}. +#' @param x An \code{LdData}. +#' @return An \code{LdBlocks} or \code{data.frame}. +#' @export +setGeneric("getBlockMetadata", + function(x) standardGeneric("getBlockMetadata")) + +#' @title Get Reference Panel (data.frame) +#' @description Flatten the variant \code{GRanges} of an \code{LdData} +#' into a reference-panel data.frame. +#' @param x An \code{LdData}. +#' @return A data.frame. +#' @export +setGeneric("getRefPanel", function(x) standardGeneric("getRefPanel")) + +#' @title Get Per-Block tau Matrix +#' @description Return the per-block jackknife tau matrix carried by an +#' \code{H2Estimate}. +#' @param x An \code{H2Estimate}. +#' @return A numeric matrix or NULL. +#' @export +setGeneric("getTauBlocks", function(x) standardGeneric("getTauBlocks")) + +#' @title Get Global SNP Heritability +#' @description Return the global SNP heritability estimate carried by +#' an \code{H2Estimate}. +#' @param x An \code{H2Estimate}. +#' @return Numeric (length 1). +#' @export +setGeneric("getH2", function(x) standardGeneric("getH2")) diff --git a/R/allGenerics.R b/R/allGenerics.R index 0a946dd4..6ef0af4d 100644 --- a/R/allGenerics.R +++ b/R/allGenerics.R @@ -387,7 +387,7 @@ setGeneric("getSusieFit", function(x, ...) standardGeneric("getSusieFit")) #' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. #' @param maxPval Optional numeric (length 1). When non-\code{NULL}, #' filter rows where \code{p > maxPval}. Default \code{NULL} -#' (no filter — return all variants). +#' (no filter). #' @param ... Class-specific selection arguments. #' @return A \code{data.frame}. #' @export @@ -496,22 +496,11 @@ setGeneric("getDataType", function(x, ...) standardGeneric("getDataType")) #' \code{chrom}, \code{pos}, \code{ref}, \code{alt}. #' @param outputPath File path for output. Extension determines format: #' \code{.vcf.gz} or \code{.vcf.bgz} for bgzipped VCF, -#' \code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. When -#' \code{splitByContext} or \code{splitByTrait} is \code{TRUE} on a -#' \code{FineMappingResult} method call, the corresponding tag is -#' appended to the file stem (e.g. -#' \code{out.vcf} + context = \code{"brain"} → -#' \code{out.brain.vcf}). +#' \code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. #' @param sampleName Name for the VCF sample column (default: trait name or #' method name from the S4 object). -#' @param ... Additional arguments passed to methods. For the -#' \code{FineMappingResult} method these include \code{splitByContext} -#' (logical, default \code{FALSE}) and \code{splitByTrait} (logical, -#' default \code{FALSE}): when either is \code{TRUE}, write one VCF -#' per row of the collection with the tuple value appended to the -#' output filename. -#' @return Invisible path (or character vector of paths when splitting) -#' to the written file(s). +#' @param ... Additional arguments passed to methods. +#' @return Invisible path to the written file. #' @export setGeneric("writeSumstatsVcf", function(x, outputPath, sampleName = NULL, ...) standardGeneric("writeSumstatsVcf")) diff --git a/R/twasWeightsPipeline.R b/R/twasWeightsPipeline.R index d2088d1f..e1cbbd24 100644 --- a/R/twasWeightsPipeline.R +++ b/R/twasWeightsPipeline.R @@ -137,15 +137,11 @@ # documented as individual-only for now. prsCs has no individual-level # counterpart (it is a sumstat-only Bayesian shrinkage method). .twasMethodCapabilities <- list( - susie = list(individualImpl = "susieWeights", - sumstatImpl = "susieRssWeights", - multivariate = FALSE), - susieInf = list(individualImpl = "susieInfWeights", - sumstatImpl = "susieInfRssWeights", - multivariate = FALSE), - susieAsh = list(individualImpl = "susieAshWeights", - sumstatImpl = "susieAshRssWeights", - multivariate = FALSE), + # NOTE: fine-mapping methods (susie / susieInf / susieAsh / mvsusie / + # fsusie) are NOT listed here. Their availability is governed by + # .fineMappingMethodCapabilities (the same registry fineMappingPipeline + # uses) and gated by .twasCheckFineMappingMethods, which delegates + # input-class compatibility to .fmCheckMethodCapabilities. mrash = list(individualImpl = "mrashWeights", sumstatImpl = "mrAshRssWeights", multivariate = FALSE), @@ -161,9 +157,6 @@ l0learn = list(individualImpl = "l0learnWeights", sumstatImpl = "l0learnRssWeights", multivariate = FALSE), - mvsusie = list(individualImpl = "mvsusieWeights", - sumstatImpl = "mvsusieRssWeights", - multivariate = TRUE), mrmash = list(individualImpl = "mrmashWeights", sumstatImpl = "mrmashRssWeights", multivariate = TRUE), @@ -218,17 +211,69 @@ .twasNormalizeMethods <- function(methods) { if (is.null(methods)) { methodList <- .twasMethodLookup("default") + tokens <- .twasTokensFromMethodList(methodList) } else if (is.character(methods)) { - methodList <- .twasMethodLookup(methods) + # Fine-mapping tokens without a .twasMethodLookup entry (e.g. fsusie) + # are recognised here so the downstream gate can produce a method- + # specific error rather than "Unknown TWAS method" from the lookup. + fmExtra <- setdiff(intersect(methods, .twasFineMappingTokens()), + .twasKnownMethodLookupNames()) + regular <- setdiff(methods, fmExtra) + methodList <- if (length(regular) > 0L) .twasMethodLookup(regular) + else list() + if (length(fmExtra) > 0L) { + # Append stub entries (empty args) for fine-mapping tokens with no + # learner counterpart (e.g. fsusie). The gate will reject these. + for (tk in fmExtra) { + snake <- paste0(tk, "_weights") + methodList[[snake]] <- list() + } + } + # Tokens come from the user input (canonical camelCase) — the snake + # keys in methodList are an internal detail of learnTwasWeights. + tokens <- unique(methods) } else if (is.list(methods)) { methodList <- methods + tokens <- .twasTokensFromMethodList(methodList) } else { stop("`methods` must be a character vector, preset string, or named list.") } - tokens <- sub("(_weights|Weights)$", "", names(methodList)) list(tokens = tokens, methodList = methodList) } +# Canonical (camelCase) tokens known to .twasMethodLookup, for use by +# .twasNormalizeMethods. Source of truth: the methodMap inside +# .twasMethodLookup. +# @noRd +.twasKnownMethodLookupNames <- function() { + c("susie", "susieAsh", "susieInf", "mrash", "enet", "lasso", + "bayes_r", "bayes_l", "bayes_a", "bayes_b", "bayes_c", "bayes_n", + "b_lasso", "dpr_vb", "dpr_gibbs", "dpr_adaptive_gibbs", + "scad", "mcp", "l0learn", "mvsusie", "mrmash") +} + +# Convert a methodList (snake_case keys like `susie_inf_weights`) back to +# canonical camelCase tokens (susieInf). Falls back to the snake form for +# unknown keys. +# @noRd +.twasTokensFromMethodList <- function(methodList) { + snake <- sub("(_weights|Weights)$", "", names(methodList)) + snakeToCanonical <- c( + susie = "susie", susie_ash = "susieAsh", susie_inf = "susieInf", + susie_ash_inf = "susieAsh", + mrash = "mrash", enet = "enet", lasso = "lasso", + bayes_r = "bayes_r", bayes_l = "bayes_l", bayes_a = "bayes_a", + bayes_b = "bayes_b", bayes_c = "bayes_c", bayes_n = "bayes_n", + b_lasso = "b_lasso", dpr_vb = "dpr_vb", dpr_gibbs = "dpr_gibbs", + dpr_adaptive_gibbs = "dpr_adaptive_gibbs", + scad = "scad", mcp = "mcp", l0learn = "l0learn", + mvsusie = "mvsusie", mrmash = "mrmash", prsCs = "prsCs", + fsusie = "fsusie") + vapply(snake, function(s) { + if (!is.na(snakeToCanonical[s])) snakeToCanonical[[s]] else s + }, character(1), USE.NAMES = FALSE) +} + # Enforce input-class / method compatibility against the TWAS # capability table. Routes the input class through individual / # sumstat branches; the twasWeightsPipeline has no GwasSumStats input @@ -238,12 +283,18 @@ .twasCheckMethodCapabilities <- function(tokens, inputKind) { if (length(tokens) == 0L) return(invisible(NULL)) caps <- .twasMethodCapabilities + # Fine-mapping tokens are governed by .twasCheckFineMappingMethods (and + # delegate input-class compat to .fmCheckMethodCapabilities); skip them + # here so they aren't reported as "unknown". + fmTokens <- intersect(tokens, .twasFineMappingTokens()) + tokens <- setdiff(tokens, fmTokens) + if (length(tokens) == 0L) return(invisible(NULL)) unknown <- setdiff(tokens, names(caps)) if (length(unknown) > 0L) { stop(sprintf( "twasWeightsPipeline: unknown method token(s): %s. Known tokens: %s.", paste(unknown, collapse = ", "), - paste(names(caps), collapse = ", "))) + paste(c(names(caps), .twasFineMappingTokens()), collapse = ", "))) } individualKinds <- c("QtlDataset", "MultiStudyQtlDataset") bad <- character(0); reason <- character(0) @@ -271,17 +322,101 @@ } } +# Adapter registry mapping each fine-mapping method (whose existence is +# governed by .fineMappingMethodCapabilities) to its TWAS-weight extractor +# wrapper. The wrapper names follow the *Weights / *RssWeights convention, +# and the *Fit argument receives the pre-fitted fine-mapping object. +# fSuSiE is intentionally absent: no TWAS-weight extractor exists for it. +# @noRd +.twasFineMappingMethodAdapters <- list( + susie = list(weightFn = "susieWeights", + rssWeightFn = "susieRssWeights", + fitArg = "susieFit", + rssFitArg = "susieRssFit", + methodKey = "susie_weights"), + susieInf = list(weightFn = "susieInfWeights", + rssWeightFn = "susieInfRssWeights", + fitArg = "susieInfFit", + rssFitArg = "susieInfRssFit", + methodKey = "susie_inf_weights"), + susieAsh = list(weightFn = "susieAshWeights", + rssWeightFn = "susieAshRssWeights", + fitArg = "susieAshFit", + rssFitArg = "susieAshRssFit", + methodKey = "susie_ash_weights"), + mvsusie = list(weightFn = "mvsusieWeights", + rssWeightFn = "mvsusieRssWeights", + fitArg = "mvsusieFit", + rssFitArg = "mvsusieRssFit", + methodKey = "mvsusie_weights")) + +# Canonical list of fine-mapping tokens recognised by twasWeightsPipeline. +# Sourced from fineMappingPipeline's registry minus mrmash (which +# fineMappingPipeline hard-rejects as a TWAS-only method). +# @noRd +.twasFineMappingTokens <- function() { + setdiff(names(.fineMappingMethodCapabilities), "mrmash") +} + +# Reject fine-mapping methods (susie / susieInf / susieAsh / mvsusie / +# fsusie) when no FineMappingResult is supplied. twasWeightsPipeline is +# not allowed to re-fit fine-mapping models from scratch; users must run +# fineMappingPipeline() first and pass the result via `fineMappingResult`. +# Input-class compatibility (e.g. fsusie has no QtlSumStats path) is +# delegated to .fmCheckMethodCapabilities so the rule set stays in lock- +# step with fineMappingPipeline. Methods with no TWAS-weight extractor +# (fsusie) are rejected with a method-specific message. +# @noRd +.twasCheckFineMappingMethods <- function(tokens, fineMappingResult, inputKind) { + if (length(tokens) == 0L) return(invisible(NULL)) + fmTokens <- intersect(tokens, .twasFineMappingTokens()) + if (length(fmTokens) == 0L) return(invisible(NULL)) + + # Defer input-class compatibility to fineMappingPipeline. e.g. this + # rejects fsusie on QtlSumStats (fsusie has no RSS impl). + .fmCheckMethodCapabilities(fmTokens, inputKind) + + # Reject fine-mapping methods that have no TWAS-weight extractor + # (currently only fsusie). + noAdapter <- setdiff(fmTokens, names(.twasFineMappingMethodAdapters)) + if (length(noAdapter) > 0L) { + stop(sprintf( + "twasWeightsPipeline: method(s) %s have no TWAS-weight extractor. For multi-trait fine-mapping use mvsusie via fineMappingResult.", + paste(noAdapter, collapse = ", "))) + } + + if (is.null(fineMappingResult)) { + stop(sprintf( + "twasWeightsPipeline: method(s) %s are fine-mapping methods and may not be re-fit by twasWeightsPipeline. Run fineMappingPipeline() first and pass the result via `fineMappingResult = `.", + paste(unique(fmTokens), collapse = ", "))) + } + if (!is(fineMappingResult, "FineMappingResultBase")) { + stop("`fineMappingResult` must be a FineMappingResult or NULL.") + } + invisible(NULL) +} + +# Look up the multivariate flag for a token. Checks the TWAS-regression +# capability table first; if absent, falls back to the fine-mapping +# capability table (the source of truth for susie / mvsusie / fsusie / +# etc.). Returns FALSE for unknown tokens. +# @noRd +.twasIsMultivariateToken <- function(token) { + info <- .twasMethodCapabilities[[token]] + if (!is.null(info)) return(isTRUE(info$multivariate)) + fmInfo <- .fineMappingMethodCapabilities[[token]] + if (!is.null(fmInfo)) return(isTRUE(fmInfo$multivariate)) + FALSE +} + # Enforce the multi-trait / multi-context rule for mvsusie / mr.mash # methods (same family as the fine-mapping mvSuSiE rule in the design # doc). Multivariate methods need at least 2 traits *or* at least 2 # contexts in the Y matrix passed to learnTwasWeights. # @noRd .twasCheckMultivariateY <- function(tokens, nTraits, nContexts) { - caps <- .twasMethodCapabilities - multivariateTokens <- tokens[vapply(tokens, function(tk) { - info <- caps[[tk]] - !is.null(info) && isTRUE(info$multivariate) - }, logical(1))] + multivariateTokens <- tokens[vapply(tokens, .twasIsMultivariateToken, + logical(1))] if (length(multivariateTokens) == 0L) return(invisible(NULL)) if (nTraits < 2L && nContexts < 2L) { stop(sprintf( @@ -354,7 +489,7 @@ } out <- list() methods <- as.character(fineMappingResult$method) - for (canonical in c("susie", "susieInf", "susieAsh")) { + for (canonical in c("susie", "susieInf", "susieAsh", "mvsusie")) { candidates <- c(canonical, paste0(tolower(substring(canonical, 1L, 1L)), substring(canonical, 2L)), @@ -371,6 +506,19 @@ out } +# Locate a fine-mapping fit for one (study, context, trait, token) tuple. +# Used by the QtlSumStats sumstat dispatcher to pass the precomputed fit +# into susieRssWeights / susieInfRssWeights / susieAshRssWeights / +# mvsusieRssWeights via their respective *Fit arguments. +# @noRd +.twasFineMappingFitFor <- function(fineMappingResult, study, context, trait, + token) { + if (is.null(fineMappingResult)) return(NULL) + fits <- .twasFineMappingFits(fineMappingResult, + study = study, context = context, trait = trait) + fits[[token]] +} + #' TWAS Weights Pipeline #' #' S4-dispatched per-region pipeline for learning TWAS prediction weights. @@ -532,6 +680,7 @@ setMethod("twasWeightsPipeline", "QtlDataset", parsedJointSpec <- parseJointSpecification(jointSpecification, data) norm <- .twasNormalizeMethods(methods) .twasCheckMethodCapabilities(norm$tokens, "QtlDataset") + .twasCheckFineMappingMethods(norm$tokens, fineMappingResult, "QtlDataset") # Explicit jointSpecification path: run the per-spec axis dispatcher for # mr.mash. Other (univariate) methods continue through the existing @@ -720,6 +869,15 @@ setMethod("twasWeightsPipeline", "QtlDataset", Y <- Y[keep, , drop = FALSE] X <- X[rownames(Y), , drop = FALSE] + # mvsusie joint fits are stored once per (context, trait) row in + # the FineMappingResult; all rows of a single joint fit point at + # the same fit object. Pull the fit using the first (context, + # trait) of the joint group and thread it through. + jointFits <- .twasFineMappingFits(fineMappingResult, + study = study, + context = meta$context[[1L]], + trait = meta$trait[[1L]]) + # Build per-column identity tuples for learnTwasWeights so multi- # outcome methods emit one row per (context, trait). .twasWeightsPipelineMatrix( @@ -727,6 +885,7 @@ setMethod("twasWeightsPipeline", "QtlDataset", study = study, context = meta$context, trait = meta$trait, + fittedModels = jointFits, cvFolds = cvFolds, samplePartition = samplePartition, weightMethods = norm$methodList, @@ -776,6 +935,7 @@ setMethod("twasWeightsPipeline", "QtlSumStats", contexts = NULL, traitId = NULL, jointSpecification = NULL, + fineMappingResult = NULL, twasWeights = NULL, dataType = NULL, verbose = 1L, @@ -788,9 +948,12 @@ setMethod("twasWeightsPipeline", "QtlSumStats", parsedJointSpec <- parseJointSpecification(jointSpecification, data) - # Normalize the methods argument into (tokens, methodArgs). + # Normalize the methods argument into (tokens, methodArgs). The default + # set excludes fine-mapping methods (susie / susieInf / susieAsh / + # mvsusie); those must be requested explicitly together with a + # FineMappingResult passed via `fineMappingResult`. if (is.null(methods)) { - tokens <- c("susie", "susieInf", "lasso", "prsCs", "dpr_gibbs") + tokens <- c("lasso", "prsCs", "dpr_gibbs") methodArgs <- setNames(rep(list(list()), length(tokens)), tokens) } else if (is.character(methods)) { tokens <- methods @@ -803,6 +966,7 @@ setMethod("twasWeightsPipeline", "QtlSumStats", "of = entries.") } .twasCheckMethodCapabilities(tokens, "QtlSumStats") + .twasCheckFineMappingMethods(tokens, fineMappingResult, "QtlSumStats") jointResult <- NULL if (length(parsedJointSpec) > 0L) { @@ -832,9 +996,7 @@ setMethod("twasWeightsPipeline", "QtlSumStats", } # Partition method tokens by univariate vs multivariate dispatch. - isMv <- vapply(tokens, function(tk) { - isTRUE(.twasMethodCapabilities[[tk]]$multivariate) - }, logical(1)) + isMv <- vapply(tokens, .twasIsMultivariateToken, logical(1)) multivariateTokens <- tokens[isMv] univariateTokens <- tokens[!isMv] @@ -891,9 +1053,27 @@ setMethod("twasWeightsPipeline", "QtlSumStats", ldMat <- .twasLdFromSketch(ldSketch, variantIds) for (tk in toFitTokens) { - fn <- .twasMethodCapabilities[[tk]]$sumstatImpl + adapter <- .twasFineMappingMethodAdapters[[tk]] + fn <- if (!is.null(adapter)) adapter$rssWeightFn + else .twasMethodCapabilities[[tk]]$sumstatImpl userArgs <- methodArgs[[tk]] if (is.null(userArgs)) userArgs <- list() + # When the token is a fine-mapping method, pass the precomputed + # fit into the *Rss weight function via its dedicated *Fit arg + # (e.g. susieRssFit, susieInfRssFit, susieAshRssFit). The gate + # above ensures fineMappingResult is non-NULL here. + if (!is.null(adapter)) { + fit <- .twasFineMappingFitFor(fineMappingResult, + study = st, context = ctx, trait = tr, + token = tk) + if (is.null(fit)) { + warning(sprintf( + "twasWeightsPipeline: no '%s' fit found in fineMappingResult for (study=%s, context=%s, trait=%s); skipping.", + tk, st, ctx, tr)) + next + } + userArgs[[adapter$rssFitArg]] <- fit + } weights <- tryCatch( do.call(get(fn, mode = "function"), c(list(stat = stat, LD = ldMat), userArgs)), @@ -961,9 +1141,27 @@ setMethod("twasWeightsPipeline", "QtlSumStats", ldMat <- .twasLdFromSketch(ldSketch, variantIds) for (tk in multivariateTokens) { - fn <- .twasMethodCapabilities[[tk]]$sumstatImpl + adapter <- .twasFineMappingMethodAdapters[[tk]] + fn <- if (!is.null(adapter)) adapter$rssWeightFn + else .twasMethodCapabilities[[tk]]$sumstatImpl userArgs <- methodArgs[[tk]] if (is.null(userArgs)) userArgs <- list() + # mvsusie is fine-mapping; thread the pre-fit through. mr.mash is + # not, so this branch only fires for mvsusie. + if (!is.null(adapter)) { + fit <- .twasFineMappingFitFor(fineMappingResult, + study = st, + context = ctxNames[[1L]], + trait = tr, + token = tk) + if (is.null(fit)) { + warning(sprintf( + "twasWeightsPipeline: no '%s' fit found in fineMappingResult for (study=%s, trait=%s); skipping.", + tk, st, tr)) + next + } + userArgs[[adapter$rssFitArg]] <- fit + } weights <- tryCatch( do.call(get(fn, mode = "function"), c(list(stat = stat, LD = ldMat), userArgs)), @@ -1046,6 +1244,17 @@ setMethod("twasWeightsPipeline", "MultiStudyQtlDataset", naAction <- match.arg(naAction) parsedJointSpec <- parseJointSpecification(jointSpecification, data) + # Gate fine-mapping methods early so the recursion into the embedded + # QtlDataset / QtlSumStats components doesn't re-run fine-mapping. + { + gateTokens <- if (is.character(methods)) methods + else if (is.list(methods)) + sub("(_weights|Weights)$", "", names(methods)) + else character(0) + .twasCheckFineMappingMethods(gateTokens, fineMappingResult, + "MultiStudyQtlDataset") + } + jointResult <- NULL if (length(parsedJointSpec) > 0L) { jointMethods <- character(0) @@ -1101,6 +1310,7 @@ setMethod("twasWeightsPipeline", "MultiStudyQtlDataset", contexts = contexts, traitId = traitId, jointSpecification = NULL, + fineMappingResult = fineMappingResult, twasWeights = twasWeights, verbose = verbose, ...) @@ -1196,6 +1406,21 @@ setMethod("twasWeightsPipeline", "ANY", if (is.null(fittedModels)) fittedModels <- list() if (!is.null(susieFit)) fittedModels[["susie"]] <- susieFit + # Inject precomputed fine-mapping fits into the per-method args so the + # corresponding *Weights wrapper extracts coefficients from the fit + # rather than refitting. The adapter table (.twasFineMappingMethodAdapters) + # gives the snake_case methodList key and the *Fit argument name for + # each fine-mapping method. + for (canonical in names(.twasFineMappingMethodAdapters)) { + adapter <- .twasFineMappingMethodAdapters[[canonical]] + if (!is.null(fittedModels[[canonical]]) && + !is.null(weightMethods[[adapter$methodKey]]) && + is.null(weightMethods[[adapter$methodKey]][[adapter$fitArg]])) { + weightMethods[[adapter$methodKey]][[adapter$fitArg]] <- + fittedModels[[canonical]] + } + } + res <- list() st <- proc.time() if (verbose >= 1) { diff --git a/man/getMarginalEffects.Rd b/man/getMarginalEffects.Rd index a3302048..438ac089 100644 --- a/man/getMarginalEffects.Rd +++ b/man/getMarginalEffects.Rd @@ -37,7 +37,7 @@ getMarginalEffects(x, maxPval = NULL, ...) \item{maxPval}{Optional numeric (length 1). When non-\code{NULL}, filter rows where \code{p > maxPval}. Default \code{NULL} -(no filter — return all variants).} +(no filter).} \item{...}{Class-specific selection arguments.} } diff --git a/man/writeSumstatsVcf.Rd b/man/writeSumstatsVcf.Rd index c8f4c6b8..d081e223 100644 --- a/man/writeSumstatsVcf.Rd +++ b/man/writeSumstatsVcf.Rd @@ -30,26 +30,15 @@ writeSumstatsVcf(x, outputPath, sampleName = NULL, ...) \item{outputPath}{File path for output. Extension determines format: \code{.vcf.gz} or \code{.vcf.bgz} for bgzipped VCF, -\code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. When -\code{splitByContext} or \code{splitByTrait} is \code{TRUE} on a -\code{FineMappingResult} method call, the corresponding tag is -appended to the file stem (e.g. -\code{out.vcf} + context = \code{"brain"} → -\code{out.brain.vcf}).} +\code{.bcf} for BCF, \code{.vcf} for uncompressed VCF.} \item{sampleName}{Name for the VCF sample column (default: trait name or method name from the S4 object).} -\item{...}{Additional arguments passed to methods. For the -\code{FineMappingResult} method these include \code{splitByContext} -(logical, default \code{FALSE}) and \code{splitByTrait} (logical, -default \code{FALSE}): when either is \code{TRUE}, write one VCF -per row of the collection with the tuple value appended to the -output filename.} +\item{...}{Additional arguments passed to methods.} } \value{ -Invisible path (or character vector of paths when splitting) - to the written file(s). +Invisible path to the written file. } \description{ Creates a VCF object from GWAS summary statistics or fine-mapping results diff --git a/tests/testthat/test_deprecated.R b/tests/testthat/test_deprecated.R index d435a12f..b0c6f0dd 100644 --- a/tests/testthat/test_deprecated.R +++ b/tests/testthat/test_deprecated.R @@ -796,50 +796,9 @@ test_that("computeQtlEnrichment errors when gwas_pip has no names", { ) }) -# ---- real C++ qtlEnrichmentRcpp integration test ---- -test_that("computeQtlEnrichment calls real C++ enrichment code and returns expected keys", { - skip_on_covr() - set.seed(42) - n_snps <- 50 - variantNames <- paste0("1:", 1:n_snps, ":A:G") - - # GWAS PIPs: sparse signal - gwas_pip <- rep(0.01, n_snps) - gwas_pip[c(5, 20, 35)] <- c(0.8, 0.6, 0.9) - names(gwas_pip) <- variantNames - - # SuSiE fit with 2 single effects over same variants - L <- 2 - alpha <- matrix(1 / n_snps, nrow = L, ncol = n_snps) - # Concentrate probability on causal variants - alpha[1, ] <- 0.001; alpha[1, 5] <- 0.95; alpha[1, ] <- alpha[1, ] / sum(alpha[1, ]) - alpha[2, ] <- 0.001; alpha[2, 20] <- 0.95; alpha[2, ] <- alpha[2, ] / sum(alpha[2, ]) - pip <- colSums(alpha) - names(pip) <- variantNames - - susie_fits <- list( - fit1 = list(pip = pip, alpha = alpha, prior_variance = c(0.5, 0.3)) - ) - - # Call without mocking - exercises the real C++ code - res <- suppressWarnings( - computeQtlEnrichment(gwas_pip, susie_fits, - numGwas = 5000, piQtl = 0.5, - lambda = 1, impN = 5, numThreads = 1) - ) - expect_type(res, "list") - # The enrichment results are in res[[1]] (the C++ output list) - en <- res[[1]] - expected_keys <- c("Intercept", "Enrichment (no shrinkage)", "Enrichment (w/ shrinkage)", - "sd (no shrinkage)", "sd (w/ shrinkage)", - "Alternative (coloc) p1", "Alternative (coloc) p2", "Alternative (coloc) p12") - for (key in expected_keys) { - expect_true(key %in% names(en), info = paste("Missing key:", key)) - } - # All numeric and finite - numeric_vals <- unlist(en[expected_keys]) - expect_true(all(is.finite(numeric_vals))) -}) +# Real C++ kernel integration is now covered in test_qtlEnrichmentPipeline.R +# via qtlEnrichment() (no skip_on_covr there). Don't duplicate here — this +# file would just skip those tests anyway. # ---- unmatched variants tracking (computeQtlEnrichment.R line 102) ---- test_that("computeQtlEnrichment tracks unmatched QTL variants", { diff --git a/tests/testthat/test_qtlEnrichmentPipeline.R b/tests/testthat/test_qtlEnrichmentPipeline.R index 0b629e0d..c8610f55 100644 --- a/tests/testthat/test_qtlEnrichmentPipeline.R +++ b/tests/testthat/test_qtlEnrichmentPipeline.R @@ -239,4 +239,135 @@ test_that(".enrBuildQtlRegionsList: returns per-entry fit shapes", { expect_true(!is.null(out[[1L]]$pip)) }) +# =========================================================================== +# qtlEnrichment() — kernel wrapper + real C++ integration +# These tests deliberately do NOT mock qtlEnrichmentRcpp so the C++ +# kernel in src/qtl_enrichment.cpp gets coverage. The wrapper itself +# (R/qtlEnrichmentPipeline.R::qtlEnrichment) is exercised here directly +# rather than via the deprecated `computeQtlEnrichment` shim (which has +# skip_on_covr()). +# =========================================================================== + +# Build a small (gwasPip, susieQtlRegions) fixture with a sparse causal +# signal at known indices so the C++ enrichment routine has something +# meaningful to compute. +.qep_makeRealKernelInputs <- function(seed = 42, nSnps = 50, + causalIdx = c(5, 20, 35), + causalPips = c(0.8, 0.6, 0.9), + L = 2L) { + set.seed(seed) + variantNames <- paste0("1:", seq_len(nSnps), ":A:G") + gwasPip <- rep(0.01, nSnps) + gwasPip[causalIdx] <- causalPips + names(gwasPip) <- variantNames + + alpha <- matrix(1 / nSnps, nrow = L, ncol = nSnps) + alpha[1, ] <- 0.001; alpha[1, causalIdx[1]] <- 0.95 + alpha[1, ] <- alpha[1, ] / sum(alpha[1, ]) + alpha[2, ] <- 0.001; alpha[2, causalIdx[2]] <- 0.95 + alpha[2, ] <- alpha[2, ] / sum(alpha[2, ]) + pip <- colSums(alpha) + names(pip) <- variantNames + susieFits <- list( + fit1 = list(pip = pip, alpha = alpha, + prior_variance = c(0.5, 0.3))) + list(gwasPip = gwasPip, susieQtlRegions = susieFits, + variantNames = variantNames) +} + +test_that("qtlEnrichment: real C++ kernel returns the expected keys (numGwas + piQtl supplied)", { + fx <- .qep_makeRealKernelInputs() + res <- qtlEnrichment( + gwasPip = fx$gwasPip, susieQtlRegions = fx$susieQtlRegions, + numGwas = 5000, piQtl = 0.5, + lambda = 1, impN = 5, numThreads = 1, verbose = FALSE) + expect_type(res, "list") + en <- res[[1L]] + expectedKeys <- c("Intercept", "Enrichment (no shrinkage)", + "Enrichment (w/ shrinkage)", + "sd (no shrinkage)", "sd (w/ shrinkage)", + "Alternative (coloc) p1", "Alternative (coloc) p2", + "Alternative (coloc) p12") + expect_setequal(intersect(expectedKeys, names(en)), expectedKeys) + expect_true(all(is.finite(unlist(en[expectedKeys])))) +}) + +test_that("qtlEnrichment: numGwas omitted -> estimates piGwas from data + warns", { + fx <- .qep_makeRealKernelInputs(nSnps = 30, causalIdx = c(5, 15)) + expect_warning( + res <- qtlEnrichment( + gwasPip = fx$gwasPip, susieQtlRegions = fx$susieQtlRegions, + piQtl = 0.5, impN = 5, numThreads = 1, verbose = FALSE), + "numGwas is not provided") + expect_type(res, "list") +}) + +test_that("qtlEnrichment: piQtl omitted -> estimates from susieQtlRegions + warns", { + fx <- .qep_makeRealKernelInputs(nSnps = 30, causalIdx = c(5, 15)) + expect_warning( + res <- qtlEnrichment( + gwasPip = fx$gwasPip, susieQtlRegions = fx$susieQtlRegions, + numGwas = 3000, impN = 5, numThreads = 1, verbose = FALSE), + "piQtl is not provided") + expect_type(res, "list") +}) + +test_that("qtlEnrichment: errors when piGwas resolves to zero", { + fx <- .qep_makeRealKernelInputs() + zeroGwas <- rep(0, length(fx$gwasPip)) + names(zeroGwas) <- names(fx$gwasPip) + expect_error( + qtlEnrichment(gwasPip = zeroGwas, + susieQtlRegions = fx$susieQtlRegions, + piQtl = 0.5, numThreads = 1, verbose = FALSE), + "No association signal found") +}) + +test_that("qtlEnrichment: errors when piQtl resolves to zero", { + fx <- .qep_makeRealKernelInputs() + expect_error( + qtlEnrichment(gwasPip = fx$gwasPip, + susieQtlRegions = fx$susieQtlRegions, + numGwas = 5000, piQtl = 0, + numThreads = 1, verbose = FALSE), + "No QTL associated") +}) + +test_that("qtlEnrichment: errors when gwasPip has no names", { + fx <- .qep_makeRealKernelInputs() + unnamed <- unname(fx$gwasPip) + expect_error( + qtlEnrichment(gwasPip = unnamed, + susieQtlRegions = fx$susieQtlRegions, + numGwas = 5000, piQtl = 0.5, + numThreads = 1, verbose = FALSE), + "Variant names are missing in gwasPip") +}) + +test_that("qtlEnrichment: errors when susieQtlRegions$pip lacks names", { + fx <- .qep_makeRealKernelInputs() + fx$susieQtlRegions$fit1$pip <- unname(fx$susieQtlRegions$fit1$pip) + expect_error( + qtlEnrichment(gwasPip = fx$gwasPip, + susieQtlRegions = fx$susieQtlRegions, + numGwas = 5000, piQtl = 0.5, + numThreads = 1, verbose = FALSE), + "Variant names are missing in susieQtlRegions") +}) + +test_that("qtlEnrichment: tracks unmatched QTL variants in the output", { + fx <- .qep_makeRealKernelInputs(nSnps = 30, causalIdx = c(5, 15)) + # Inject a couple of variant IDs into the QTL fit that don't exist + # in the GWAS PIP vector. + newNames <- names(fx$susieQtlRegions$fit1$pip) + newNames[1:2] <- c("1:9999:A:G", "1:9998:A:G") + names(fx$susieQtlRegions$fit1$pip) <- newNames + colnames(fx$susieQtlRegions$fit1$alpha) <- newNames + res <- qtlEnrichment( + gwasPip = fx$gwasPip, susieQtlRegions = fx$susieQtlRegions, + numGwas = 3000, piQtl = 0.5, + impN = 5, numThreads = 1, verbose = FALSE) + expect_true("unused_xqtl_variants" %in% names(res)) +}) + diff --git a/tests/testthat/test_twasWeightsPipeline.R b/tests/testthat/test_twasWeightsPipeline.R index 379e6fa4..c33c87b3 100644 --- a/tests/testthat/test_twasWeightsPipeline.R +++ b/tests/testthat/test_twasWeightsPipeline.R @@ -96,6 +96,46 @@ context("twasWeightsPipeline (S4 dispatch) with mocked weight methods") } } +# Build a minimal FineMappingResult that satisfies .twasCheckFineMappingMethods. +# Each entry stores a stub fit list under the susieFit slot; the per-tuple +# loop pulls these via .twasFineMappingFits() and threads them into the +# corresponding *Weights wrapper as susieFit / susieInfFit / mvsusieFit / etc. +.tp_makeStubFineMappingResult <- function(study = "study1", + contexts = "brain", + traits = "ENSG_A", + method = "susie", + fitPayload = NULL) { + rows <- expand.grid(study = study, context = contexts, trait = traits, + method = method, stringsAsFactors = FALSE) + entries <- lapply(seq_len(nrow(rows)), function(i) { + if (is.null(fitPayload)) + fitPayload <- list(method = rows$method[[i]], + context = rows$context[[i]], + trait = rows$trait[[i]]) + tl <- data.frame( + variant_id = paste0("v", seq_len(3L)), + pip = c(0.9, 0.5, 0.1), + stringsAsFactors = FALSE) + FineMappingEntry( + variantIds = tl$variant_id, + susieFit = fitPayload, + topLoci = tl) + }) + if ("brain" %in% rows$context || any(rows$context != "")) { + QtlFineMappingResult( + study = rows$study, + context = rows$context, + trait = rows$trait, + method = rows$method, + entry = entries) + } else { + GwasFineMappingResult( + study = rows$study, + method = rows$method, + entry = entries) + } +} + # Mock individual-level weight methods to return zero vectors quickly. .tp_mockIndividualWeights <- function() { list( @@ -234,21 +274,22 @@ test_that("twasWeightsPipeline(QtlSumStats): runs end-to-end with mocked solvers .tp_mockSumstatWeights()) do.call(local_mocked_bindings, c(mocks, list(.package = "pecotmr"))) - # Method tokens are the bare short names ("susie", "lasso"); the - # QtlSumStats dispatch resolves them to the *Rss / lassosumRss impl via - # the .twasMethodCapabilities table. + # Method tokens are the bare short names; the QtlSumStats dispatch + # resolves them to the *Rss impl via the .twasMethodCapabilities table. + # Fine-mapping methods (susie / susieInf / etc.) require a + # FineMappingResult and are covered by separate tests. res <- suppressMessages(suppressWarnings( - twasWeightsPipeline(ss, methods = c("susie", "lasso"), + twasWeightsPipeline(ss, methods = c("mrash", "lasso"), verbose = 0))) expect_s4_class(res, "TwasWeights") expect_equal(nrow(res), 2L) - expect_setequal(getMethodNames(res), c("susie", "lasso")) + expect_setequal(getMethodNames(res), c("mrash", "lasso")) }) test_that("twasWeightsPipeline(QtlSumStats): un-QCd input is rejected", { ss <- .tp_makeQtlSumStats(qc = FALSE) expect_error( - twasWeightsPipeline(ss, methods = "susie"), + twasWeightsPipeline(ss, methods = "lasso"), "has no QC record" ) }) @@ -264,7 +305,7 @@ test_that("twasWeightsPipeline(QtlSumStats): individual-only method rejected", { test_that("twasWeightsPipeline(QtlSumStats): empty contexts/trait filter errors", { ss <- .tp_makeQtlSumStats() expect_error( - twasWeightsPipeline(ss, methods = "susie", + twasWeightsPipeline(ss, methods = "lasso", contexts = "ghost"), "no entries matched" ) @@ -275,27 +316,359 @@ test_that("twasWeightsPipeline(QtlSumStats): per-method failure surfaces as warn mocks <- c( list(extractBlockGenotypes = .tp_mockExtractor()), .tp_mockSumstatWeights()) - # Override susieRssWeights with the failure-producing version. - mocks$susieRssWeights <- function(stat, LD, ...) stop("synthetic test failure") + # Override lassosumRssWeights with the failure-producing version. + mocks$lassosumRssWeights <- function(stat, LD, ...) stop("synthetic test failure") do.call(local_mocked_bindings, c(mocks, list(.package = "pecotmr"))) # All entries fail -> the per-method-warning fires *and* the pipeline # then errors out (no rows produced). Capture both. expect_error( suppressWarnings(suppressMessages( - twasWeightsPipeline(ss, methods = "susie", verbose = 0))), + twasWeightsPipeline(ss, methods = "lasso", verbose = 0))), "no entries produced weights" ) }) test_that("twasWeightsPipeline(QtlSumStats): multivariate requires >=2 contexts per (study, trait)", { ss <- .tp_makeQtlSumStats(n_entries = 1L) # 1 context per (study, trait) + # Provide a stub FineMappingResult so the gate passes; the multivariate + # guard is what we want to exercise here. + fmr <- .tp_makeStubFineMappingResult(study = "s1", contexts = "c1", + traits = "t1", method = "mvsusie") expect_error( - twasWeightsPipeline(ss, methods = "mvsusie"), + twasWeightsPipeline(ss, methods = "mvsusie", fineMappingResult = fmr), "multivariate method.*require at least two contexts" ) }) +# =========================================================================== +# Fine-mapping method gate: every fine-mapping method (susie / susieInf / +# susieAsh / mvsusie / fsusie) must be paired with a FineMappingResult. +# twasWeightsPipeline is not allowed to re-fit them from scratch. Input- +# class compatibility is delegated to .fmCheckMethodCapabilities so the +# rule set stays in sync with fineMappingPipeline. +# =========================================================================== + +test_that("gate: QtlDataset + susie without fineMappingResult errors", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + expect_error( + twasWeightsPipeline(qd, methods = "susie"), + "are fine-mapping methods and may not be re-fit") +}) + +test_that("gate: QtlSumStats + susieInf without fineMappingResult errors", { + ss <- .tp_makeQtlSumStats() + expect_error( + twasWeightsPipeline(ss, methods = "susieInf"), + "are fine-mapping methods and may not be re-fit") +}) + +test_that("gate: QtlDataset + susieAsh without fineMappingResult errors", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + expect_error( + twasWeightsPipeline(qd, methods = "susieAsh"), + "are fine-mapping methods and may not be re-fit") +}) + +test_that("gate: composite (susie + susieInf) without fineMappingResult errors", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + # Mixed: lasso is allowed, but susie/susieInf must still be gated. + expect_error( + twasWeightsPipeline(qd, methods = c("susie", "susieInf", "lasso")), + "are fine-mapping methods and may not be re-fit") +}) + +test_that("gate: mvsusie without fineMappingResult errors (QtlDataset)", { + qd <- .tp_makeQtlDataset(contexts = c("brain", "liver"), traits = "ENSG_A") + expect_error( + twasWeightsPipeline(qd, methods = "mvsusie"), + "are fine-mapping methods and may not be re-fit") +}) + +test_that("gate: fsusie has no TWAS-weight extractor (rejected by name)", { + qd <- .tp_makeQtlDataset(contexts = c("brain", "liver"), + traits = c("ENSG_A", "ENSG_B")) + fmr <- .tp_makeStubFineMappingResult( + study = "study1", contexts = c("brain", "liver"), + traits = "ENSG_A", method = "fsusie") + # Even with a fineMappingResult, fsusie can't produce TWAS weights. + expect_error( + twasWeightsPipeline(qd, methods = "fsusie", fineMappingResult = fmr), + "have no TWAS-weight extractor") +}) + +test_that("gate: fsusie on QtlSumStats delegates to .fmCheckMethodCapabilities", { + ss <- .tp_makeQtlSumStats() + fmr <- .tp_makeStubFineMappingResult(study = "s1", contexts = "c1", + traits = "t1", method = "fsusie") + # fineMappingPipeline rejects fsusie on QtlSumStats ("sumstat-only on this + # pipeline" / no sumstat impl); twasWeightsPipeline reuses that check. + expect_error( + twasWeightsPipeline(ss, methods = "fsusie", fineMappingResult = fmr), + "individual-only|sumstat-only|not supported") +}) + +test_that("gate: non-FineMappingResult object passed in errors", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + expect_error( + twasWeightsPipeline(qd, methods = "susie", + fineMappingResult = list()), + "must be a FineMappingResult") +}) + +test_that("gate: unknown method tokens still error with full menu", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + expect_error( + twasWeightsPipeline(qd, methods = "totallyMadeUpMethod"), + "Unknown TWAS method|unknown method") +}) + +# --------------------------------------------------------------------------- +# Success: when a FineMappingResult is supplied, the fit is threaded into +# the corresponding *Weights wrapper via its *Fit argument. The wrappers +# are mocked to verify the fit arrives and the underlying fitter is NOT +# re-invoked. +# --------------------------------------------------------------------------- + +test_that("gate: QtlDataset + susie + fineMappingResult threads the susieFit", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + fmr <- .tp_makeStubFineMappingResult( + study = "study1", contexts = "brain", traits = "ENSG_A", + method = "susie") + sawFit <- FALSE + mocks <- list( + extractBlockGenotypes = .tp_mockExtractor(), + susieWeights = function(X = NULL, y = NULL, susieFit = NULL, ...) { + sawFit <<- !is.null(susieFit) + rep(0, ncol(X)) + }) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(qd, + methods = "susie", + fineMappingResult = fmr, + cisWindow = 1000L, + cvFolds = 0, + ensemble = FALSE, + estimatePi = FALSE, + verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 1L) + expect_setequal(getMethodNames(res), "susie") + expect_true(sawFit) +}) + +test_that("gate: QtlSumStats + susie + fineMappingResult threads the susieRssFit", { + ss <- .tp_makeQtlSumStats() + fmr <- .tp_makeStubFineMappingResult(study = "s1", contexts = "c1", + traits = "t1", method = "susie") + fitsSeen <- 0L + mocks <- list( + extractBlockGenotypes = .tp_mockExtractor(), + susieRssWeights = function(stat, LD, susieRssFit = NULL, ...) { + if (!is.null(susieRssFit)) fitsSeen <<- fitsSeen + 1L + rep(0, nrow(LD)) + }) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = "susie", + fineMappingResult = fmr, verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 1L) + expect_setequal(getMethodNames(res), "susie") + expect_equal(fitsSeen, 1L) +}) + +test_that("gate: QtlSumStats + susieAsh + fineMappingResult threads the susieAshRssFit", { + ss <- .tp_makeQtlSumStats() + fmr <- .tp_makeStubFineMappingResult(study = "s1", contexts = "c1", + traits = "t1", method = "susieAsh") + sawFit <- FALSE + mocks <- list( + extractBlockGenotypes = .tp_mockExtractor(), + susieAshRssWeights = function(stat, LD, susieAshRssFit = NULL, ...) { + sawFit <<- !is.null(susieAshRssFit) + rep(0, nrow(LD)) + }) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = "susieAsh", + fineMappingResult = fmr, verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 1L) + expect_setequal(getMethodNames(res), "susieAsh") + expect_true(sawFit) +}) + +test_that("gate: missing matching tuple in fineMappingResult warns and skips", { + ss <- .tp_makeQtlSumStats() + # FineMappingResult has a row for a DIFFERENT (study, context, trait): + # the gate passes (any fine-mapping fit present satisfies the object + # check), but the per-tuple lookup finds nothing and warns. + fmr <- .tp_makeStubFineMappingResult(study = "other_study", + contexts = "other_ctx", + traits = "other_trait", + method = "susie") + mocks <- c( + list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockSumstatWeights()) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + expect_error( + suppressWarnings(suppressMessages( + twasWeightsPipeline(ss, methods = "susie", + fineMappingResult = fmr, verbose = 0))), + "no entries produced weights") +}) + +# =========================================================================== +# Multivariate dispatch paths (mvsusie / mr.mash) on individual-level data +# (QtlDataset.runMultivariate) and on summary-statistics input (QtlSumStats +# multivariate-Z dispatch). Both paths build a multi-column Y / Z and invoke +# a single multivariate solver that returns a (variants x conditions) weight +# matrix; we mock that solver to return a zero matrix of the expected shape. +# =========================================================================== + +test_that("twasWeightsPipeline(QtlDataset): mvsusie multivariate path returns one row per (context, trait)", { + qd <- .tp_makeQtlDataset(contexts = c("brain", "liver"), + traits = "ENSG_A") + fmr <- .tp_makeStubFineMappingResult( + study = "study1", contexts = c("brain", "liver"), + traits = "ENSG_A", method = "mvsusie") + mvCalls <- 0L + mocks <- list( + extractBlockGenotypes = .tp_mockExtractor(), + mvsusieWeights = function(X, Y, mvsusieFit = NULL, ...) { + mvCalls <<- mvCalls + 1L + # Verify the gate threaded the precomputed fit through. + stopifnot(!is.null(mvsusieFit)) + matrix(0, nrow = ncol(X), ncol = ncol(Y), + dimnames = list(colnames(X), colnames(Y))) + }) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(qd, + methods = "mvsusie", + fineMappingResult = fmr, + cisWindow = 1000L, + cvFolds = 0, + ensemble = FALSE, + estimatePi = FALSE, + verbose = 0))) + expect_s4_class(res, "TwasWeights") + # Joint fit on 2 contexts x 1 trait -> 2 rows back, one per (context, trait). + expect_equal(nrow(res), 2L) + expect_setequal(getContexts(res), c("brain", "liver")) + expect_setequal(getTraits(res), "ENSG_A") + expect_setequal(getMethodNames(res), "mvsusie") + expect_gte(mvCalls, 1L) +}) + +test_that("twasWeightsPipeline(QtlDataset): mr.mash multivariate path with 2 traits x 2 contexts", { + qd <- .tp_makeQtlDataset(contexts = c("brain", "liver"), + traits = c("ENSG_A", "ENSG_B")) + mocks <- list( + extractBlockGenotypes = .tp_mockExtractor(), + mrmashWeights = function(X, Y, ...) { + matrix(0, nrow = ncol(X), ncol = ncol(Y), + dimnames = list(colnames(X), colnames(Y))) + }) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(qd, + methods = "mrmash", + cisWindow = 1000L, + cvFolds = 0, + ensemble = FALSE, + estimatePi = FALSE, + verbose = 0))) + expect_s4_class(res, "TwasWeights") + # 2 contexts x 2 traits = 4 output rows. + expect_equal(nrow(res), 4L) + expect_setequal(getContexts(res), c("brain", "liver")) + expect_setequal(getTraits(res), c("ENSG_A", "ENSG_B")) + expect_setequal(getMethodNames(res), "mrmash") +}) + +# --------------------------------------------------------------------------- +# QtlSumStats: multivariate dispatch builds a (variants x contexts) Z matrix +# and invokes the *Rss solver once per (study, trait) group. +# --------------------------------------------------------------------------- + +.tp_makeMultiCtxQtlSumStats <- function(contexts = c("c1", "c2"), + snp_ids = paste0("v", 1:8), + positions = seq(100L, by = 100L, length.out = 8L)) { + n <- length(contexts) + entries <- lapply(seq_len(n), function(i) { + # Same SNP order across contexts -- required by the multivariate path + # (it errors on any divergence after summaryStatsQc). + .tp_makeSumstatsEntry(snp_ids = snp_ids, positions = positions) + }) + QtlSumStats(study = rep("s1", n), + context = contexts, + trait = rep("t1", n), + entry = entries, + genome = "hg19", + ldSketch = .tp_makeHandle(snp_n = 20L), + qcInfo = list(step1 = "ok")) +} + +test_that("twasWeightsPipeline(QtlSumStats): mvsusie multivariate path returns one row per context", { + ss <- .tp_makeMultiCtxQtlSumStats(contexts = c("c1", "c2")) + fmr <- .tp_makeStubFineMappingResult( + study = "s1", contexts = c("c1", "c2"), traits = "t1", + method = "mvsusie") + shapesSeen <- list() + mocks <- list( + extractBlockGenotypes = .tp_mockExtractor(), + mvsusieRssWeights = function(stat, LD, mvsusieRssFit = NULL, ...) { + shapesSeen[[length(shapesSeen) + 1L]] <<- list( + Zdim = dim(stat$z), LDdim = dim(LD), + ctxNames = colnames(stat$z), + sawFit = !is.null(mvsusieRssFit)) + matrix(0, nrow = nrow(LD), ncol = ncol(stat$z), + dimnames = list(stat$variantNames, colnames(stat$z))) + }) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = "mvsusie", + fineMappingResult = fmr, verbose = 0))) + expect_s4_class(res, "TwasWeights") + # 1 (study, trait) group with 2 contexts -> 2 output rows. + expect_equal(nrow(res), 2L) + expect_setequal(getContexts(res), c("c1", "c2")) + expect_setequal(getTraits(res), "t1") + expect_setequal(getMethodNames(res), "mvsusie") + # The solver was called once with a (variants x 2) Z matrix and was + # handed the precomputed fit from fineMappingResult. + expect_length(shapesSeen, 1L) + expect_equal(shapesSeen[[1L]]$Zdim[[2L]], 2L) + expect_setequal(shapesSeen[[1L]]$ctxNames, c("c1", "c2")) + expect_true(shapesSeen[[1L]]$sawFit) +}) + +test_that("twasWeightsPipeline(QtlSumStats): mr.mash multivariate solver failure surfaces as warning + empty result", { + ss <- .tp_makeMultiCtxQtlSumStats(contexts = c("c1", "c2")) + mocks <- list( + extractBlockGenotypes = .tp_mockExtractor(), + mrmashRssWeights = function(stat, LD, ...) stop("synthetic multivariate failure")) + do.call(local_mocked_bindings, + c(mocks, list(.package = "pecotmr"))) + # All multivariate fits fail -> no rows -> the pipeline errors out at the + # end, surfacing the per-group warning along the way. + expect_error( + suppressWarnings(suppressMessages( + twasWeightsPipeline(ss, methods = "mrmash", verbose = 0))), + "no entries produced weights" + ) +}) + # =========================================================================== # Resume cache (twasWeights = ) # =========================================================================== @@ -371,20 +744,20 @@ test_that("twasWeightsPipeline(QtlSumStats): cache hit on a per-tuple basis", { ss <- .tp_makeQtlSumStats() cached <- TwasWeights( study = "s1", context = "c1", trait = "t1", - method = "susie", + method = "lasso", entry = list(.tp_makeCachedEntry())) rssCalls <- 0L mocks <- c( list(extractBlockGenotypes = .tp_mockExtractor()), .tp_mockSumstatWeights()) - mocks$susieRssWeights <- function(stat, LD, ...) { + mocks$lassosumRssWeights <- function(stat, LD, ...) { rssCalls <<- rssCalls + 1L rep(0, nrow(LD)) } do.call(local_mocked_bindings, c(mocks, list(.package = "pecotmr"))) res <- suppressMessages(suppressWarnings( - twasWeightsPipeline(ss, methods = "susie", + twasWeightsPipeline(ss, methods = "lasso", twasWeights = cached, verbose = 0))) expect_s4_class(res, "TwasWeights") From ed3f980a4936e4ff1e55463b0159f4beb5526015 Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Mon, 22 Jun 2026 00:15:50 -0700 Subject: [PATCH 3/4] improve finemappingresult and twasweightspipeline --- man/twasWeightsPipeline.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/twasWeightsPipeline.Rd b/man/twasWeightsPipeline.Rd index 898954bf..cbabba63 100644 --- a/man/twasWeightsPipeline.Rd +++ b/man/twasWeightsPipeline.Rd @@ -46,6 +46,7 @@ twasWeightsPipeline(data, ...) contexts = NULL, traitId = NULL, jointSpecification = NULL, + fineMappingResult = NULL, twasWeights = NULL, dataType = NULL, verbose = 1L, From 5b5b0ea2888255c4c3eedc4eb4aed90fe23c3acd Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Mon, 22 Jun 2026 00:24:45 -0700 Subject: [PATCH 4/4] fix file name --- R/allGenerics.R | 799 ------------------------------------------------ 1 file changed, 799 deletions(-) delete mode 100644 R/allGenerics.R diff --git a/R/allGenerics.R b/R/allGenerics.R deleted file mode 100644 index 6ef0af4d..00000000 --- a/R/allGenerics.R +++ /dev/null @@ -1,799 +0,0 @@ -#' @title S4 Generic Function Definitions -#' @description All S4 generic function definitions for pecotmr. -#' @name pecotmr-generics -#' @keywords internal -#' @importFrom methods setGeneric -NULL - -# ============================================================================= -# High-level estimation generic -# ============================================================================= - -#' @title Estimate SNP Heritability -#' @description Estimate SNP heritability from GWAS summary statistics using -#' one of three methods: LDER, g-LDSC, or HDL/sHDL. -#' @param sumstats A \code{GwasSumStats} object. -#' @param ldRef An \code{LdStatistic} object (method-appropriate subclass). -#' @param method Character, one of "lder", "gldsc", "hdl". -#' @param annotations An \code{AnnotationMatrix} object, or NULL for -#' unstratified estimation. -#' @param local Logical, whether to compute per-block local estimates. -#' @param ... Additional method-specific arguments. -#' @return An \code{H2Estimate} object. -#' @export -setGeneric("estimateH2", - function(sumstats, ldRef, method = "lder", annotations = NULL, - local = FALSE, ...) - standardGeneric("estimateH2") -) - -# ============================================================================= -# LD score computation -# ============================================================================= - -#' @title Compute LD Scores -#' @description Compute LD scores from an LD reference, optionally -#' stratified by annotations. -#' @param ldRef An \code{LdStatistic} object. -#' @param annotations An \code{AnnotationMatrix} object, or NULL. -#' @param ... Additional arguments. -#' @return A numeric matrix of LD scores (SNPs x annotations+1). -#' @export -setGeneric("computeLdScores", - function(ldRef, annotations = NULL, ...) - standardGeneric("computeLdScores") -) - -# ============================================================================= -# I/O generics -# ============================================================================= - -#' @title Read Genotype Data -#' @description Read genotype data from various formats (VCF, plink1, -#' plink2, GDS) and return a \code{GenotypeHandle} for deferred -#' genotype loading. -#' @param path Character, path to the genotype file. -#' @param format Character, one of "vcf", "plink1", "plink2", "gds". -#' If NULL, inferred from file extension. -#' @param ... Additional arguments. -#' @return A \code{GenotypeHandle} object. -#' @export -setGeneric("readGenotypes", - function(path, format = NULL, ...) - standardGeneric("readGenotypes") -) - -#' @title Read Annotations -#' @description Read genomic annotations from files (BED, BigWig, -#' S-LDSC .annot format, or GRanges objects) and create an -#' AnnotationMatrix. -#' @param paths Named character vector of file paths, or a named list -#' of GRanges objects. Names become annotation names. -#' @param snpRanges A \code{GRanges} object defining SNP positions. -#' @param annotationMeta A \code{data.frame} with annotation metadata -#' (name, tier, type). If NULL, auto-detected from file format. -#' @param genome Character, genome build. -#' @param ... Additional arguments. -#' @return An \code{AnnotationMatrix} object. -#' @export -setGeneric("readAnnotations", - function(paths, snpRanges, annotationMeta = NULL, - genome = "hg19", ...) - standardGeneric("readAnnotations") -) - -# ============================================================================= -# Accessor generics -# ============================================================================= - -#' @title Get Local Estimates -#' @description Extract per-block local estimates from a result object. -#' @param object An \code{H2Estimate} object. -#' @return A \code{data.frame} of local estimates, or NULL. -#' @export -setGeneric("getLocal", function(object) standardGeneric("getLocal")) - -#' @title Get Enrichment Estimates -#' @description Extract annotation enrichment estimates from a result object. -#' @param object An \code{H2Estimate} object. -#' @return A \code{data.frame} of enrichment estimates, or NULL. -#' @export -setGeneric("getEnrichment", - function(object) standardGeneric("getEnrichment")) - -#' @title Get Score Statistics -#' @description Extract score statistics for candidate annotations. -#' @param object An \code{H2Estimate} object. -#' @return A list with \code{z} and \code{R}, or NULL. -#' @export -setGeneric("getScoreStats", - function(object) standardGeneric("getScoreStats")) - -# ============================================================================= -# GwasSumStats accessor generics -# ============================================================================= - -#' @title Get Z-scores -#' @description Extract z-score vector from a \code{GwasSumStats} or -#' \code{QtlSumStats} entry, selected by its identity tuple. -#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. -#' @param ... Class-specific selection arguments (e.g., \code{study} for -#' \code{GwasSumStats}; \code{study}, \code{context}, \code{trait} for -#' \code{QtlSumStats}). -#' @return Numeric vector of z-scores. -#' @export -setGeneric("getZ", function(x, ...) standardGeneric("getZ")) - -#' @title Get Sample Sizes -#' @description Extract sample size vector from a \code{GwasSumStats} or -#' \code{QtlSumStats} entry, selected by its identity tuple. -#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. -#' @param ... Class-specific selection arguments. -#' @return Numeric vector of sample sizes. -#' @export -setGeneric("getN", function(x, ...) standardGeneric("getN")) - -#' @title Get Minor Allele Frequencies -#' @description Extract MAF vector from a GwasSumStats object. -#' @param x A \code{GwasSumStats} or \code{QtlDataset} object. -#' @param ... Class-specific selection arguments (e.g., \code{region}, -#' \code{cisWindow} for \code{QtlDataset}). -#' @return Numeric vector of MAFs, or NULL if not available. -#' @export -setGeneric("getMaf", function(x, ...) standardGeneric("getMaf")) - -#' @title Get Number of SNPs -#' @description Number of SNPs in a \code{GwasSumStats} or -#' \code{QtlSumStats} entry, selected by its identity tuple. -#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. -#' @param ... Class-specific selection arguments. -#' @return Integer. -#' @export -setGeneric("nSnps", function(x, ...) standardGeneric("nSnps")) - -#' @title Subset by Chromosome -#' @description Extract a chromosome-specific subset of a GwasSumStats object. -#' @param x A \code{GwasSumStats} object. -#' @param chr Character, chromosome name (e.g., "1", "chr1"). -#' @return A \code{GwasSumStats} object. -#' @export -setGeneric("subsetChr", function(x, chr) standardGeneric("subsetChr")) - -#' @title Get Phenotype Variance -#' @description Extract phenotype variance from a \code{GwasSumStats} or -#' \code{QtlSumStats} entry, selected by its identity tuple. Returns -#' \code{NULL} when the entry has no \code{varY} recorded. -#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. -#' @param ... Class-specific selection arguments. -#' @return Numeric phenotype variance, or NULL. -#' @export -setGeneric("getVarY", function(x, ...) standardGeneric("getVarY")) - -#' @title Get a Single Summary-Statistic Entry or Embedded Collection -#' @description Behavior depends on the class of \code{x}: -#' \describe{ -#' \item{For \code{GwasSumStats} / \code{QtlSumStats}}{Returns the -#' per-variant \code{GRanges} of summary statistics for one entry, -#' selected by its identity tuple (\code{study} for GWAS; -#' \code{study}, \code{context}, \code{trait} for QTL).} -#' \item{For \code{MultiStudyQtlDataset}}{Returns the embedded -#' \code{QtlSumStats} collection (the summary-statistic-only -#' studies), or \code{NULL} when absent. No selection arguments -#' are accepted in this case.} -#' } -#' @param x A \code{GwasSumStats}, \code{QtlSumStats}, or -#' \code{MultiStudyQtlDataset} object. -#' @param ... Class-specific selection arguments (see above). -#' @return A \code{GRanges}, a \code{QtlSumStats}, or \code{NULL}. -#' @export -setGeneric("getSumStats", function(x, ...) standardGeneric("getSumStats")) - -#' @title Get Standardized Sumstat Data Frame for One Tuple -#' @description Return a per-tuple summary-statistics \code{data.frame} -#' in the standardized layout \code{variant_id, chrom, pos, A1, A2, -#' z, beta, se, N, maf} (optional columns omitted when absent on the -#' entry). Combines tuple-keyed row selection (\code{getSumStats}) -#' with mcols unpacking; replaces the pre-S4 idiom of pulling -#' \code{S4Vectors::mcols(entry)$} directly inside pipelines. -#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. -#' @param ... Class-specific selectors (\code{study} for -#' \code{GwasSumStats}; \code{study}, \code{context}, \code{trait} -#' for \code{QtlSumStats}) plus pass-throughs \code{require}, -#' \code{derive}, \code{keepChrPrefix} forwarded to the underlying -#' unpacker. -#' @return A \code{data.frame}. -#' @export -setGeneric("getSumstatDf", function(x, ...) standardGeneric("getSumstatDf")) - -#' @title Get the Embedded QtlDataset List -#' @description Return the named list of \code{QtlDataset} objects -#' carried by a \code{MultiStudyQtlDataset}. -#' @param x A \code{MultiStudyQtlDataset} object. -#' @return A named list of \code{QtlDataset} objects. -#' @export -setGeneric("getQtlDatasets", - function(x) standardGeneric("getQtlDatasets")) - -#' @title Get the Genome Build -#' @description Return the genome build that the collection's LD sketch -#' and every entry are aligned to. Because all entries in a -#' \code{GwasSumStats} or \code{QtlSumStats} share the LD sketch, the -#' genome build is a single value at the collection level. -#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. -#' @param ... Unused (present for method-signature compatibility). -#' @return Character (length 1). -#' @export -setGeneric("getGenome", function(x, ...) standardGeneric("getGenome")) - -#' @title Get QC Audit Record -#' @description Return the audit record of QC steps applied to this -#' collection. An empty \code{list()} (default on construction) means -#' \code{\link{summaryStatsQc}} has not yet been run. Pipelines that -#' require harmonized sumstats (\code{fineMappingPipeline}, -#' \code{twasWeightsPipeline}, and downstream consumers) reject inputs -#' where \code{length(getQcInfo(x)) == 0L}. -#' @param x A \code{GwasSumStats} or \code{QtlSumStats} object. -#' @param ... Unused. -#' @return A \code{list} (possibly empty). -#' @export -setGeneric("getQcInfo", function(x, ...) standardGeneric("getQcInfo")) - -#' @title Get LD Sketch -#' @description Return the \code{GenotypeHandle} carrying the LD -#' reference for this collection. Defined on classes that embed an -#' \code{ldSketch} slot: \code{GwasSumStats}, \code{QtlSumStats}, -#' \code{FineMappingResult}, \code{TwasWeights}. Returns \code{NULL} -#' when the slot is unset (e.g. a \code{TwasWeights} fit from -#' individual-level data via \code{QtlDataset}). -#' @param x An S4 object that carries an \code{ldSketch} slot. -#' @param ... Unused. -#' @return A \code{GenotypeHandle} or \code{NULL}. -#' @export -setGeneric("getLdSketch", function(x, ...) standardGeneric("getLdSketch")) - -# ============================================================================= -# LdData accessor generics -# ============================================================================= - -#' @title Get LD Correlation Matrix -#' @description Extract the LD correlation matrix from an \code{LdData} -#' object. If only a genotype handle is available, recomputes R from -#' genotypes on the fly. -#' @param x An \code{LdData} object. -#' @return A correlation matrix, or a list of per-block matrices. -#' @export -setGeneric("getCorrelation", function(x) standardGeneric("getCorrelation")) - -#' @title Get Genotype Matrix -#' @description Extract a genotype matrix from an object that carries -#' genotype data. For an \code{LdData}, returns the underlying genotype -#' matrix via its handle (or \code{NULL} if no handle is available). -#' For a \code{QtlDataset}, returns the genotype matrix for a selected -#' set of traits or region (see method documentation for the -#' per-class selection arguments). -#' @param x The object to extract from. -#' @param ... Class-specific selection arguments (e.g., \code{traitId}, -#' \code{region}, \code{cisWindow} for \code{QtlDataset}). -#' @return A numeric matrix, a list of matrices, or \code{NULL}. -#' @export -setGeneric("getGenotypes", function(x, ...) standardGeneric("getGenotypes")) - -#' @title Check Genotype Availability -#' @description Check whether an \code{LdData} object has a genotype -#' handle for extracting raw genotypes. -#' @param x An \code{LdData} object. -#' @return Logical. -#' @export -setGeneric("hasGenotypes", function(x) standardGeneric("hasGenotypes")) - -#' @title Get Variant IDs -#' @description Extract variant ID vector from an object that carries one -#' (e.g., \code{LdData}, \code{FineMappingEntry}, \code{TwasWeightsEntry}) -#' or from one entry of a collection class selected by its identity -#' tuple. -#' @param x The object. -#' @param ... Class-specific selection arguments. -#' @return Character vector of variant IDs. -#' @export -setGeneric("getVariantIds", function(x, ...) standardGeneric("getVariantIds")) - -#' @title Get Phenotype List -#' @description Extract phenotype data from an object that carries it. -#' For a \code{QtlDataset}, the user can optionally select specific -#' contexts, traits, or a region (see method documentation for the -#' per-class selection arguments). -#' @param x The object to extract from. -#' @param ... Class-specific selection arguments (e.g., \code{contexts}, -#' \code{traitId}, \code{region}). -#' @return A named list of phenotype matrices or -#' \code{SummarizedExperiment} objects. -#' @export -setGeneric("getPhenotypes", function(x, ...) standardGeneric("getPhenotypes")) -# ============================================================================= -# FineMappingResult accessor generics -# ============================================================================= - -#' @title Get a Single Fine-Mapping Entry -#' @description Return the \code{FineMappingEntry} for one -#' \code{(study, context, trait, method)} row of a -#' \code{FineMappingResult} collection. -#' @param x A \code{FineMappingResult} object. -#' @param study,context,trait,method Single character identifiers. All -#' required when the collection has more than one row; optional when -#' the collection has a single row. -#' @return A \code{FineMappingEntry} object. -#' @export -setGeneric("getFineMappingResult", - function(x, study = NULL, context = NULL, trait = NULL, method = NULL) - standardGeneric("getFineMappingResult")) - -#' @title Renormalize Fine-Mapping PIPs to a Variant Subset -#' @description Re-derive a \code{FineMappingEntry}'s PIPs (and the -#' \code{topLoci} table) after restricting to a kept variant subset. -#' For each effect the \code{lbf_variable} row is subset to the kept -#' variants, renormalized via \code{lbfToAlpha()}, and the per-variant -#' PIPs are recomputed as \code{1 - prod_l(1 - alpha[l, p])}. -#' -#' The two scenarios this supports: -#' \itemize{ -#' \item The user declined to impute missing variants in a GWAS -#' \code{SumStats}, so a downstream fine-mapping result needs -#' PIPs restricted to the GWAS-covered intersection. -#' \item Colocalization between a GWAS \code{FineMappingResult} and a -#' QTL \code{FineMappingResult} computed on different variant -#' sets — the GWAS PIPs (or QTL PIPs) get renormalized to the -#' common variant set. -#' } -#' -#' @param x A \code{FineMappingEntry} or \code{FineMappingResultBase}. -#' @param keepVariants Character vector of variant IDs to keep. Intersected -#' with the entry's own \code{variantIds}; an empty intersection raises -#' an error. -#' @param ... Future expansion. -#' @return The same flavour of object with PIPs renormalized on the kept -#' subset. -#' @export -setGeneric("adjustPips", - function(x, keepVariants, ...) standardGeneric("adjustPips")) - -#' @title Get PIP Values -#' @description Extract posterior inclusion probabilities from a single -#' \code{FineMappingEntry} or from one entry of a -#' \code{FineMappingResult} (selected by its identity tuple). -#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. -#' @param ... Class-specific selection arguments. -#' @return A named numeric vector of PIPs. -#' @export -setGeneric("getPip", function(x, ...) standardGeneric("getPip")) - -#' @title Get SuSiE Fit -#' @description Extract the SuSiE fit object from a fine-mapping entry -#' or result. The fit may be the trimmed view (when the pipeline ran -#' with the default \code{trim = TRUE}) or the full untrimmed -#' \code{susie()} return (when \code{trim = FALSE}). -#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. -#' @param ... Class-specific selection arguments. -#' @return A list (the SuSiE fit object). -#' @export -setGeneric("getSusieFit", function(x, ...) standardGeneric("getSusieFit")) - -#' @title Get Marginal Effects -#' @description Extract per-variant marginal univariate effects from a -#' fine-mapping entry or result. Returns a \code{data.frame} with -#' identity columns (\code{variant_id, chrom, pos, A1, A2}), context -#' (\code{N, MAF}), and the marginal effect columns -#' (\code{beta, se, z, p}). Populated uniformly across the -#' individual-level and RSS paths. -#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. -#' @param maxPval Optional numeric (length 1). When non-\code{NULL}, -#' filter rows where \code{p > maxPval}. Default \code{NULL} -#' (no filter). -#' @param ... Class-specific selection arguments. -#' @return A \code{data.frame}. -#' @export -setGeneric("getMarginalEffects", - function(x, maxPval = NULL, ...) standardGeneric("getMarginalEffects")) - -#' @title Get Top Loci (posterior view) -#' @description Extract the per-variant posterior fine-mapping payload -#' as either a \code{data.frame} (default) or a \code{GRanges}. -#' Returns identity columns (\code{variant_id, chrom, pos, A1, A2}), -#' context (\code{N, MAF}), the posterior effect columns -#' (\code{beta = posterior_mean, se = posterior_sd}), \code{pip}, -#' and credible-set membership columns (\code{cs_95}, etc.). -#' Rows are filtered by PIP by default — set \code{signalCutoff = 0} -#' to return every variant. -#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. -#' @param type One of \code{"data.frame"} (default) or \code{"GRanges"}. -#' @param signalCutoff Numeric (length 1). Drop rows where -#' \code{pip <= signalCutoff}. Default \code{0.025}. Use -#' \code{signalCutoff = 0} to keep every variant. -#' @param ... Class-specific selection arguments. -#' @return A \code{data.frame} or a \code{GRanges}. -#' @export -setGeneric("getTopLoci", - function(x, type = c("data.frame", "GRanges"), - signalCutoff = 0.025, ...) - standardGeneric("getTopLoci")) - -#' @title Get Credible Sets -#' @description Extract credible set assignments at the requested coverage. -#' @param x A \code{FineMappingEntry} or \code{FineMappingResult}. -#' @param ... Class-specific selection arguments plus \code{coverage}. -#' @return A data.frame of credible set information. -#' @export -setGeneric("getCs", function(x, ...) standardGeneric("getCs")) - -# ============================================================================= -# TwasWeights accessor generics -# ============================================================================= - -#' @title Get TWAS Weights -#' @description Extract weights from a \code{TwasWeightsEntry} or from -#' one entry of a \code{TwasWeights} collection. -#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. -#' @param ... Class-specific selection arguments. -#' @return A numeric vector or matrix of weights. -#' @export -setGeneric("getWeights", function(x, ...) standardGeneric("getWeights")) - -#' @title Get Standardized Flag -#' @description Check whether weights are on the standardized scale. -#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. -#' @param ... Class-specific selection arguments. -#' @return Logical. -#' @export -setGeneric("getStandardized", - function(x, ...) standardGeneric("getStandardized")) - -#' @title Get CV Performance -#' @description Extract cross-validation performance metrics. -#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. -#' @param ... Class-specific selection arguments. -#' @return Method-specific (typically a list). -#' @export -setGeneric("getCvPerformance", - function(x, ...) standardGeneric("getCvPerformance")) - -#' @title Get Model Fits -#' @description Extract fitted model objects. -#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. -#' @param ... Class-specific selection arguments. -#' @return Method-specific (typically a list). -#' @export -setGeneric("getFits", function(x, ...) standardGeneric("getFits")) - -#' @title Get Method Names -#' @description Extract method names from a collection class. -#' @param x A \code{FineMappingResult} or \code{TwasWeights} object. -#' @return Character vector. -#' @export -setGeneric("getMethodNames", function(x) standardGeneric("getMethodNames")) - -#' @title Get Data Type -#' @description Extract the data-type tag. -#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. -#' @param ... Class-specific selection arguments. -#' @return A character vector or NULL. -#' @export -setGeneric("getDataType", function(x, ...) standardGeneric("getDataType")) - -# ============================================================================= -# AlleleQcResult accessor generics -# QcResult accessor generics -# ============================================================================= -# VCF/BCF writer generic -# ============================================================================= - -#' Write summary statistics or fine-mapping results to VCF/BCF -#' -#' Creates a VCF object from GWAS summary statistics or fine-mapping results -#' and writes it to disk. Supports bgzipped VCF (.vcf.gz/.vcf.bgz) and -#' BCF (.bcf) output formats via VariantAnnotation and Rsamtools. -#' -#' @param x Input data: a \code{GwasSumStats} object, a -#' \code{FineMappingResult} object, or a data.frame with columns -#' \code{chrom}, \code{pos}, \code{ref}, \code{alt}. -#' @param outputPath File path for output. Extension determines format: -#' \code{.vcf.gz} or \code{.vcf.bgz} for bgzipped VCF, -#' \code{.bcf} for BCF, \code{.vcf} for uncompressed VCF. -#' @param sampleName Name for the VCF sample column (default: trait name or -#' method name from the S4 object). -#' @param ... Additional arguments passed to methods. -#' @return Invisible path to the written file. -#' @export -setGeneric("writeSumstatsVcf", - function(x, outputPath, sampleName = NULL, ...) standardGeneric("writeSumstatsVcf")) - -# ============================================================================= -# QtlDataset accessor generics -# ============================================================================= - -#' @title Get Study Identifier -#' @description Return the study identifier carried by a \code{QtlDataset}. -#' @param x A \code{QtlDataset} object. -#' @return Character (length 1). -#' @export -setGeneric("getStudy", function(x) standardGeneric("getStudy")) - -#' @title Get Context Names -#' @description Return the names of all contexts carried by an object -#' (e.g., the keys of the \code{phenotypes} list on a \code{QtlDataset}, -#' or the unique \code{context} values of a \code{QtlSumStats}). -#' @param x The object. -#' @return Character vector of context names. -#' @export -setGeneric("getContexts", function(x) standardGeneric("getContexts")) - -#' @title Get Unique Trait Names -#' @description Return the unique trait identifiers carried by a -#' collection class (e.g., \code{QtlSumStats}). -#' @param x The object. -#' @return Character vector of unique trait names. -#' @export -setGeneric("getTraits", function(x) standardGeneric("getTraits")) - -#' @title Get Residualized Genotypes -#' @description Residualize the genotype matrix against the per-context -#' phenotype covariates and the genotype covariates, optionally -#' subsetting variants to those falling within a trait's cis-window or -#' an explicit region. -#' @param x A \code{QtlDataset} object. -#' @param ... Selection arguments: \code{traitId}, \code{region}, -#' \code{cisWindow}, \code{phenotypeCovariatesToRemove}, -#' \code{genotypeCovariatesToRemove}. -#' @return A numeric matrix (samples x variants). -#' @export -setGeneric("getResidualizedGenotypes", - function(x, ...) standardGeneric("getResidualizedGenotypes")) - -#' @title Get Residualized Phenotypes -#' @description Residualize the per-context phenotype matrices against -#' the per-context phenotype covariates and the genotype covariates, -#' for one or more requested contexts. -#' @param x A \code{QtlDataset} object. -#' @param ... Selection arguments: \code{contexts} (required), -#' \code{traitId}, \code{region}, -#' \code{phenotypeCovariatesToRemove}, -#' \code{genotypeCovariatesToRemove}. -#' @return A named list of numeric matrices keyed by context. -#' @export -setGeneric("getResidualizedPhenotypes", - function(x, ...) standardGeneric("getResidualizedPhenotypes")) - -#' @title Get Per-Context Phenotype Covariates -#' @description Return per-context phenotype covariate matrices, taken -#' from the \code{colData} of each context's \code{SummarizedExperiment}. -#' @param x A \code{QtlDataset} object. -#' @param contexts Character vector of context names (subset of -#' \code{names(getPhenotypes(x))}). -#' @return A named list of matrices keyed by context. -#' @export -setGeneric("getPhenotypeCovariates", - function(x, contexts) standardGeneric("getPhenotypeCovariates")) - -#' @title Get Genotype Covariates -#' @description Return the single genotype-derived covariate matrix -#' carried by a \code{QtlDataset} (e.g., ancestry PCs). -#' @param x A \code{QtlDataset} object. -#' @return Numeric matrix (samples x covariates). -#' @export -setGeneric("getGenotypeCovariates", - function(x) standardGeneric("getGenotypeCovariates")) - -#' @title Get scaleResiduals Flag -#' @description Whether residualization accessors scale residuals to unit -#' variance. -#' @param x A \code{QtlDataset} object. -#' @return Logical (length 1). -#' @export -setGeneric("getScaleResiduals", - function(x) standardGeneric("getScaleResiduals")) - -# ============================================================================= -# GenotypeHandle / LD-statistic / Annotation / LdData / H2Estimate accessors -# ============================================================================= - -#' @title Get SNP Info -#' @description Return the cached SNP metadata data.frame -#' (columns: SNP, CHR, BP, A1, A2, optionally MAF). -#' @param x A \code{GenotypeHandle} or \code{LdStatistic}. -#' @return A data.frame. -#' @export -setGeneric("getSnpInfo", function(x) standardGeneric("getSnpInfo")) - -#' @title Get Genotype Storage Format -#' @description Return the detected genotype storage format. -#' @param x A \code{GenotypeHandle}. -#' @return Character (length 1): one of "gds", "vcf", "plink1", "plink2". -#' @export -setGeneric("getFormat", function(x) standardGeneric("getFormat")) - -#' @title Get File Path -#' @description Return the underlying genotype file path or stem. -#' @param x A \code{GenotypeHandle}. -#' @return Character (length 1). -#' @export -setGeneric("getPath", function(x) standardGeneric("getPath")) - -#' @title Get Sample Identifiers -#' @description Return the sample-id vector. -#' @param x A \code{GenotypeHandle}. -#' @return Character vector. -#' @export -setGeneric("getSampleIds", function(x) standardGeneric("getSampleIds")) - -#' @title Get plink2 pgen Pointer -#' @description Return the cached external pointer to the plink2 pgen -#' handle (NULL when the handle is not pgen-backed). -#' @param x A \code{GenotypeHandle}. -#' @return An external pointer or NULL. -#' @export -setGeneric("getPgenPtr", function(x) standardGeneric("getPgenPtr")) - -#' @title Get Sample Count -#' @description Return the number of samples carried by a -#' \code{GenotypeHandle}. -#' @param x A \code{GenotypeHandle}. -#' @return Integer (length 1). -#' @export -setGeneric("getNSamples", function(x) standardGeneric("getNSamples")) - -#' @title Get Per-Block Eigendecompositions -#' @description Return the per-block eigendecomposition list carried by -#' an \code{LdEigen} object. -#' @param x An \code{LdEigen}. -#' @return List of per-block eigen decompositions. -#' @export -setGeneric("getEigenList", function(x) standardGeneric("getEigenList")) - -#' @title Get LD Reference Panel Size -#' @description Return the reference-panel sample size used to compute -#' an \code{LdStatistic} or carried by an \code{LdData}. -#' @param x An \code{LdStatistic} or \code{LdData}. -#' @return Integer (length 1). -#' @export -setGeneric("getNRef", function(x) standardGeneric("getNRef")) - -#' @title Get In-Sample Flag -#' @description Whether the LD reference panel is from the same cohort -#' as the GWAS (affects bias correction). -#' @param x An \code{LdStatistic}. -#' @return Logical (length 1). -#' @export -setGeneric("getInSample", function(x) standardGeneric("getInSample")) - -#' @title Get LD Scores -#' @description Return the per-SNP LD score matrix carried by an -#' \code{LdScore} object. -#' @param x An \code{LdScore}. -#' @return Numeric matrix (SNPs x annotations+1). -#' @export -setGeneric("getLdScores", function(x) standardGeneric("getLdScores")) - -#' @title Get LD-Score Regression Weights -#' @description Return the per-SNP regression weights vector carried by -#' an \code{LdScore} object. -#' @param x An \code{LdScore}. -#' @return Numeric vector. -#' @export -setGeneric("getLdScoreWeights", - function(x) standardGeneric("getLdScoreWeights")) - -#' @title Get Per-Block LD Matrix List -#' @description Return the list of per-block LD (R^2) matrices used for -#' the FGLS residual covariance in g-LDSC. -#' @param x An \code{LdScore}. -#' @return List of matrices (empty list for S-LDSC). -#' @export -setGeneric("getLdMatrixList", - function(x) standardGeneric("getLdMatrixList")) - -#' @title Get LD Block Container -#' @description Return the \code{LdBlocks} object carried by an -#' \code{LdStatistic}. -#' @param x An \code{LdStatistic}. -#' @return An \code{LdBlocks} object. -#' @export -setGeneric("getLdBlocks", function(x) standardGeneric("getLdBlocks")) - -#' @title Get Annotation Matrix -#' @description Return the (SNPs x annotations) annotation matrix. -#' @param x An \code{AnnotationMatrix}. -#' @return Numeric matrix or dgCMatrix. -#' @export -setGeneric("getAnnotations", - function(x) standardGeneric("getAnnotations")) - -#' @title Get Annotation Metadata -#' @description Return the per-annotation metadata data.frame (columns -#' \code{name}, \code{tier}, \code{type}). -#' @param x An \code{AnnotationMatrix}. -#' @return A data.frame. -#' @export -setGeneric("getAnnotationMeta", - function(x) standardGeneric("getAnnotationMeta")) - -#' @title Get SNP Ranges -#' @description Return the per-SNP \code{GRanges} carried by an -#' \code{AnnotationMatrix}. -#' @param x An \code{AnnotationMatrix}. -#' @return A \code{GRanges} object. -#' @export -setGeneric("getSnpRanges", function(x) standardGeneric("getSnpRanges")) - -#' @title Get LD Block Ranges -#' @description Return the per-block \code{GRanges} carried by an -#' \code{LdBlocks} object. -#' @param x An \code{LdBlocks}. -#' @return A \code{GRanges} object. -#' @export -setGeneric("getBlocks", function(x) standardGeneric("getBlocks")) - -#' @title Get GenotypeHandle from LdData -#' @description Return the \code{GenotypeHandle} (or list of handles for -#' mixture panels) carried by an \code{LdData}. -#' @param x An \code{LdData}. -#' @return A \code{GenotypeHandle}, a list of them, or NULL. -#' @export -setGeneric("getGenotypeHandle", - function(x) standardGeneric("getGenotypeHandle")) - -#' @title Get Mixture Weights -#' @description Return the per-panel mixing proportions carried by an -#' \code{LdData} when its \code{genotypeHandle} slot is a list of -#' panels. NULL for single-panel objects. -#' @param x An \code{LdData}. -#' @return Numeric vector or NULL. -#' @export -setGeneric("getMixtureWeights", - function(x) standardGeneric("getMixtureWeights")) - -#' @title Get SNP Indices -#' @description Return the integer indices into the handle's snpInfo -#' carried by an \code{LdData}. -#' @param x An \code{LdData}. -#' @return Integer vector or NULL. -#' @export -setGeneric("getSnpIdx", function(x) standardGeneric("getSnpIdx")) - -#' @title Get Variant GRanges -#' @description Return the variant metadata \code{GRanges} of an -#' \code{LdData}. -#' @param x An \code{LdData}. -#' @return A \code{GRanges}. -#' @export -setGeneric("getVariantInfo", function(x) standardGeneric("getVariantInfo")) - -#' @title Get Block Metadata -#' @description Return the block metadata (\code{LdBlocks} or -#' \code{data.frame}) carried by an \code{LdData}. -#' @param x An \code{LdData}. -#' @return An \code{LdBlocks} or \code{data.frame}. -#' @export -setGeneric("getBlockMetadata", - function(x) standardGeneric("getBlockMetadata")) - -#' @title Get Reference Panel (data.frame) -#' @description Flatten the variant \code{GRanges} of an \code{LdData} -#' into a reference-panel data.frame. -#' @param x An \code{LdData}. -#' @return A data.frame. -#' @export -setGeneric("getRefPanel", function(x) standardGeneric("getRefPanel")) - -#' @title Get Per-Block tau Matrix -#' @description Return the per-block jackknife tau matrix carried by an -#' \code{H2Estimate}. -#' @param x An \code{H2Estimate}. -#' @return A numeric matrix or NULL. -#' @export -setGeneric("getTauBlocks", function(x) standardGeneric("getTauBlocks")) - -#' @title Get Global SNP Heritability -#' @description Return the global SNP heritability estimate carried by -#' an \code{H2Estimate}. -#' @param x An \code{H2Estimate}. -#' @return Numeric (length 1). -#' @export -setGeneric("getH2", function(x) standardGeneric("getH2"))